diff options
449 files changed, 17380 insertions, 8544 deletions
diff --git a/Documentation/arch/s390/driver-model.rst b/Documentation/arch/s390/driver-model.rst index ad4bc2dbea43..ad18f129fb0b 100644 --- a/Documentation/arch/s390/driver-model.rst +++ b/Documentation/arch/s390/driver-model.rst @@ -244,7 +244,7 @@ information about the interrupt from the irb parameter. -------------------- The ccwgroup mechanism is designed to handle devices consisting of multiple ccw -devices, like lcs or ctc. +devices, like qeth or ctc. The ccw driver provides a 'group' attribute. Piping bus ids of ccw devices to this attributes creates a ccwgroup device consisting of these ccw devices (if diff --git a/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml b/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml index 97dd1a7c5ed2..73252fe56fe6 100644 --- a/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml +++ b/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml @@ -10,9 +10,6 @@ title: maintainers: - Marc Kleine-Budde <mkl@pengutronix.de> -allOf: - - $ref: can-controller.yaml# - properties: compatible: oneOf: @@ -28,6 +25,7 @@ properties: - fsl,vf610-flexcan - fsl,ls1021ar2-flexcan - fsl,lx2160ar1-flexcan + - nxp,s32g2-flexcan - items: - enum: - fsl,imx53-flexcan @@ -43,12 +41,21 @@ properties: - enum: - fsl,ls1028ar1-flexcan - const: fsl,lx2160ar1-flexcan + - items: + - enum: + - nxp,s32g3-flexcan + - const: nxp,s32g2-flexcan reg: maxItems: 1 interrupts: - maxItems: 1 + minItems: 1 + maxItems: 4 + + interrupt-names: + minItems: 1 + maxItems: 4 clocks: maxItems: 2 @@ -136,6 +143,35 @@ required: - reg - interrupts +allOf: + - $ref: can-controller.yaml# + - if: + properties: + compatible: + contains: + const: nxp,s32g2-flexcan + then: + properties: + interrupts: + items: + - description: Message Buffer interrupt for mailboxes 0-7 and Enhanced RX FIFO + - description: Device state change + - description: Bus Error detection + - description: Message Buffer interrupt for mailboxes 8-127 + interrupt-names: + items: + - const: mb-0 + - const: state + - const: berr + - const: mb-1 + required: + - interrupt-names + else: + properties: + interrupts: + maxItems: 1 + interrupt-names: false + additionalProperties: false examples: diff --git a/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml b/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml index 2a98b26630cb..c155c9c6db39 100644 --- a/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml +++ b/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml @@ -40,7 +40,7 @@ properties: microchip,rx-int-gpios: description: - GPIO phandle of GPIO connected to to INT1 pin of the MCP251XFD, which + GPIO phandle of GPIO connected to INT1 pin of the MCP251XFD, which signals a pending RX interrupt. maxItems: 1 diff --git a/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml b/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml index 4c78c546343f..d6c957a33b48 100644 --- a/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml +++ b/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml @@ -16,6 +16,7 @@ properties: compatible: oneOf: - const: brcm,bcm5325 + - const: brcm,bcm53101 - const: brcm,bcm53115 - const: brcm,bcm53125 - const: brcm,bcm53128 @@ -77,6 +78,7 @@ allOf: contains: enum: - brcm,bcm5325 + - brcm,bcm53101 - brcm,bcm53115 - brcm,bcm53125 - brcm,bcm53128 diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml index 2c71454ae8e3..824bbe4333b7 100644 --- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml @@ -232,6 +232,12 @@ properties: PHY's that have configurable TX internal delays. If this property is present then the PHY applies the TX delay. + tx-amplitude-100base-tx-percent: + description: + Transmit amplitude gain applied for 100BASE-TX. 100% matches 2V + peak-to-peak specified in ANSI X3.263. When omitted, the PHYs default + will be left as is. + leds: type: object diff --git a/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml b/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml index 9bcbacb6640d..55d6a8379025 100644 --- a/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml +++ b/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml @@ -44,6 +44,9 @@ properties: phy-mode: enum: - rgmii + - rgmii-id + - rgmii-rxid + - rgmii-txid - rmii phy-handle: true diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml index 9660ffb1ed6a..96fa1f1522ed 100644 --- a/Documentation/netlink/genetlink-c.yaml +++ b/Documentation/netlink/genetlink-c.yaml @@ -14,9 +14,10 @@ $defs: pattern: ^[0-9A-Za-z_-]+( - 1)?$ minimum: 0 len-or-limit: - # literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc. + # literal int, const name, or limit based on fixed-width type + # e.g. u8-min, u16-max, etc. type: [ string, integer ] - pattern: ^[su](8|16|32|64)-(min|max)$ + pattern: ^[0-9A-Za-z_-]+$ minimum: 0 # Schema for specs @@ -160,7 +161,7 @@ properties: type: string type: &attr-type enum: [ unused, pad, flag, binary, - uint, sint, u8, u16, u32, u64, s32, s64, + uint, sint, u8, u16, u32, u64, s8, s16, s32, s64, string, nest, indexed-array, nest-type-value ] doc: description: Documentation of the attribute. diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml index 16380e12cabe..a8c5b521937d 100644 --- a/Documentation/netlink/genetlink-legacy.yaml +++ b/Documentation/netlink/genetlink-legacy.yaml @@ -14,9 +14,10 @@ $defs: pattern: ^[0-9A-Za-z_-]+( - 1)?$ minimum: 0 len-or-limit: - # literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc. + # literal int, const name, or limit based on fixed-width type + # e.g. u8-min, u16-max, etc. type: [ string, integer ] - pattern: ^[su](8|16|32|64)-(min|max)$ + pattern: ^[0-9A-Za-z_-]+$ minimum: 0 # Schema for specs @@ -151,6 +152,9 @@ properties: the right formatting mechanism when displaying values of this type. enum: [ hex, mac, fddi, ipv4, ipv6, uuid ] + struct: + description: Name of the nested struct type. + type: string # End genetlink-legacy attribute-sets: @@ -203,7 +207,7 @@ properties: type: &attr-type description: The netlink attribute type enum: [ unused, pad, flag, binary, bitfield32, - uint, sint, u8, u16, u32, u64, s32, s64, + uint, sint, u8, u16, u32, u64, s8, s16, s32, s64, string, nest, indexed-array, nest-type-value ] doc: description: Documentation of the attribute. diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml index b036227b46f1..40efbbad76ab 100644 --- a/Documentation/netlink/genetlink.yaml +++ b/Documentation/netlink/genetlink.yaml @@ -14,9 +14,10 @@ $defs: pattern: ^[0-9A-Za-z_-]+( - 1)?$ minimum: 0 len-or-limit: - # literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc. + # literal int, const name, or limit based on fixed-width type + # e.g. u8-min, u16-max, etc. type: [ string, integer ] - pattern: ^[su](8|16|32|64)-(min|max)$ + pattern: ^[0-9A-Za-z_-]+$ minimum: 0 # Schema for specs @@ -123,7 +124,7 @@ properties: type: string type: &attr-type enum: [ unused, pad, flag, binary, - uint, sint, u8, u16, u32, u64, s32, s64, + uint, sint, u8, u16, u32, u64, s8, s16, s32, s64, string, nest, indexed-array, nest-type-value ] doc: description: Documentation of the attribute. diff --git a/Documentation/netlink/specs/conntrack.yaml b/Documentation/netlink/specs/conntrack.yaml new file mode 100644 index 000000000000..840dc4504216 --- /dev/null +++ b/Documentation/netlink/specs/conntrack.yaml @@ -0,0 +1,643 @@ +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) + +name: conntrack +protocol: netlink-raw +protonum: 12 + +doc: + Netfilter connection tracking subsystem over nfnetlink + +definitions: + - + name: nfgenmsg + type: struct + members: + - + name: nfgen-family + type: u8 + - + name: version + type: u8 + - + name: res-id + byte-order: big-endian + type: u16 + - + name: nf-ct-tcp-flags-mask + type: struct + members: + - + name: flags + type: u8 + enum: nf-ct-tcp-flags + enum-as-flags: true + - + name: mask + type: u8 + enum: nf-ct-tcp-flags + enum-as-flags: true + - + name: nf-ct-tcp-flags + type: flags + entries: + - window-scale + - sack-perm + - close-init + - be-liberal + - unacked + - maxack + - challenge-ack + - simultaneous-open + - + name: nf-ct-tcp-state + type: enum + entries: + - none + - syn-sent + - syn-recv + - established + - fin-wait + - close-wait + - last-ack + - time-wait + - close + - syn-sent2 + - max + - ignore + - retrans + - unack + - timeout-max + - + name: nf-ct-sctp-state + type: enum + entries: + - none + - cloned + - cookie-wait + - cookie-echoed + - established + - shutdown-sent + - shutdown-received + - shutdown-ack-sent + - shutdown-heartbeat-sent + - + name: nf-ct-status + type: flags + entries: + - expected + - seen-reply + - assured + - confirmed + - src-nat + - dst-nat + - seq-adj + - src-nat-done + - dst-nat-done + - dying + - fixed-timeout + - template + - nat-clash + - helper + - offload + - hw-offload + +attribute-sets: + - + name: counter-attrs + attributes: + - + name: packets + type: u64 + byte-order: big-endian + - + name: bytes + type: u64 + byte-order: big-endian + - + name: packets-old + type: u32 + - + name: bytes-old + type: u32 + - + name: pad + type: pad + - + name: tuple-proto-attrs + attributes: + - + name: proto-num + type: u8 + doc: l4 protocol number + - + name: proto-src-port + type: u16 + byte-order: big-endian + doc: l4 source port + - + name: proto-dst-port + type: u16 + byte-order: big-endian + doc: l4 source port + - + name: proto-icmp-id + type: u16 + byte-order: big-endian + doc: l4 icmp id + - + name: proto-icmp-type + type: u8 + - + name: proto-icmp-code + type: u8 + - + name: proto-icmpv6-id + type: u16 + byte-order: big-endian + doc: l4 icmp id + - + name: proto-icmpv6-type + type: u8 + - + name: proto-icmpv6-code + type: u8 + - + name: tuple-ip-attrs + attributes: + - + name: ip-v4-src + type: u32 + byte-order: big-endian + display-hint: ipv4 + doc: ipv4 source address + - + name: ip-v4-dst + type: u32 + byte-order: big-endian + display-hint: ipv4 + doc: ipv4 destination address + - + name: ip-v6-src + type: binary + checks: + min-len: 16 + byte-order: big-endian + display-hint: ipv6 + doc: ipv6 source address + - + name: ip-v6-dst + type: binary + checks: + min-len: 16 + byte-order: big-endian + display-hint: ipv6 + doc: ipv6 destination address + - + name: tuple-attrs + attributes: + - + name: tuple-ip + type: nest + nested-attributes: tuple-ip-attrs + doc: conntrack l3 information + - + name: tuple-proto + type: nest + nested-attributes: tuple-proto-attrs + doc: conntrack l4 information + - + name: tuple-zone + type: u16 + byte-order: big-endian + doc: conntrack zone id + - + name: protoinfo-tcp-attrs + attributes: + - + name: tcp-state + type: u8 + enum: nf-ct-tcp-state + doc: tcp connection state + - + name: tcp-wscale-original + type: u8 + doc: window scaling factor in original direction + - + name: tcp-wscale-reply + type: u8 + doc: window scaling factor in reply direction + - + name: tcp-flags-original + type: binary + struct: nf-ct-tcp-flags-mask + - + name: tcp-flags-reply + type: binary + struct: nf-ct-tcp-flags-mask + - + name: protoinfo-dccp-attrs + attributes: + - + name: dccp-state + type: u8 + doc: dccp connection state + - + name: dccp-role + type: u8 + - + name: dccp-handshake-seq + type: u64 + byte-order: big-endian + - + name: dccp-pad + type: pad + - + name: protoinfo-sctp-attrs + attributes: + - + name: sctp-state + type: u8 + doc: sctp connection state + enum: nf-ct-sctp-state + - + name: vtag-original + type: u32 + byte-order: big-endian + - + name: vtag-reply + type: u32 + byte-order: big-endian + - + name: protoinfo-attrs + attributes: + - + name: protoinfo-tcp + type: nest + nested-attributes: protoinfo-tcp-attrs + doc: conntrack tcp state information + - + name: protoinfo-dccp + type: nest + nested-attributes: protoinfo-dccp-attrs + doc: conntrack dccp state information + - + name: protoinfo-sctp + type: nest + nested-attributes: protoinfo-sctp-attrs + doc: conntrack sctp state information + - + name: help-attrs + attributes: + - + name: help-name + type: string + doc: helper name + - + name: nat-proto-attrs + attributes: + - + name: nat-port-min + type: u16 + byte-order: big-endian + - + name: nat-port-max + type: u16 + byte-order: big-endian + - + name: nat-attrs + attributes: + - + name: nat-v4-minip + type: u32 + byte-order: big-endian + - + name: nat-v4-maxip + type: u32 + byte-order: big-endian + - + name: nat-v6-minip + type: binary + - + name: nat-v6-maxip + type: binary + - + name: nat-proto + type: nest + nested-attributes: nat-proto-attrs + - + name: seqadj-attrs + attributes: + - + name: correction-pos + type: u32 + byte-order: big-endian + - + name: offset-before + type: u32 + byte-order: big-endian + - + name: offset-after + type: u32 + byte-order: big-endian + - + name: secctx-attrs + attributes: + - + name: secctx-name + type: string + - + name: synproxy-attrs + attributes: + - + name: isn + type: u32 + byte-order: big-endian + - + name: its + type: u32 + byte-order: big-endian + - + name: tsoff + type: u32 + byte-order: big-endian + - + name: conntrack-attrs + attributes: + - + name: tuple-orig + type: nest + nested-attributes: tuple-attrs + doc: conntrack l3+l4 protocol information, original direction + - + name: tuple-reply + type: nest + nested-attributes: tuple-attrs + doc: conntrack l3+l4 protocol information, reply direction + - + name: status + type: u32 + byte-order: big-endian + enum: nf-ct-status + enum-as-flags: true + doc: conntrack flag bits + - + name: protoinfo + type: nest + nested-attributes: protoinfo-attrs + - + name: help + type: nest + nested-attributes: help-attrs + - + name: nat-src + type: nest + nested-attributes: nat-attrs + - + name: timeout + type: u32 + byte-order: big-endian + - + name: mark + type: u32 + byte-order: big-endian + - + name: counters-orig + type: nest + nested-attributes: counter-attrs + - + name: counters-reply + type: nest + nested-attributes: counter-attrs + - + name: use + type: u32 + byte-order: big-endian + - + name: id + type: u32 + byte-order: big-endian + - + name: nat-dst + type: nest + nested-attributes: nat-attrs + - + name: tuple-master + type: nest + nested-attributes: tuple-attrs + - + name: seq-adj-orig + type: nest + nested-attributes: seqadj-attrs + - + name: seq-adj-reply + type: nest + nested-attributes: seqadj-attrs + - + name: secmark + type: binary + doc: obsolete + - + name: zone + type: u16 + byte-order: big-endian + doc: conntrack zone id + - + name: secctx + type: nest + nested-attributes: secctx-attrs + - + name: timestamp + type: u64 + byte-order: big-endian + - + name: mark-mask + type: u32 + byte-order: big-endian + - + name: labels + type: binary + - + name: labels mask + type: binary + - + name: synproxy + type: nest + nested-attributes: synproxy-attrs + - + name: filter + type: nest + nested-attributes: tuple-attrs + - + name: status-mask + type: u32 + byte-order: big-endian + enum: nf-ct-status + enum-as-flags: true + doc: conntrack flag bits to change + - + name: timestamp-event + type: u64 + byte-order: big-endian + - + name: conntrack-stats-attrs + attributes: + - + name: searched + type: u32 + byte-order: big-endian + doc: obsolete + - + name: found + type: u32 + byte-order: big-endian + - + name: new + type: u32 + byte-order: big-endian + doc: obsolete + - + name: invalid + type: u32 + byte-order: big-endian + doc: obsolete + - + name: ignore + type: u32 + byte-order: big-endian + doc: obsolete + - + name: delete + type: u32 + byte-order: big-endian + doc: obsolete + - + name: delete-list + type: u32 + byte-order: big-endian + doc: obsolete + - + name: insert + type: u32 + byte-order: big-endian + - + name: insert-failed + type: u32 + byte-order: big-endian + - + name: drop + type: u32 + byte-order: big-endian + - + name: early-drop + type: u32 + byte-order: big-endian + - + name: error + type: u32 + byte-order: big-endian + - + name: search-restart + type: u32 + byte-order: big-endian + - + name: clash-resolve + type: u32 + byte-order: big-endian + - + name: chain-toolong + type: u32 + byte-order: big-endian + +operations: + enum-model: directional + list: + - + name: get + doc: get / dump entries + attribute-set: conntrack-attrs + fixed-header: nfgenmsg + do: + request: + value: 0x101 + attributes: + - tuple-orig + - tuple-reply + - zone + reply: + value: 0x100 + attributes: + - tuple-orig + - tuple-reply + - status + - protoinfo + - help + - nat-src + - nat-dst + - timeout + - mark + - counter-orig + - counter-reply + - use + - id + - nat-dst + - tuple-master + - seq-adj-orig + - seq-adj-reply + - zone + - secctx + - labels + - synproxy + dump: + request: + value: 0x101 + attributes: + - nfgen-family + - mark + - filter + - status + - zone + reply: + value: 0x100 + attributes: + - tuple-orig + - tuple-reply + - status + - protoinfo + - help + - nat-src + - nat-dst + - timeout + - mark + - counter-orig + - counter-reply + - use + - id + - nat-dst + - tuple-master + - seq-adj-orig + - seq-adj-reply + - zone + - secctx + - labels + - synproxy + - + name: get-stats + doc: dump pcpu conntrack stats + attribute-set: conntrack-stats-attrs + fixed-header: nfgenmsg + dump: + request: + value: 0x104 + reply: + value: 0x104 + attributes: + - searched + - found + - insert + - insert-failed + - drop + - early-drop + - error + - search-restart + - clash-resolve + - chain-toolong diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index cbb544bd6c84..766b82005d18 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -115,6 +115,9 @@ attribute-sets: type: u64 enum: xsk-flags - + name: io-uring-provider-info + attributes: [] + - name: page-pool attributes: - @@ -171,6 +174,11 @@ attribute-sets: name: dmabuf doc: ID of the dmabuf this page-pool is attached to. type: u32 + - + name: io-uring + doc: io-uring memory provider information. + type: nest + nested-attributes: io-uring-provider-info - name: page-pool-info subset-of: page-pool @@ -269,6 +277,9 @@ attribute-sets: processing, if event polling finds events type: uint - + name: xsk-info + attributes: [] + - name: queue attributes: - @@ -286,6 +297,9 @@ attribute-sets: - name: type doc: Queue type as rx, tx. Each queue type defines a separate ID space. + XDP TX queues allocated in the kernel are not linked to NAPIs and + thus not listed. AF_XDP queues will have more information set in + the xsk attribute. type: u32 enum: queue-type - @@ -296,7 +310,16 @@ attribute-sets: name: dmabuf doc: ID of the dmabuf attached to this queue, if any. type: u32 - + - + name: io-uring + doc: io_uring memory provider information. + type: nest + nested-attributes: io-uring-provider-info + - + name: xsk + doc: XSK information for this queue, if any. + type: nest + nested-attributes: xsk-info - name: qstats doc: | @@ -444,6 +467,8 @@ attribute-sets: name: tx-needs-csum doc: | Number of packets that required the device to calculate the checksum. + This counter includes the number of GSO wire packets for which device + calculated the L4 checksum. type: uint - name: tx-hw-gso-packets @@ -572,6 +597,7 @@ operations: - inflight-mem - detach-time - dmabuf + - io-uring dump: reply: *pp-reply config-cond: page-pool @@ -637,6 +663,8 @@ operations: - napi-id - ifindex - dmabuf + - io-uring + - xsk dump: request: attributes: diff --git a/Documentation/netlink/specs/nl80211.yaml b/Documentation/netlink/specs/nl80211.yaml new file mode 100644 index 000000000000..1ec49c3562cd --- /dev/null +++ b/Documentation/netlink/specs/nl80211.yaml @@ -0,0 +1,2000 @@ +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) + +name: nl80211 +protocol: genetlink-legacy + +doc: + Netlink API for 802.11 wireless devices + +definitions: + - + name: commands + type: enum + entries: + - unspec + - get-wiphy + - set-wiphy + - new-wiphy + - del-wiphy + - get-interface + - set-interface + - new-interface + - del-interface + - get-key + - set-key + - new-key + - del-key + - get-beacon + - set-beacon + - new-beacon + - del-beacon + - get-station + - set-station + - new-station + - del-station + - get-mpath + - set-mpath + - new-mpath + - del-mpath + - set-bss + - set-reg + - req-set-reg + - get-mesh-config + - set-mesh-config + - set-mgmt-extra-ie + - get-reg + - get-scan + - trigger-scan + - new-scan-results + - scan-aborted + - reg-change + - authenticate + - associate + - deauthenticate + - disassociate + - michael-mic-failure + - reg-beacon-hint + - join-ibss + - leave-ibss + - testmode + - connect + - roam + - disconnect + - set-wiphy-netns + - get-survey + - new-survey-results + - set-pmksa + - del-pmksa + - flush-pmksa + - remain-on-channel + - cancel-remain-on-channel + - set-tx-bitrate-mask + - register-action + - action + - action-tx-status + - set-power-save + - get-power-save + - set-cqm + - notify-cqm + - set-channel + - set-wds-peer + - frame-wait-cancel + - join-mesh + - leave-mesh + - unprot-deauthenticate + - unprot-disassociate + - new-peer-candidate + - get-wowlan + - set-wowlan + - start-sched-scan + - stop-sched-scan + - sched-scan-results + - sched-scan-stopped + - set-rekey-offload + - pmksa-candidate + - tdls-oper + - tdls-mgmt + - unexpected-frame + - probe-client + - register-beacons + - unexpected-4-addr-frame + - set-noack-map + - ch-switch-notify + - start-p2p-device + - stop-p2p-device + - conn-failed + - set-mcast-rate + - set-mac-acl + - radar-detect + - get-protocol-features + - update-ft-ies + - ft-event + - crit-protocol-start + - crit-protocol-stop + - get-coalesce + - set-coalesce + - channel-switch + - vendor + - set-qos-map + - add-tx-ts + - del-tx-ts + - get-mpp + - join-ocb + - leave-ocb + - ch-switch-started-notify + - tdls-channel-switch + - tdls-cancel-channel-switch + - wiphy-reg-change + - abort-scan + - start-nan + - stop-nan + - add-nan-function + - del-nan-function + - change-nan-config + - nan-match + - set-multicast-to-unicast + - update-connect-params + - set-pmk + - del-pmk + - port-authorized + - reload-regdb + - external-auth + - sta-opmode-changed + - control-port-frame + - get-ftm-responder-stats + - peer-measurement-start + - peer-measurement-result + - peer-measurement-complete + - notify-radar + - update-owe-info + - probe-mesh-link + - set-tid-config + - unprot-beacon + - control-port-frame-tx-status + - set-sar-specs + - obss-color-collision + - color-change-request + - color-change-started + - color-change-aborted + - color-change-completed + - set-fils-aad + - assoc-comeback + - add-link + - remove-link + - add-link-sta + - modify-link-sta + - remove-link-sta + - set-hw-timestamp + - links-removed + - set-tid-to-link-mapping + - + name: feature-flags + type: flags + entries: + - sk-tx-status + - ht-ibss + - inactivity-timer + - cell-base-reg-hints + - p2p-device-needs-channel + - sae + - low-priority-scan + - scan-flush + - ap-scan + - vif-txpower + - need-obss-scan + - p2p-go-ctwin + - p2p-go-oppps + - reserved + - advertise-chan-limits + - full-ap-client-state + - userspace-mpm + - active-monitor + - ap-mode-chan-width-change + - ds-param-set-ie-in-probes + - wfa-tpc-ie-in-probes + - quiet + - tx-power-insertion + - ackto-estimation + - static-smps + - dynamic-smps + - supports-wmm-admission + - mac-on-create + - tdls-channel-switch + - scan-random-mac-addr + - sched-scan-random-mac-addr + - no-random-mac-addr + - + name: ieee80211-mcs-info + type: struct + members: + - + name: rx-mask + type: binary + len: 10 + - + name: rx-highest + type: u16 + byte-order: little-endian + - + name: tx-params + type: u8 + - + name: reserved + type: binary + len: 3 + - + name: ieee80211-vht-mcs-info + type: struct + members: + - + name: rx-mcs-map + type: u16 + byte-order: little-endian + - + name: rx-highest + type: u16 + byte-order: little-endian + - + name: tx-mcs-map + type: u16 + byte-order: little-endian + - + name: tx-highest + type: u16 + byte-order: little-endian + - + name: ieee80211-ht-cap + type: struct + members: + - + name: cap-info + type: u16 + byte-order: little-endian + - + name: ampdu-params-info + type: u8 + - + name: mcs + type: binary + struct: ieee80211-mcs-info + - + name: extended-ht-cap-info + type: u16 + byte-order: little-endian + - + name: tx-bf-cap-info + type: u32 + byte-order: little-endian + - + name: antenna-selection-info + type: u8 + - + name: channel-type + type: enum + entries: + - no-ht + - ht20 + - ht40minus + - ht40plus + - + name: sta-flag-update + type: struct + members: + - + name: mask + type: u32 + - + name: set + type: u32 + - + name: protocol-features + type: flags + entries: + - split-wiphy-dump + +attribute-sets: + - + name: nl80211-attrs + name-prefix: nl80211-attr- + enum-name: nl80211-attrs + attr-max-name: num-nl80211-attr + attributes: + - + name: wiphy + type: u32 + - + name: wiphy-name + type: string + - + name: ifindex + type: u32 + - + name: ifname + type: string + - + name: iftype + type: u32 + - + name: mac + type: binary + display-hint: mac + - + name: key-data + type: binary + - + name: key-idx + type: u8 + - + name: key-cipher + type: u32 + - + name: key-seq + type: binary + - + name: key-default + type: flag + - + name: beacon-interval + type: u32 + - + name: dtim-period + type: u32 + - + name: beacon-head + type: binary + - + name: beacon-tail + type: binary + - + name: sta-aid + type: u16 + - + name: sta-flags + type: binary # TODO: nest + - + name: sta-listen-interval + type: u16 + - + name: sta-supported-rates + type: binary + - + name: sta-vlan + type: u32 + - + name: sta-info + type: binary # TODO: nest + - + name: wiphy-bands + type: nest + nested-attributes: wiphy-bands + - + name: mntr-flags + type: binary # TODO: nest + - + name: mesh-id + type: binary + - + name: sta-plink-action + type: u8 + - + name: mpath-next-hop + type: binary + display-hint: mac + - + name: mpath-info + type: binary # TODO: nest + - + name: bss-cts-prot + type: u8 + - + name: bss-short-preamble + type: u8 + - + name: bss-short-slot-time + type: u8 + - + name: ht-capability + type: binary + - + name: supported-iftypes + type: nest + nested-attributes: supported-iftypes + - + name: reg-alpha2 + type: binary + - + name: reg-rules + type: binary # TODO: nest + - + name: mesh-config + type: binary # TODO: nest + - + name: bss-basic-rates + type: binary + - + name: wiphy-txq-params + type: binary # TODO: nest + - + name: wiphy-freq + type: u32 + - + name: wiphy-channel-type + type: u32 + enum: channel-type + - + name: key-default-mgmt + type: flag + - + name: mgmt-subtype + type: u8 + - + name: ie + type: binary + - + name: max-num-scan-ssids + type: u8 + - + name: scan-frequencies + type: binary # TODO: nest + - + name: scan-ssids + type: binary # TODO: nest + - + name: generation + type: u32 + - + name: bss + type: binary # TODO: nest + - + name: reg-initiator + type: u8 + - + name: reg-type + type: u8 + - + name: supported-commands + type: indexed-array + sub-type: u32 + enum: commands + - + name: frame + type: binary + - + name: ssid + type: binary + - + name: auth-type + type: u32 + - + name: reason-code + type: u16 + - + name: key-type + type: u32 + - + name: max-scan-ie-len + type: u16 + - + name: cipher-suites + type: binary + sub-type: u32 + display-hint: hex + - + name: freq-before + type: binary # TODO: nest + - + name: freq-after + type: binary # TODO: nest + - + name: freq-fixed + type: flag + - + name: wiphy-retry-short + type: u8 + - + name: wiphy-retry-long + type: u8 + - + name: wiphy-frag-threshold + type: u32 + - + name: wiphy-rts-threshold + type: u32 + - + name: timed-out + type: flag + - + name: use-mfp + type: u32 + - + name: sta-flags2 + type: binary + struct: sta-flag-update + - + name: control-port + type: flag + - + name: testdata + type: binary + - + name: privacy + type: flag + - + name: disconnected-by-ap + type: flag + - + name: status-code + type: u16 + - + name: cipher-suites-pairwise + type: binary + - + name: cipher-suite-group + type: u32 + - + name: wpa-versions + type: u32 + - + name: akm-suites + type: binary + - + name: req-ie + type: binary + - + name: resp-ie + type: binary + - + name: prev-bssid + type: binary + - + name: key + type: binary # TODO: nest + - + name: keys + type: binary # TODO: nest + - + name: pid + type: u32 + - + name: 4addr + type: u8 + - + name: survey-info + type: binary # TODO: nest + - + name: pmkid + type: binary + - + name: max-num-pmkids + type: u8 + - + name: duration + type: u32 + - + name: cookie + type: u64 + - + name: wiphy-coverage-class + type: u8 + - + name: tx-rates + type: binary # TODO: nest + - + name: frame-match + type: binary + - + name: ack + type: flag + - + name: ps-state + type: u32 + - + name: cqm + type: binary # TODO: nest + - + name: local-state-change + type: flag + - + name: ap-isolate + type: u8 + - + name: wiphy-tx-power-setting + type: u32 + - + name: wiphy-tx-power-level + type: u32 + - + name: tx-frame-types + type: nest + nested-attributes: iftype-attrs + - + name: rx-frame-types + type: nest + nested-attributes: iftype-attrs + - + name: frame-type + type: u16 + - + name: control-port-ethertype + type: flag + - + name: control-port-no-encrypt + type: flag + - + name: support-ibss-rsn + type: flag + - + name: wiphy-antenna-tx + type: u32 + - + name: wiphy-antenna-rx + type: u32 + - + name: mcast-rate + type: u32 + - + name: offchannel-tx-ok + type: flag + - + name: bss-ht-opmode + type: u16 + - + name: key-default-types + type: binary # TODO: nest + - + name: max-remain-on-channel-duration + type: u32 + - + name: mesh-setup + type: binary # TODO: nest + - + name: wiphy-antenna-avail-tx + type: u32 + - + name: wiphy-antenna-avail-rx + type: u32 + - + name: support-mesh-auth + type: flag + - + name: sta-plink-state + type: u8 + - + name: wowlan-triggers + type: binary # TODO: nest + - + name: wowlan-triggers-supported + type: nest + nested-attributes: wowlan-triggers-attrs + - + name: sched-scan-interval + type: u32 + - + name: interface-combinations + type: indexed-array + sub-type: nest + nested-attributes: if-combination-attributes + - + name: software-iftypes + type: nest + nested-attributes: supported-iftypes + - + name: rekey-data + type: binary # TODO: nest + - + name: max-num-sched-scan-ssids + type: u8 + - + name: max-sched-scan-ie-len + type: u16 + - + name: scan-supp-rates + type: binary # TODO: nest + - + name: hidden-ssid + type: u32 + - + name: ie-probe-resp + type: binary + - + name: ie-assoc-resp + type: binary + - + name: sta-wme + type: binary # TODO: nest + - + name: support-ap-uapsd + type: flag + - + name: roam-support + type: flag + - + name: sched-scan-match + type: binary # TODO: nest + - + name: max-match-sets + type: u8 + - + name: pmksa-candidate + type: binary # TODO: nest + - + name: tx-no-cck-rate + type: flag + - + name: tdls-action + type: u8 + - + name: tdls-dialog-token + type: u8 + - + name: tdls-operation + type: u8 + - + name: tdls-support + type: flag + - + name: tdls-external-setup + type: flag + - + name: device-ap-sme + type: u32 + - + name: dont-wait-for-ack + type: flag + - + name: feature-flags + type: u32 + enum: feature-flags + enum-as-flags: True + - + name: probe-resp-offload + type: u32 + - + name: probe-resp + type: binary + - + name: dfs-region + type: u8 + - + name: disable-ht + type: flag + - + name: ht-capability-mask + type: binary + struct: ieee80211-ht-cap + - + name: noack-map + type: u16 + - + name: inactivity-timeout + type: u16 + - + name: rx-signal-dbm + type: u32 + - + name: bg-scan-period + type: u16 + - + name: wdev + type: u64 + - + name: user-reg-hint-type + type: u32 + - + name: conn-failed-reason + type: u32 + - + name: auth-data + type: binary + - + name: vht-capability + type: binary + - + name: scan-flags + type: u32 + - + name: channel-width + type: u32 + - + name: center-freq1 + type: u32 + - + name: center-freq2 + type: u32 + - + name: p2p-ctwindow + type: u8 + - + name: p2p-oppps + type: u8 + - + name: local-mesh-power-mode + type: u32 + - + name: acl-policy + type: u32 + - + name: mac-addrs + type: binary # TODO: nest + - + name: mac-acl-max + type: u32 + - + name: radar-event + type: u32 + - + name: ext-capa + type: binary + - + name: ext-capa-mask + type: binary + - + name: sta-capability + type: u16 + - + name: sta-ext-capability + type: binary + - + name: protocol-features + type: u32 + enum: protocol-features + - + name: split-wiphy-dump + type: flag + - + name: disable-vht + type: flag + - + name: vht-capability-mask + type: binary + - + name: mdid + type: u16 + - + name: ie-ric + type: binary + - + name: crit-prot-id + type: u16 + - + name: max-crit-prot-duration + type: u16 + - + name: peer-aid + type: u16 + - + name: coalesce-rule + type: binary # TODO: nest + - + name: ch-switch-count + type: u32 + - + name: ch-switch-block-tx + type: flag + - + name: csa-ies + type: binary # TODO: nest + - + name: cntdwn-offs-beacon + type: binary + - + name: cntdwn-offs-presp + type: binary + - + name: rxmgmt-flags + type: binary + - + name: sta-supported-channels + type: binary + - + name: sta-supported-oper-classes + type: binary + - + name: handle-dfs + type: flag + - + name: support-5-mhz + type: flag + - + name: support-10-mhz + type: flag + - + name: opmode-notif + type: u8 + - + name: vendor-id + type: u32 + - + name: vendor-subcmd + type: u32 + - + name: vendor-data + type: binary + - + name: vendor-events + type: binary + - + name: qos-map + type: binary + - + name: mac-hint + type: binary + display-hint: mac + - + name: wiphy-freq-hint + type: u32 + - + name: max-ap-assoc-sta + type: u32 + - + name: tdls-peer-capability + type: u32 + - + name: socket-owner + type: flag + - + name: csa-c-offsets-tx + type: binary + - + name: max-csa-counters + type: u8 + - + name: tdls-initiator + type: flag + - + name: use-rrm + type: flag + - + name: wiphy-dyn-ack + type: flag + - + name: tsid + type: u8 + - + name: user-prio + type: u8 + - + name: admitted-time + type: u16 + - + name: smps-mode + type: u8 + - + name: oper-class + type: u8 + - + name: mac-mask + type: binary + display-hint: mac + - + name: wiphy-self-managed-reg + type: flag + - + name: ext-features + type: binary + - + name: survey-radio-stats + type: binary + - + name: netns-fd + type: u32 + - + name: sched-scan-delay + type: u32 + - + name: reg-indoor + type: flag + - + name: max-num-sched-scan-plans + type: u32 + - + name: max-scan-plan-interval + type: u32 + - + name: max-scan-plan-iterations + type: u32 + - + name: sched-scan-plans + type: binary # TODO: nest + - + name: pbss + type: flag + - + name: bss-select + type: binary # TODO: nest + - + name: sta-support-p2p-ps + type: u8 + - + name: pad + type: binary + - + name: iftype-ext-capa + type: binary # TODO: nest + - + name: mu-mimo-group-data + type: binary + - + name: mu-mimo-follow-mac-addr + type: binary + display-hint: mac + - + name: scan-start-time-tsf + type: u64 + - + name: scan-start-time-tsf-bssid + type: binary + - + name: measurement-duration + type: u16 + - + name: measurement-duration-mandatory + type: flag + - + name: mesh-peer-aid + type: u16 + - + name: nan-master-pref + type: u8 + - + name: bands + type: u32 + - + name: nan-func + type: binary # TODO: nest + - + name: nan-match + type: binary # TODO: nest + - + name: fils-kek + type: binary + - + name: fils-nonces + type: binary + - + name: multicast-to-unicast-enabled + type: flag + - + name: bssid + type: binary + display-hint: mac + - + name: sched-scan-relative-rssi + type: s8 + - + name: sched-scan-rssi-adjust + type: binary + - + name: timeout-reason + type: u32 + - + name: fils-erp-username + type: binary + - + name: fils-erp-realm + type: binary + - + name: fils-erp-next-seq-num + type: u16 + - + name: fils-erp-rrk + type: binary + - + name: fils-cache-id + type: binary + - + name: pmk + type: binary + - + name: sched-scan-multi + type: flag + - + name: sched-scan-max-reqs + type: u32 + - + name: want-1x-4way-hs + type: flag + - + name: pmkr0-name + type: binary + - + name: port-authorized + type: binary + - + name: external-auth-action + type: u32 + - + name: external-auth-support + type: flag + - + name: nss + type: u8 + - + name: ack-signal + type: s32 + - + name: control-port-over-nl80211 + type: flag + - + name: txq-stats + type: nest + nested-attributes: txq-stats-attrs + - + name: txq-limit + type: u32 + - + name: txq-memory-limit + type: u32 + - + name: txq-quantum + type: u32 + - + name: he-capability + type: binary + - + name: ftm-responder + type: binary # TODO: nest + - + name: ftm-responder-stats + type: binary # TODO: nest + - + name: timeout + type: u32 + - + name: peer-measurements + type: binary # TODO: nest + - + name: airtime-weight + type: u16 + - + name: sta-tx-power-setting + type: u8 + - + name: sta-tx-power + type: s16 + - + name: sae-password + type: binary + - + name: twt-responder + type: flag + - + name: he-obss-pd + type: binary # TODO: nest + - + name: wiphy-edmg-channels + type: u8 + - + name: wiphy-edmg-bw-config + type: u8 + - + name: vlan-id + type: u16 + - + name: he-bss-color + type: binary # TODO: nest + - + name: iftype-akm-suites + type: binary # TODO: nest + - + name: tid-config + type: binary # TODO: nest + - + name: control-port-no-preauth + type: flag + - + name: pmk-lifetime + type: u32 + - + name: pmk-reauth-threshold + type: u8 + - + name: receive-multicast + type: flag + - + name: wiphy-freq-offset + type: u32 + - + name: center-freq1-offset + type: u32 + - + name: scan-freq-khz + type: binary # TODO: nest + - + name: he-6ghz-capability + type: binary + - + name: fils-discovery + type: binary # TOOD: nest + - + name: unsol-bcast-probe-resp + type: binary # TOOD: nest + - + name: s1g-capability + type: binary + - + name: s1g-capability-mask + type: binary + - + name: sae-pwe + type: u8 + - + name: reconnect-requested + type: binary + - + name: sar-spec + type: nest + nested-attributes: sar-attributes + - + name: disable-he + type: flag + - + name: obss-color-bitmap + type: u64 + - + name: color-change-count + type: u8 + - + name: color-change-color + type: u8 + - + name: color-change-elems + type: binary # TODO: nest + - + name: mbssid-config + type: binary # TODO: nest + - + name: mbssid-elems + type: binary # TODO: nest + - + name: radar-background + type: flag + - + name: ap-settings-flags + type: u32 + - + name: eht-capability + type: binary + - + name: disable-eht + type: flag + - + name: mlo-links + type: binary # TODO: nest + - + name: mlo-link-id + type: u8 + - + name: mld-addr + type: binary + display-hint: mac + - + name: mlo-support + type: flag + - + name: max-num-akm-suites + type: binary + - + name: eml-capability + type: u16 + - + name: mld-capa-and-ops + type: u16 + - + name: tx-hw-timestamp + type: u64 + - + name: rx-hw-timestamp + type: u64 + - + name: td-bitmap + type: binary + - + name: punct-bitmap + type: u32 + - + name: max-hw-timestamp-peers + type: u16 + - + name: hw-timestamp-enabled + type: flag + - + name: ema-rnr-elems + type: binary # TODO: nest + - + name: mlo-link-disabled + type: flag + - + name: bss-dump-include-use-data + type: flag + - + name: mlo-ttlm-dlink + type: u16 + - + name: mlo-ttlm-ulink + type: u16 + - + name: assoc-spp-amsdu + type: flag + - + name: wiphy-radios + type: binary # TODO: nest + - + name: wiphy-interface-combinations + type: binary # TODO: nest + - + name: vif-radio-mask + type: u32 + - + name: frame-type-attrs + subset-of: nl80211-attrs + attributes: + - + name: frame-type + - + name: wiphy-bands + name-prefix: nl80211-band- + attr-max-name: num-nl80211-bands + attributes: + - + name: 2ghz + doc: 2.4 GHz ISM band + value: 0 + type: nest + nested-attributes: band-attrs + - + name: 5ghz + doc: around 5 GHz band (4.9 - 5.7 GHz) + type: nest + nested-attributes: band-attrs + - + name: 60ghz + doc: around 60 GHz band (58.32 - 69.12 GHz) + type: nest + nested-attributes: band-attrs + - + name: 6ghz + type: nest + nested-attributes: band-attrs + - + name: s1ghz + type: nest + nested-attributes: band-attrs + - + name: lc + type: nest + nested-attributes: band-attrs + - + name: band-attrs + enum-name: nl80211-band-attr + name-prefix: nl80211-band-attr- + attributes: + - + name: freqs + type: indexed-array + sub-type: nest + nested-attributes: frequency-attrs + - + name: rates + type: indexed-array + sub-type: nest + nested-attributes: bitrate-attrs + - + name: ht-mcs-set + type: binary + struct: ieee80211-mcs-info + - + name: ht-capa + type: u16 + - + name: ht-ampdu-factor + type: u8 + - + name: ht-ampdu-density + type: u8 + - + name: vht-mcs-set + type: binary + struct: ieee80211-vht-mcs-info + - + name: vht-capa + type: u32 + - + name: iftype-data + type: indexed-array + sub-type: nest + nested-attributes: iftype-data-attrs + - + name: edmg-channels + type: binary + - + name: edmg-bw-config + type: binary + - + name: s1g-mcs-nss-set + type: binary + - + name: s1g-capa + type: binary + - + name: bitrate-attrs + name-prefix: nl80211-bitrate-attr- + attributes: + - + name: rate + type: u32 + - + name: 2ghz-shortpreamble + type: flag + - + name: frequency-attrs + name-prefix: nl80211-frequency-attr- + attributes: + - + name: freq + type: u32 + - + name: disabled + type: flag + - + name: no-ir + type: flag + - + name: no-ibss + name-prefix: __nl80211-frequency-attr- + type: flag + - + name: radar + type: flag + - + name: max-tx-power + type: u32 + - + name: dfs-state + type: u32 + - + name: dfs-time + type: binary + - + name: no-ht40-minus + type: binary + - + name: no-ht40-plus + type: binary + - + name: no-80mhz + type: binary + - + name: no-160mhz + type: binary + - + name: dfs-cac-time + type: binary + - + name: indoor-only + type: binary + - + name: ir-concurrent + type: binary + - + name: no-20mhz + type: binary + - + name: no-10mhz + type: binary + - + name: wmm + type: indexed-array + sub-type: nest + nested-attributes: wmm-attrs + - + name: no-he + type: binary + - + name: offset + type: u32 + - + name: 1mhz + type: binary + - + name: 2mhz + type: binary + - + name: 4mhz + type: binary + - + name: 8mhz + type: binary + - + name: 16mhz + type: binary + - + name: no-320mhz + type: binary + - + name: no-eht + type: binary + - + name: psd + type: binary + - + name: dfs-concurrent + type: binary + - + name: no-6ghz-vlp-client + type: binary + - + name: no-6ghz-afc-client + type: binary + - + name: can-monitor + type: binary + - + name: allow-6ghz-vlp-ap + type: binary + - + name: if-combination-attributes + enum-name: nl80211-if-combination-attrs + name-prefix: nl80211-iface-comb- + attr-max-name: max-nl80211-iface-comb + attributes: + - + name: limits + type: indexed-array + sub-type: nest + nested-attributes: iface-limit-attributes + - + name: maxnum + type: u32 + - + name: sta-ap-bi-match + type: flag + - + name: num-channels + type: u32 + - + name: radar-detect-widths + type: u32 + - + name: radar-detect-regions + type: u32 + - + name: bi-min-gcd + type: u32 + - + name: iface-limit-attributes + enum-name: nl80211-iface-limit-attrs + name-prefix: nl80211-iface-limit- + attr-max-name: max-nl80211-iface-limit + attributes: + - + name: max + type: u32 + - + name: types + type: nest + nested-attributes: supported-iftypes + - + name: iftype-data-attrs + name-prefix: nl80211-band-iftype-attr- + attributes: + - + name: iftypes + type: binary + - + name: he-cap-mac + type: binary + - + name: he-cap-phy + type: binary + - + name: he-cap-mcs-set + type: binary + - + name: he-cap-ppe + type: binary + - + name: he-6ghz-capa + type: binary + - + name: vendor-elems + type: binary + - + name: eht-cap-mac + type: binary + - + name: eht-cap-phy + type: binary + - + name: eht-cap-mcs-set + type: binary + - + name: eht-cap-ppe + type: binary + - + name: iftype-attrs + enum-name: nl80211-iftype + name-prefix: nl80211-iftype- + attributes: + - + name: unspecified + type: nest + value: 0 + nested-attributes: frame-type-attrs + - + name: adhoc + type: nest + nested-attributes: frame-type-attrs + - + name: station + type: nest + nested-attributes: frame-type-attrs + - + name: ap + type: nest + nested-attributes: frame-type-attrs + - + name: ap-vlan + type: nest + nested-attributes: frame-type-attrs + - + name: wds + type: nest + nested-attributes: frame-type-attrs + - + name: monitor + type: nest + nested-attributes: frame-type-attrs + - + name: mesh-point + type: nest + nested-attributes: frame-type-attrs + - + name: p2p-client + type: nest + nested-attributes: frame-type-attrs + - + name: p2p-go + type: nest + nested-attributes: frame-type-attrs + - + name: p2p-device + type: nest + nested-attributes: frame-type-attrs + - + name: ocb + type: nest + nested-attributes: frame-type-attrs + - + name: nan + type: nest + nested-attributes: frame-type-attrs + - + name: sar-attributes + enum-name: nl80211-sar-attrs + name-prefix: nl80211-sar-attr- + attributes: + - + name: type + type: u32 + - + name: specs + type: indexed-array + sub-type: nest + nested-attributes: sar-specs + - + name: sar-specs + enum-name: nl80211-sar-specs-attrs + name-prefix: nl80211-sar-attr-specs- + attributes: + - + name: power + type: s32 + - + name: range-index + type: u32 + - + name: start-freq + type: u32 + - + name: end-freq + type: u32 + - + name: supported-iftypes + enum-name: nl80211-iftype + name-prefix: nl80211-iftype- + attributes: + - + name: adhoc + type: flag + - + name: station + type: flag + - + name: ap + type: flag + - + name: ap-vlan + type: flag + - + name: wds + type: flag + - + name: monitor + type: flag + - + name: mesh-point + type: flag + - + name: p2p-client + type: flag + - + name: p2p-go + type: flag + - + name: p2p-device + type: flag + - + name: ocb + type: flag + - + name: nan + type: flag + - + name: txq-stats-attrs + name-prefix: nl80211-txq-stats- + attributes: + - + name: backlog-bytes + type: u32 + - + name: backlog-packets + type: u32 + - + name: flows + type: u32 + - + name: drops + type: u32 + - + name: ecn-marks + type: u32 + - + name: overlimit + type: u32 + - + name: overmemory + type: u32 + - + name: collisions + type: u32 + - + name: tx-bytes + type: u32 + - + name: tx-packets + type: u32 + - + name: max-flows + type: u32 + - + name: wmm-attrs + enum-name: nl80211-wmm-rule + name-prefix: nl80211-wmmr- + attributes: + - + name: cw-min + type: u16 + - + name: cw-max + type: u16 + - + name: aifsn + type: u8 + - + name: txop + type: u16 + - + name: wowlan-triggers-attrs + enum-name: nl80211-wowlan-triggers + name-prefix: nl80211-wowlan-trig- + attr-max-name: max-nl80211-wowlan-trig + attributes: + - + name: any + type: flag + - + name: disconnect + type: flag + - + name: magic-pkt + type: flag + - + name: pkt-pattern + type: flag + - + name: gtk-rekey-supported + type: flag + - + name: gtk-rekey-failure + type: flag + - + name: eap-ident-request + type: flag + - + name: 4way-handshake + type: flag + - + name: rfkill-release + type: flag + - + name: wakeup-pkt-80211 + type: flag + - + name: wakeup-pkt-80211-len + type: flag + - + name: wakeup-pkt-8023 + type: flag + - + name: wakeup-pkt-8023-len + type: flag + - + name: tcp-connection + type: flag + - + name: wakeup-tcp-match + type: flag + - + name: wakeup-tcp-connlost + type: flag + - + name: wakeup-tcp-nomoretokens + type: flag + - + name: net-detect + type: flag + - + name: net-detect-results + type: flag + - + name: unprotected-deauth-disassoc + type: flag + +operations: + enum-model: directional + list: + - + name: get-wiphy + doc: | + Get information about a wiphy or dump a list of all wiphys. Requests to dump get-wiphy + should unconditionally include the split-wiphy-dump flag in the request. + attribute-set: nl80211-attrs + do: + request: + value: 1 + attributes: + - wiphy + - wdev + - ifindex + reply: + value: 3 + attributes: &wiphy-reply-attrs + - bands + - cipher-suites + - control-port-ethertype + - ext-capa + - ext-capa-mask + - ext-features + - feature-flags + - generation + - ht-capability-mask + - interface-combinations + - mac + - max-csa-counters + - max-match-sets + - max-num-akm-suites + - max-num-pmkids + - max-num-scan-ssids + - max-num-sched-scan-plans + - max-num-sched-scan-ssids + - max-remain-on-channel-duration + - max-scan-ie-len + - max-scan-plan-interval + - max-scan-plan-iterations + - max-sched-scan-ie-len + - offchannel-tx-ok + - rx-frame-types + - sar-spec + - sched-scan-max-reqs + - software-iftypes + - support-ap-uapsd + - supported-commands + - supported-iftypes + - tdls-external-setup + - tdls-support + - tx-frame-types + - txq-limit + - txq-memory-limit + - txq-quantum + - txq-stats + - vht-capability-mask + - wiphy + - wiphy-antenna-avail-rx + - wiphy-antenna-avail-tx + - wiphy-antenna-rx + - wiphy-antenna-tx + - wiphy-bands + - wiphy-coverage-class + - wiphy-frag-threshold + - wiphy-name + - wiphy-retry-long + - wiphy-retry-short + - wiphy-rts-threshold + - wowlan-triggers-supported + dump: + request: + attributes: + - wiphy + - wdev + - ifindex + - split-wiphy-dump + reply: + attributes: *wiphy-reply-attrs + - + name: get-interface + doc: Get information about an interface or dump a list of all interfaces + attribute-set: nl80211-attrs + do: + request: + value: 5 + attributes: + - ifname + reply: + value: 7 + attributes: &interface-reply-attrs + - ifname + - iftype + - ifindex + - wiphy + - wdev + - mac + - generation + - txq-stats + - 4addr + dump: + request: + attributes: + - ifname + reply: + attributes: *interface-reply-attrs + - + name: get-protocol-features + doc: Get information about supported protocol features + attribute-set: nl80211-attrs + do: + request: + value: 95 + attributes: + - protocol-features + reply: + value: 95 + attributes: + - protocol-features + +mcast-groups: + list: + - + name: config + - + name: scan + - + name: regulatory + - + name: mlme + - + name: vendor + - + name: nan + - + name: testmode diff --git a/Documentation/netlink/specs/rt_addr.yaml b/Documentation/netlink/specs/rt_addr.yaml index cbee1cedb177..5dd5469044c7 100644 --- a/Documentation/netlink/specs/rt_addr.yaml +++ b/Documentation/netlink/specs/rt_addr.yaml @@ -168,6 +168,29 @@ operations: reply: value: 20 attributes: *ifaddr-all + - + name: getmaddrs + doc: Get / dump IPv4/IPv6 multicast addresses. + attribute-set: addr-attrs + fixed-header: ifaddrmsg + do: + request: + value: 58 + attributes: + - ifa-family + - ifa-index + reply: + value: 58 + attributes: &mcaddr-attrs + - ifa-multicast + - ifa-cacheinfo + dump: + request: + value: 58 + - ifa-family + reply: + value: 58 + attributes: *mcaddr-attrs mcast-groups: list: diff --git a/Documentation/netlink/specs/rt_rule.yaml b/Documentation/netlink/specs/rt_rule.yaml index a9debac3058a..b30c924087fa 100644 --- a/Documentation/netlink/specs/rt_rule.yaml +++ b/Documentation/netlink/specs/rt_rule.yaml @@ -182,6 +182,14 @@ attribute-sets: type: u32 byte-order: big-endian display-hint: hex + - + name: sport-mask + type: u16 + display-hint: hex + - + name: dport-mask + type: u16 + display-hint: hex operations: enum-model: directional @@ -215,6 +223,8 @@ operations: - dscp - flowlabel - flowlabel-mask + - sport-mask + - dport-mask - name: newrule-ntf doc: Notify a rule creation diff --git a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst index 8bf411b857d4..5f3885e56f58 100644 --- a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst +++ b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst @@ -70,7 +70,7 @@ the DPSW object that it will probe: Besides the configuration of the actual DPSW object, the dpaa2-switch driver will need the following DPAA2 objects: - * 1 DPMCP - A Management Command Portal object is needed for any interraction + * 1 DPMCP - A Management Command Portal object is needed for any interaction with the MC firmware. * 1 DPBP - A Buffer Pool is used for seeding buffers intended for the Rx path diff --git a/Documentation/networking/devlink/ice.rst b/Documentation/networking/devlink/ice.rst index e3972d03cea0..792e9f8c846a 100644 --- a/Documentation/networking/devlink/ice.rst +++ b/Documentation/networking/devlink/ice.rst @@ -69,6 +69,17 @@ Parameters To verify that value has been set: $ devlink dev param show pci/0000:16:00.0 name tx_scheduling_layers + * - ``msix_vec_per_pf_max`` + - driverinit + - Set the max MSI-X that can be used by the PF, rest can be utilized for + SRIOV. The range is from min value set in msix_vec_per_pf_min to + 2k/number of ports. + * - ``msix_vec_per_pf_min`` + - driverinit + - Set the min MSI-X that will be used by the PF. This value inform how many + MSI-X will be allocated statically. The range is from 2 to value set + in msix_vec_per_pf_max. + .. list-table:: Driver specific parameters implemented :widths: 5 5 90 diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst index 41618538fc70..7febe0aecd53 100644 --- a/Documentation/networking/devlink/mlx5.rst +++ b/Documentation/networking/devlink/mlx5.rst @@ -280,6 +280,10 @@ Description of the vnic counters: number of packets handled by the VNIC experiencing unexpected steering failure (at any point in steering flow owned by the VNIC, including the FDB for the eswitch owner). +- icm_consumption + amount of Interconnect Host Memory (ICM) consumed by the vnic in + granularity of 4KB. ICM is host memory allocated by SW upon HCA request + and is used for storing data structures that control HCA operation. User commands examples: diff --git a/Documentation/networking/devlink/sfc.rst b/Documentation/networking/devlink/sfc.rst index db64a1bd9733..0398d59ea184 100644 --- a/Documentation/networking/devlink/sfc.rst +++ b/Documentation/networking/devlink/sfc.rst @@ -5,7 +5,7 @@ sfc devlink support =================== This document describes the devlink features implemented by the ``sfc`` -device driver for the ef100 device. +device driver for the ef10 and ef100 devices. Info versions ============= @@ -18,6 +18,10 @@ The ``sfc`` driver reports the following versions * - Name - Type - Description + * - ``fw.bundle_id`` + - stored + - Version of the firmware "bundle" image that was last used to update + multiple components. * - ``fw.mgmt.suc`` - running - For boards where the management function is split between multiple @@ -55,3 +59,13 @@ The ``sfc`` driver reports the following versions * - ``fw.uefi`` - running - UEFI driver version (No UNDI support). + +Flash Update +============ + +The ``sfc`` driver implements support for flash update using the +``devlink-flash`` interface. It supports updating the device flash using a +combined flash image ("bundle") that contains multiple components (on ef10, +typically ``fw.mgmt``, ``fw.app``, ``fw.exprom`` and ``fw.uefi``). + +The driver does not support any overwrite mask flags. diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 363b4950d542..054561f8dcae 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -705,6 +705,8 @@ tcp_retries2 - INTEGER seconds and is a lower bound for the effective timeout. TCP will effectively time out at the first RTO which exceeds the hypothetical timeout. + If tcp_rto_max_ms is decreased, it is recommended to also + change tcp_retries2. RFC 1122 recommends at least 100 seconds for the timeout, which corresponds to a value of at least 8. @@ -1237,6 +1239,17 @@ tcp_rto_min_us - INTEGER Default: 200000 +tcp_rto_max_ms - INTEGER + Maximal TCP retransmission timeout (in ms). + Note that TCP_RTO_MAX_MS socket option has higher precedence. + + When changing tcp_rto_max_ms, it is important to understand + that tcp_retries2 might need a change. + + Possible Values: 1000 - 120,000 + + Default: 120,000 + UDP variables ============= diff --git a/Documentation/networking/j1939.rst b/Documentation/networking/j1939.rst index 544bad175aae..45f02efe3df5 100644 --- a/Documentation/networking/j1939.rst +++ b/Documentation/networking/j1939.rst @@ -66,6 +66,90 @@ the library exclusively, or by the in-kernel system exclusively. J1939 concepts ============== +Data Sent to the J1939 Stack +---------------------------- + +The data buffers sent to the J1939 stack from user space are not CAN frames +themselves. Instead, they are payloads that the J1939 stack converts into +proper CAN frames based on the size of the buffer and the type of transfer. The +size of the buffer influences how the stack processes the data and determines +the internal code path used for the transfer. + +**Handling of Different Buffer Sizes:** + +- **Buffers with a size of 8 bytes or less:** + + - These are handled as simple sessions internally within the stack. + + - The stack converts the buffer directly into a single CAN frame without + fragmentation. + + - This type of transfer does not require an actual client (receiver) on the + receiving side. + +- **Buffers up to 1785 bytes:** + + - These are automatically handled as J1939 Transport Protocol (TP) transfers. + + - Internally, the stack splits the buffer into multiple 8-byte CAN frames. + + - TP transfers can be unicast or broadcast. + + - **Broadcast TP:** Does not require a receiver on the other side and can be + used in broadcast scenarios. + + - **Unicast TP:** Requires an active receiver (client) on the other side to + acknowledge the transfer. + +- **Buffers from 1786 bytes up to 111 MiB:** + + - These are handled as ISO 11783 Extended Transport Protocol (ETP) transfers. + + - ETP transfers are used for larger payloads and are split into multiple CAN + frames internally. + + - **ETP transfers (unicast):** Require a receiver on the other side to + process the incoming data and acknowledge each step of the transfer. + + - ETP transfers cannot be broadcast like TP transfers, and always require a + receiver for operation. + +**Non-Blocking Operation with `MSG_DONTWAIT`:** + +The J1939 stack supports non-blocking operation when used in combination with +the `MSG_DONTWAIT` flag. In this mode, the stack attempts to take as much data +as the available memory for the socket allows. It returns the amount of data +that was successfully taken, and it is the responsibility of user space to +monitor this value and handle partial transfers. + +- If the stack cannot take the entire buffer, it returns the number of bytes + successfully taken, and user space should handle the remainder. + +- **Error handling:** When using `MSG_DONTWAIT`, the user must rely on the + error queue to detect transfer errors. See the **SO_J1939_ERRQUEUE** section + for details on how to subscribe to error notifications. Without the error + queue, there is no other way for user space to be notified of transfer errors + during non-blocking operations. + +**Behavior and Requirements:** + +- **Simple transfers (<= 8 bytes):** Do not require a receiver on the other + side, making them easy to send without needing address claiming or + coordination with a destination. + +- **Unicast TP/ETP:** Requires a receiver on the other side to complete the + transfer. The receiver must acknowledge the transfer for the session to + proceed successfully. + +- **Broadcast TP:** Allows sending data without a receiver, but only works for + TP transfers. ETP cannot be broadcast and always needs a receiving client. + +These different behaviors depend heavily on the size of the buffer provided to +the stack, and the appropriate transport mechanism (TP or ETP) is selected +based on the payload size. The stack automatically manages the fragmentation +and reassembly of large payloads and ensures that the correct CAN frames are +generated and transmitted for each session. + PGN --- @@ -338,6 +422,459 @@ with ``cmsg_level == SOL_J1939 && cmsg_type == SCM_J1939_DEST_ADDR``, } } +setsockopt(2) +^^^^^^^^^^^^^ + +The ``setsockopt(2)`` function is used to configure various socket-level +options for J1939 communication. The following options are supported: + +``SO_J1939_FILTER`` +~~~~~~~~~~~~~~~~~~~ + +The ``SO_J1939_FILTER`` option is essential when the default behavior of +``bind(2)`` and ``connect(2)`` is insufficient for specific use cases. By +default, ``bind(2)`` and ``connect(2)`` allow a socket to be associated with a +single unicast or broadcast address. However, there are scenarios where finer +control over the incoming messages is required, such as filtering by Parameter +Group Number (PGN) rather than by addresses. + +For example, in a system where multiple types of J1939 messages are being +transmitted, a process might only be interested in a subset of those messages, +such as specific PGNs, and not want to receive all messages destined for its +address or broadcast to the bus. + +By applying the ``SO_J1939_FILTER`` option, you can filter messages based on: + +- **Source Address (SA)**: Filter messages coming from specific source + addresses. + +- **Source Name**: Filter messages coming from ECUs with specific NAME + identifiers. + +- **Parameter Group Number (PGN)**: Focus on receiving messages with specific + PGNs, filtering out irrelevant ones. + +This filtering mechanism is particularly useful when: + +- You want to receive a subset of messages based on their PGNs, even if the + address is the same. + +- You need to handle both broadcast and unicast messages but only care about + certain message types or parameters. + +- The ``bind(2)`` and ``connect(2)`` functions only allow binding to a single + address, which might not be sufficient if the process needs to handle multiple + PGNs but does not want to open multiple sockets. + +To remove existing filters, you can pass ``optval == NULL`` or ``optlen == 0`` +to ``setsockopt(2)``. This will clear all currently set filters. If you want to +**update** the set of filters, you must pass the updated filter set to +``setsockopt(2)``, as the new filter set will **replace** the old one entirely. +This behavior ensures that any previous filter configuration is discarded and +only the new set is applied. + +Example of removing all filters: + +.. code-block:: c + + setsockopt(sock, SOL_CAN_J1939, SO_J1939_FILTER, NULL, 0); + +**Maximum number of filters:** The maximum amount of filters that can be +applied using ``SO_J1939_FILTER`` is defined by ``J1939_FILTER_MAX``, which is +set to 512. This means you can configure up to 512 individual filters to match +your specific filtering needs. + +Practical use case: **Monitoring Address Claiming** + +One practical use case is monitoring the J1939 address claiming process by +filtering for specific PGNs related to address claiming. This allows a process +to monitor and handle address claims without processing unrelated messages. + +Example: + +.. code-block:: c + + struct j1939_filter filt[] = { + { + .pgn = J1939_PGN_ADDRESS_CLAIMED, + .pgn_mask = J1939_PGN_PDU1_MAX, + }, { + .pgn = J1939_PGN_REQUEST, + .pgn_mask = J1939_PGN_PDU1_MAX, + }, { + .pgn = J1939_PGN_ADDRESS_COMMANDED, + .pgn_mask = J1939_PGN_MAX, + }, + }; + setsockopt(sock, SOL_CAN_J1939, SO_J1939_FILTER, &filt, sizeof(filt)); + +In this example, the socket will only receive messages with the PGNs related to +address claiming: ``J1939_PGN_ADDRESS_CLAIMED``, ``J1939_PGN_REQUEST``, and +``J1939_PGN_ADDRESS_COMMANDED``. This is particularly useful in scenarios where +you want to monitor and process address claims without being overwhelmed by +other traffic on the J1939 network. + +``SO_J1939_PROMISC`` +~~~~~~~~~~~~~~~~~~~~ + +The ``SO_J1939_PROMISC`` option enables socket-level promiscuous mode. When +this option is enabled, the socket will receive all J1939 traffic, regardless +of any filters set by ``bind()`` or ``connect()``. This is analogous to +enabling promiscuous mode for an Ethernet interface, where all traffic on the +network segment is captured. + +However, **`SO_J1939_FILTER` has a higher priority** compared to +``SO_J1939_PROMISC``. This means that even in promiscuous mode, you can reduce +the number of packets received by applying specific filters with +`SO_J1939_FILTER`. The filters will limit which packets are passed to the +socket, allowing for more refined traffic selection while promiscuous mode is +active. + +The acceptable value size for this option is ``sizeof(int)``, and the value is +only differentiated between `0` and non-zero. A value of `0` disables +promiscuous mode, while any non-zero value enables it. + +This combination can be useful for debugging or monitoring specific types of +traffic while still capturing a broad set of messages. + +Example: + +.. code-block:: c + + int value = 1; + setsockopt(sock, SOL_CAN_J1939, SO_J1939_PROMISC, &value, sizeof(value)); + +In this example, setting ``value`` to any non-zero value (e.g., `1`) enables +promiscuous mode, allowing the socket to receive all J1939 traffic on the +network. + +``SO_BROADCAST`` +~~~~~~~~~~~~~~~~ + +The ``SO_BROADCAST`` option enables the sending and receiving of broadcast +messages. By default, broadcast messages are disabled for J1939 sockets. When +this option is enabled, the socket will be allowed to send and receive +broadcast packets on the J1939 network. + +Due to the nature of the CAN bus as a shared medium, all messages transmitted +on the bus are visible to all participants. In the context of J1939, +broadcasting refers to using a specific destination address field, where the +destination address is set to a value that indicates the message is intended +for all participants (usually a global address such as 0xFF). Enabling the +broadcast option allows the socket to send and receive such broadcast messages. + +The acceptable value size for this option is ``sizeof(int)``, and the value is +only differentiated between `0` and non-zero. A value of `0` disables the +ability to send and receive broadcast messages, while any non-zero value +enables it. + +Example: + +.. code-block:: c + + int value = 1; + setsockopt(sock, SOL_SOCKET, SO_BROADCAST, &value, sizeof(value)); + +In this example, setting ``value`` to any non-zero value (e.g., `1`) enables +the socket to send and receive broadcast messages. + +``SO_J1939_SEND_PRIO`` +~~~~~~~~~~~~~~~~~~~~~~ + +The ``SO_J1939_SEND_PRIO`` option sets the priority of outgoing J1939 messages +for the socket. In J1939, messages can have different priorities, and lower +numerical values indicate higher priority. This option allows the user to +control the priority of messages sent from the socket by adjusting the priority +bits in the CAN identifier. + +The acceptable value **size** for this option is ``sizeof(int)``, and the value +is expected to be in the range of 0 to 7, where `0` is the highest priority, +and `7` is the lowest. By default, the priority is set to `6` if this option is +not explicitly configured. + +Note that the priority values `0` and `1` can only be set if the process has +the `CAP_NET_ADMIN` capability. These are reserved for high-priority traffic +and require administrative privileges. + +Example: + +.. code-block:: c + + int prio = 3; // Priority value between 0 (highest) and 7 (lowest) + setsockopt(sock, SOL_CAN_J1939, SO_J1939_SEND_PRIO, &prio, sizeof(prio)); + +In this example, the priority is set to `3`, meaning the outgoing messages will +be sent with a moderate priority level. + +``SO_J1939_ERRQUEUE`` +~~~~~~~~~~~~~~~~~~~~~ + +The ``SO_J1939_ERRQUEUE`` option enables the socket to receive error messages +from the error queue, providing diagnostic information about transmission +failures, protocol violations, or other issues that occur during J1939 +communication. Once this option is set, user space is required to handle +``MSG_ERRQUEUE`` messages. + +Setting ``SO_J1939_ERRQUEUE`` to ``0`` will purge any currently present error +messages in the error queue. When enabled, error messages can be retrieved +using the ``recvmsg(2)`` system call. + +When subscribing to the error queue, the following error events can be +accessed: + +- **``J1939_EE_INFO_TX_ABORT``**: Transmission abort errors. +- **``J1939_EE_INFO_RX_RTS``**: Reception of RTS (Request to Send) control + frames. +- **``J1939_EE_INFO_RX_DPO``**: Reception of data packets with Data Page Offset + (DPO). +- **``J1939_EE_INFO_RX_ABORT``**: Reception abort errors. + +The error queue can be used to correlate errors with specific message transfer +sessions using the session ID (``tskey``). The session ID is assigned via the +``SOF_TIMESTAMPING_OPT_ID`` flag, which is set by enabling the +``SO_TIMESTAMPING`` option. + +If ``SO_J1939_ERRQUEUE`` is activated, the user is required to pull messages +from the error queue, meaning that using plain ``recv(2)`` is not sufficient +anymore. The user must use ``recvmsg(2)`` with appropriate flags to handle +error messages. Failure to do so can result in the socket becoming blocked with +unprocessed error messages in the queue. + +It is **recommended** that ``SO_J1939_ERRQUEUE`` be used in combination with +``SO_TIMESTAMPING`` in most cases. This enables proper error handling along +with session tracking and timestamping, providing a more detailed analysis of +message transfers and errors. + +The acceptable value **size** for this option is ``sizeof(int)``, and the value +is only differentiated between ``0`` and non-zero. A value of ``0`` disables +error queue reception and purges any existing error messages, while any +non-zero value enables it. + +Example: + +.. code-block:: c + + int enable = 1; // Enable error queue reception + setsockopt(sock, SOL_CAN_J1939, SO_J1939_ERRQUEUE, &enable, sizeof(enable)); + + // Enable timestamping with session tracking via tskey + int timestamping = SOF_TIMESTAMPING_OPT_ID | SOF_TIMESTAMPING_TX_ACK | + SOF_TIMESTAMPING_TX_SCHED | + SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_OPT_CMSG; + setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, ×tamping, + sizeof(timestamping)); + +When enabled, error messages can be retrieved using ``recvmsg(2)``. By +combining ``SO_J1939_ERRQUEUE`` with ``SO_TIMESTAMPING`` (with +``SOF_TIMESTAMPING_OPT_ID`` and ``SOF_TIMESTAMPING_OPT_CMSG`` enabled), the +user can track message transfers, retrieve precise timestamps, and correlate +errors with specific sessions. + +For more information on enabling timestamps and session tracking, refer to the +`SO_TIMESTAMPING` section. + +``SO_TIMESTAMPING`` +~~~~~~~~~~~~~~~~~~~ + +The ``SO_TIMESTAMPING`` option allows the socket to receive timestamps for +various events related to message transmissions and receptions in J1939. This +option is often used in combination with ``SO_J1939_ERRQUEUE`` to provide +detailed diagnostic information, session tracking, and precise timing data for +message transfers. + +In J1939, all payloads provided by user space, regardless of size, are +processed by the kernel as **sessions**. This includes both single-frame +messages (up to 8 bytes) and multi-frame protocols such as the Transport +Protocol (TP) and Extended Transport Protocol (ETP). Even for small, +single-frame messages, the kernel creates a session to manage the transmission +and reception. The concept of sessions allows the kernel to manage various +aspects of the protocol, such as reassembling multi-frame messages and tracking +the status of transmissions. + +When receiving extended error messages from the error queue, the error +information is delivered through a `struct sock_extended_err`, accessible via +the control message (``cmsg``) retrieved using the ``recvmsg(2)`` system call. + +There are two typical origins for the extended error messages in J1939: + +1. ``serr->ee_origin == SO_EE_ORIGIN_TIMESTAMPING``: + + In this case, the `serr->ee_info` field will contain one of the following + timestamp types: + + - ``SCM_TSTAMP_SCHED``: This timestamp is valid for Extended Transport + Protocol (ETP) transfers and simple transfers (8 bytes or less). It + indicates when a message or set of frames has been scheduled for + transmission. + + - For simple transfers (8 bytes or less), it marks the point when the + message is queued and ready to be sent onto the CAN bus. + + - For ETP transfers, it is sent after receiving a CTS (Clear to Send) + frame on the sender side, indicating that a new set of frames has been + scheduled for transmission. + + - The Transport Protocol (TP) case is currently not implemented for this + timestamp. + + - On the receiver side, the counterpart to this event for ETP is + represented by the ``J1939_EE_INFO_RX_DPO`` message, which indicates the + reception of a Data Page Offset (DPO) control frame. + + - ``SCM_TSTAMP_ACK``: This timestamp indicates the acknowledgment of the + message or session. + + - For simple transfers (8 bytes or less), it marks when the message has + been sent and an echo confirmation has been received from the CAN + controller, indicating that the frame was transmitted onto the bus. + + - For multi-frame transfers (TP or ETP), it signifies that the entire + session has been acknowledged, typically after receiving the End of + Message Acknowledgment (EOMA) packet. + +2. ``serr->ee_origin == SO_EE_ORIGIN_LOCAL``: + + In this case, the `serr->ee_info` field will contain one of the following + J1939 stack-specific message types: + + - ``J1939_EE_INFO_TX_ABORT``: This message indicates that the transmission + of a message or session was aborted. The cause of the abort can come from + various sources: + + - **CAN stack failure**: The J1939 stack was unable to pass the frame to + the CAN framework for transmission. + + - **Echo failure**: The J1939 stack did not receive an echo confirmation + from the CAN controller, meaning the frame may not have been successfully + transmitted to the CAN bus. + + - **Protocol-level issues**: For multi-frame transfers (TP/ETP), this + could include protocol-related errors, such as an abort signaled by the + receiver or a timeout at the protocol level, which causes the session to + terminate prematurely. + + - The corresponding error code is stored in ``serr->ee_data`` + (``session->err`` on kernel side), providing additional details about + the specific reason for the abort. + + - ``J1939_EE_INFO_RX_RTS``: This message indicates that the J1939 stack has + received a Request to Send (RTS) control frame, signaling the start of a + multi-frame transfer using the Transport Protocol (TP) or Extended + Transport Protocol (ETP). + + - It informs the receiver that the sender is ready to transmit a + multi-frame message and includes details about the total message size + and the number of frames to be sent. + + - Statistics such as ``J1939_NLA_TOTAL_SIZE``, ``J1939_NLA_PGN``, + ``J1939_NLA_SRC_NAME``, and ``J1939_NLA_DEST_NAME`` are provided along + with the ``J1939_EE_INFO_RX_RTS`` message, giving detailed information + about the incoming transfer. + + - ``J1939_EE_INFO_RX_DPO``: This message indicates that the J1939 stack has + received a Data Page Offset (DPO) control frame, which is part of the + Extended Transport Protocol (ETP). + + - The DPO frame signals the continuation of an ETP multi-frame message by + indicating the offset position in the data being transferred. It helps + the receiver manage large data sets by identifying which portion of the + message is being received. + + - It is typically paired with a corresponding ``SCM_TSTAMP_SCHED`` event + on the sender side, which indicates when the next set of frames is + scheduled for transmission. + + - This event includes statistics such as ``J1939_NLA_BYTES_ACKED``, which + tracks the number of bytes acknowledged up to that point in the session. + + - ``J1939_EE_INFO_RX_ABORT``: This message indicates that the reception of a + multi-frame message (Transport Protocol or Extended Transport Protocol) has + been aborted. + + - The abort can be triggered by protocol-level errors such as timeouts, an + unexpected frame, or a specific abort request from the sender. + + - This message signals that the receiver cannot continue processing the + transfer, and the session is terminated. + + - The corresponding error code is stored in ``serr->ee_data`` + (``session->err`` on kernel side ), providing further details about the + reason for the abort, such as protocol violations or timeouts. + + - After receiving this message, the receiver discards the partially received + frames, and the multi-frame session is considered incomplete. + +In both cases, if ``SOF_TIMESTAMPING_OPT_ID`` is enabled, ``serr->ee_data`` +will be set to the session’s unique identifier (``session->tskey``). This +allows user space to track message transfers by their session identifier across +multiple frames or stages. + +In all other cases, ``serr->ee_errno`` will be set to ``ENOMSG``, except for +the ``J1939_EE_INFO_TX_ABORT`` and ``J1939_EE_INFO_RX_ABORT`` cases, where the +kernel sets ``serr->ee_data`` to the error stored in ``session->err``. All +protocol-specific errors are converted to standard kernel error values and +stored in ``session->err``. These error values are unified across system calls +and ``serr->ee_errno``. Some of the known error values are described in the +`Error Codes in the J1939 Stack` section. + +When the `J1939_EE_INFO_RX_RTS` message is provided, it will include the +following statistics for multi-frame messages (TP and ETP): + + - ``J1939_NLA_TOTAL_SIZE``: Total size of the message in the session. + - ``J1939_NLA_PGN``: Parameter Group Number (PGN) identifying the message type. + - ``J1939_NLA_SRC_NAME``: 64-bit name of the source ECU. + - ``J1939_NLA_DEST_NAME``: 64-bit name of the destination ECU. + - ``J1939_NLA_SRC_ADDR``: 8-bit source address of the sending ECU. + - ``J1939_NLA_DEST_ADDR``: 8-bit destination address of the receiving ECU. + +- For other messages (including single-frame messages), only the following + statistic is included: + + - ``J1939_NLA_BYTES_ACKED``: Number of bytes successfully acknowledged in the + session. + +The key flags for ``SO_TIMESTAMPING`` include: + +- ``SOF_TIMESTAMPING_OPT_ID``: Enables the use of a unique session identifier + (``tskey``) for each transfer. This identifier helps track message transfers + and errors as distinct sessions in user space. When this option is enabled, + ``serr->ee_data`` will be set to ``session->tskey``. + +- ``SOF_TIMESTAMPING_OPT_CMSG``: Sends timestamp information through control + messages (``struct scm_timestamping``), allowing the application to retrieve + timestamps alongside the data. + +- ``SOF_TIMESTAMPING_TX_SCHED``: Provides the timestamp for when a message is + scheduled for transmission (``SCM_TSTAMP_SCHED``). + +- ``SOF_TIMESTAMPING_TX_ACK``: Provides the timestamp for when a message + transmission is fully acknowledged (``SCM_TSTAMP_ACK``). + +- ``SOF_TIMESTAMPING_RX_SOFTWARE``: Provides timestamps for reception-related + events (e.g., ``J1939_EE_INFO_RX_RTS``, ``J1939_EE_INFO_RX_DPO``, + ``J1939_EE_INFO_RX_ABORT``). + +These flags enable detailed monitoring of message lifecycles, including +transmission scheduling, acknowledgments, reception timestamps, and gathering +detailed statistics about the communication session, especially for multi-frame +payloads like TP and ETP. + +Example: + +.. code-block:: c + + // Enable timestamping with various options, including session tracking and + // statistics + int sock_opt = SOF_TIMESTAMPING_OPT_CMSG | + SOF_TIMESTAMPING_TX_ACK | + SOF_TIMESTAMPING_TX_SCHED | + SOF_TIMESTAMPING_OPT_ID | + SOF_TIMESTAMPING_RX_SOFTWARE; + + setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &sock_opt, sizeof(sock_opt)); + + + Dynamic Addressing ------------------ @@ -458,3 +995,141 @@ Send: }; sendto(sock, dat, sizeof(dat), 0, (const struct sockaddr *)&saddr, sizeof(saddr)); + + +Error Codes in the J1939 Stack +------------------------------ + +This section lists all potential kernel error codes that can be exposed to user +space when interacting with the J1939 stack. It includes both standard error +codes and those derived from protocol-specific abort codes. + +- ``EAGAIN``: Operation would block; retry may succeed. One common reason is + that an active TP or ETP session exists, and an attempt was made to start a + new overlapping TP or ETP session between the same peers. + +- ``ENETDOWN``: Network is down. This occurs when the CAN interface is switched + to the "down" state. + +- ``ENOBUFS``: No buffer space available. This error occurs when the CAN + interface's transmit (TX) queue is full, and no more messages can be queued. + +- ``EOVERFLOW``: Value too large for defined data type. In J1939, this can + happen if the requested data lies outside of the queued buffer. For example, + if a CTS (Clear to Send) requests an offset not available in the kernel buffer + because user space did not provide enough data. + +- ``EBUSY``: Device or resource is busy. For example, this occurs if an + identical session is already active and the stack is unable to recover from + the condition. + +- ``EACCES``: Permission denied. This error can occur, for example, when + attempting to send broadcast messages, but the socket is not configured with + ``SO_BROADCAST``. + +- ``EADDRNOTAVAIL``: Address not available. This error occurs in cases such as: + + - When attempting to use ``getsockname(2)`` to retrieve the peer's address, + but the socket is not connected. + + - When trying to send data to or from a NAME, but address claiming for the + NAME was not performed or detected by the stack. + +- ``EBADFD``: File descriptor in bad state. This error can occur if: + + - Attempting to send data to an unbound socket. + + - The socket is bound but has no source name, and the source address is + ``J1939_NO_ADDR``. + + - The ``can_ifindex`` is incorrect. + +- ``EFAULT``: Bad address. Occurs mostly when the stack can't copy from or to a + sockptr, when there is insufficient data from user space, or when the buffer + provided by user space is not large enough for the requested data. + +- ``EINTR``: A signal occurred before any data was transmitted; see ``signal(7)``. + +- ``EINVAL``: Invalid argument passed. For example: + + - ``msg->msg_namelen`` is less than ``J1939_MIN_NAMELEN``. + + - ``addr->can_family`` is not equal to ``AF_CAN``. + + - An incorrect PGN was provided. + +- ``ENODEV``: No such device. This happens when the CAN network device cannot + be found for the provided ``can_ifindex`` or if ``can_ifindex`` is 0. + +- ``ENOMEM``: Out of memory. Typically related to issues with memory allocation + in the stack. + +- ``ENOPROTOOPT``: Protocol not available. This can occur when using + ``getsockopt(2)`` or ``setsockopt(2)`` if the requested socket option is not + available. + +- ``EDESTADDRREQ``: Destination address required. This error occurs: + + - In the case of ``connect(2)``, if the ``struct sockaddr *uaddr`` is ``NULL``. + + - In the case of ``send*(2)``, if there is an attempt to send an ETP message + to a broadcast address. + +- ``EDOM``: Argument out of domain. This error may happen if attempting to send + a TP or ETP message to a PGN that is reserved for control PGNs for TP or ETP + operations. + +- ``EIO``: I/O error. This can occur if the amount of data provided to the + socket for a TP or ETP session does not match the announced amount of data for + the session. + +- ``ENOENT``: No such file or directory. This can happen when the stack + attempts to transfer CTS or EOMA but cannot find a matching receiving socket + anymore. + +- ``ENOIOCTLCMD``: No ioctls are available for the socket layer. + +- ``EPERM``: Operation not permitted. For example, this can occur if a + requested action requires ``CAP_NET_ADMIN`` privileges. + +- ``ENETUNREACH``: Network unreachable. Most likely, this occurs when frames + cannot be transmitted to the CAN bus. + +- ``ETIME``: Timer expired. This can happen if a timeout occurs while + attempting to send a simple message, for example, when an echo message from + the controller is not received. + +- ``EPROTO``: Protocol error. + + - Used for various protocol-level errors in J1939, including: + + - Duplicate sequence number. + + - Unexpected EDPO or ECTS packet. + + - Invalid PGN or offset in EDPO/ECTS. + + - Number of EDPO packets exceeded CTS allowance. + + - Any other protocol-level error. + +- ``EMSGSIZE``: Message too long. + +- ``ENOMSG``: No message available. + +- ``EALREADY``: The ECU is already engaged in one or more connection-managed + sessions and cannot support another. + +- ``EHOSTUNREACH``: A timeout occurred, and the session was aborted. + +- ``EBADMSG``: CTS (Clear to Send) messages were received during an active data + transfer, causing an abort. + +- ``ENOTRECOVERABLE``: The maximum retransmission request limit was reached, + and the session cannot recover. + +- ``ENOTCONN``: An unexpected data transfer packet was received. + +- ``EILSEQ``: A bad sequence number was received, and the software could not + recover. + diff --git a/Documentation/networking/napi.rst b/Documentation/networking/napi.rst index f970a2be271a..d0e3953cae6a 100644 --- a/Documentation/networking/napi.rst +++ b/Documentation/networking/napi.rst @@ -171,12 +171,43 @@ a channel as an IRQ/NAPI which services queues of a given type. For example, a configuration of 1 ``rx``, 1 ``tx`` and 1 ``combined`` channel is expected to utilize 3 interrupts, 2 Rx and 2 Tx queues. +Persistent NAPI config +---------------------- + +Drivers often allocate and free NAPI instances dynamically. This leads to loss +of NAPI-related user configuration each time NAPI instances are reallocated. +The netif_napi_add_config() API prevents this loss of configuration by +associating each NAPI instance with a persistent NAPI configuration based on +a driver defined index value, like a queue number. + +Using this API allows for persistent NAPI IDs (among other settings), which can +be beneficial to userspace programs using ``SO_INCOMING_NAPI_ID``. See the +sections below for other NAPI configuration settings. + +Drivers should try to use netif_napi_add_config() whenever possible. + User API ======== User interactions with NAPI depend on NAPI instance ID. The instance IDs are only visible to the user thru the ``SO_INCOMING_NAPI_ID`` socket option. -It's not currently possible to query IDs used by a given device. + +Users can query NAPI IDs for a device or device queue using netlink. This can +be done programmatically in a user application or by using a script included in +the kernel source tree: ``tools/net/ynl/pyynl/cli.py``. + +For example, using the script to dump all of the queues for a device (which +will reveal each queue's NAPI ID): + +.. code-block:: bash + + $ kernel-source/tools/net/ynl/pyynl/cli.py \ + --spec Documentation/netlink/specs/netdev.yaml \ + --dump queue-get \ + --json='{"ifindex": 2}' + +See ``Documentation/netlink/specs/netdev.yaml`` for more details on +available operations and attributes. Software IRQ coalescing ----------------------- diff --git a/Documentation/networking/net_cachelines/inet_connection_sock.rst b/Documentation/networking/net_cachelines/inet_connection_sock.rst index 4a15627fc93b..b2401aa7c450 100644 --- a/Documentation/networking/net_cachelines/inet_connection_sock.rst +++ b/Documentation/networking/net_cachelines/inet_connection_sock.rst @@ -17,6 +17,7 @@ struct timer_list icsk_retransmit_timer read_mostly struct timer_list icsk_delack_timer read_mostly inet_csk_reset_xmit_timer,tcp_connect u32 icsk_rto read_write tcp_cwnd_validate,tcp_schedule_loss_probe,tcp_connect_init,tcp_connect,tcp_write_xmit,tcp_push_one u32 icsk_rto_min +u32 icsk_rto_max read_mostly tcp_reset_xmit_timer u32 icsk_delack_max u32 icsk_pmtu_cookie read_write tcp_sync_mss,tcp_current_mss,tcp_send_syn_data,tcp_connect_init,tcp_connect struct tcp_congestion_ops icsk_ca_ops read_write tcp_cwnd_validate,tcp_tso_segs,tcp_ca_dst_init,tcp_connect_init,tcp_connect,tcp_write_xmit diff --git a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst index de0263302f16..6e7b20afd2d4 100644 --- a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst +++ b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst @@ -86,6 +86,7 @@ u8 sysctl_tcp_sack u8 sysctl_tcp_window_scaling tcp_syn_options,tcp_parse_options u8 sysctl_tcp_timestamps u8 sysctl_tcp_early_retrans read_mostly tcp_schedule_loss_probe(tcp_write_xmit) +u32 sysctl_tcp_rto_max_ms u8 sysctl_tcp_recovery tcp_fastretrans_alert u8 sysctl_tcp_thin_linear_timeouts tcp_retrans_timer(on_thin_streams) u8 sysctl_tcp_slow_start_after_idle unlikely(tcp_cwnd_validate-network-not-starved) diff --git a/Documentation/networking/netconsole.rst b/Documentation/networking/netconsole.rst index 94c4680fdf3e..84803c59968a 100644 --- a/Documentation/networking/netconsole.rst +++ b/Documentation/networking/netconsole.rst @@ -17,6 +17,8 @@ Release prepend support by Breno Leitao <leitao@debian.org>, Jul 7 2023 Userdata append support by Matthew Wood <thepacketgeek@gmail.com>, Jan 22 2024 +Sysdata append support by Breno Leitao <leitao@debian.org>, Jan 15 2025 + Please send bug reports to Matt Mackall <mpm@selenic.com> Satyam Sharma <satyam.sharma@gmail.com>, and Cong Wang <xiyou.wangcong@gmail.com> @@ -238,6 +240,49 @@ Delete `userdata` entries with `rmdir`:: It is recommended to not write user data values with newlines. +CPU number auto population in userdata +-------------------------------------- + +Inside the netconsole configfs hierarchy, there is a file called +`cpu_nr` under the `userdata` directory. This file is used to enable or disable +the automatic CPU number population feature. This feature automatically +populates the CPU number that is sending the message. + +To enable the CPU number auto-population:: + + echo 1 > /sys/kernel/config/netconsole/target1/userdata/cpu_nr + +When this option is enabled, the netconsole messages will include an additional +line in the userdata field with the format `cpu=<cpu_number>`. This allows the +receiver of the netconsole messages to easily differentiate and demultiplex +messages originating from different CPUs, which is particularly useful when +dealing with parallel log output. + +Example:: + + echo "This is a message" > /dev/kmsg + 12,607,22085407756,-;This is a message + cpu=42 + +In this example, the message was sent by CPU 42. + +.. note:: + + If the user has set a conflicting `cpu` key in the userdata dictionary, + both keys will be reported, with the kernel-populated entry appearing after + the user one. For example:: + + # User-defined CPU entry + mkdir -p /sys/kernel/config/netconsole/target1/userdata/cpu + echo "1" > /sys/kernel/config/netconsole/target1/userdata/cpu/value + + Output might look like:: + + 12,607,22085407756,-;This is a message + cpu=1 + cpu=42 # kernel-populated value + + Extended console: ================= diff --git a/MAINTAINERS b/MAINTAINERS index 3864d473f52f..b0cd54818bb3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -24159,7 +24159,7 @@ W: http://vtun.sourceforge.net/tun F: Documentation/networking/tuntap.rst F: arch/um/os-Linux/drivers/ F: drivers/net/tap.c -F: drivers/net/tun.c +F: drivers/net/tun* TURBOCHANNEL SUBSYSTEM M: "Maciej W. Rozycki" <macro@orcam.me.uk> diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index d9e705f4a697..bde6a496df5f 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -54,7 +54,6 @@ enum interruption_class { IRQIO_C70, IRQIO_TAP, IRQIO_VMR, - IRQIO_LCS, IRQIO_CTC, IRQIO_ADM, IRQIO_CSC, diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index ef7be599e1f7..7ca157ffab30 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -84,7 +84,6 @@ static const struct irq_class irqclass_sub_desc[] = { {.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"}, {.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"}, {.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"}, - {.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"}, {.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"}, {.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"}, {.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"}, diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index ad50b77282f8..69ce1862eabe 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -498,8 +498,6 @@ static int irdma_save_msix_info(struct irdma_pci_f *rf) iw_qvlist->num_vectors = rf->msix_count; if (rf->msix_count <= num_online_cpus()) rf->msix_shared = true; - else if (rf->msix_count > num_online_cpus() + 1) - rf->msix_count = num_online_cpus() + 1; pmsix = rf->msix_entries; for (i = 0, ceq_idx = 0; i < rf->msix_count; i++, iw_qvinfo++) { diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index 3f13200ff71b..1ee8969595d3 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -206,6 +206,43 @@ static void irdma_lan_unregister_qset(struct irdma_sc_vsi *vsi, ibdev_dbg(&iwdev->ibdev, "WS: LAN free_res for rdma qset failed.\n"); } +static int irdma_init_interrupts(struct irdma_pci_f *rf, struct ice_pf *pf) +{ + int i; + + rf->msix_count = num_online_cpus() + IRDMA_NUM_AEQ_MSIX; + rf->msix_entries = kcalloc(rf->msix_count, sizeof(*rf->msix_entries), + GFP_KERNEL); + if (!rf->msix_entries) + return -ENOMEM; + + for (i = 0; i < rf->msix_count; i++) + if (ice_alloc_rdma_qvector(pf, &rf->msix_entries[i])) + break; + + if (i < IRDMA_MIN_MSIX) { + for (; i > 0; i--) + ice_free_rdma_qvector(pf, &rf->msix_entries[i]); + + kfree(rf->msix_entries); + return -ENOMEM; + } + + rf->msix_count = i; + + return 0; +} + +static void irdma_deinit_interrupts(struct irdma_pci_f *rf, struct ice_pf *pf) +{ + int i; + + for (i = 0; i < rf->msix_count; i++) + ice_free_rdma_qvector(pf, &rf->msix_entries[i]); + + kfree(rf->msix_entries); +} + static void irdma_remove(struct auxiliary_device *aux_dev) { struct iidc_auxiliary_dev *iidc_adev = container_of(aux_dev, @@ -216,6 +253,7 @@ static void irdma_remove(struct auxiliary_device *aux_dev) irdma_ib_unregister_device(iwdev); ice_rdma_update_vsi_filter(pf, iwdev->vsi_num, false); + irdma_deinit_interrupts(iwdev->rf, pf); pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(pf->pdev->devfn)); } @@ -230,9 +268,7 @@ static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf rf->gen_ops.unregister_qset = irdma_lan_unregister_qset; rf->hw.hw_addr = pf->hw.hw_addr; rf->pcidev = pf->pdev; - rf->msix_count = pf->num_rdma_msix; rf->pf_id = pf->hw.pf_id; - rf->msix_entries = &pf->msix_entries[pf->rdma_base_vector]; rf->default_vsi.vsi_idx = vsi->vsi_num; rf->protocol_used = pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2 ? IRDMA_ROCE_PROTOCOL_ONLY : IRDMA_IWARP_PROTOCOL_ONLY; @@ -281,6 +317,10 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_ irdma_fill_device_info(iwdev, pf, vsi); rf = iwdev->rf; + err = irdma_init_interrupts(rf, pf); + if (err) + goto err_init_interrupts; + err = irdma_ctrl_init_hw(rf); if (err) goto err_ctrl_init; @@ -311,6 +351,8 @@ err_ibreg: err_rt_init: irdma_ctrl_deinit_hw(rf); err_ctrl_init: + irdma_deinit_interrupts(rf, pf); +err_init_interrupts: kfree(iwdev->rf); ib_dealloc_device(&iwdev->ibdev); diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index 9f0ed6e84471..ef9a9b79d711 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -117,6 +117,9 @@ extern struct auxiliary_driver i40iw_auxiliary_drv; #define IRDMA_IRQ_NAME_STR_LEN (64) +#define IRDMA_NUM_AEQ_MSIX 1 +#define IRDMA_MIN_MSIX 2 + enum init_completion_state { INVALID_STATE = 0, INITIAL_STATE, diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index e45bba240cbc..f6d0628a36d9 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -432,9 +432,6 @@ static struct net_device *bond_ipsec_dev(struct xfrm_state *xs) struct bonding *bond; struct slave *slave; - if (!bond_dev) - return NULL; - bond = netdev_priv(bond_dev); if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) return NULL; diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c index 399844809bbe..19c86b94a40e 100644 --- a/drivers/net/can/c_can/c_can_platform.c +++ b/drivers/net/can/c_can/c_can_platform.c @@ -269,30 +269,22 @@ static int c_can_plat_probe(struct platform_device *pdev) /* get the appropriate clk */ clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(clk)) { - ret = PTR_ERR(clk); - goto exit; - } + if (IS_ERR(clk)) + return PTR_ERR(clk); /* get the platform data */ irq = platform_get_irq(pdev, 0); - if (irq < 0) { - ret = irq; - goto exit; - } + if (irq < 0) + return irq; addr = devm_platform_get_and_ioremap_resource(pdev, 0, &mem); - if (IS_ERR(addr)) { - ret = PTR_ERR(addr); - goto exit; - } + if (IS_ERR(addr)) + return PTR_ERR(addr); /* allocate the c_can device */ dev = alloc_c_can_dev(drvdata->msg_obj_num); - if (!dev) { - ret = -ENOMEM; - goto exit; - } + if (!dev) + return -ENOMEM; priv = netdev_priv(dev); switch (drvdata->id) { @@ -324,33 +316,22 @@ static int c_can_plat_probe(struct platform_device *pdev) /* Check if we need custom RAMINIT via syscon. Mostly for TI * platforms. Only supported with DT boot. */ - if (np && of_property_read_bool(np, "syscon-raminit")) { + if (np && of_property_present(np, "syscon-raminit")) { + unsigned int args[2]; u32 id; struct c_can_raminit *raminit = &priv->raminit_sys; ret = -EINVAL; - raminit->syscon = syscon_regmap_lookup_by_phandle(np, - "syscon-raminit"); + raminit->syscon = syscon_regmap_lookup_by_phandle_args(np, + "syscon-raminit", + 2, args); if (IS_ERR(raminit->syscon)) { - /* can fail with -EPROBE_DEFER */ ret = PTR_ERR(raminit->syscon); - free_c_can_dev(dev); - return ret; - } - - if (of_property_read_u32_index(np, "syscon-raminit", 1, - &raminit->reg)) { - dev_err(&pdev->dev, - "couldn't get the RAMINIT reg. offset!\n"); goto exit_free_device; } - if (of_property_read_u32_index(np, "syscon-raminit", 2, - &id)) { - dev_err(&pdev->dev, - "couldn't get the CAN instance ID\n"); - goto exit_free_device; - } + raminit->reg = args[0]; + id = args[1]; if (id >= drvdata->raminit_num) { dev_err(&pdev->dev, @@ -396,8 +377,6 @@ exit_pm_runtime: pm_runtime_disable(priv->device); exit_free_device: free_c_can_dev(dev); -exit: - dev_err(&pdev->dev, "probe failed\n"); return ret; } diff --git a/drivers/net/can/flexcan/flexcan-core.c b/drivers/net/can/flexcan/flexcan-core.c index ac1a860986df..b347a1c93536 100644 --- a/drivers/net/can/flexcan/flexcan-core.c +++ b/drivers/net/can/flexcan/flexcan-core.c @@ -386,6 +386,16 @@ static const struct flexcan_devtype_data fsl_lx2160a_r1_devtype_data = { FLEXCAN_QUIRK_SUPPORT_RX_MAILBOX_RTR, }; +static const struct flexcan_devtype_data nxp_s32g2_devtype_data = { + .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS | + FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_BROKEN_PERR_STATE | + FLEXCAN_QUIRK_USE_RX_MAILBOX | FLEXCAN_QUIRK_SUPPORT_FD | + FLEXCAN_QUIRK_SUPPORT_ECC | FLEXCAN_QUIRK_NR_IRQ_3 | + FLEXCAN_QUIRK_SUPPORT_RX_MAILBOX | + FLEXCAN_QUIRK_SUPPORT_RX_MAILBOX_RTR | + FLEXCAN_QUIRK_SECONDARY_MB_IRQ, +}; + static const struct can_bittiming_const flexcan_bittiming_const = { .name = DRV_NAME, .tseg1_min = 4, @@ -1762,14 +1772,25 @@ static int flexcan_open(struct net_device *dev) goto out_free_irq_boff; } + if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SECONDARY_MB_IRQ) { + err = request_irq(priv->irq_secondary_mb, + flexcan_irq, IRQF_SHARED, dev->name, dev); + if (err) + goto out_free_irq_err; + } + flexcan_chip_interrupts_enable(dev); netif_start_queue(dev); return 0; + out_free_irq_err: + if (priv->devtype_data.quirks & FLEXCAN_QUIRK_NR_IRQ_3) + free_irq(priv->irq_err, dev); out_free_irq_boff: - free_irq(priv->irq_boff, dev); + if (priv->devtype_data.quirks & FLEXCAN_QUIRK_NR_IRQ_3) + free_irq(priv->irq_boff, dev); out_free_irq: free_irq(dev->irq, dev); out_can_rx_offload_disable: @@ -1794,6 +1815,9 @@ static int flexcan_close(struct net_device *dev) netif_stop_queue(dev); flexcan_chip_interrupts_disable(dev); + if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SECONDARY_MB_IRQ) + free_irq(priv->irq_secondary_mb, dev); + if (priv->devtype_data.quirks & FLEXCAN_QUIRK_NR_IRQ_3) { free_irq(priv->irq_err, dev); free_irq(priv->irq_boff, dev); @@ -2041,6 +2065,7 @@ static const struct of_device_id flexcan_of_match[] = { { .compatible = "fsl,vf610-flexcan", .data = &fsl_vf610_devtype_data, }, { .compatible = "fsl,ls1021ar2-flexcan", .data = &fsl_ls1021a_r2_devtype_data, }, { .compatible = "fsl,lx2160ar1-flexcan", .data = &fsl_lx2160a_r1_devtype_data, }, + { .compatible = "nxp,s32g2-flexcan", .data = &nxp_s32g2_devtype_data, }, { /* sentinel */ }, }; MODULE_DEVICE_TABLE(of, flexcan_of_match); @@ -2187,6 +2212,14 @@ static int flexcan_probe(struct platform_device *pdev) } } + if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SECONDARY_MB_IRQ) { + priv->irq_secondary_mb = platform_get_irq_byname(pdev, "mb-1"); + if (priv->irq_secondary_mb < 0) { + err = priv->irq_secondary_mb; + goto failed_platform_get_irq; + } + } + if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SUPPORT_FD) { priv->can.ctrlmode_supported |= CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO; diff --git a/drivers/net/can/flexcan/flexcan.h b/drivers/net/can/flexcan/flexcan.h index 4933d8c7439e..2cf886618c96 100644 --- a/drivers/net/can/flexcan/flexcan.h +++ b/drivers/net/can/flexcan/flexcan.h @@ -70,6 +70,10 @@ #define FLEXCAN_QUIRK_SUPPORT_RX_FIFO BIT(16) /* Setup stop mode with ATF SCMI protocol to support wakeup */ #define FLEXCAN_QUIRK_SETUP_STOP_MODE_SCMI BIT(17) +/* Device has two separate interrupt lines for two mailbox ranges, which + * both need to have an interrupt handler registered. + */ +#define FLEXCAN_QUIRK_SECONDARY_MB_IRQ BIT(18) struct flexcan_devtype_data { u32 quirks; /* quirks needed for different IP cores */ @@ -107,6 +111,7 @@ struct flexcan_priv { int irq_boff; int irq_err; + int irq_secondary_mb; /* IPC handle when setup stop mode by System Controller firmware(scfw) */ struct imx_sc_ipc *sc_ipc_handle; diff --git a/drivers/net/can/rockchip/rockchip_canfd-core.c b/drivers/net/can/rockchip/rockchip_canfd-core.c index d9a937ba126c..46201c126703 100644 --- a/drivers/net/can/rockchip/rockchip_canfd-core.c +++ b/drivers/net/can/rockchip/rockchip_canfd-core.c @@ -236,11 +236,6 @@ static void rkcanfd_chip_fifo_setup(struct rkcanfd_priv *priv) { u32 reg; - /* TXE FIFO */ - reg = rkcanfd_read(priv, RKCANFD_REG_RX_FIFO_CTRL); - reg |= RKCANFD_REG_RX_FIFO_CTRL_RX_FIFO_ENABLE; - rkcanfd_write(priv, RKCANFD_REG_RX_FIFO_CTRL, reg); - /* RX FIFO */ reg = rkcanfd_read(priv, RKCANFD_REG_RX_FIFO_CTRL); reg |= RKCANFD_REG_RX_FIFO_CTRL_RX_FIFO_ENABLE; diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index b6f4de375df7..3ccac6781b98 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -43,6 +43,9 @@ #define USB_XYLANTA_SAINT3_VENDOR_ID 0x16d0 #define USB_XYLANTA_SAINT3_PRODUCT_ID 0x0f30 +#define USB_CANNECTIVITY_VENDOR_ID 0x1209 +#define USB_CANNECTIVITY_PRODUCT_ID 0xca01 + /* Timestamp 32 bit timer runs at 1 MHz (1 µs tick). Worker accounts * for timer overflow (will be after ~71 minutes) */ @@ -1546,6 +1549,8 @@ static const struct usb_device_id gs_usb_table[] = { USB_ABE_CANDEBUGGER_FD_PRODUCT_ID, 0) }, { USB_DEVICE_INTERFACE_NUMBER(USB_XYLANTA_SAINT3_VENDOR_ID, USB_XYLANTA_SAINT3_PRODUCT_ID, 0) }, + { USB_DEVICE_INTERFACE_NUMBER(USB_CANNECTIVITY_VENDOR_ID, + USB_CANNECTIVITY_PRODUCT_ID, 0) }, {} /* Terminating entry */ }; diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 79dc77835681..61d164ffb3ae 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2410,6 +2410,19 @@ static const struct b53_chip_data b53_switch_chips[] = { .jumbo_size_reg = B53_JUMBO_MAX_SIZE, }, { + .chip_id = BCM53101_DEVICE_ID, + .dev_name = "BCM53101", + .vlans = 4096, + .enabled_ports = 0x11f, + .arl_bins = 4, + .arl_buckets = 512, + .vta_regs = B53_VTA_REGS, + .imp_port = 8, + .duplex_reg = B53_DUPLEX_STAT_GE, + .jumbo_pm_reg = B53_JUMBO_PORT_MASK, + .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + }, + { .chip_id = BCM53115_DEVICE_ID, .dev_name = "BCM53115", .vlans = 4096, @@ -2789,6 +2802,7 @@ int b53_switch_detect(struct b53_device *dev) return ret; switch (id32) { + case BCM53101_DEVICE_ID: case BCM53115_DEVICE_ID: case BCM53125_DEVICE_ID: case BCM53128_DEVICE_ID: diff --git a/drivers/net/dsa/b53/b53_mdio.c b/drivers/net/dsa/b53/b53_mdio.c index 31d070bf161a..43a3b37b731b 100644 --- a/drivers/net/dsa/b53/b53_mdio.c +++ b/drivers/net/dsa/b53/b53_mdio.c @@ -374,6 +374,7 @@ static void b53_mdio_shutdown(struct mdio_device *mdiodev) static const struct of_device_id b53_of_match[] = { { .compatible = "brcm,bcm5325" }, + { .compatible = "brcm,bcm53101" }, { .compatible = "brcm,bcm53115" }, { .compatible = "brcm,bcm53125" }, { .compatible = "brcm,bcm53128" }, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 9e9b5bc0c5d6..0166c37a13a7 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -66,6 +66,7 @@ enum { BCM5395_DEVICE_ID = 0x95, BCM5397_DEVICE_ID = 0x97, BCM5398_DEVICE_ID = 0x98, + BCM53101_DEVICE_ID = 0x53101, BCM53115_DEVICE_ID = 0x53115, BCM53125_DEVICE_ID = 0x53125, BCM53128_DEVICE_ID = 0x53128, @@ -188,6 +189,7 @@ static inline int is531x5(struct b53_device *dev) { return dev->chip_id == BCM53115_DEVICE_ID || dev->chip_id == BCM53125_DEVICE_ID || + dev->chip_id == BCM53101_DEVICE_ID || dev->chip_id == BCM53128_DEVICE_ID || dev->chip_id == BCM53134_DEVICE_ID; } diff --git a/drivers/net/dsa/b53/b53_serdes.c b/drivers/net/dsa/b53/b53_serdes.c index 4730982b6840..7460122f6abc 100644 --- a/drivers/net/dsa/b53/b53_serdes.c +++ b/drivers/net/dsa/b53/b53_serdes.c @@ -239,7 +239,6 @@ int b53_serdes_init(struct b53_device *dev, int port) pcs->dev = dev; pcs->lane = lane; pcs->pcs.ops = &b53_pcs_ops; - pcs->pcs.neg_mode = true; return 0; } diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 1c83af805209..8422262febaf 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2957,28 +2957,61 @@ static void mt753x_phylink_mac_link_up(struct phylink_config *config, mcr |= PMCR_FORCE_RX_FC_EN; } - if (mode == MLO_AN_PHY && phydev && phy_init_eee(phydev, false) >= 0) { - switch (speed) { - case SPEED_1000: - case SPEED_2500: - mcr |= PMCR_FORCE_EEE1G; - break; - case SPEED_100: - mcr |= PMCR_FORCE_EEE100; - break; - } - } - mt7530_set(priv, MT753X_PMCR_P(dp->index), mcr); } +static void mt753x_phylink_mac_disable_tx_lpi(struct phylink_config *config) +{ + struct dsa_port *dp = dsa_phylink_to_port(config); + struct mt7530_priv *priv = dp->ds->priv; + + mt7530_clear(priv, MT753X_PMCR_P(dp->index), + PMCR_FORCE_EEE1G | PMCR_FORCE_EEE100); +} + +static int mt753x_phylink_mac_enable_tx_lpi(struct phylink_config *config, + u32 timer, bool tx_clock_stop) +{ + struct dsa_port *dp = dsa_phylink_to_port(config); + struct mt7530_priv *priv = dp->ds->priv; + u32 val; + + /* If the timer is zero, then set LPI_MODE_EN, which allows the + * system to enter LPI mode immediately rather than waiting for + * the LPI threshold. + */ + if (!timer) + val = LPI_MODE_EN; + else if (FIELD_FIT(LPI_THRESH_MASK, timer)) + val = FIELD_PREP(LPI_THRESH_MASK, timer); + else + val = LPI_THRESH_MASK; + + mt7530_rmw(priv, MT753X_PMEEECR_P(dp->index), + LPI_THRESH_MASK | LPI_MODE_EN, val); + + mt7530_set(priv, MT753X_PMCR_P(dp->index), + PMCR_FORCE_EEE1G | PMCR_FORCE_EEE100); + + return 0; +} + static void mt753x_phylink_get_caps(struct dsa_switch *ds, int port, struct phylink_config *config) { struct mt7530_priv *priv = ds->priv; + u32 eeecr; config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE; + config->lpi_capabilities = MAC_100FD | MAC_1000FD | MAC_2500FD; + + eeecr = mt7530_read(priv, MT753X_PMEEECR_P(port)); + /* tx_lpi_timer should be in microseconds. The time units for + * LPI threshold are unspecified. + */ + config->lpi_timer_default = FIELD_GET(LPI_THRESH_MASK, eeecr); + priv->info->mac_port_get_caps(ds, port, config); } @@ -3071,7 +3104,6 @@ mt753x_setup(struct dsa_switch *ds) /* Initialise the PCS devices */ for (i = 0; i < priv->ds->num_ports; i++) { priv->pcs[i].pcs.ops = priv->info->pcs_ops; - priv->pcs[i].pcs.neg_mode = true; priv->pcs[i].priv = priv; priv->pcs[i].port = i; } @@ -3088,18 +3120,9 @@ mt753x_setup(struct dsa_switch *ds) static int mt753x_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e) { - struct mt7530_priv *priv = ds->priv; - u32 set, mask = LPI_THRESH_MASK | LPI_MODE_EN; - if (e->tx_lpi_timer > 0xFFF) return -EINVAL; - set = LPI_THRESH_SET(e->tx_lpi_timer); - if (!e->tx_lpi_enabled) - /* Force LPI Mode without a delay */ - set |= LPI_MODE_EN; - mt7530_rmw(priv, MT753X_PMEEECR_P(port), mask, set); - return 0; } @@ -3238,6 +3261,8 @@ static const struct phylink_mac_ops mt753x_phylink_mac_ops = { .mac_config = mt753x_phylink_mac_config, .mac_link_down = mt753x_phylink_mac_link_down, .mac_link_up = mt753x_phylink_mac_link_up, + .mac_disable_tx_lpi = mt753x_phylink_mac_disable_tx_lpi, + .mac_enable_tx_lpi = mt753x_phylink_mac_enable_tx_lpi, }; const struct mt753x_info mt753x_table[] = { diff --git a/drivers/net/dsa/mv88e6xxx/pcs-6185.c b/drivers/net/dsa/mv88e6xxx/pcs-6185.c index 75ed1fa500a5..af7e06d265f7 100644 --- a/drivers/net/dsa/mv88e6xxx/pcs-6185.c +++ b/drivers/net/dsa/mv88e6xxx/pcs-6185.c @@ -138,7 +138,6 @@ static int mv88e6185_pcs_init(struct mv88e6xxx_chip *chip, int port) mpcs->chip = chip; mpcs->port = port; mpcs->phylink_pcs.ops = &mv88e6185_phylink_pcs_ops; - mpcs->phylink_pcs.neg_mode = true; irq = mv88e6xxx_serdes_irq_mapping(chip, port); if (irq) { diff --git a/drivers/net/dsa/mv88e6xxx/pcs-6352.c b/drivers/net/dsa/mv88e6xxx/pcs-6352.c index 143fe21d1834..36993400837e 100644 --- a/drivers/net/dsa/mv88e6xxx/pcs-6352.c +++ b/drivers/net/dsa/mv88e6xxx/pcs-6352.c @@ -275,7 +275,6 @@ static struct marvell_c22_pcs *marvell_c22_pcs_alloc(struct device *dev, mpcs->mdio.bus = bus; mpcs->mdio.addr = addr; mpcs->phylink_pcs.ops = &marvell_c22_pcs_ops; - mpcs->phylink_pcs.neg_mode = true; return mpcs; } diff --git a/drivers/net/dsa/mv88e6xxx/pcs-639x.c b/drivers/net/dsa/mv88e6xxx/pcs-639x.c index 59f63d6beec8..5db17c0b77f5 100644 --- a/drivers/net/dsa/mv88e6xxx/pcs-639x.c +++ b/drivers/net/dsa/mv88e6xxx/pcs-639x.c @@ -565,9 +565,7 @@ static int mv88e6390_pcs_init(struct mv88e6xxx_chip *chip, int port) return -ENOMEM; mpcs->sgmii_pcs.ops = &mv88e639x_sgmii_pcs_ops; - mpcs->sgmii_pcs.neg_mode = true; mpcs->xg_pcs.ops = &mv88e6390_xg_pcs_ops; - mpcs->xg_pcs.neg_mode = true; if (chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6190X || chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6390X) @@ -945,9 +943,7 @@ static int mv88e6393x_pcs_init(struct mv88e6xxx_chip *chip, int port) return -ENOMEM; mpcs->sgmii_pcs.ops = &mv88e6393x_sgmii_pcs_ops; - mpcs->sgmii_pcs.neg_mode = true; mpcs->xg_pcs.ops = &mv88e6393x_xg_pcs_ops; - mpcs->xg_pcs.neg_mode = true; mpcs->supports_5g = true; err = mv88e6393x_erratum_4_6(mpcs); diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index e8cb4da15dbe..a36b8b07030e 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -1634,7 +1634,6 @@ static void qca8k_setup_pcs(struct qca8k_priv *priv, struct qca8k_pcs *qpcs, int port) { qpcs->pcs.ops = &qca8k_pcs_ops; - qpcs->pcs.neg_mode = true; /* We don't have interrupts for link changes, so we need to poll */ qpcs->pcs.poll = true; diff --git a/drivers/net/dsa/rzn1_a5psw.c b/drivers/net/dsa/rzn1_a5psw.c index 66974379334a..31ea8130a495 100644 --- a/drivers/net/dsa/rzn1_a5psw.c +++ b/drivers/net/dsa/rzn1_a5psw.c @@ -1248,18 +1248,16 @@ static int a5psw_probe(struct platform_device *pdev) if (ret) goto clk_disable; - mdio = of_get_child_by_name(dev->of_node, "mdio"); - if (of_device_is_available(mdio)) { + mdio = of_get_available_child_by_name(dev->of_node, "mdio"); + if (mdio) { ret = a5psw_probe_mdio(a5psw, mdio); + of_node_put(mdio); if (ret) { - of_node_put(mdio); dev_err(dev, "Failed to register MDIO: %d\n", ret); goto hclk_disable; } } - of_node_put(mdio); - ds = &a5psw->ds; ds->dev = dev; ds->num_ports = A5PSW_PORTS_NUM; diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c index 84b7169f2974..8d535c033cef 100644 --- a/drivers/net/dsa/sja1105/sja1105_mdio.c +++ b/drivers/net/dsa/sja1105/sja1105_mdio.c @@ -468,13 +468,10 @@ int sja1105_mdiobus_register(struct dsa_switch *ds) if (rc) return rc; - mdio_node = of_get_child_by_name(switch_node, "mdios"); + mdio_node = of_get_available_child_by_name(switch_node, "mdios"); if (!mdio_node) return 0; - if (!of_device_is_available(mdio_node)) - goto out_put_mdio_node; - if (regs->mdio_100base_tx != SJA1105_RSV_ADDR) { rc = sja1105_mdiobus_base_tx_register(priv, mdio_node); if (rc) @@ -487,7 +484,6 @@ int sja1105_mdiobus_register(struct dsa_switch *ds) goto err_free_base_tx_mdiobus; } -out_put_mdio_node: of_node_put(mdio_node); return 0; diff --git a/drivers/net/ethernet/actions/owl-emac.c b/drivers/net/ethernet/actions/owl-emac.c index 115f48b3342c..0a08da799255 100644 --- a/drivers/net/ethernet/actions/owl-emac.c +++ b/drivers/net/ethernet/actions/owl-emac.c @@ -1325,15 +1325,10 @@ static int owl_emac_mdio_init(struct net_device *netdev) struct device_node *mdio_node; int ret; - mdio_node = of_get_child_by_name(dev->of_node, "mdio"); + mdio_node = of_get_available_child_by_name(dev->of_node, "mdio"); if (!mdio_node) return -ENODEV; - if (!of_device_is_available(mdio_node)) { - ret = -ENODEV; - goto err_put_node; - } - priv->mii = devm_mdiobus_alloc(dev); if (!priv->mii) { ret = -ENOMEM; diff --git a/drivers/net/ethernet/apm/xgene-v2/mdio.c b/drivers/net/ethernet/apm/xgene-v2/mdio.c index eba06831aec2..6a17045a5f62 100644 --- a/drivers/net/ethernet/apm/xgene-v2/mdio.c +++ b/drivers/net/ethernet/apm/xgene-v2/mdio.c @@ -97,7 +97,6 @@ void xge_mdio_remove(struct net_device *ndev) int xge_mdio_config(struct net_device *ndev) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; struct xge_pdata *pdata = netdev_priv(ndev); struct device *dev = &pdata->pdev->dev; struct mii_bus *mdio_bus; @@ -137,17 +136,12 @@ int xge_mdio_config(struct net_device *ndev) goto err; } - linkmode_set_bit_array(phy_10_100_features_array, - ARRAY_SIZE(phy_10_100_features_array), - mask); - linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, mask); - linkmode_set_bit(ETHTOOL_LINK_MODE_AUI_BIT, mask); - linkmode_set_bit(ETHTOOL_LINK_MODE_MII_BIT, mask); - linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, mask); - linkmode_set_bit(ETHTOOL_LINK_MODE_BNC_BIT, mask); - - linkmode_andnot(phydev->supported, phydev->supported, mask); - linkmode_copy(phydev->advertising, phydev->supported); + phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_10baseT_Half_BIT); + phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_10baseT_Full_BIT); + phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_100baseT_Half_BIT); + phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_100baseT_Full_BIT); + phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT); + pdata->phy_speed = SPEED_UNKNOWN; return 0; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c index 414b2e448d59..787ea91802e7 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c @@ -113,19 +113,9 @@ static const struct hwmon_ops aq_hwmon_ops = { .read_string = aq_hwmon_read_string, }; -static u32 aq_hwmon_temp_config[] = { - HWMON_T_INPUT | HWMON_T_LABEL, - HWMON_T_INPUT | HWMON_T_LABEL, - 0, -}; - -static const struct hwmon_channel_info aq_hwmon_temp = { - .type = hwmon_temp, - .config = aq_hwmon_temp_config, -}; - static const struct hwmon_channel_info * const aq_hwmon_info[] = { - &aq_hwmon_temp, + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL), NULL, }; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h index f5901f8e3907..f6b990b7f5b4 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h @@ -226,7 +226,6 @@ struct __packed offload_info { struct offload_port_info ports; struct offload_ka_info kas; struct offload_rr_info rrs; - u8 buf[]; }; struct __packed hw_atl_utils_fw_rpc { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 7b8b5b39c7bb..15c57a06ecaf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -55,6 +55,8 @@ #include <net/page_pool/helpers.h> #include <linux/align.h> #include <net/netdev_queues.h> +#include <net/netdev_rx_queue.h> +#include <linux/pci-tph.h> #include "bnxt_hsi.h" #include "bnxt.h" @@ -76,6 +78,7 @@ #define BNXT_DEF_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_HW | \ NETIF_MSG_TX_ERR) +MODULE_IMPORT_NS("NETDEV_INTERNAL"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Broadcom NetXtreme network driver"); @@ -3314,74 +3317,81 @@ poll_done: return work_done; } -static void bnxt_free_tx_skbs(struct bnxt *bp) +static void bnxt_free_one_tx_ring_skbs(struct bnxt *bp, + struct bnxt_tx_ring_info *txr, int idx) { int i, max_idx; struct pci_dev *pdev = bp->pdev; - if (!bp->tx_ring) - return; - max_idx = bp->tx_nr_pages * TX_DESC_CNT; - for (i = 0; i < bp->tx_nr_rings; i++) { - struct bnxt_tx_ring_info *txr = &bp->tx_ring[i]; - int j; - if (!txr->tx_buf_ring) + for (i = 0; i < max_idx;) { + struct bnxt_sw_tx_bd *tx_buf = &txr->tx_buf_ring[i]; + struct sk_buff *skb; + int j, last; + + if (idx < bp->tx_nr_rings_xdp && + tx_buf->action == XDP_REDIRECT) { + dma_unmap_single(&pdev->dev, + dma_unmap_addr(tx_buf, mapping), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + xdp_return_frame(tx_buf->xdpf); + tx_buf->action = 0; + tx_buf->xdpf = NULL; + i++; continue; + } - for (j = 0; j < max_idx;) { - struct bnxt_sw_tx_bd *tx_buf = &txr->tx_buf_ring[j]; - struct sk_buff *skb; - int k, last; - - if (i < bp->tx_nr_rings_xdp && - tx_buf->action == XDP_REDIRECT) { - dma_unmap_single(&pdev->dev, - dma_unmap_addr(tx_buf, mapping), - dma_unmap_len(tx_buf, len), - DMA_TO_DEVICE); - xdp_return_frame(tx_buf->xdpf); - tx_buf->action = 0; - tx_buf->xdpf = NULL; - j++; - continue; - } + skb = tx_buf->skb; + if (!skb) { + i++; + continue; + } - skb = tx_buf->skb; - if (!skb) { - j++; - continue; - } + tx_buf->skb = NULL; - tx_buf->skb = NULL; + if (tx_buf->is_push) { + dev_kfree_skb(skb); + i += 2; + continue; + } - if (tx_buf->is_push) { - dev_kfree_skb(skb); - j += 2; - continue; - } + dma_unmap_single(&pdev->dev, + dma_unmap_addr(tx_buf, mapping), + skb_headlen(skb), + DMA_TO_DEVICE); - dma_unmap_single(&pdev->dev, - dma_unmap_addr(tx_buf, mapping), - skb_headlen(skb), - DMA_TO_DEVICE); + last = tx_buf->nr_frags; + i += 2; + for (j = 0; j < last; j++, i++) { + int ring_idx = i & bp->tx_ring_mask; + skb_frag_t *frag = &skb_shinfo(skb)->frags[j]; - last = tx_buf->nr_frags; - j += 2; - for (k = 0; k < last; k++, j++) { - int ring_idx = j & bp->tx_ring_mask; - skb_frag_t *frag = &skb_shinfo(skb)->frags[k]; - - tx_buf = &txr->tx_buf_ring[ring_idx]; - dma_unmap_page( - &pdev->dev, - dma_unmap_addr(tx_buf, mapping), - skb_frag_size(frag), DMA_TO_DEVICE); - } - dev_kfree_skb(skb); + tx_buf = &txr->tx_buf_ring[ring_idx]; + dma_unmap_page(&pdev->dev, + dma_unmap_addr(tx_buf, mapping), + skb_frag_size(frag), DMA_TO_DEVICE); } - netdev_tx_reset_queue(netdev_get_tx_queue(bp->dev, i)); + dev_kfree_skb(skb); + } + netdev_tx_reset_queue(netdev_get_tx_queue(bp->dev, idx)); +} + +static void bnxt_free_tx_skbs(struct bnxt *bp) +{ + int i; + + if (!bp->tx_ring) + return; + + for (i = 0; i < bp->tx_nr_rings; i++) { + struct bnxt_tx_ring_info *txr = &bp->tx_ring[i]; + + if (!txr->tx_buf_ring) + continue; + + bnxt_free_one_tx_ring_skbs(bp, txr, i); } } @@ -5565,6 +5575,8 @@ int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap, int bmap_size, if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY) flags |= FUNC_DRV_RGTR_REQ_FLAGS_ERROR_RECOVERY_SUPPORT | FUNC_DRV_RGTR_REQ_FLAGS_MASTER_SUPPORT; + if (bp->fw_cap & BNXT_FW_CAP_NPAR_1_2) + flags |= FUNC_DRV_RGTR_REQ_FLAGS_NPAR_1_2_SUPPORT; req->flags = cpu_to_le32(flags); req->ver_maj_8b = DRV_VER_MAJ; req->ver_min_8b = DRV_VER_MIN; @@ -6935,6 +6947,30 @@ static void bnxt_hwrm_ring_grp_free(struct bnxt *bp) hwrm_req_drop(bp, req); } +static void bnxt_set_rx_ring_params_p5(struct bnxt *bp, u32 ring_type, + struct hwrm_ring_alloc_input *req, + struct bnxt_ring_struct *ring) +{ + struct bnxt_ring_grp_info *grp_info = &bp->grp_info[ring->grp_idx]; + u32 enables = RING_ALLOC_REQ_ENABLES_RX_BUF_SIZE_VALID | + RING_ALLOC_REQ_ENABLES_NQ_RING_ID_VALID; + + if (ring_type == HWRM_RING_ALLOC_AGG) { + req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX_AGG; + req->rx_ring_id = cpu_to_le16(grp_info->rx_fw_ring_id); + req->rx_buf_size = cpu_to_le16(BNXT_RX_PAGE_SIZE); + enables |= RING_ALLOC_REQ_ENABLES_RX_RING_ID_VALID; + } else { + req->rx_buf_size = cpu_to_le16(bp->rx_buf_use_size); + if (NET_IP_ALIGN == 2) + req->flags = + cpu_to_le16(RING_ALLOC_REQ_FLAGS_RX_SOP_PAD); + } + req->stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx); + req->nq_ring_id = cpu_to_le16(grp_info->cp_fw_ring_id); + req->enables |= cpu_to_le32(enables); +} + static int hwrm_ring_alloc_send_msg(struct bnxt *bp, struct bnxt_ring_struct *ring, u32 ring_type, u32 map_index) @@ -6986,37 +7022,13 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp, break; } case HWRM_RING_ALLOC_RX: - req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX; - req->length = cpu_to_le32(bp->rx_ring_mask + 1); - if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { - u16 flags = 0; - - /* Association of rx ring with stats context */ - grp_info = &bp->grp_info[ring->grp_idx]; - req->rx_buf_size = cpu_to_le16(bp->rx_buf_use_size); - req->stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx); - req->enables |= cpu_to_le32( - RING_ALLOC_REQ_ENABLES_RX_BUF_SIZE_VALID); - if (NET_IP_ALIGN == 2) - flags = RING_ALLOC_REQ_FLAGS_RX_SOP_PAD; - req->flags = cpu_to_le16(flags); - } - break; case HWRM_RING_ALLOC_AGG: - if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { - req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX_AGG; - /* Association of agg ring with rx ring */ - grp_info = &bp->grp_info[ring->grp_idx]; - req->rx_ring_id = cpu_to_le16(grp_info->rx_fw_ring_id); - req->rx_buf_size = cpu_to_le16(BNXT_RX_PAGE_SIZE); - req->stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx); - req->enables |= cpu_to_le32( - RING_ALLOC_REQ_ENABLES_RX_RING_ID_VALID | - RING_ALLOC_REQ_ENABLES_RX_BUF_SIZE_VALID); - } else { - req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX; - } - req->length = cpu_to_le32(bp->rx_agg_ring_mask + 1); + req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX; + req->length = (ring_type == HWRM_RING_ALLOC_RX) ? + cpu_to_le32(bp->rx_ring_mask + 1) : + cpu_to_le32(bp->rx_agg_ring_mask + 1); + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) + bnxt_set_rx_ring_params_p5(bp, ring_type, req, ring); break; case HWRM_RING_ALLOC_CMPL: req->ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL; @@ -7197,6 +7209,39 @@ static int bnxt_hwrm_rx_agg_ring_alloc(struct bnxt *bp, return 0; } +static int bnxt_hwrm_cp_ring_alloc_p5(struct bnxt *bp, + struct bnxt_cp_ring_info *cpr) +{ + const u32 type = HWRM_RING_ALLOC_CMPL; + struct bnxt_napi *bnapi = cpr->bnapi; + struct bnxt_ring_struct *ring; + u32 map_idx = bnapi->index; + int rc; + + ring = &cpr->cp_ring_struct; + ring->handle = BNXT_SET_NQ_HDL(cpr); + rc = hwrm_ring_alloc_send_msg(bp, ring, type, map_idx); + if (rc) + return rc; + bnxt_set_db(bp, &cpr->cp_db, type, map_idx, ring->fw_ring_id); + bnxt_db_cq(bp, &cpr->cp_db, cpr->cp_raw_cons); + return 0; +} + +static int bnxt_hwrm_tx_ring_alloc(struct bnxt *bp, + struct bnxt_tx_ring_info *txr, u32 tx_idx) +{ + struct bnxt_ring_struct *ring = &txr->tx_ring_struct; + const u32 type = HWRM_RING_ALLOC_TX; + int rc; + + rc = hwrm_ring_alloc_send_msg(bp, ring, type, tx_idx); + if (rc) + return rc; + bnxt_set_db(bp, &txr->tx_db, type, tx_idx, ring->fw_ring_id); + return 0; +} + static int bnxt_hwrm_ring_alloc(struct bnxt *bp) { bool agg_rings = !!(bp->flags & BNXT_FLAG_AGG_RINGS); @@ -7233,33 +7278,17 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp) } } - type = HWRM_RING_ALLOC_TX; for (i = 0; i < bp->tx_nr_rings; i++) { struct bnxt_tx_ring_info *txr = &bp->tx_ring[i]; - struct bnxt_ring_struct *ring; - u32 map_idx; if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { - struct bnxt_cp_ring_info *cpr2 = txr->tx_cpr; - struct bnxt_napi *bnapi = txr->bnapi; - u32 type2 = HWRM_RING_ALLOC_CMPL; - - ring = &cpr2->cp_ring_struct; - ring->handle = BNXT_SET_NQ_HDL(cpr2); - map_idx = bnapi->index; - rc = hwrm_ring_alloc_send_msg(bp, ring, type2, map_idx); + rc = bnxt_hwrm_cp_ring_alloc_p5(bp, txr->tx_cpr); if (rc) goto err_out; - bnxt_set_db(bp, &cpr2->cp_db, type2, map_idx, - ring->fw_ring_id); - bnxt_db_cq(bp, &cpr2->cp_db, cpr2->cp_raw_cons); } - ring = &txr->tx_ring_struct; - map_idx = i; - rc = hwrm_ring_alloc_send_msg(bp, ring, type, map_idx); + rc = bnxt_hwrm_tx_ring_alloc(bp, txr, i); if (rc) goto err_out; - bnxt_set_db(bp, &txr->tx_db, type, map_idx, ring->fw_ring_id); } for (i = 0; i < bp->rx_nr_rings; i++) { @@ -7272,20 +7301,9 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp) if (!agg_rings) bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { - struct bnxt_cp_ring_info *cpr2 = rxr->rx_cpr; - struct bnxt_napi *bnapi = rxr->bnapi; - u32 type2 = HWRM_RING_ALLOC_CMPL; - struct bnxt_ring_struct *ring; - u32 map_idx = bnapi->index; - - ring = &cpr2->cp_ring_struct; - ring->handle = BNXT_SET_NQ_HDL(cpr2); - rc = hwrm_ring_alloc_send_msg(bp, ring, type2, map_idx); + rc = bnxt_hwrm_cp_ring_alloc_p5(bp, rxr->rx_cpr); if (rc) goto err_out; - bnxt_set_db(bp, &cpr2->cp_db, type2, map_idx, - ring->fw_ring_id); - bnxt_db_cq(bp, &cpr2->cp_db, cpr2->cp_raw_cons); } } @@ -7353,6 +7371,23 @@ exit: return 0; } +static void bnxt_hwrm_tx_ring_free(struct bnxt *bp, + struct bnxt_tx_ring_info *txr, + bool close_path) +{ + struct bnxt_ring_struct *ring = &txr->tx_ring_struct; + u32 cmpl_ring_id; + + if (ring->fw_ring_id == INVALID_HW_RING_ID) + return; + + cmpl_ring_id = close_path ? bnxt_cp_ring_for_tx(bp, txr) : + INVALID_HW_RING_ID; + hwrm_ring_free_send_msg(bp, ring, RING_FREE_REQ_RING_TYPE_TX, + cmpl_ring_id); + ring->fw_ring_id = INVALID_HW_RING_ID; +} + static void bnxt_hwrm_rx_ring_free(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, bool close_path) @@ -7397,6 +7432,33 @@ static void bnxt_hwrm_rx_agg_ring_free(struct bnxt *bp, bp->grp_info[grp_idx].agg_fw_ring_id = INVALID_HW_RING_ID; } +static void bnxt_hwrm_cp_ring_free(struct bnxt *bp, + struct bnxt_cp_ring_info *cpr) +{ + struct bnxt_ring_struct *ring; + + ring = &cpr->cp_ring_struct; + if (ring->fw_ring_id == INVALID_HW_RING_ID) + return; + + hwrm_ring_free_send_msg(bp, ring, RING_FREE_REQ_RING_TYPE_L2_CMPL, + INVALID_HW_RING_ID); + ring->fw_ring_id = INVALID_HW_RING_ID; +} + +static void bnxt_clear_one_cp_ring(struct bnxt *bp, struct bnxt_cp_ring_info *cpr) +{ + struct bnxt_ring_struct *ring = &cpr->cp_ring_struct; + int i, size = ring->ring_mem.page_size; + + cpr->cp_raw_cons = 0; + cpr->toggle = 0; + + for (i = 0; i < bp->cp_nr_pages; i++) + if (cpr->cp_desc_ring[i]) + memset(cpr->cp_desc_ring[i], 0, size); +} + static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path) { u32 type; @@ -7405,20 +7467,8 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path) if (!bp->bnapi) return; - for (i = 0; i < bp->tx_nr_rings; i++) { - struct bnxt_tx_ring_info *txr = &bp->tx_ring[i]; - struct bnxt_ring_struct *ring = &txr->tx_ring_struct; - - if (ring->fw_ring_id != INVALID_HW_RING_ID) { - u32 cmpl_ring_id = bnxt_cp_ring_for_tx(bp, txr); - - hwrm_ring_free_send_msg(bp, ring, - RING_FREE_REQ_RING_TYPE_TX, - close_path ? cmpl_ring_id : - INVALID_HW_RING_ID); - ring->fw_ring_id = INVALID_HW_RING_ID; - } - } + for (i = 0; i < bp->tx_nr_rings; i++) + bnxt_hwrm_tx_ring_free(bp, &bp->tx_ring[i], close_path); bnxt_cancel_dim(bp); for (i = 0; i < bp->rx_nr_rings; i++) { @@ -7442,17 +7492,9 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path) struct bnxt_ring_struct *ring; int j; - for (j = 0; j < cpr->cp_ring_count && cpr->cp_ring_arr; j++) { - struct bnxt_cp_ring_info *cpr2 = &cpr->cp_ring_arr[j]; + for (j = 0; j < cpr->cp_ring_count && cpr->cp_ring_arr; j++) + bnxt_hwrm_cp_ring_free(bp, &cpr->cp_ring_arr[j]); - ring = &cpr2->cp_ring_struct; - if (ring->fw_ring_id == INVALID_HW_RING_ID) - continue; - hwrm_ring_free_send_msg(bp, ring, - RING_FREE_REQ_RING_TYPE_L2_CMPL, - INVALID_HW_RING_ID); - ring->fw_ring_id = INVALID_HW_RING_ID; - } ring = &cpr->cp_ring_struct; if (ring->fw_ring_id != INVALID_HW_RING_ID) { hwrm_ring_free_send_msg(bp, ring, type, @@ -8365,6 +8407,7 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp) switch (resp->port_partition_type) { case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0: + case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_2: case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5: case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR2_0: bp->port_partition_type = resp->port_partition_type; @@ -9529,6 +9572,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) bp->fw_cap |= BNXT_FW_CAP_HOT_RESET_IF; if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_FW_LIVEPATCH_SUPPORTED)) bp->fw_cap |= BNXT_FW_CAP_LIVEPATCH; + if (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_NPAR_1_2_SUPPORTED) + bp->fw_cap |= BNXT_FW_CAP_NPAR_1_2; if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_DFLT_VLAN_TPID_PCP_SUPPORTED)) bp->fw_cap |= BNXT_FW_CAP_DFLT_VLAN_TPID_PCP; if (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_BS_V2_SUPPORTED) @@ -11237,6 +11282,155 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init) return 0; } +static void bnxt_tx_queue_stop(struct bnxt *bp, int idx) +{ + struct bnxt_tx_ring_info *txr; + struct netdev_queue *txq; + struct bnxt_napi *bnapi; + int i; + + bnapi = bp->bnapi[idx]; + bnxt_for_each_napi_tx(i, bnapi, txr) { + WRITE_ONCE(txr->dev_state, BNXT_DEV_STATE_CLOSING); + synchronize_net(); + + if (!(bnapi->flags & BNXT_NAPI_FLAG_XDP)) { + txq = netdev_get_tx_queue(bp->dev, txr->txq_index); + if (txq) { + __netif_tx_lock_bh(txq); + netif_tx_stop_queue(txq); + __netif_tx_unlock_bh(txq); + } + } + + if (!bp->tph_mode) + continue; + + bnxt_hwrm_tx_ring_free(bp, txr, true); + bnxt_hwrm_cp_ring_free(bp, txr->tx_cpr); + bnxt_free_one_tx_ring_skbs(bp, txr, txr->txq_index); + bnxt_clear_one_cp_ring(bp, txr->tx_cpr); + } +} + +static int bnxt_tx_queue_start(struct bnxt *bp, int idx) +{ + struct bnxt_tx_ring_info *txr; + struct netdev_queue *txq; + struct bnxt_napi *bnapi; + int rc, i; + + bnapi = bp->bnapi[idx]; + /* All rings have been reserved and previously allocated. + * Reallocating with the same parameters should never fail. + */ + bnxt_for_each_napi_tx(i, bnapi, txr) { + if (!bp->tph_mode) + goto start_tx; + + rc = bnxt_hwrm_cp_ring_alloc_p5(bp, txr->tx_cpr); + if (rc) + return rc; + + rc = bnxt_hwrm_tx_ring_alloc(bp, txr, false); + if (rc) + return rc; + + txr->tx_prod = 0; + txr->tx_cons = 0; + txr->tx_hw_cons = 0; +start_tx: + WRITE_ONCE(txr->dev_state, 0); + synchronize_net(); + + if (bnapi->flags & BNXT_NAPI_FLAG_XDP) + continue; + + txq = netdev_get_tx_queue(bp->dev, txr->txq_index); + if (txq) + netif_tx_start_queue(txq); + } + + return 0; +} + +static void bnxt_irq_affinity_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + struct bnxt_irq *irq; + u16 tag; + int err; + + irq = container_of(notify, struct bnxt_irq, affinity_notify); + + if (!irq->bp->tph_mode) + return; + + cpumask_copy(irq->cpu_mask, mask); + + if (irq->ring_nr >= irq->bp->rx_nr_rings) + return; + + if (pcie_tph_get_cpu_st(irq->bp->pdev, TPH_MEM_TYPE_VM, + cpumask_first(irq->cpu_mask), &tag)) + return; + + if (pcie_tph_set_st_entry(irq->bp->pdev, irq->msix_nr, tag)) + return; + + rtnl_lock(); + if (netif_running(irq->bp->dev)) { + err = netdev_rx_queue_restart(irq->bp->dev, irq->ring_nr); + if (err) + netdev_err(irq->bp->dev, + "RX queue restart failed: err=%d\n", err); + } + rtnl_unlock(); +} + +static void bnxt_irq_affinity_release(struct kref *ref) +{ + struct irq_affinity_notify *notify = + container_of(ref, struct irq_affinity_notify, kref); + struct bnxt_irq *irq; + + irq = container_of(notify, struct bnxt_irq, affinity_notify); + + if (!irq->bp->tph_mode) + return; + + if (pcie_tph_set_st_entry(irq->bp->pdev, irq->msix_nr, 0)) { + netdev_err(irq->bp->dev, + "Setting ST=0 for MSIX entry %d failed\n", + irq->msix_nr); + return; + } +} + +static void bnxt_release_irq_notifier(struct bnxt_irq *irq) +{ + irq_set_affinity_notifier(irq->vector, NULL); +} + +static void bnxt_register_irq_notifier(struct bnxt *bp, struct bnxt_irq *irq) +{ + struct irq_affinity_notify *notify; + + irq->bp = bp; + + /* Nothing to do if TPH is not enabled */ + if (!bp->tph_mode) + return; + + /* Register IRQ affinity notifier */ + notify = &irq->affinity_notify; + notify->irq = irq->vector; + notify->notify = bnxt_irq_affinity_notify; + notify->release = bnxt_irq_affinity_release; + + irq_set_affinity_notifier(irq->vector, notify); +} + static void bnxt_free_irq(struct bnxt *bp) { struct bnxt_irq *irq; @@ -11259,11 +11453,18 @@ static void bnxt_free_irq(struct bnxt *bp) free_cpumask_var(irq->cpu_mask); irq->have_cpumask = 0; } + + bnxt_release_irq_notifier(irq); + free_irq(irq->vector, bp->bnapi[i]); } irq->requested = 0; } + + /* Disable TPH support */ + pcie_disable_tph(bp->pdev); + bp->tph_mode = 0; } static int bnxt_request_irq(struct bnxt *bp) @@ -11283,6 +11484,12 @@ static int bnxt_request_irq(struct bnxt *bp) #ifdef CONFIG_RFS_ACCEL rmap = bp->dev->rx_cpu_rmap; #endif + + /* Enable TPH support as part of IRQ request */ + rc = pcie_enable_tph(bp->pdev, PCI_TPH_ST_IV_MODE); + if (!rc) + bp->tph_mode = PCI_TPH_ST_IV_MODE; + for (i = 0, j = 0; i < bp->cp_nr_rings; i++) { int map_idx = bnxt_cp_num_to_irq_num(bp, i); struct bnxt_irq *irq = &bp->irq_tbl[map_idx]; @@ -11306,8 +11513,11 @@ static int bnxt_request_irq(struct bnxt *bp) if (zalloc_cpumask_var(&irq->cpu_mask, GFP_KERNEL)) { int numa_node = dev_to_node(&bp->pdev->dev); + u16 tag; irq->have_cpumask = 1; + irq->msix_nr = map_idx; + irq->ring_nr = i; cpumask_set_cpu(cpumask_local_spread(i, numa_node), irq->cpu_mask); rc = irq_update_affinity_hint(irq->vector, irq->cpu_mask); @@ -11317,6 +11527,16 @@ static int bnxt_request_irq(struct bnxt *bp) irq->vector); break; } + + bnxt_register_irq_notifier(bp, irq); + + /* Init ST table entry */ + if (pcie_tph_get_cpu_st(irq->bp->pdev, TPH_MEM_TYPE_VM, + cpumask_first(irq->cpu_mask), + &tag)) + continue; + + pcie_tph_set_st_entry(irq->bp->pdev, irq->msix_nr, tag); } } return rc; @@ -15601,6 +15821,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) struct bnxt_rx_ring_info *rxr, *clone; struct bnxt_cp_ring_info *cpr; struct bnxt_vnic_info *vnic; + struct bnxt_napi *bnapi; int i, rc; rxr = &bp->rx_ring[idx]; @@ -15618,19 +15839,38 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) bnxt_copy_rx_ring(bp, rxr, clone); + bnapi = rxr->bnapi; + cpr = &bnapi->cp_ring; + + /* All rings have been reserved and previously allocated. + * Reallocating with the same parameters should never fail. + */ rc = bnxt_hwrm_rx_ring_alloc(bp, rxr); if (rc) - return rc; + goto err_reset; + + if (bp->tph_mode) { + rc = bnxt_hwrm_cp_ring_alloc_p5(bp, rxr->rx_cpr); + if (rc) + goto err_reset; + } + rc = bnxt_hwrm_rx_agg_ring_alloc(bp, rxr); if (rc) - goto err_free_hwrm_rx_ring; + goto err_reset; bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); if (bp->flags & BNXT_FLAG_AGG_RINGS) bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod); - cpr = &rxr->bnapi->cp_ring; - cpr->sw_stats->rx.rx_resets++; + if (bp->flags & BNXT_FLAG_SHARED_RINGS) { + rc = bnxt_tx_queue_start(bp, idx); + if (rc) + goto err_reset; + } + + napi_enable(&bnapi->napi); + bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons); for (i = 0; i <= BNXT_VNIC_NTUPLE; i++) { vnic = &bp->vnic_info[i]; @@ -15648,8 +15888,12 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) return 0; -err_free_hwrm_rx_ring: - bnxt_hwrm_rx_ring_free(bp, rxr, false); +err_reset: + netdev_err(bp->dev, "Unexpected HWRM error during queue start rc: %d\n", + rc); + napi_enable(&bnapi->napi); + bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons); + bnxt_reset_task(bp, true); return rc; } @@ -15657,7 +15901,9 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx) { struct bnxt *bp = netdev_priv(dev); struct bnxt_rx_ring_info *rxr; + struct bnxt_cp_ring_info *cpr; struct bnxt_vnic_info *vnic; + struct bnxt_napi *bnapi; int i; for (i = 0; i <= BNXT_VNIC_NTUPLE; i++) { @@ -15669,14 +15915,30 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx) /* Make sure NAPI sees that the VNIC is disabled */ synchronize_net(); rxr = &bp->rx_ring[idx]; - cancel_work_sync(&rxr->bnapi->cp_ring.dim.work); + bnapi = rxr->bnapi; + cpr = &bnapi->cp_ring; + cancel_work_sync(&cpr->dim.work); bnxt_hwrm_rx_ring_free(bp, rxr, false); bnxt_hwrm_rx_agg_ring_free(bp, rxr, false); - rxr->rx_next_cons = 0; page_pool_disable_direct_recycling(rxr->page_pool); if (bnxt_separate_head_pool()) page_pool_disable_direct_recycling(rxr->head_pool); + if (bp->flags & BNXT_FLAG_SHARED_RINGS) + bnxt_tx_queue_stop(bp, idx); + + /* Disable NAPI now after freeing the rings because HWRM_RING_FREE + * completion is handled in NAPI to guarantee no more DMA on that ring + * after seeing the completion. + */ + napi_disable(&bnapi->napi); + + if (bp->tph_mode) { + bnxt_hwrm_cp_ring_free(bp, rxr->rx_cpr); + bnxt_clear_one_cp_ring(bp, rxr->rx_cpr); + } + bnxt_db_nq(bp, &cpr->cp_db, cpr->cp_raw_cons); + memcpy(qmem, rxr, sizeof(*rxr)); bnxt_init_rx_ring_struct(bp, qmem); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 2373f423a523..e85b5ce94f58 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1234,6 +1234,11 @@ struct bnxt_irq { u8 have_cpumask:1; char name[IFNAMSIZ + BNXT_IRQ_NAME_EXTRA]; cpumask_var_t cpu_mask; + + struct bnxt *bp; + int msix_nr; + int ring_nr; + struct irq_affinity_notify affinity_notify; }; #define HWRM_RING_ALLOC_TX 0x1 @@ -2410,6 +2415,8 @@ struct bnxt { u8 max_q; u8 num_tc; + u8 tph_mode; + unsigned int current_interval; #define BNXT_TIMER_INTERVAL HZ @@ -2492,6 +2499,7 @@ struct bnxt { #define BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V3 BIT_ULL(39) #define BNXT_FW_CAP_VNIC_RE_FLUSH BIT_ULL(40) #define BNXT_FW_CAP_SW_MAX_RESOURCE_LIMITS BIT_ULL(41) + #define BNXT_FW_CAP_NPAR_1_2 BIT_ULL(42) u32 fw_dbg_cap; diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 5740c98d8c9f..f69b2b7c8802 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -951,75 +951,73 @@ struct macb_tx_skb { * device stats by a periodic timer. */ struct macb_stats { - u32 rx_pause_frames; - u32 tx_ok; - u32 tx_single_cols; - u32 tx_multiple_cols; - u32 rx_ok; - u32 rx_fcs_errors; - u32 rx_align_errors; - u32 tx_deferred; - u32 tx_late_cols; - u32 tx_excessive_cols; - u32 tx_underruns; - u32 tx_carrier_errors; - u32 rx_resource_errors; - u32 rx_overruns; - u32 rx_symbol_errors; - u32 rx_oversize_pkts; - u32 rx_jabbers; - u32 rx_undersize_pkts; - u32 sqe_test_errors; - u32 rx_length_mismatch; - u32 tx_pause_frames; + u64 rx_pause_frames; + u64 tx_ok; + u64 tx_single_cols; + u64 tx_multiple_cols; + u64 rx_ok; + u64 rx_fcs_errors; + u64 rx_align_errors; + u64 tx_deferred; + u64 tx_late_cols; + u64 tx_excessive_cols; + u64 tx_underruns; + u64 tx_carrier_errors; + u64 rx_resource_errors; + u64 rx_overruns; + u64 rx_symbol_errors; + u64 rx_oversize_pkts; + u64 rx_jabbers; + u64 rx_undersize_pkts; + u64 sqe_test_errors; + u64 rx_length_mismatch; + u64 tx_pause_frames; }; struct gem_stats { - u32 tx_octets_31_0; - u32 tx_octets_47_32; - u32 tx_frames; - u32 tx_broadcast_frames; - u32 tx_multicast_frames; - u32 tx_pause_frames; - u32 tx_64_byte_frames; - u32 tx_65_127_byte_frames; - u32 tx_128_255_byte_frames; - u32 tx_256_511_byte_frames; - u32 tx_512_1023_byte_frames; - u32 tx_1024_1518_byte_frames; - u32 tx_greater_than_1518_byte_frames; - u32 tx_underrun; - u32 tx_single_collision_frames; - u32 tx_multiple_collision_frames; - u32 tx_excessive_collisions; - u32 tx_late_collisions; - u32 tx_deferred_frames; - u32 tx_carrier_sense_errors; - u32 rx_octets_31_0; - u32 rx_octets_47_32; - u32 rx_frames; - u32 rx_broadcast_frames; - u32 rx_multicast_frames; - u32 rx_pause_frames; - u32 rx_64_byte_frames; - u32 rx_65_127_byte_frames; - u32 rx_128_255_byte_frames; - u32 rx_256_511_byte_frames; - u32 rx_512_1023_byte_frames; - u32 rx_1024_1518_byte_frames; - u32 rx_greater_than_1518_byte_frames; - u32 rx_undersized_frames; - u32 rx_oversize_frames; - u32 rx_jabbers; - u32 rx_frame_check_sequence_errors; - u32 rx_length_field_frame_errors; - u32 rx_symbol_errors; - u32 rx_alignment_errors; - u32 rx_resource_errors; - u32 rx_overruns; - u32 rx_ip_header_checksum_errors; - u32 rx_tcp_checksum_errors; - u32 rx_udp_checksum_errors; + u64 tx_octets; + u64 tx_frames; + u64 tx_broadcast_frames; + u64 tx_multicast_frames; + u64 tx_pause_frames; + u64 tx_64_byte_frames; + u64 tx_65_127_byte_frames; + u64 tx_128_255_byte_frames; + u64 tx_256_511_byte_frames; + u64 tx_512_1023_byte_frames; + u64 tx_1024_1518_byte_frames; + u64 tx_greater_than_1518_byte_frames; + u64 tx_underrun; + u64 tx_single_collision_frames; + u64 tx_multiple_collision_frames; + u64 tx_excessive_collisions; + u64 tx_late_collisions; + u64 tx_deferred_frames; + u64 tx_carrier_sense_errors; + u64 rx_octets; + u64 rx_frames; + u64 rx_broadcast_frames; + u64 rx_multicast_frames; + u64 rx_pause_frames; + u64 rx_64_byte_frames; + u64 rx_65_127_byte_frames; + u64 rx_128_255_byte_frames; + u64 rx_256_511_byte_frames; + u64 rx_512_1023_byte_frames; + u64 rx_1024_1518_byte_frames; + u64 rx_greater_than_1518_byte_frames; + u64 rx_undersized_frames; + u64 rx_oversize_frames; + u64 rx_jabbers; + u64 rx_frame_check_sequence_errors; + u64 rx_length_field_frame_errors; + u64 rx_symbol_errors; + u64 rx_alignment_errors; + u64 rx_resource_errors; + u64 rx_overruns; + u64 rx_ip_header_checksum_errors; + u64 rx_tcp_checksum_errors; + u64 rx_udp_checksum_errors; }; /* Describes the name and offset of an individual statistic register, as diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 48496209fb16..2112a9701e05 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -853,9 +853,7 @@ static int macb_mii_probe(struct net_device *dev) struct macb *bp = netdev_priv(dev); bp->phylink_sgmii_pcs.ops = &macb_phylink_pcs_ops; - bp->phylink_sgmii_pcs.neg_mode = true; bp->phylink_usx_pcs.ops = &macb_phylink_usx_pcs_ops; - bp->phylink_usx_pcs.neg_mode = true; bp->phylink_config.dev = &dev->dev; bp->phylink_config.type = PHYLINK_NETDEV; @@ -990,8 +988,8 @@ err_out: static void macb_update_stats(struct macb *bp) { - u32 *p = &bp->hw_stats.macb.rx_pause_frames; - u32 *end = &bp->hw_stats.macb.tx_pause_frames + 1; + u64 *p = &bp->hw_stats.macb.rx_pause_frames; + u64 *end = &bp->hw_stats.macb.tx_pause_frames + 1; int offset = MACB_PFR; WARN_ON((unsigned long)(end - p - 1) != (MACB_TPF - MACB_PFR) / 4); @@ -3071,7 +3069,7 @@ static void gem_update_stats(struct macb *bp) unsigned int i, q, idx; unsigned long *stat; - u32 *p = &bp->hw_stats.gem.tx_octets_31_0; + u64 *p = &bp->hw_stats.gem.tx_octets; for (i = 0; i < GEM_STATS_LEN; ++i, ++p) { u32 offset = gem_statistics[i].offset; @@ -3084,7 +3082,7 @@ static void gem_update_stats(struct macb *bp) /* Add GEM_OCTTXH, GEM_OCTRXH */ val = bp->macb_reg_readl(bp, offset + 4); bp->ethtool_stats[i] += ((u64)val) << 32; - *(++p) += val; + *(p++) += ((u64)val) << 32; } } @@ -3094,15 +3092,12 @@ static void gem_update_stats(struct macb *bp) bp->ethtool_stats[idx++] = *stat; } -static struct net_device_stats *gem_get_stats(struct macb *bp) +static void gem_get_stats(struct macb *bp, struct rtnl_link_stats64 *nstat) { struct gem_stats *hwstat = &bp->hw_stats.gem; - struct net_device_stats *nstat = &bp->dev->stats; - if (!netif_running(bp->dev)) - return nstat; - - gem_update_stats(bp); + if (netif_running(bp->dev)) + gem_update_stats(bp); nstat->rx_errors = (hwstat->rx_frame_check_sequence_errors + hwstat->rx_alignment_errors + @@ -3131,8 +3126,6 @@ static struct net_device_stats *gem_get_stats(struct macb *bp) nstat->tx_aborted_errors = hwstat->tx_excessive_collisions; nstat->tx_carrier_errors = hwstat->tx_carrier_sense_errors; nstat->tx_fifo_errors = hwstat->tx_underrun; - - return nstat; } static void gem_get_ethtool_stats(struct net_device *dev, @@ -3183,14 +3176,17 @@ static void gem_get_ethtool_strings(struct net_device *dev, u32 sset, u8 *p) } } -static struct net_device_stats *macb_get_stats(struct net_device *dev) +static void macb_get_stats(struct net_device *dev, + struct rtnl_link_stats64 *nstat) { struct macb *bp = netdev_priv(dev); - struct net_device_stats *nstat = &bp->dev->stats; struct macb_stats *hwstat = &bp->hw_stats.macb; - if (macb_is_gem(bp)) - return gem_get_stats(bp); + netdev_stats_to_stats64(nstat, &bp->dev->stats); + if (macb_is_gem(bp)) { + gem_get_stats(bp, nstat); + return; + } /* read stats from hardware */ macb_update_stats(bp); @@ -3226,8 +3222,154 @@ static struct net_device_stats *macb_get_stats(struct net_device *dev) nstat->tx_carrier_errors = hwstat->tx_carrier_errors; nstat->tx_fifo_errors = hwstat->tx_underruns; /* Don't know about heartbeat or window errors... */ +} + +static void macb_get_pause_stats(struct net_device *dev, + struct ethtool_pause_stats *pause_stats) +{ + struct macb *bp = netdev_priv(dev); + struct macb_stats *hwstat = &bp->hw_stats.macb; + + macb_update_stats(bp); + pause_stats->tx_pause_frames = hwstat->tx_pause_frames; + pause_stats->rx_pause_frames = hwstat->rx_pause_frames; +} + +static void gem_get_pause_stats(struct net_device *dev, + struct ethtool_pause_stats *pause_stats) +{ + struct macb *bp = netdev_priv(dev); + struct gem_stats *hwstat = &bp->hw_stats.gem; + + gem_update_stats(bp); + pause_stats->tx_pause_frames = hwstat->tx_pause_frames; + pause_stats->rx_pause_frames = hwstat->rx_pause_frames; +} + +static void macb_get_eth_mac_stats(struct net_device *dev, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct macb *bp = netdev_priv(dev); + struct macb_stats *hwstat = &bp->hw_stats.macb; + + macb_update_stats(bp); + mac_stats->FramesTransmittedOK = hwstat->tx_ok; + mac_stats->SingleCollisionFrames = hwstat->tx_single_cols; + mac_stats->MultipleCollisionFrames = hwstat->tx_multiple_cols; + mac_stats->FramesReceivedOK = hwstat->rx_ok; + mac_stats->FrameCheckSequenceErrors = hwstat->rx_fcs_errors; + mac_stats->AlignmentErrors = hwstat->rx_align_errors; + mac_stats->FramesWithDeferredXmissions = hwstat->tx_deferred; + mac_stats->LateCollisions = hwstat->tx_late_cols; + mac_stats->FramesAbortedDueToXSColls = hwstat->tx_excessive_cols; + mac_stats->FramesLostDueToIntMACXmitError = hwstat->tx_underruns; + mac_stats->CarrierSenseErrors = hwstat->tx_carrier_errors; + mac_stats->FramesLostDueToIntMACRcvError = hwstat->rx_overruns; + mac_stats->InRangeLengthErrors = hwstat->rx_length_mismatch; + mac_stats->FrameTooLongErrors = hwstat->rx_oversize_pkts; +} + +static void gem_get_eth_mac_stats(struct net_device *dev, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct macb *bp = netdev_priv(dev); + struct gem_stats *hwstat = &bp->hw_stats.gem; + + gem_update_stats(bp); + mac_stats->FramesTransmittedOK = hwstat->tx_frames; + mac_stats->SingleCollisionFrames = hwstat->tx_single_collision_frames; + mac_stats->MultipleCollisionFrames = + hwstat->tx_multiple_collision_frames; + mac_stats->FramesReceivedOK = hwstat->rx_frames; + mac_stats->FrameCheckSequenceErrors = + hwstat->rx_frame_check_sequence_errors; + mac_stats->AlignmentErrors = hwstat->rx_alignment_errors; + mac_stats->OctetsTransmittedOK = hwstat->tx_octets; + mac_stats->FramesWithDeferredXmissions = hwstat->tx_deferred_frames; + mac_stats->LateCollisions = hwstat->tx_late_collisions; + mac_stats->FramesAbortedDueToXSColls = hwstat->tx_excessive_collisions; + mac_stats->FramesLostDueToIntMACXmitError = hwstat->tx_underrun; + mac_stats->CarrierSenseErrors = hwstat->tx_carrier_sense_errors; + mac_stats->OctetsReceivedOK = hwstat->rx_octets; + mac_stats->MulticastFramesXmittedOK = hwstat->tx_multicast_frames; + mac_stats->BroadcastFramesXmittedOK = hwstat->tx_broadcast_frames; + mac_stats->MulticastFramesReceivedOK = hwstat->rx_multicast_frames; + mac_stats->BroadcastFramesReceivedOK = hwstat->rx_broadcast_frames; + mac_stats->InRangeLengthErrors = hwstat->rx_length_field_frame_errors; + mac_stats->FrameTooLongErrors = hwstat->rx_oversize_frames; +} + +/* TODO: Report SQE test errors when added to phy_stats */ +static void macb_get_eth_phy_stats(struct net_device *dev, + struct ethtool_eth_phy_stats *phy_stats) +{ + struct macb *bp = netdev_priv(dev); + struct macb_stats *hwstat = &bp->hw_stats.macb; - return nstat; + macb_update_stats(bp); + phy_stats->SymbolErrorDuringCarrier = hwstat->rx_symbol_errors; +} + +static void gem_get_eth_phy_stats(struct net_device *dev, + struct ethtool_eth_phy_stats *phy_stats) +{ + struct macb *bp = netdev_priv(dev); + struct gem_stats *hwstat = &bp->hw_stats.gem; + + gem_update_stats(bp); + phy_stats->SymbolErrorDuringCarrier = hwstat->rx_symbol_errors; +} + +static void macb_get_rmon_stats(struct net_device *dev, + struct ethtool_rmon_stats *rmon_stats, + const struct ethtool_rmon_hist_range **ranges) +{ + struct macb *bp = netdev_priv(dev); + struct macb_stats *hwstat = &bp->hw_stats.macb; + + macb_update_stats(bp); + rmon_stats->undersize_pkts = hwstat->rx_undersize_pkts; + rmon_stats->oversize_pkts = hwstat->rx_oversize_pkts; + rmon_stats->jabbers = hwstat->rx_jabbers; +} + +static const struct ethtool_rmon_hist_range gem_rmon_ranges[] = { + { 64, 64 }, + { 65, 127 }, + { 128, 255 }, + { 256, 511 }, + { 512, 1023 }, + { 1024, 1518 }, + { 1519, 16384 }, + { }, +}; + +static void gem_get_rmon_stats(struct net_device *dev, + struct ethtool_rmon_stats *rmon_stats, + const struct ethtool_rmon_hist_range **ranges) +{ + struct macb *bp = netdev_priv(dev); + struct gem_stats *hwstat = &bp->hw_stats.gem; + + gem_update_stats(bp); + rmon_stats->undersize_pkts = hwstat->rx_undersized_frames; + rmon_stats->oversize_pkts = hwstat->rx_oversize_frames; + rmon_stats->jabbers = hwstat->rx_jabbers; + rmon_stats->hist[0] = hwstat->rx_64_byte_frames; + rmon_stats->hist[1] = hwstat->rx_65_127_byte_frames; + rmon_stats->hist[2] = hwstat->rx_128_255_byte_frames; + rmon_stats->hist[3] = hwstat->rx_256_511_byte_frames; + rmon_stats->hist[4] = hwstat->rx_512_1023_byte_frames; + rmon_stats->hist[5] = hwstat->rx_1024_1518_byte_frames; + rmon_stats->hist[6] = hwstat->rx_greater_than_1518_byte_frames; + rmon_stats->hist_tx[0] = hwstat->tx_64_byte_frames; + rmon_stats->hist_tx[1] = hwstat->tx_65_127_byte_frames; + rmon_stats->hist_tx[2] = hwstat->tx_128_255_byte_frames; + rmon_stats->hist_tx[3] = hwstat->tx_256_511_byte_frames; + rmon_stats->hist_tx[4] = hwstat->tx_512_1023_byte_frames; + rmon_stats->hist_tx[5] = hwstat->tx_1024_1518_byte_frames; + rmon_stats->hist_tx[6] = hwstat->tx_greater_than_1518_byte_frames; + *ranges = gem_rmon_ranges; } static int macb_get_regs_len(struct net_device *netdev) @@ -3756,6 +3898,10 @@ static const struct ethtool_ops macb_ethtool_ops = { .get_regs = macb_get_regs, .get_link = ethtool_op_get_link, .get_ts_info = ethtool_op_get_ts_info, + .get_pause_stats = macb_get_pause_stats, + .get_eth_mac_stats = macb_get_eth_mac_stats, + .get_eth_phy_stats = macb_get_eth_phy_stats, + .get_rmon_stats = macb_get_rmon_stats, .get_wol = macb_get_wol, .set_wol = macb_set_wol, .get_link_ksettings = macb_get_link_ksettings, @@ -3774,6 +3920,10 @@ static const struct ethtool_ops gem_ethtool_ops = { .get_ethtool_stats = gem_get_ethtool_stats, .get_strings = gem_get_ethtool_strings, .get_sset_count = gem_get_sset_count, + .get_pause_stats = gem_get_pause_stats, + .get_eth_mac_stats = gem_get_eth_mac_stats, + .get_eth_phy_stats = gem_get_eth_phy_stats, + .get_rmon_stats = gem_get_rmon_stats, .get_link_ksettings = macb_get_link_ksettings, .set_link_ksettings = macb_set_link_ksettings, .get_ringparam = macb_get_ringparam, @@ -3910,7 +4060,7 @@ static const struct net_device_ops macb_netdev_ops = { .ndo_stop = macb_close, .ndo_start_xmit = macb_start_xmit, .ndo_set_rx_mode = macb_set_rx_mode, - .ndo_get_stats = macb_get_stats, + .ndo_get_stats64 = macb_get_stats, .ndo_eth_ioctl = macb_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = macb_change_mtu, @@ -4571,7 +4721,7 @@ static const struct net_device_ops at91ether_netdev_ops = { .ndo_open = at91ether_open, .ndo_stop = at91ether_close, .ndo_start_xmit = at91ether_start_xmit, - .ndo_get_stats = macb_get_stats, + .ndo_get_stats64 = macb_get_stats, .ndo_set_rx_mode = macb_set_rx_mode, .ndo_set_mac_address = eth_mac_addr, .ndo_eth_ioctl = macb_ioctl, diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index 6b6cb73482d7..1753bb87dfbd 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -1433,22 +1433,6 @@ int octeon_wait_for_ddr_init(struct octeon_device *oct, u32 *timeout) } EXPORT_SYMBOL_GPL(octeon_wait_for_ddr_init); -/* Get the octeon id assigned to the octeon device passed as argument. - * This function is exported to other modules. - * @param dev - octeon device pointer passed as a void *. - * @return octeon device id - */ -int lio_get_device_id(void *dev) -{ - struct octeon_device *octeon_dev = (struct octeon_device *)dev; - u32 i; - - for (i = 0; i < MAX_OCTEON_DEVICES; i++) - if (octeon_device[i] == octeon_dev) - return octeon_dev->octeon_id; - return -1; -} - void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq) { u64 instr_cnt; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h index d26364c2ac81..19344b21f8fb 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h @@ -705,13 +705,6 @@ octeon_get_dispatch(struct octeon_device *octeon_dev, u16 opcode, */ struct octeon_device *lio_get_device(u32 octeon_id); -/** Get the octeon id assigned to the octeon device passed as argument. - * This function is exported to other modules. - * @param dev - octeon device pointer passed as a void *. - * @return octeon device id - */ -int lio_get_device_id(void *dev); - /** Read windowed register. * @param oct - pointer to the Octeon device. * @param addr - Address of the register to read. diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index c7c2c15a1815..95e6f015a6af 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1211,9 +1211,6 @@ struct adapter { struct timer_list flower_stats_timer; struct work_struct flower_stats_work; - /* Ethtool Dump */ - struct ethtool_dump eth_dump; - /* HMA */ struct hma_data hma; @@ -1233,6 +1230,10 @@ struct adapter { /* Ethtool n-tuple */ struct cxgb4_ethtool_filter *ethtool_filters; + + /* Ethtool Dump */ + /* Must be last - ends in a flex-array member. */ + struct ethtool_dump eth_dump; }; /* Support for "sched-class" command to allow a TX Scheduling Class to be diff --git a/drivers/net/ethernet/cisco/enic/Makefile b/drivers/net/ethernet/cisco/enic/Makefile index c3b6febfdbe4..b3b5196b2dfc 100644 --- a/drivers/net/ethernet/cisco/enic/Makefile +++ b/drivers/net/ethernet/cisco/enic/Makefile @@ -3,5 +3,5 @@ obj-$(CONFIG_ENIC) := enic.o enic-y := enic_main.o vnic_cq.o vnic_intr.o vnic_wq.o \ enic_res.o enic_dev.o enic_pp.o vnic_dev.o vnic_rq.o vnic_vic.o \ - enic_ethtool.o enic_api.o enic_clsf.o + enic_ethtool.o enic_api.o enic_clsf.o enic_rq.o diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index 10b7e02ba4d0..305ed12aa031 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -17,6 +17,7 @@ #include "vnic_nic.h" #include "vnic_rss.h" #include <linux/irq.h> +#include <net/page_pool/helpers.h> #define DRV_NAME "enic" #define DRV_DESCRIPTION "Cisco VIC Ethernet NIC Driver" @@ -158,6 +159,7 @@ struct enic_rq_stats { u64 pkt_truncated; /* truncated pkts */ u64 no_skb; /* out of skbs */ u64 desc_skip; /* Rx pkt went into later buffer */ + u64 pp_alloc_fail; /* page pool alloc failure */ }; struct enic_wq { @@ -169,6 +171,7 @@ struct enic_wq { struct enic_rq { struct vnic_rq vrq; struct enic_rq_stats stats; + struct page_pool *pool; } ____cacheline_aligned; /* Per-instance private data structure */ @@ -223,7 +226,6 @@ struct enic { unsigned int cq_avail; unsigned int cq_count; struct enic_rfs_flw_tbl rfs_h; - u32 rx_copybreak; u8 rss_key[ENIC_RSS_LEN]; struct vnic_gen_stats gen_stats; }; diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index d607b4f0542c..18b929fc2879 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -608,43 +608,6 @@ static int enic_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, return ret; } -static int enic_get_tunable(struct net_device *dev, - const struct ethtool_tunable *tuna, void *data) -{ - struct enic *enic = netdev_priv(dev); - int ret = 0; - - switch (tuna->id) { - case ETHTOOL_RX_COPYBREAK: - *(u32 *)data = enic->rx_copybreak; - break; - default: - ret = -EINVAL; - break; - } - - return ret; -} - -static int enic_set_tunable(struct net_device *dev, - const struct ethtool_tunable *tuna, - const void *data) -{ - struct enic *enic = netdev_priv(dev); - int ret = 0; - - switch (tuna->id) { - case ETHTOOL_RX_COPYBREAK: - enic->rx_copybreak = *(u32 *)data; - break; - default: - ret = -EINVAL; - break; - } - - return ret; -} - static u32 enic_get_rxfh_key_size(struct net_device *netdev) { return ENIC_RSS_LEN; @@ -727,8 +690,6 @@ static const struct ethtool_ops enic_ethtool_ops = { .get_coalesce = enic_get_coalesce, .set_coalesce = enic_set_coalesce, .get_rxnfc = enic_get_rxnfc, - .get_tunable = enic_get_tunable, - .set_tunable = enic_set_tunable, .get_rxfh_key_size = enic_get_rxfh_key_size, .get_rxfh = enic_get_rxfh, .set_rxfh = enic_set_rxfh, diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 49f6cab01ed5..f24fd29ea207 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -58,6 +58,7 @@ #include "enic_dev.h" #include "enic_pp.h" #include "enic_clsf.h" +#include "enic_rq.h" #define ENIC_NOTIFY_TIMER_PERIOD (2 * HZ) #define WQ_ENET_MAX_DESC_LEN (1 << WQ_ENET_LEN_BITS) @@ -68,8 +69,6 @@ #define PCI_DEVICE_ID_CISCO_VIC_ENET_DYN 0x0044 /* enet dynamic vnic */ #define PCI_DEVICE_ID_CISCO_VIC_ENET_VF 0x0071 /* enet SRIOV VF */ -#define RX_COPYBREAK_DEFAULT 256 - /* Supported devices */ static const struct pci_device_id enic_id_table[] = { { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET) }, @@ -1313,243 +1312,6 @@ nla_put_failure: return -EMSGSIZE; } -static void enic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf) -{ - struct enic *enic = vnic_dev_priv(rq->vdev); - - if (!buf->os_buf) - return; - - dma_unmap_single(&enic->pdev->dev, buf->dma_addr, buf->len, - DMA_FROM_DEVICE); - dev_kfree_skb_any(buf->os_buf); - buf->os_buf = NULL; -} - -static int enic_rq_alloc_buf(struct vnic_rq *rq) -{ - struct enic *enic = vnic_dev_priv(rq->vdev); - struct net_device *netdev = enic->netdev; - struct sk_buff *skb; - unsigned int len = netdev->mtu + VLAN_ETH_HLEN; - unsigned int os_buf_index = 0; - dma_addr_t dma_addr; - struct vnic_rq_buf *buf = rq->to_use; - - if (buf->os_buf) { - enic_queue_rq_desc(rq, buf->os_buf, os_buf_index, buf->dma_addr, - buf->len); - - return 0; - } - skb = netdev_alloc_skb_ip_align(netdev, len); - if (!skb) { - enic->rq[rq->index].stats.no_skb++; - return -ENOMEM; - } - - dma_addr = dma_map_single(&enic->pdev->dev, skb->data, len, - DMA_FROM_DEVICE); - if (unlikely(enic_dma_map_check(enic, dma_addr))) { - dev_kfree_skb(skb); - return -ENOMEM; - } - - enic_queue_rq_desc(rq, skb, os_buf_index, - dma_addr, len); - - return 0; -} - -static void enic_intr_update_pkt_size(struct vnic_rx_bytes_counter *pkt_size, - u32 pkt_len) -{ - if (ENIC_LARGE_PKT_THRESHOLD <= pkt_len) - pkt_size->large_pkt_bytes_cnt += pkt_len; - else - pkt_size->small_pkt_bytes_cnt += pkt_len; -} - -static bool enic_rxcopybreak(struct net_device *netdev, struct sk_buff **skb, - struct vnic_rq_buf *buf, u16 len) -{ - struct enic *enic = netdev_priv(netdev); - struct sk_buff *new_skb; - - if (len > enic->rx_copybreak) - return false; - new_skb = netdev_alloc_skb_ip_align(netdev, len); - if (!new_skb) - return false; - dma_sync_single_for_cpu(&enic->pdev->dev, buf->dma_addr, len, - DMA_FROM_DEVICE); - memcpy(new_skb->data, (*skb)->data, len); - *skb = new_skb; - - return true; -} - -static void enic_rq_indicate_buf(struct vnic_rq *rq, - struct cq_desc *cq_desc, struct vnic_rq_buf *buf, - int skipped, void *opaque) -{ - struct enic *enic = vnic_dev_priv(rq->vdev); - struct net_device *netdev = enic->netdev; - struct sk_buff *skb; - struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)]; - struct enic_rq_stats *rqstats = &enic->rq[rq->index].stats; - - u8 type, color, eop, sop, ingress_port, vlan_stripped; - u8 fcoe, fcoe_sof, fcoe_fc_crc_ok, fcoe_enc_error, fcoe_eof; - u8 tcp_udp_csum_ok, udp, tcp, ipv4_csum_ok; - u8 ipv6, ipv4, ipv4_fragment, fcs_ok, rss_type, csum_not_calc; - u8 packet_error; - u16 q_number, completed_index, bytes_written, vlan_tci, checksum; - u32 rss_hash; - bool outer_csum_ok = true, encap = false; - - rqstats->packets++; - if (skipped) { - rqstats->desc_skip++; - return; - } - - skb = buf->os_buf; - - cq_enet_rq_desc_dec((struct cq_enet_rq_desc *)cq_desc, - &type, &color, &q_number, &completed_index, - &ingress_port, &fcoe, &eop, &sop, &rss_type, - &csum_not_calc, &rss_hash, &bytes_written, - &packet_error, &vlan_stripped, &vlan_tci, &checksum, - &fcoe_sof, &fcoe_fc_crc_ok, &fcoe_enc_error, - &fcoe_eof, &tcp_udp_csum_ok, &udp, &tcp, - &ipv4_csum_ok, &ipv6, &ipv4, &ipv4_fragment, - &fcs_ok); - - if (packet_error) { - - if (!fcs_ok) { - if (bytes_written > 0) - rqstats->bad_fcs++; - else if (bytes_written == 0) - rqstats->pkt_truncated++; - } - - dma_unmap_single(&enic->pdev->dev, buf->dma_addr, buf->len, - DMA_FROM_DEVICE); - dev_kfree_skb_any(skb); - buf->os_buf = NULL; - - return; - } - - if (eop && bytes_written > 0) { - - /* Good receive - */ - rqstats->bytes += bytes_written; - if (!enic_rxcopybreak(netdev, &skb, buf, bytes_written)) { - buf->os_buf = NULL; - dma_unmap_single(&enic->pdev->dev, buf->dma_addr, - buf->len, DMA_FROM_DEVICE); - } - prefetch(skb->data - NET_IP_ALIGN); - - skb_put(skb, bytes_written); - skb->protocol = eth_type_trans(skb, netdev); - skb_record_rx_queue(skb, q_number); - if ((netdev->features & NETIF_F_RXHASH) && rss_hash && - (type == 3)) { - switch (rss_type) { - case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv4: - case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6: - case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6_EX: - skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L4); - rqstats->l4_rss_hash++; - break; - case CQ_ENET_RQ_DESC_RSS_TYPE_IPv4: - case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6: - case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6_EX: - skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L3); - rqstats->l3_rss_hash++; - break; - } - } - if (enic->vxlan.vxlan_udp_port_number) { - switch (enic->vxlan.patch_level) { - case 0: - if (fcoe) { - encap = true; - outer_csum_ok = fcoe_fc_crc_ok; - } - break; - case 2: - if ((type == 7) && - (rss_hash & BIT(0))) { - encap = true; - outer_csum_ok = (rss_hash & BIT(1)) && - (rss_hash & BIT(2)); - } - break; - } - } - - /* Hardware does not provide whole packet checksum. It only - * provides pseudo checksum. Since hw validates the packet - * checksum but not provide us the checksum value. use - * CHECSUM_UNNECESSARY. - * - * In case of encap pkt tcp_udp_csum_ok/tcp_udp_csum_ok is - * inner csum_ok. outer_csum_ok is set by hw when outer udp - * csum is correct or is zero. - */ - if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc && - tcp_udp_csum_ok && outer_csum_ok && - (ipv4_csum_ok || ipv6)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->csum_level = encap; - if (encap) - rqstats->csum_unnecessary_encap++; - else - rqstats->csum_unnecessary++; - } - - if (vlan_stripped) { - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); - rqstats->vlan_stripped++; - } - skb_mark_napi_id(skb, &enic->napi[rq->index]); - if (!(netdev->features & NETIF_F_GRO)) - netif_receive_skb(skb); - else - napi_gro_receive(&enic->napi[q_number], skb); - if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) - enic_intr_update_pkt_size(&cq->pkt_size_counter, - bytes_written); - } else { - - /* Buffer overflow - */ - rqstats->pkt_truncated++; - dma_unmap_single(&enic->pdev->dev, buf->dma_addr, buf->len, - DMA_FROM_DEVICE); - dev_kfree_skb_any(skb); - buf->os_buf = NULL; - } -} - -static int enic_rq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc, - u8 type, u16 q_number, u16 completed_index, void *opaque) -{ - struct enic *enic = vnic_dev_priv(vdev); - - vnic_rq_service(&enic->rq[q_number].vrq, cq_desc, - completed_index, VNIC_RQ_RETURN_DESC, - enic_rq_indicate_buf, opaque); - - return 0; -} - static void enic_set_int_moderation(struct enic *enic, struct vnic_rq *rq) { unsigned int intr = enic_msix_rq_intr(enic, rq->index); @@ -1972,6 +1734,17 @@ static int enic_open(struct net_device *netdev) struct enic *enic = netdev_priv(netdev); unsigned int i; int err, ret; + unsigned int max_pkt_len = netdev->mtu + VLAN_ETH_HLEN; + struct page_pool_params pp_params = { + .order = get_order(max_pkt_len), + .pool_size = enic->config.rq_desc_count, + .nid = dev_to_node(&enic->pdev->dev), + .dev = &enic->pdev->dev, + .dma_dir = DMA_FROM_DEVICE, + .max_len = (max_pkt_len > PAGE_SIZE) ? max_pkt_len : PAGE_SIZE, + .netdev = netdev, + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, + }; err = enic_request_intr(enic); if (err) { @@ -1989,6 +1762,16 @@ static int enic_open(struct net_device *netdev) } for (i = 0; i < enic->rq_count; i++) { + /* create a page pool for each RQ */ + pp_params.napi = &enic->napi[i]; + pp_params.queue_idx = i; + enic->rq[i].pool = page_pool_create(&pp_params); + if (IS_ERR(enic->rq[i].pool)) { + err = PTR_ERR(enic->rq[i].pool); + enic->rq[i].pool = NULL; + goto err_out_free_rq; + } + /* enable rq before updating rq desc */ vnic_rq_enable(&enic->rq[i].vrq); vnic_rq_fill(&enic->rq[i].vrq, enic_rq_alloc_buf); @@ -2029,8 +1812,11 @@ static int enic_open(struct net_device *netdev) err_out_free_rq: for (i = 0; i < enic->rq_count; i++) { ret = vnic_rq_disable(&enic->rq[i].vrq); - if (!ret) + if (!ret) { vnic_rq_clean(&enic->rq[i].vrq, enic_free_rq_buf); + page_pool_destroy(enic->rq[i].pool); + enic->rq[i].pool = NULL; + } } enic_dev_notify_unset(enic); err_out_free_intr: @@ -2088,8 +1874,11 @@ static int enic_stop(struct net_device *netdev) for (i = 0; i < enic->wq_count; i++) vnic_wq_clean(&enic->wq[i].vwq, enic_free_wq_buf); - for (i = 0; i < enic->rq_count; i++) + for (i = 0; i < enic->rq_count; i++) { vnic_rq_clean(&enic->rq[i].vrq, enic_free_rq_buf); + page_pool_destroy(enic->rq[i].pool); + enic->rq[i].pool = NULL; + } for (i = 0; i < enic->cq_count; i++) vnic_cq_clean(&enic->cq[i]); for (i = 0; i < enic->intr_count; i++) @@ -2599,6 +2388,7 @@ static void enic_get_queue_stats_rx(struct net_device *dev, int idx, rxs->hw_drop_overruns = rqstats->pkt_truncated; rxs->csum_unnecessary = rqstats->csum_unnecessary + rqstats->csum_unnecessary_encap; + rxs->alloc_fail = rqstats->pp_alloc_fail; } static void enic_get_queue_stats_tx(struct net_device *dev, int idx, @@ -2626,6 +2416,7 @@ static void enic_get_base_stats(struct net_device *dev, rxs->hw_drops = 0; rxs->hw_drop_overruns = 0; rxs->csum_unnecessary = 0; + rxs->alloc_fail = 0; txs->bytes = 0; txs->packets = 0; txs->csum_none = 0; @@ -3179,7 +2970,6 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev_err(dev, "Cannot register net device, aborting\n"); goto err_out_dev_deinit; } - enic->rx_copybreak = RX_COPYBREAK_DEFAULT; return 0; diff --git a/drivers/net/ethernet/cisco/enic/enic_rq.c b/drivers/net/ethernet/cisco/enic/enic_rq.c new file mode 100644 index 000000000000..e3228ef7988a --- /dev/null +++ b/drivers/net/ethernet/cisco/enic/enic_rq.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright 2024 Cisco Systems, Inc. All rights reserved. + +#include <linux/skbuff.h> +#include <linux/if_vlan.h> +#include <net/busy_poll.h> +#include "enic.h" +#include "enic_res.h" +#include "enic_rq.h" +#include "vnic_rq.h" +#include "cq_enet_desc.h" + +#define ENIC_LARGE_PKT_THRESHOLD 1000 + +static void enic_intr_update_pkt_size(struct vnic_rx_bytes_counter *pkt_size, + u32 pkt_len) +{ + if (pkt_len > ENIC_LARGE_PKT_THRESHOLD) + pkt_size->large_pkt_bytes_cnt += pkt_len; + else + pkt_size->small_pkt_bytes_cnt += pkt_len; +} + +int enic_rq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc, u8 type, + u16 q_number, u16 completed_index, void *opaque) +{ + struct enic *enic = vnic_dev_priv(vdev); + + vnic_rq_service(&enic->rq[q_number].vrq, cq_desc, completed_index, + VNIC_RQ_RETURN_DESC, enic_rq_indicate_buf, opaque); + return 0; +} + +static void enic_rq_set_skb_flags(struct vnic_rq *vrq, u8 type, u32 rss_hash, + u8 rss_type, u8 fcoe, u8 fcoe_fc_crc_ok, + u8 vlan_stripped, u8 csum_not_calc, + u8 tcp_udp_csum_ok, u8 ipv6, u8 ipv4_csum_ok, + u16 vlan_tci, struct sk_buff *skb) +{ + struct enic *enic = vnic_dev_priv(vrq->vdev); + struct net_device *netdev = enic->netdev; + struct enic_rq_stats *rqstats = &enic->rq[vrq->index].stats; + bool outer_csum_ok = true, encap = false; + + if ((netdev->features & NETIF_F_RXHASH) && rss_hash && type == 3) { + switch (rss_type) { + case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv4: + case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6: + case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6_EX: + skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L4); + rqstats->l4_rss_hash++; + break; + case CQ_ENET_RQ_DESC_RSS_TYPE_IPv4: + case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6: + case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6_EX: + skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L3); + rqstats->l3_rss_hash++; + break; + } + } + if (enic->vxlan.vxlan_udp_port_number) { + switch (enic->vxlan.patch_level) { + case 0: + if (fcoe) { + encap = true; + outer_csum_ok = fcoe_fc_crc_ok; + } + break; + case 2: + if (type == 7 && (rss_hash & BIT(0))) { + encap = true; + outer_csum_ok = (rss_hash & BIT(1)) && + (rss_hash & BIT(2)); + } + break; + } + } + + /* Hardware does not provide whole packet checksum. It only + * provides pseudo checksum. Since hw validates the packet + * checksum but not provide us the checksum value. use + * CHECSUM_UNNECESSARY. + * + * In case of encap pkt tcp_udp_csum_ok/tcp_udp_csum_ok is + * inner csum_ok. outer_csum_ok is set by hw when outer udp + * csum is correct or is zero. + */ + if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc && + tcp_udp_csum_ok && outer_csum_ok && (ipv4_csum_ok || ipv6)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->csum_level = encap; + if (encap) + rqstats->csum_unnecessary_encap++; + else + rqstats->csum_unnecessary++; + } + + if (vlan_stripped) { + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); + rqstats->vlan_stripped++; + } +} + +static bool enic_rq_pkt_error(struct vnic_rq *vrq, u8 packet_error, u8 fcs_ok, + u16 bytes_written) +{ + struct enic *enic = vnic_dev_priv(vrq->vdev); + struct enic_rq_stats *rqstats = &enic->rq[vrq->index].stats; + + if (packet_error) { + if (!fcs_ok) { + if (bytes_written > 0) + rqstats->bad_fcs++; + else if (bytes_written == 0) + rqstats->pkt_truncated++; + } + return true; + } + return false; +} + +int enic_rq_alloc_buf(struct vnic_rq *rq) +{ + struct enic *enic = vnic_dev_priv(rq->vdev); + struct net_device *netdev = enic->netdev; + struct enic_rq *erq = &enic->rq[rq->index]; + struct enic_rq_stats *rqstats = &erq->stats; + unsigned int offset = 0; + unsigned int len = netdev->mtu + VLAN_ETH_HLEN; + unsigned int os_buf_index = 0; + dma_addr_t dma_addr; + struct vnic_rq_buf *buf = rq->to_use; + struct page *page; + unsigned int truesize = len; + + if (buf->os_buf) { + enic_queue_rq_desc(rq, buf->os_buf, os_buf_index, buf->dma_addr, + buf->len); + + return 0; + } + + page = page_pool_dev_alloc(erq->pool, &offset, &truesize); + if (unlikely(!page)) { + rqstats->pp_alloc_fail++; + return -ENOMEM; + } + buf->offset = offset; + buf->truesize = truesize; + dma_addr = page_pool_get_dma_addr(page) + offset; + enic_queue_rq_desc(rq, (void *)page, os_buf_index, dma_addr, len); + + return 0; +} + +void enic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf) +{ + struct enic *enic = vnic_dev_priv(rq->vdev); + struct enic_rq *erq = &enic->rq[rq->index]; + + if (!buf->os_buf) + return; + + page_pool_put_full_page(erq->pool, (struct page *)buf->os_buf, true); + buf->os_buf = NULL; +} + +void enic_rq_indicate_buf(struct vnic_rq *rq, struct cq_desc *cq_desc, + struct vnic_rq_buf *buf, int skipped, void *opaque) +{ + struct enic *enic = vnic_dev_priv(rq->vdev); + struct sk_buff *skb; + struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)]; + struct enic_rq_stats *rqstats = &enic->rq[rq->index].stats; + struct napi_struct *napi; + + u8 type, color, eop, sop, ingress_port, vlan_stripped; + u8 fcoe, fcoe_sof, fcoe_fc_crc_ok, fcoe_enc_error, fcoe_eof; + u8 tcp_udp_csum_ok, udp, tcp, ipv4_csum_ok; + u8 ipv6, ipv4, ipv4_fragment, fcs_ok, rss_type, csum_not_calc; + u8 packet_error; + u16 q_number, completed_index, bytes_written, vlan_tci, checksum; + u32 rss_hash; + + rqstats->packets++; + if (skipped) { + rqstats->desc_skip++; + return; + } + + cq_enet_rq_desc_dec((struct cq_enet_rq_desc *)cq_desc, &type, &color, + &q_number, &completed_index, &ingress_port, &fcoe, + &eop, &sop, &rss_type, &csum_not_calc, &rss_hash, + &bytes_written, &packet_error, &vlan_stripped, + &vlan_tci, &checksum, &fcoe_sof, &fcoe_fc_crc_ok, + &fcoe_enc_error, &fcoe_eof, &tcp_udp_csum_ok, &udp, + &tcp, &ipv4_csum_ok, &ipv6, &ipv4, &ipv4_fragment, + &fcs_ok); + + if (enic_rq_pkt_error(rq, packet_error, fcs_ok, bytes_written)) + return; + + if (eop && bytes_written > 0) { + /* Good receive + */ + rqstats->bytes += bytes_written; + napi = &enic->napi[rq->index]; + skb = napi_get_frags(napi); + if (unlikely(!skb)) { + net_warn_ratelimited("%s: skb alloc error rq[%d], desc[%d]\n", + enic->netdev->name, rq->index, + completed_index); + rqstats->no_skb++; + return; + } + + prefetch(skb->data - NET_IP_ALIGN); + + dma_sync_single_for_cpu(&enic->pdev->dev, buf->dma_addr, + bytes_written, DMA_FROM_DEVICE); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + (struct page *)buf->os_buf, buf->offset, + bytes_written, buf->truesize); + skb_record_rx_queue(skb, q_number); + enic_rq_set_skb_flags(rq, type, rss_hash, rss_type, fcoe, + fcoe_fc_crc_ok, vlan_stripped, + csum_not_calc, tcp_udp_csum_ok, ipv6, + ipv4_csum_ok, vlan_tci, skb); + skb_mark_for_recycle(skb); + napi_gro_frags(napi); + if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) + enic_intr_update_pkt_size(&cq->pkt_size_counter, + bytes_written); + buf->os_buf = NULL; + buf->dma_addr = 0; + buf = buf->next; + } else { + /* Buffer overflow + */ + rqstats->pkt_truncated++; + } +} diff --git a/drivers/net/ethernet/cisco/enic/enic_rq.h b/drivers/net/ethernet/cisco/enic/enic_rq.h new file mode 100644 index 000000000000..a75d07562686 --- /dev/null +++ b/drivers/net/ethernet/cisco/enic/enic_rq.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright 2024 Cisco Systems, Inc. All rights reserved. + */ + +int enic_rq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc, u8 type, + u16 q_number, u16 completed_index, void *opaque); +void enic_rq_indicate_buf(struct vnic_rq *rq, struct cq_desc *cq_desc, + struct vnic_rq_buf *buf, int skipped, void *opaque); +int enic_rq_alloc_buf(struct vnic_rq *rq); +void enic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf); diff --git a/drivers/net/ethernet/cisco/enic/vnic_rq.h b/drivers/net/ethernet/cisco/enic/vnic_rq.h index 0bc595abc03b..2ee4be2b9a34 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_rq.h +++ b/drivers/net/ethernet/cisco/enic/vnic_rq.h @@ -61,6 +61,8 @@ struct vnic_rq_buf { unsigned int index; void *desc; uint64_t wr_id; + unsigned int offset; + unsigned int truesize; }; enum enic_poll_state { diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index f7c4ce8e9a26..a86cfebedaa8 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1093,6 +1093,29 @@ static void fec_enet_enable_ring(struct net_device *ndev) } } +/* Whack a reset. We should wait for this. + * For i.MX6SX SOC, enet use AXI bus, we use disable MAC + * instead of reset MAC itself. + */ +static void fec_ctrl_reset(struct fec_enet_private *fep, bool allow_wol) +{ + u32 val; + + if (!allow_wol || !(fep->wol_flag & FEC_WOL_FLAG_SLEEP_ON)) { + if (fep->quirks & FEC_QUIRK_HAS_MULTI_QUEUES || + ((fep->quirks & FEC_QUIRK_NO_HARD_RESET) && fep->link)) { + writel(0, fep->hwp + FEC_ECNTRL); + } else { + writel(FEC_ECR_RESET, fep->hwp + FEC_ECNTRL); + udelay(10); + } + } else { + val = readl(fep->hwp + FEC_ECNTRL); + val |= (FEC_ECR_MAGICEN | FEC_ECR_SLEEP); + writel(val, fep->hwp + FEC_ECNTRL); + } +} + /* * This function is called to start or restart the FEC during a link * change, transmit timeout, or to reconfigure the FEC. The network @@ -1109,17 +1132,7 @@ fec_restart(struct net_device *ndev) if (fep->bufdesc_ex) fec_ptp_save_state(fep); - /* Whack a reset. We should wait for this. - * For i.MX6SX SOC, enet use AXI bus, we use disable MAC - * instead of reset MAC itself. - */ - if (fep->quirks & FEC_QUIRK_HAS_MULTI_QUEUES || - ((fep->quirks & FEC_QUIRK_NO_HARD_RESET) && fep->link)) { - writel(0, fep->hwp + FEC_ECNTRL); - } else { - writel(1, fep->hwp + FEC_ECNTRL); - udelay(10); - } + fec_ctrl_reset(fep, false); /* * enet-mac reset will reset mac address registers too, @@ -1373,22 +1386,7 @@ fec_stop(struct net_device *ndev) if (fep->bufdesc_ex) fec_ptp_save_state(fep); - /* Whack a reset. We should wait for this. - * For i.MX6SX SOC, enet use AXI bus, we use disable MAC - * instead of reset MAC itself. - */ - if (!(fep->wol_flag & FEC_WOL_FLAG_SLEEP_ON)) { - if (fep->quirks & FEC_QUIRK_HAS_MULTI_QUEUES) { - writel(0, fep->hwp + FEC_ECNTRL); - } else { - writel(FEC_ECR_RESET, fep->hwp + FEC_ECNTRL); - udelay(10); - } - } else { - val = readl(fep->hwp + FEC_ECNTRL); - val |= (FEC_ECR_MAGICEN | FEC_ECR_SLEEP); - writel(val, fep->hwp + FEC_ECNTRL); - } + fec_ctrl_reset(fep, true); writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED); writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK); diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c index b3e2a596ad2c..51402dff72c5 100644 --- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c +++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c @@ -1446,7 +1446,6 @@ int dtsec_initialization(struct mac_device *mac_dev, goto _return_fm_mac_free; } dtsec->pcs.ops = &dtsec_pcs_ops; - dtsec->pcs.neg_mode = true; dtsec->pcs.poll = true; supported = mac_dev->phylink_config.supported_interfaces; diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 435138f4699d..deb35b38c976 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -1647,20 +1647,11 @@ static void gfar_configure_serdes(struct net_device *dev) */ static int init_phy(struct net_device *dev) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; struct gfar_private *priv = netdev_priv(dev); phy_interface_t interface = priv->interface; struct phy_device *phydev; struct ethtool_keee edata; - linkmode_set_bit_array(phy_10_100_features_array, - ARRAY_SIZE(phy_10_100_features_array), - mask); - linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, mask); - linkmode_set_bit(ETHTOOL_LINK_MODE_MII_BIT, mask); - if (priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT) - linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, mask); - priv->oldlink = 0; priv->oldspeed = 0; priv->oldduplex = -1; @@ -1675,9 +1666,8 @@ static int init_phy(struct net_device *dev) if (interface == PHY_INTERFACE_MODE_SGMII) gfar_configure_serdes(dev); - /* Remove any features not supported by the controller */ - linkmode_and(phydev->supported, phydev->supported, mask); - linkmode_copy(phydev->advertising, phydev->supported); + if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT)) + phy_set_max_speed(phydev, SPEED_100); /* Add support for flow control */ phy_support_asym_pause(phydev); diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 88510f822759..affd5a6c44e7 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3408,7 +3408,7 @@ static int ucc_geth_parse_clock(struct device_node *np, const char *which, return 0; } -struct phylink_mac_ops ugeth_mac_ops = { +static const struct phylink_mac_ops ugeth_mac_ops = { .mac_link_up = ugeth_mac_link_up, .mac_link_down = ugeth_mac_link_down, .mac_config = ugeth_mac_config, diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h index 38789faae706..84f92f6384e7 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.h +++ b/drivers/net/ethernet/freescale/ucc_geth.h @@ -890,8 +890,6 @@ struct ucc_geth_hardware_statistics { addresses */ #define TX_TIMEOUT (1*HZ) -#define PHY_INIT_TIMEOUT 100000 -#define PHY_CHANGE_TIME 2 /* Fast Ethernet (10/100 Mbps) */ #define UCC_GETH_URFS_INIT 512 /* Rx virtual FIFO size diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 25b8a3556004..417dfa18daae 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -2554,17 +2554,12 @@ static int emac_dt_mdio_probe(struct emac_instance *dev) struct mii_bus *bus; int res; - mii_np = of_get_child_by_name(dev->ofdev->dev.of_node, "mdio"); + mii_np = of_get_available_child_by_name(dev->ofdev->dev.of_node, "mdio"); if (!mii_np) { dev_err(&dev->ofdev->dev, "no mdio definition found."); return -ENODEV; } - if (!of_device_is_available(mii_np)) { - res = -ENODEV; - goto put_node; - } - bus = devm_mdiobus_alloc(&dev->ofdev->dev); if (!bus) { res = -ENOMEM; diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 24ec9a4f1ffa..1640d2f27833 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -264,6 +264,7 @@ config I40EVF tristate "Intel(R) Ethernet Adaptive Virtual Function support" select IAVF depends on PCI_MSI + depends on PTP_1588_CLOCK_OPTIONAL help This driver supports virtual functions for Intel XL710, X710, X722, XXV710, and all devices advertising support for @@ -336,7 +337,7 @@ config ICE_SWITCHDEV config ICE_HWTS bool "Support HW cross-timestamp on platforms with PTM support" default y - depends on ICE && X86 + depends on ICE && X86 && PCIE_PTM help Say Y to enable hardware supported cross-timestamping on platforms with PCIe PTM support. The cross-timestamp is available through diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c index d7df2a0ed629..44249dd91bd6 100644 --- a/drivers/net/ethernet/intel/e1000e/mac.c +++ b/drivers/net/ethernet/intel/e1000e/mac.c @@ -331,8 +331,21 @@ void e1000e_update_mc_addr_list_generic(struct e1000_hw *hw, } /* replace the entire MTA table */ - for (i = hw->mac.mta_reg_count - 1; i >= 0; i--) + for (i = hw->mac.mta_reg_count - 1; i >= 0; i--) { E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, hw->mac.mta_shadow[i]); + + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + /* + * Do not queue up too many posted writes to prevent + * increased latency for other devices on the + * interconnect. Flush after each 8th posted write, + * to keep additional execution time low while still + * preventing increased latency. + */ + if (!(i % 8) && i) + e1e_flush(); + } + } e1e_flush(); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index e28f1905a4a0..9f47388eaba5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -2,6 +2,7 @@ /* Copyright(c) 2018 Intel Corporation. */ #include <linux/bpf_trace.h> +#include <linux/unroll.h> #include <net/xdp_sock_drv.h> #include "i40e_txrx_common.h" #include "i40e_xsk.h" @@ -529,7 +530,8 @@ static void i40e_xmit_pkt_batch(struct i40e_ring *xdp_ring, struct xdp_desc *des dma_addr_t dma; u32 i; - loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) { + unrolled_count(PKTS_PER_BATCH) + for (i = 0; i < PKTS_PER_BATCH; i++) { u32 cmd = I40E_TX_DESC_CMD_ICRC | xsk_is_eop_desc(&desc[i]); dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc[i].addr); diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h index ef156fad52f2..dd16351a7af8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h @@ -6,7 +6,7 @@ #include <linux/types.h> -/* This value should match the pragma in the loop_unrolled_for +/* This value should match the pragma in the unrolled_count() * macro. Why 4? It is strictly empirical. It seems to be a good * compromise between the advantage of having simultaneous outstanding * reads to the DMA array that can hide each others latency and the @@ -14,14 +14,6 @@ */ #define PKTS_PER_BATCH 4 -#ifdef __clang__ -#define loop_unrolled_for _Pragma("clang loop unroll_count(4)") for -#elif __GNUC__ >= 8 -#define loop_unrolled_for _Pragma("GCC unroll 4") for -#else -#define loop_unrolled_for for -#endif - struct i40e_ring; struct i40e_vsi; struct net_device; diff --git a/drivers/net/ethernet/intel/iavf/Makefile b/drivers/net/ethernet/intel/iavf/Makefile index 356ac9faa5bf..e13720a728ff 100644 --- a/drivers/net/ethernet/intel/iavf/Makefile +++ b/drivers/net/ethernet/intel/iavf/Makefile @@ -13,3 +13,5 @@ obj-$(CONFIG_IAVF) += iavf.o iavf-y := iavf_main.o iavf_ethtool.o iavf_virtchnl.o iavf_fdir.o \ iavf_adv_rss.o iavf_txrx.o iavf_common.o iavf_adminq.o + +iavf-$(CONFIG_PTP_1588_CLOCK) += iavf_ptp.o diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 532a0a595fe8..9de3e0ba3731 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -41,6 +41,7 @@ #include "iavf_txrx.h" #include "iavf_fdir.h" #include "iavf_adv_rss.h" +#include "iavf_types.h" #include <linux/bitmap.h> #define DEFAULT_DEBUG_LEVEL_SHIFT 3 @@ -82,7 +83,7 @@ struct iavf_vsi { #define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN) -#define IAVF_RX_DESC(R, i) (&(((union iavf_32byte_rx_desc *)((R)->desc))[i])) +#define IAVF_RX_DESC(R, i) (&(((struct iavf_rx_desc *)((R)->desc))[i])) #define IAVF_TX_DESC(R, i) (&(((struct iavf_tx_desc *)((R)->desc))[i])) #define IAVF_TX_CTXTDESC(R, i) \ (&(((struct iavf_tx_context_desc *)((R)->desc))[i])) @@ -271,6 +272,7 @@ struct iavf_adapter { /* Lock to protect accesses to MAC and VLAN lists */ spinlock_t mac_vlan_list_lock; char misc_vector_name[IFNAMSIZ + 9]; + u8 rxdid; int num_active_queues; int num_req_queues; @@ -343,6 +345,17 @@ struct iavf_adapter { #define IAVF_FLAG_AQ_CONFIGURE_QUEUES_BW BIT_ULL(39) #define IAVF_FLAG_AQ_CFG_QUEUES_QUANTA_SIZE BIT_ULL(40) #define IAVF_FLAG_AQ_GET_QOS_CAPS BIT_ULL(41) +#define IAVF_FLAG_AQ_GET_SUPPORTED_RXDIDS BIT_ULL(42) +#define IAVF_FLAG_AQ_GET_PTP_CAPS BIT_ULL(43) +#define IAVF_FLAG_AQ_SEND_PTP_CMD BIT_ULL(44) + + /* AQ messages that must be sent after IAVF_FLAG_AQ_GET_CONFIG, in + * order to negotiated extended capabilities. + */ +#define IAVF_FLAG_AQ_EXTENDED_CAPS \ + (IAVF_FLAG_AQ_GET_OFFLOAD_VLAN_V2_CAPS | \ + IAVF_FLAG_AQ_GET_SUPPORTED_RXDIDS | \ + IAVF_FLAG_AQ_GET_PTP_CAPS) /* flags for processing extended capability messages during * __IAVF_INIT_EXTENDED_CAPS. Each capability exchange requires @@ -354,10 +367,18 @@ struct iavf_adapter { u64 extended_caps; #define IAVF_EXTENDED_CAP_SEND_VLAN_V2 BIT_ULL(0) #define IAVF_EXTENDED_CAP_RECV_VLAN_V2 BIT_ULL(1) +#define IAVF_EXTENDED_CAP_SEND_RXDID BIT_ULL(2) +#define IAVF_EXTENDED_CAP_RECV_RXDID BIT_ULL(3) +#define IAVF_EXTENDED_CAP_SEND_PTP BIT_ULL(4) +#define IAVF_EXTENDED_CAP_RECV_PTP BIT_ULL(5) #define IAVF_EXTENDED_CAPS \ (IAVF_EXTENDED_CAP_SEND_VLAN_V2 | \ - IAVF_EXTENDED_CAP_RECV_VLAN_V2) + IAVF_EXTENDED_CAP_RECV_VLAN_V2 | \ + IAVF_EXTENDED_CAP_SEND_RXDID | \ + IAVF_EXTENDED_CAP_RECV_RXDID | \ + IAVF_EXTENDED_CAP_SEND_PTP | \ + IAVF_EXTENDED_CAP_RECV_PTP) /* Lock to prevent possible clobbering of * current_netdev_promisc_flags @@ -417,12 +438,18 @@ struct iavf_adapter { VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF) #define QOS_ALLOWED(_a) ((_a)->vf_res->vf_cap_flags & \ VIRTCHNL_VF_OFFLOAD_QOS) +#define IAVF_RXDID_ALLOWED(a) \ + ((a)->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) +#define IAVF_PTP_ALLOWED(a) \ + ((a)->vf_res->vf_cap_flags & VIRTCHNL_VF_CAP_PTP) struct virtchnl_vf_resource *vf_res; /* incl. all VSIs */ struct virtchnl_vsi_resource *vsi_res; /* our LAN VSI */ struct virtchnl_version_info pf_version; #define PF_IS_V11(_a) (((_a)->pf_version.major == 1) && \ ((_a)->pf_version.minor == 1)) struct virtchnl_vlan_caps vlan_v2_caps; + u64 supp_rxdids; + struct iavf_ptp ptp; u16 msg_enable; struct iavf_eth_stats current_stats; struct virtchnl_qos_cap_list *qos_caps; @@ -555,6 +582,10 @@ int iavf_send_vf_config_msg(struct iavf_adapter *adapter); int iavf_get_vf_config(struct iavf_adapter *adapter); int iavf_get_vf_vlan_v2_caps(struct iavf_adapter *adapter); int iavf_send_vf_offload_vlan_v2_msg(struct iavf_adapter *adapter); +int iavf_send_vf_supported_rxdids_msg(struct iavf_adapter *adapter); +int iavf_get_vf_supported_rxdids(struct iavf_adapter *adapter); +int iavf_send_vf_ptp_caps_msg(struct iavf_adapter *adapter); +int iavf_get_vf_ptp_caps(struct iavf_adapter *adapter); void iavf_set_queue_vlan_tag_loc(struct iavf_adapter *adapter); u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter); void iavf_irq_enable(struct iavf_adapter *adapter, bool flush); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 852e5b62f0a5..4c29739780bd 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -4,6 +4,7 @@ #include <linux/net/intel/libie/rx.h> #include "iavf.h" +#include "iavf_ptp.h" #include "iavf_prototype.h" /* All iavf tracepoints are defined by the include below, which must * be included exactly once across the whole kernel with @@ -710,6 +711,47 @@ static void iavf_configure_tx(struct iavf_adapter *adapter) } /** + * iavf_select_rx_desc_format - Select Rx descriptor format + * @adapter: adapter private structure + * + * Select what Rx descriptor format based on availability and enabled + * features. + * + * Return: the desired RXDID to select for a given Rx queue, as defined by + * enum virtchnl_rxdid_format. + */ +static u8 iavf_select_rx_desc_format(const struct iavf_adapter *adapter) +{ + u64 rxdids = adapter->supp_rxdids; + + /* If we did not negotiate VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC, we must + * stick with the default value of the legacy 32 byte format. + */ + if (!IAVF_RXDID_ALLOWED(adapter)) + return VIRTCHNL_RXDID_1_32B_BASE; + + /* Rx timestamping requires the use of flexible NIC descriptors */ + if (iavf_ptp_cap_supported(adapter, VIRTCHNL_1588_PTP_CAP_RX_TSTAMP)) { + if (rxdids & BIT(VIRTCHNL_RXDID_2_FLEX_SQ_NIC)) + return VIRTCHNL_RXDID_2_FLEX_SQ_NIC; + + pci_warn(adapter->pdev, + "Unable to negotiate flexible descriptor format\n"); + } + + /* Warn if the PF does not list support for the default legacy + * descriptor format. This shouldn't happen, as this is the format + * used if VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC is not supported. It is + * likely caused by a bug in the PF implementation failing to indicate + * support for the format. + */ + if (!(rxdids & VIRTCHNL_RXDID_1_32B_BASE_M)) + netdev_warn(adapter->netdev, "PF does not list support for default Rx descriptor format\n"); + + return VIRTCHNL_RXDID_1_32B_BASE; +} + +/** * iavf_configure_rx - Configure Receive Unit after Reset * @adapter: board private structure * @@ -719,8 +761,12 @@ static void iavf_configure_rx(struct iavf_adapter *adapter) { struct iavf_hw *hw = &adapter->hw; - for (u32 i = 0; i < adapter->num_active_queues; i++) + adapter->rxdid = iavf_select_rx_desc_format(adapter); + + for (u32 i = 0; i < adapter->num_active_queues; i++) { adapter->rx_rings[i].tail = hw->hw_addr + IAVF_QRX_TAIL1(i); + adapter->rx_rings[i].rxdid = adapter->rxdid; + } } /** @@ -2071,6 +2117,10 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) return iavf_send_vf_config_msg(adapter); if (adapter->aq_required & IAVF_FLAG_AQ_GET_OFFLOAD_VLAN_V2_CAPS) return iavf_send_vf_offload_vlan_v2_msg(adapter); + if (adapter->aq_required & IAVF_FLAG_AQ_GET_SUPPORTED_RXDIDS) + return iavf_send_vf_supported_rxdids_msg(adapter); + if (adapter->aq_required & IAVF_FLAG_AQ_GET_PTP_CAPS) + return iavf_send_vf_ptp_caps_msg(adapter); if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_QUEUES) { iavf_disable_queues(adapter); return 0; @@ -2235,7 +2285,10 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) iavf_enable_vlan_insertion_v2(adapter, ETH_P_8021AD); return 0; } - + if (adapter->aq_required & IAVF_FLAG_AQ_SEND_PTP_CMD) { + iavf_virtchnl_send_ptp_cmd(adapter); + return IAVF_SUCCESS; + } if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_STATS) { iavf_request_stats(adapter); return 0; @@ -2600,6 +2653,112 @@ err: } /** + * iavf_init_send_supported_rxdids - part of querying for supported RXDID + * formats + * @adapter: board private structure + * + * Function processes send of the request for supported RXDIDs to the PF. + * Must clear IAVF_EXTENDED_CAP_RECV_RXDID if the message is not sent, e.g. + * due to the PF not negotiating VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC. + */ +static void iavf_init_send_supported_rxdids(struct iavf_adapter *adapter) +{ + int ret; + + ret = iavf_send_vf_supported_rxdids_msg(adapter); + if (ret == -EOPNOTSUPP) { + /* PF does not support VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC. In this + * case, we did not send the capability exchange message and + * do not expect a response. + */ + adapter->extended_caps &= ~IAVF_EXTENDED_CAP_RECV_RXDID; + } + + /* We sent the message, so move on to the next step */ + adapter->extended_caps &= ~IAVF_EXTENDED_CAP_SEND_RXDID; +} + +/** + * iavf_init_recv_supported_rxdids - part of querying for supported RXDID + * formats + * @adapter: board private structure + * + * Function processes receipt of the supported RXDIDs message from the PF. + **/ +static void iavf_init_recv_supported_rxdids(struct iavf_adapter *adapter) +{ + int ret; + + memset(&adapter->supp_rxdids, 0, sizeof(adapter->supp_rxdids)); + + ret = iavf_get_vf_supported_rxdids(adapter); + if (ret) + goto err; + + /* We've processed the PF response to the + * VIRTCHNL_OP_GET_SUPPORTED_RXDIDS message we sent previously. + */ + adapter->extended_caps &= ~IAVF_EXTENDED_CAP_RECV_RXDID; + return; + +err: + /* We didn't receive a reply. Make sure we try sending again when + * __IAVF_INIT_FAILED attempts to recover. + */ + adapter->extended_caps |= IAVF_EXTENDED_CAP_SEND_RXDID; + iavf_change_state(adapter, __IAVF_INIT_FAILED); +} + +/** + * iavf_init_send_ptp_caps - part of querying for extended PTP capabilities + * @adapter: board private structure + * + * Function processes send of the request for 1588 PTP capabilities to the PF. + * Must clear IAVF_EXTENDED_CAP_SEND_PTP if the message is not sent, e.g. + * due to the PF not negotiating VIRTCHNL_VF_PTP_CAP + */ +static void iavf_init_send_ptp_caps(struct iavf_adapter *adapter) +{ + if (iavf_send_vf_ptp_caps_msg(adapter) == -EOPNOTSUPP) { + /* PF does not support VIRTCHNL_VF_PTP_CAP. In this case, we + * did not send the capability exchange message and do not + * expect a response. + */ + adapter->extended_caps &= ~IAVF_EXTENDED_CAP_RECV_PTP; + } + + /* We sent the message, so move on to the next step */ + adapter->extended_caps &= ~IAVF_EXTENDED_CAP_SEND_PTP; +} + +/** + * iavf_init_recv_ptp_caps - part of querying for supported PTP capabilities + * @adapter: board private structure + * + * Function processes receipt of the PTP capabilities supported on this VF. + **/ +static void iavf_init_recv_ptp_caps(struct iavf_adapter *adapter) +{ + memset(&adapter->ptp.hw_caps, 0, sizeof(adapter->ptp.hw_caps)); + + if (iavf_get_vf_ptp_caps(adapter)) + goto err; + + /* We've processed the PF response to the VIRTCHNL_OP_1588_PTP_GET_CAPS + * message we sent previously. + */ + adapter->extended_caps &= ~IAVF_EXTENDED_CAP_RECV_PTP; + return; + +err: + /* We didn't receive a reply. Make sure we try sending again when + * __IAVF_INIT_FAILED attempts to recover. + */ + adapter->extended_caps |= IAVF_EXTENDED_CAP_SEND_PTP; + iavf_change_state(adapter, __IAVF_INIT_FAILED); +} + +/** * iavf_init_process_extended_caps - Part of driver startup * @adapter: board private structure * @@ -2623,6 +2782,24 @@ static void iavf_init_process_extended_caps(struct iavf_adapter *adapter) return; } + /* Process capability exchange for RXDID formats */ + if (adapter->extended_caps & IAVF_EXTENDED_CAP_SEND_RXDID) { + iavf_init_send_supported_rxdids(adapter); + return; + } else if (adapter->extended_caps & IAVF_EXTENDED_CAP_RECV_RXDID) { + iavf_init_recv_supported_rxdids(adapter); + return; + } + + /* Process capability exchange for PTP features */ + if (adapter->extended_caps & IAVF_EXTENDED_CAP_SEND_PTP) { + iavf_init_send_ptp_caps(adapter); + return; + } else if (adapter->extended_caps & IAVF_EXTENDED_CAP_RECV_PTP) { + iavf_init_recv_ptp_caps(adapter); + return; + } + /* When we reach here, no further extended capabilities exchanges are * necessary, so we finally transition into __IAVF_INIT_CONFIG_ADAPTER */ @@ -2714,6 +2891,9 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) if (QOS_ALLOWED(adapter)) adapter->aq_required |= IAVF_FLAG_AQ_GET_QOS_CAPS; + /* Setup initial PTP configuration */ + iavf_ptp_init(adapter); + iavf_schedule_finish_config(adapter); return; @@ -3139,15 +3319,18 @@ continue_reset: } adapter->aq_required |= IAVF_FLAG_AQ_GET_CONFIG; - /* always set since VIRTCHNL_OP_GET_VF_RESOURCES has not been - * sent/received yet, so VLAN_V2_ALLOWED() cannot is not reliable here, - * however the VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS won't be sent until - * VIRTCHNL_OP_GET_VF_RESOURCES and VIRTCHNL_VF_OFFLOAD_VLAN_V2 have - * been successfully sent and negotiated - */ - adapter->aq_required |= IAVF_FLAG_AQ_GET_OFFLOAD_VLAN_V2_CAPS; adapter->aq_required |= IAVF_FLAG_AQ_MAP_VECTORS; + /* Certain capabilities require an extended negotiation process using + * extra messages that must be processed after getting the VF + * configuration. The related checks such as VLAN_V2_ALLOWED() are not + * reliable here, since the configuration has not yet been negotiated. + * + * Always set these flags, since them related VIRTCHNL messages won't + * be sent until after VIRTCHNL_OP_GET_VF_RESOURCES. + */ + adapter->aq_required |= IAVF_FLAG_AQ_EXTENDED_CAPS; + spin_lock_bh(&adapter->mac_vlan_list_lock); /* Delete filter for the current MAC address, it could have @@ -4996,6 +5179,25 @@ static netdev_features_t iavf_fix_features(struct net_device *netdev, return iavf_fix_strip_features(adapter, features); } +static int iavf_hwstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + + *config = adapter->ptp.hwtstamp_config; + + return 0; +} + +static int iavf_hwstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + + return iavf_ptp_set_ts_config(adapter, config, extack); +} + static int iavf_verify_shaper(struct net_shaper_binding *binding, const struct net_shaper *shaper, @@ -5104,6 +5306,8 @@ static const struct net_device_ops iavf_netdev_ops = { .ndo_set_features = iavf_set_features, .ndo_setup_tc = iavf_setup_tc, .net_shaper_ops = &iavf_shaper_ops, + .ndo_hwtstamp_get = iavf_hwstamp_get, + .ndo_hwtstamp_set = iavf_hwstamp_set, }; /** @@ -5358,6 +5562,10 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Setup the wait queue for indicating virtchannel events */ init_waitqueue_head(&adapter->vc_waitqueue); + INIT_LIST_HEAD(&adapter->ptp.aq_cmds); + init_waitqueue_head(&adapter->ptp.phc_time_waitqueue); + mutex_init(&adapter->ptp.aq_cmd_lock); + queue_delayed_work(adapter->wq, &adapter->watchdog_task, msecs_to_jiffies(5 * (pdev->devfn & 0x07))); /* Initialization goes on in the work. Do not add more of it below. */ @@ -5514,6 +5722,8 @@ static void iavf_remove(struct pci_dev *pdev) msleep(50); } + iavf_ptp_release(adapter); + iavf_misc_irq_disable(adapter); /* Shut down all the garbage mashers on the detention level */ cancel_work_sync(&adapter->reset_task); diff --git a/drivers/net/ethernet/intel/iavf/iavf_ptp.c b/drivers/net/ethernet/intel/iavf/iavf_ptp.c new file mode 100644 index 000000000000..b4d5eda2e84f --- /dev/null +++ b/drivers/net/ethernet/intel/iavf/iavf_ptp.c @@ -0,0 +1,485 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2024 Intel Corporation. */ + +#include "iavf.h" +#include "iavf_ptp.h" + +#define iavf_clock_to_adapter(info) \ + container_of_const(info, struct iavf_adapter, ptp.info) + +/** + * iavf_ptp_disable_rx_tstamp - Disable timestamping in Rx rings + * @adapter: private adapter structure + * + * Disable timestamp reporting for all Rx rings. + */ +static void iavf_ptp_disable_rx_tstamp(struct iavf_adapter *adapter) +{ + for (u32 i = 0; i < adapter->num_active_queues; i++) + adapter->rx_rings[i].flags &= ~IAVF_TXRX_FLAGS_HW_TSTAMP; +} + +/** + * iavf_ptp_enable_rx_tstamp - Enable timestamping in Rx rings + * @adapter: private adapter structure + * + * Enable timestamp reporting for all Rx rings. + */ +static void iavf_ptp_enable_rx_tstamp(struct iavf_adapter *adapter) +{ + for (u32 i = 0; i < adapter->num_active_queues; i++) + adapter->rx_rings[i].flags |= IAVF_TXRX_FLAGS_HW_TSTAMP; +} + +/** + * iavf_ptp_set_timestamp_mode - Set device timestamping mode + * @adapter: private adapter structure + * @config: pointer to kernel_hwtstamp_config + * + * Set the timestamping mode requested from the userspace. + * + * Note: this function always translates Rx timestamp requests for any packet + * category into HWTSTAMP_FILTER_ALL. + * + * Return: 0 on success, negative error code otherwise. + */ +static int iavf_ptp_set_timestamp_mode(struct iavf_adapter *adapter, + struct kernel_hwtstamp_config *config) +{ + /* Reserved for future extensions. */ + if (config->flags) + return -EINVAL; + + switch (config->tx_type) { + case HWTSTAMP_TX_OFF: + break; + case HWTSTAMP_TX_ON: + return -EOPNOTSUPP; + default: + return -ERANGE; + } + + if (config->rx_filter == HWTSTAMP_FILTER_NONE) { + iavf_ptp_disable_rx_tstamp(adapter); + return 0; + } else if (config->rx_filter > HWTSTAMP_FILTER_NTP_ALL) { + return -ERANGE; + } else if (!(iavf_ptp_cap_supported(adapter, + VIRTCHNL_1588_PTP_CAP_RX_TSTAMP))) { + return -EOPNOTSUPP; + } + + config->rx_filter = HWTSTAMP_FILTER_ALL; + iavf_ptp_enable_rx_tstamp(adapter); + + return 0; +} + +/** + * iavf_ptp_set_ts_config - Set timestamping configuration + * @adapter: private adapter structure + * @config: pointer to kernel_hwtstamp_config structure + * @extack: pointer to netlink_ext_ack structure + * + * Program the requested timestamping configuration to the device. + * + * Return: 0 on success, negative error code otherwise. + */ +int iavf_ptp_set_ts_config(struct iavf_adapter *adapter, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) +{ + int err; + + err = iavf_ptp_set_timestamp_mode(adapter, config); + if (err) + return err; + + /* Save successful settings for future reference */ + adapter->ptp.hwtstamp_config = *config; + + return 0; +} + +/** + * iavf_ptp_cap_supported - Check if a PTP capability is supported + * @adapter: private adapter structure + * @cap: the capability bitmask to check + * + * Return: true if every capability set in cap is also set in the enabled + * capabilities reported by the PF, false otherwise. + */ +bool iavf_ptp_cap_supported(const struct iavf_adapter *adapter, u32 cap) +{ + if (!IAVF_PTP_ALLOWED(adapter)) + return false; + + /* Only return true if every bit in cap is set in hw_caps.caps */ + return (adapter->ptp.hw_caps.caps & cap) == cap; +} + +/** + * iavf_allocate_ptp_cmd - Allocate a PTP command message structure + * @v_opcode: the virtchnl opcode + * @msglen: length in bytes of the associated virtchnl structure + * + * Allocates a PTP command message and pre-fills it with the provided message + * length and opcode. + * + * Return: allocated PTP command. + */ +static struct iavf_ptp_aq_cmd *iavf_allocate_ptp_cmd(enum virtchnl_ops v_opcode, + u16 msglen) +{ + struct iavf_ptp_aq_cmd *cmd; + + cmd = kzalloc(struct_size(cmd, msg, msglen), GFP_KERNEL); + if (!cmd) + return NULL; + + cmd->v_opcode = v_opcode; + cmd->msglen = msglen; + + return cmd; +} + +/** + * iavf_queue_ptp_cmd - Queue PTP command for sending over virtchnl + * @adapter: private adapter structure + * @cmd: the command structure to send + * + * Queue the given command structure into the PTP virtchnl command queue tos + * end to the PF. + */ +static void iavf_queue_ptp_cmd(struct iavf_adapter *adapter, + struct iavf_ptp_aq_cmd *cmd) +{ + mutex_lock(&adapter->ptp.aq_cmd_lock); + list_add_tail(&cmd->list, &adapter->ptp.aq_cmds); + mutex_unlock(&adapter->ptp.aq_cmd_lock); + + adapter->aq_required |= IAVF_FLAG_AQ_SEND_PTP_CMD; + mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); +} + +/** + * iavf_send_phc_read - Send request to read PHC time + * @adapter: private adapter structure + * + * Send a request to obtain the PTP hardware clock time. This allocates the + * VIRTCHNL_OP_1588_PTP_GET_TIME message and queues it up to send to + * indirectly read the PHC time. + * + * This function does not wait for the reply from the PF. + * + * Return: 0 if success, error code otherwise. + */ +static int iavf_send_phc_read(struct iavf_adapter *adapter) +{ + struct iavf_ptp_aq_cmd *cmd; + + if (!adapter->ptp.clock) + return -EOPNOTSUPP; + + cmd = iavf_allocate_ptp_cmd(VIRTCHNL_OP_1588_PTP_GET_TIME, + sizeof(struct virtchnl_phc_time)); + if (!cmd) + return -ENOMEM; + + iavf_queue_ptp_cmd(adapter, cmd); + + return 0; +} + +/** + * iavf_read_phc_indirect - Indirectly read the PHC time via virtchnl + * @adapter: private adapter structure + * @ts: storage for the timestamp value + * @sts: system timestamp values before and after the read + * + * Used when the device does not have direct register access to the PHC time. + * Indirectly reads the time via the VIRTCHNL_OP_1588_PTP_GET_TIME, and waits + * for the reply from the PF. + * + * Based on some simple measurements using ftrace and phc2sys, this clock + * access method has about a ~110 usec latency even when the system is not + * under load. In order to achieve acceptable results when using phc2sys with + * the indirect clock access method, it is recommended to use more + * conservative proportional and integration constants with the P/I servo. + * + * Return: 0 if success, error code otherwise. + */ +static int iavf_read_phc_indirect(struct iavf_adapter *adapter, + struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + long ret; + int err; + + adapter->ptp.phc_time_ready = false; + + ptp_read_system_prets(sts); + + err = iavf_send_phc_read(adapter); + if (err) + return err; + + ret = wait_event_interruptible_timeout(adapter->ptp.phc_time_waitqueue, + adapter->ptp.phc_time_ready, + HZ); + + ptp_read_system_postts(sts); + + if (ret < 0) + return ret; + else if (!ret) + return -EBUSY; + + *ts = ns_to_timespec64(adapter->ptp.cached_phc_time); + + return 0; +} + +static int iavf_ptp_gettimex64(struct ptp_clock_info *info, + struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + struct iavf_adapter *adapter = iavf_clock_to_adapter(info); + + if (!adapter->ptp.clock) + return -EOPNOTSUPP; + + return iavf_read_phc_indirect(adapter, ts, sts); +} + +/** + * iavf_ptp_cache_phc_time - Cache PHC time for performing timestamp extension + * @adapter: private adapter structure + * + * Periodically cache the PHC time in order to allow for timestamp extension. + * This is required because the Tx and Rx timestamps only contain 32bits of + * nanoseconds. Timestamp extension allows calculating the corrected 64bit + * timestamp. This algorithm relies on the cached time being within ~1 second + * of the timestamp. + */ +static void iavf_ptp_cache_phc_time(struct iavf_adapter *adapter) +{ + if (!time_is_before_jiffies(adapter->ptp.cached_phc_updated + HZ)) + return; + + /* The response from virtchnl will store the time into + * cached_phc_time. + */ + iavf_send_phc_read(adapter); +} + +/** + * iavf_ptp_do_aux_work - Perform periodic work required for PTP support + * @info: PTP clock info structure + * + * Handler to take care of periodic work required for PTP operation. This + * includes the following tasks: + * + * 1) updating cached_phc_time + * + * cached_phc_time is used by the Tx and Rx timestamp flows in order to + * perform timestamp extension, by carefully comparing the timestamp + * 32bit nanosecond timestamps and determining the corrected 64bit + * timestamp value to report to userspace. This algorithm only works if + * the cached_phc_time is within ~1 second of the Tx or Rx timestamp + * event. This task periodically reads the PHC time and stores it, to + * ensure that timestamp extension operates correctly. + * + * Returns: time in jiffies until the periodic task should be re-scheduled. + */ +static long iavf_ptp_do_aux_work(struct ptp_clock_info *info) +{ + struct iavf_adapter *adapter = iavf_clock_to_adapter(info); + + iavf_ptp_cache_phc_time(adapter); + + /* Check work about twice a second */ + return msecs_to_jiffies(500); +} + +/** + * iavf_ptp_register_clock - Register a new PTP for userspace + * @adapter: private adapter structure + * + * Allocate and register a new PTP clock device if necessary. + * + * Return: 0 if success, error otherwise. + */ +static int iavf_ptp_register_clock(struct iavf_adapter *adapter) +{ + struct ptp_clock_info *ptp_info = &adapter->ptp.info; + struct device *dev = &adapter->pdev->dev; + struct ptp_clock *clock; + + snprintf(ptp_info->name, sizeof(ptp_info->name), "%s-%s-clk", + KBUILD_MODNAME, dev_name(dev)); + ptp_info->owner = THIS_MODULE; + ptp_info->gettimex64 = iavf_ptp_gettimex64; + ptp_info->do_aux_work = iavf_ptp_do_aux_work; + + clock = ptp_clock_register(ptp_info, dev); + if (IS_ERR(clock)) + return PTR_ERR(clock); + + adapter->ptp.clock = clock; + + dev_dbg(&adapter->pdev->dev, "PTP clock %s registered\n", + adapter->ptp.info.name); + + return 0; +} + +/** + * iavf_ptp_init - Initialize PTP support if capability was negotiated + * @adapter: private adapter structure + * + * Initialize PTP functionality, based on the capabilities that the PF has + * enabled for this VF. + */ +void iavf_ptp_init(struct iavf_adapter *adapter) +{ + int err; + + if (!iavf_ptp_cap_supported(adapter, VIRTCHNL_1588_PTP_CAP_READ_PHC)) { + pci_notice(adapter->pdev, + "Device does not have PTP clock support\n"); + return; + } + + err = iavf_ptp_register_clock(adapter); + if (err) { + pci_err(adapter->pdev, + "Failed to register PTP clock device (%p)\n", + ERR_PTR(err)); + return; + } + + for (int i = 0; i < adapter->num_active_queues; i++) { + struct iavf_ring *rx_ring = &adapter->rx_rings[i]; + + rx_ring->ptp = &adapter->ptp; + } + + ptp_schedule_worker(adapter->ptp.clock, 0); +} + +/** + * iavf_ptp_release - Disable PTP support + * @adapter: private adapter structure + * + * Release all PTP resources that were previously initialized. + */ +void iavf_ptp_release(struct iavf_adapter *adapter) +{ + struct iavf_ptp_aq_cmd *cmd, *tmp; + + if (!adapter->ptp.clock) + return; + + pci_dbg(adapter->pdev, "removing PTP clock %s\n", + adapter->ptp.info.name); + ptp_clock_unregister(adapter->ptp.clock); + adapter->ptp.clock = NULL; + + /* Cancel any remaining uncompleted PTP clock commands */ + mutex_lock(&adapter->ptp.aq_cmd_lock); + list_for_each_entry_safe(cmd, tmp, &adapter->ptp.aq_cmds, list) { + list_del(&cmd->list); + kfree(cmd); + } + adapter->aq_required &= ~IAVF_FLAG_AQ_SEND_PTP_CMD; + mutex_unlock(&adapter->ptp.aq_cmd_lock); + + adapter->ptp.hwtstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; + iavf_ptp_disable_rx_tstamp(adapter); +} + +/** + * iavf_ptp_process_caps - Handle change in PTP capabilities + * @adapter: private adapter structure + * + * Handle any state changes necessary due to change in PTP capabilities, such + * as after a device reset or change in configuration from the PF. + */ +void iavf_ptp_process_caps(struct iavf_adapter *adapter) +{ + bool phc = iavf_ptp_cap_supported(adapter, VIRTCHNL_1588_PTP_CAP_READ_PHC); + + /* Check if the device gained or lost necessary access to support the + * PTP hardware clock. If so, driver must respond appropriately by + * creating or destroying the PTP clock device. + */ + if (adapter->ptp.clock && !phc) + iavf_ptp_release(adapter); + else if (!adapter->ptp.clock && phc) + iavf_ptp_init(adapter); + + /* Check if the device lost access to Rx timestamp incoming packets */ + if (!iavf_ptp_cap_supported(adapter, VIRTCHNL_1588_PTP_CAP_RX_TSTAMP)) { + adapter->ptp.hwtstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; + iavf_ptp_disable_rx_tstamp(adapter); + } +} + +/** + * iavf_ptp_extend_32b_timestamp - Convert a 32b nanoseconds timestamp to 64b + * nanoseconds + * @cached_phc_time: recently cached copy of PHC time + * @in_tstamp: Ingress/egress 32b nanoseconds timestamp value + * + * Hardware captures timestamps which contain only 32 bits of nominal + * nanoseconds, as opposed to the 64bit timestamps that the stack expects. + * + * Extend the 32bit nanosecond timestamp using the following algorithm and + * assumptions: + * + * 1) have a recently cached copy of the PHC time + * 2) assume that the in_tstamp was captured 2^31 nanoseconds (~2.1 + * seconds) before or after the PHC time was captured. + * 3) calculate the delta between the cached time and the timestamp + * 4) if the delta is smaller than 2^31 nanoseconds, then the timestamp was + * captured after the PHC time. In this case, the full timestamp is just + * the cached PHC time plus the delta. + * 5) otherwise, if the delta is larger than 2^31 nanoseconds, then the + * timestamp was captured *before* the PHC time, i.e. because the PHC + * cache was updated after the timestamp was captured by hardware. In this + * case, the full timestamp is the cached time minus the inverse delta. + * + * This algorithm works even if the PHC time was updated after a Tx timestamp + * was requested, but before the Tx timestamp event was reported from + * hardware. + * + * This calculation primarily relies on keeping the cached PHC time up to + * date. If the timestamp was captured more than 2^31 nanoseconds after the + * PHC time, it is possible that the lower 32bits of PHC time have + * overflowed more than once, and we might generate an incorrect timestamp. + * + * This is prevented by (a) periodically updating the cached PHC time once + * a second, and (b) discarding any Tx timestamp packet if it has waited for + * a timestamp for more than one second. + * + * Return: extended timestamp (to 64b). + */ +u64 iavf_ptp_extend_32b_timestamp(u64 cached_phc_time, u32 in_tstamp) +{ + u32 low = lower_32_bits(cached_phc_time); + u32 delta = in_tstamp - low; + u64 ns; + + /* Do not assume that the in_tstamp is always more recent than the + * cached PHC time. If the delta is large, it indicates that the + * in_tstamp was taken in the past, and should be converted + * forward. + */ + if (delta > S32_MAX) + ns = cached_phc_time - (low - in_tstamp); + else + ns = cached_phc_time + delta; + + return ns; +} diff --git a/drivers/net/ethernet/intel/iavf/iavf_ptp.h b/drivers/net/ethernet/intel/iavf/iavf_ptp.h new file mode 100644 index 000000000000..783b8f287cd9 --- /dev/null +++ b/drivers/net/ethernet/intel/iavf/iavf_ptp.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2024 Intel Corporation. */ + +#ifndef _IAVF_PTP_H_ +#define _IAVF_PTP_H_ + +#include "iavf_types.h" + +/* bit indicating whether a 40bit timestamp is valid */ +#define IAVF_PTP_40B_TSTAMP_VALID BIT(24) + +#if IS_ENABLED(CONFIG_PTP_1588_CLOCK) +void iavf_ptp_init(struct iavf_adapter *adapter); +void iavf_ptp_release(struct iavf_adapter *adapter); +void iavf_ptp_process_caps(struct iavf_adapter *adapter); +bool iavf_ptp_cap_supported(const struct iavf_adapter *adapter, u32 cap); +void iavf_virtchnl_send_ptp_cmd(struct iavf_adapter *adapter); +int iavf_ptp_set_ts_config(struct iavf_adapter *adapter, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); +u64 iavf_ptp_extend_32b_timestamp(u64 cached_phc_time, u32 in_tstamp); +#else /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ +static inline void iavf_ptp_init(struct iavf_adapter *adapter) { } +static inline void iavf_ptp_release(struct iavf_adapter *adapter) { } +static inline void iavf_ptp_process_caps(struct iavf_adapter *adapter) { } +static inline bool iavf_ptp_cap_supported(const struct iavf_adapter *adapter, + u32 cap) +{ + return false; +} + +static inline void iavf_virtchnl_send_ptp_cmd(struct iavf_adapter *adapter) { } +static inline int iavf_ptp_set_ts_config(struct iavf_adapter *adapter, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) +{ + return -1; +} + +static inline u64 iavf_ptp_extend_32b_timestamp(u64 cached_phc_time, + u32 in_tstamp) +{ + return 0; +} + +#endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ +#endif /* _IAVF_PTP_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_trace.h b/drivers/net/ethernet/intel/iavf/iavf_trace.h index 62212011c807..c5e4d1823886 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_trace.h +++ b/drivers/net/ethernet/intel/iavf/iavf_trace.h @@ -112,7 +112,7 @@ DECLARE_EVENT_CLASS( iavf_rx_template, TP_PROTO(struct iavf_ring *ring, - union iavf_32byte_rx_desc *desc, + struct iavf_rx_desc *desc, struct sk_buff *skb), TP_ARGS(ring, desc, skb), @@ -140,7 +140,7 @@ DECLARE_EVENT_CLASS( DEFINE_EVENT( iavf_rx_template, iavf_clean_rx_irq, TP_PROTO(struct iavf_ring *ring, - union iavf_32byte_rx_desc *desc, + struct iavf_rx_desc *desc, struct sk_buff *skb), TP_ARGS(ring, desc, skb)); @@ -148,7 +148,7 @@ DEFINE_EVENT( DEFINE_EVENT( iavf_rx_template, iavf_clean_rx_irq_rx, TP_PROTO(struct iavf_ring *ring, - union iavf_32byte_rx_desc *desc, + struct iavf_rx_desc *desc, struct sk_buff *skb), TP_ARGS(ring, desc, skb)); diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index 26b424fd6718..422312b8b54a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -8,6 +8,26 @@ #include "iavf.h" #include "iavf_trace.h" #include "iavf_prototype.h" +#include "iavf_ptp.h" + +/** + * iavf_is_descriptor_done - tests DD bit in Rx descriptor + * @qw1: quad word 1 from descriptor to get Descriptor Done field from + * @flex: is the descriptor flex or legacy + * + * This function tests the descriptor done bit in specified descriptor. Because + * there are two types of descriptors (legacy and flex) the parameter rx_ring + * is used to distinguish. + * + * Return: true or false based on the state of DD bit in Rx descriptor. + */ +static bool iavf_is_descriptor_done(u64 qw1, bool flex) +{ + if (flex) + return FIELD_GET(IAVF_RXD_FLEX_DD_M, qw1); + else + return FIELD_GET(IAVF_RXD_LEGACY_DD_M, qw1); +} static __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, u32 td_tag) @@ -766,7 +786,7 @@ int iavf_setup_rx_descriptors(struct iavf_ring *rx_ring) u64_stats_init(&rx_ring->syncp); /* Round up to nearest 4K */ - rx_ring->size = rx_ring->count * sizeof(union iavf_32byte_rx_desc); + rx_ring->size = rx_ring->count * sizeof(struct iavf_rx_desc); rx_ring->size = ALIGN(rx_ring->size, 4096); rx_ring->desc = dma_alloc_coherent(fq.pp->p.dev, rx_ring->size, &rx_ring->dma, GFP_KERNEL); @@ -845,7 +865,7 @@ bool iavf_alloc_rx_buffers(struct iavf_ring *rx_ring, u16 cleaned_count) .count = rx_ring->count, }; u16 ntu = rx_ring->next_to_use; - union iavf_rx_desc *rx_desc; + struct iavf_rx_desc *rx_desc; /* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) @@ -863,7 +883,7 @@ bool iavf_alloc_rx_buffers(struct iavf_ring *rx_ring, u16 cleaned_count) /* Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ - rx_desc->read.pkt_addr = cpu_to_le64(addr); + rx_desc->qw0 = cpu_to_le64(addr); rx_desc++; ntu++; @@ -873,7 +893,7 @@ bool iavf_alloc_rx_buffers(struct iavf_ring *rx_ring, u16 cleaned_count) } /* clear the status bits for the next_to_use descriptor */ - rx_desc->wb.qword1.status_error_len = 0; + rx_desc->qw1 = 0; cleaned_count--; } while (cleaned_count); @@ -896,60 +916,43 @@ no_buffers: } /** - * iavf_rx_checksum - Indicate in skb if hw indicated a good cksum + * iavf_rx_csum - Indicate in skb if hw indicated a good checksum * @vsi: the VSI we care about * @skb: skb currently being received and modified - * @rx_desc: the receive descriptor + * @decoded_pt: decoded ptype information + * @csum_bits: decoded Rx descriptor information **/ -static void iavf_rx_checksum(struct iavf_vsi *vsi, - struct sk_buff *skb, - union iavf_rx_desc *rx_desc) +static void iavf_rx_csum(const struct iavf_vsi *vsi, struct sk_buff *skb, + struct libeth_rx_pt decoded_pt, + struct libeth_rx_csum csum_bits) { - struct libeth_rx_pt decoded; - u32 rx_error, rx_status; bool ipv4, ipv6; - u8 ptype; - u64 qword; skb->ip_summed = CHECKSUM_NONE; - qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - ptype = FIELD_GET(IAVF_RXD_QW1_PTYPE_MASK, qword); - - decoded = libie_rx_pt_parse(ptype); - if (!libeth_rx_pt_has_checksum(vsi->netdev, decoded)) - return; - - rx_error = FIELD_GET(IAVF_RXD_QW1_ERROR_MASK, qword); - rx_status = FIELD_GET(IAVF_RXD_QW1_STATUS_MASK, qword); - /* did the hardware decode the packet and checksum? */ - if (!(rx_status & BIT(IAVF_RX_DESC_STATUS_L3L4P_SHIFT))) + if (unlikely(!csum_bits.l3l4p)) return; - ipv4 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV4; - ipv6 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV6; + ipv4 = libeth_rx_pt_get_ip_ver(decoded_pt) == LIBETH_RX_PT_OUTER_IPV4; + ipv6 = libeth_rx_pt_get_ip_ver(decoded_pt) == LIBETH_RX_PT_OUTER_IPV6; - if (ipv4 && - (rx_error & (BIT(IAVF_RX_DESC_ERROR_IPE_SHIFT) | - BIT(IAVF_RX_DESC_ERROR_EIPE_SHIFT)))) + if (unlikely(ipv4 && (csum_bits.ipe || csum_bits.eipe))) goto checksum_fail; /* likely incorrect csum if alternate IP extension headers found */ - if (ipv6 && - rx_status & BIT(IAVF_RX_DESC_STATUS_IPV6EXADD_SHIFT)) - /* don't increment checksum err here, non-fatal err */ + if (unlikely(ipv6 && csum_bits.ipv6exadd)) return; /* there was some L4 error, count error and punt packet to the stack */ - if (rx_error & BIT(IAVF_RX_DESC_ERROR_L4E_SHIFT)) + if (unlikely(csum_bits.l4e)) goto checksum_fail; /* handle packets that were not able to be checksummed due * to arrival speed, in this case the stack can compute * the csum. */ - if (rx_error & BIT(IAVF_RX_DESC_ERROR_PPRS_SHIFT)) + if (unlikely(csum_bits.pprs)) return; skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -960,52 +963,196 @@ checksum_fail: } /** - * iavf_rx_hash - set the hash value in the skb + * iavf_legacy_rx_csum - Indicate in skb if hw indicated a good checksum + * @vsi: the VSI we care about + * @qw1: quad word 1 + * @decoded_pt: decoded packet type + * + * This function only operates on the VIRTCHNL_RXDID_1_32B_BASE legacy 32byte + * descriptor writeback format. + * + * Return: decoded checksum bits. + **/ +static struct libeth_rx_csum +iavf_legacy_rx_csum(const struct iavf_vsi *vsi, u64 qw1, + const struct libeth_rx_pt decoded_pt) +{ + struct libeth_rx_csum csum_bits = {}; + + if (!libeth_rx_pt_has_checksum(vsi->netdev, decoded_pt)) + return csum_bits; + + csum_bits.ipe = FIELD_GET(IAVF_RXD_LEGACY_IPE_M, qw1); + csum_bits.eipe = FIELD_GET(IAVF_RXD_LEGACY_EIPE_M, qw1); + csum_bits.l4e = FIELD_GET(IAVF_RXD_LEGACY_L4E_M, qw1); + csum_bits.pprs = FIELD_GET(IAVF_RXD_LEGACY_PPRS_M, qw1); + csum_bits.l3l4p = FIELD_GET(IAVF_RXD_LEGACY_L3L4P_M, qw1); + csum_bits.ipv6exadd = FIELD_GET(IAVF_RXD_LEGACY_IPV6EXADD_M, qw1); + + return csum_bits; +} + +/** + * iavf_flex_rx_csum - Indicate in skb if hw indicated a good checksum + * @vsi: the VSI we care about + * @qw1: quad word 1 + * @decoded_pt: decoded packet type + * + * This function only operates on the VIRTCHNL_RXDID_2_FLEX_SQ_NIC flexible + * descriptor writeback format. + * + * Return: decoded checksum bits. + **/ +static struct libeth_rx_csum +iavf_flex_rx_csum(const struct iavf_vsi *vsi, u64 qw1, + const struct libeth_rx_pt decoded_pt) +{ + struct libeth_rx_csum csum_bits = {}; + + if (!libeth_rx_pt_has_checksum(vsi->netdev, decoded_pt)) + return csum_bits; + + csum_bits.ipe = FIELD_GET(IAVF_RXD_FLEX_XSUM_IPE_M, qw1); + csum_bits.eipe = FIELD_GET(IAVF_RXD_FLEX_XSUM_EIPE_M, qw1); + csum_bits.l4e = FIELD_GET(IAVF_RXD_FLEX_XSUM_L4E_M, qw1); + csum_bits.eudpe = FIELD_GET(IAVF_RXD_FLEX_XSUM_EUDPE_M, qw1); + csum_bits.l3l4p = FIELD_GET(IAVF_RXD_FLEX_L3L4P_M, qw1); + csum_bits.ipv6exadd = FIELD_GET(IAVF_RXD_FLEX_IPV6EXADD_M, qw1); + csum_bits.nat = FIELD_GET(IAVF_RXD_FLEX_NAT_M, qw1); + + return csum_bits; +} + +/** + * iavf_legacy_rx_hash - set the hash value in the skb + * @ring: descriptor ring + * @qw0: quad word 0 + * @qw1: quad word 1 + * @skb: skb currently being received and modified + * @decoded_pt: decoded packet type + * + * This function only operates on the VIRTCHNL_RXDID_1_32B_BASE legacy 32byte + * descriptor writeback format. + **/ +static void iavf_legacy_rx_hash(const struct iavf_ring *ring, __le64 qw0, + __le64 qw1, struct sk_buff *skb, + const struct libeth_rx_pt decoded_pt) +{ + const __le64 rss_mask = cpu_to_le64(IAVF_RXD_LEGACY_FLTSTAT_M); + u32 hash; + + if (!libeth_rx_pt_has_hash(ring->netdev, decoded_pt)) + return; + + if ((qw1 & rss_mask) == rss_mask) { + hash = le64_get_bits(qw0, IAVF_RXD_LEGACY_RSS_M); + libeth_rx_pt_set_hash(skb, hash, decoded_pt); + } +} + +/** + * iavf_flex_rx_hash - set the hash value in the skb * @ring: descriptor ring - * @rx_desc: specific descriptor + * @qw1: quad word 1 * @skb: skb currently being received and modified - * @rx_ptype: Rx packet type + * @decoded_pt: decoded packet type + * + * This function only operates on the VIRTCHNL_RXDID_2_FLEX_SQ_NIC flexible + * descriptor writeback format. **/ -static void iavf_rx_hash(struct iavf_ring *ring, - union iavf_rx_desc *rx_desc, - struct sk_buff *skb, - u8 rx_ptype) +static void iavf_flex_rx_hash(const struct iavf_ring *ring, __le64 qw1, + struct sk_buff *skb, + const struct libeth_rx_pt decoded_pt) { - struct libeth_rx_pt decoded; + bool rss_valid; u32 hash; - const __le64 rss_mask = - cpu_to_le64((u64)IAVF_RX_DESC_FLTSTAT_RSS_HASH << - IAVF_RX_DESC_STATUS_FLTSTAT_SHIFT); - decoded = libie_rx_pt_parse(rx_ptype); - if (!libeth_rx_pt_has_hash(ring->netdev, decoded)) + if (!libeth_rx_pt_has_hash(ring->netdev, decoded_pt)) return; - if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) { - hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss); - libeth_rx_pt_set_hash(skb, hash, decoded); + rss_valid = le64_get_bits(qw1, IAVF_RXD_FLEX_RSS_VALID_M); + if (rss_valid) { + hash = le64_get_bits(qw1, IAVF_RXD_FLEX_RSS_HASH_M); + libeth_rx_pt_set_hash(skb, hash, decoded_pt); } } /** + * iavf_flex_rx_tstamp - Capture Rx timestamp from the descriptor + * @rx_ring: descriptor ring + * @qw2: quad word 2 of descriptor + * @qw3: quad word 3 of descriptor + * @skb: skb currently being received + * + * Read the Rx timestamp value from the descriptor and pass it to the stack. + * + * This function only operates on the VIRTCHNL_RXDID_2_FLEX_SQ_NIC flexible + * descriptor writeback format. + */ +static void iavf_flex_rx_tstamp(const struct iavf_ring *rx_ring, __le64 qw2, + __le64 qw3, struct sk_buff *skb) +{ + u32 tstamp; + u64 ns; + + /* Skip processing if timestamps aren't enabled */ + if (!(rx_ring->flags & IAVF_TXRX_FLAGS_HW_TSTAMP)) + return; + + /* Check if this Rx descriptor has a valid timestamp */ + if (!le64_get_bits(qw2, IAVF_PTP_40B_TSTAMP_VALID)) + return; + + /* the ts_low field only contains the valid bit and sub-nanosecond + * precision, so we don't need to extract it. + */ + tstamp = le64_get_bits(qw3, IAVF_RXD_FLEX_QW3_TSTAMP_HIGH_M); + + ns = iavf_ptp_extend_32b_timestamp(rx_ring->ptp->cached_phc_time, + tstamp); + + *skb_hwtstamps(skb) = (struct skb_shared_hwtstamps) { + .hwtstamp = ns_to_ktime(ns), + }; +} + +/** * iavf_process_skb_fields - Populate skb header fields from Rx descriptor * @rx_ring: rx descriptor ring packet is being transacted on * @rx_desc: pointer to the EOP Rx descriptor * @skb: pointer to current skb being populated - * @rx_ptype: the packet type decoded by hardware + * @ptype: the packet type decoded by hardware + * @flex: is the descriptor flex or legacy * * This function checks the ring, descriptor, and packet information in * order to populate the hash, checksum, VLAN, protocol, and * other fields within the skb. **/ -static void -iavf_process_skb_fields(struct iavf_ring *rx_ring, - union iavf_rx_desc *rx_desc, struct sk_buff *skb, - u8 rx_ptype) +static void iavf_process_skb_fields(const struct iavf_ring *rx_ring, + const struct iavf_rx_desc *rx_desc, + struct sk_buff *skb, u32 ptype, + bool flex) { - iavf_rx_hash(rx_ring, rx_desc, skb, rx_ptype); - - iavf_rx_checksum(rx_ring->vsi, skb, rx_desc); + struct libeth_rx_csum csum_bits; + struct libeth_rx_pt decoded_pt; + __le64 qw0 = rx_desc->qw0; + __le64 qw1 = rx_desc->qw1; + __le64 qw2 = rx_desc->qw2; + __le64 qw3 = rx_desc->qw3; + + decoded_pt = libie_rx_pt_parse(ptype); + + if (flex) { + iavf_flex_rx_hash(rx_ring, qw1, skb, decoded_pt); + iavf_flex_rx_tstamp(rx_ring, qw2, qw3, skb); + csum_bits = iavf_flex_rx_csum(rx_ring->vsi, le64_to_cpu(qw1), + decoded_pt); + } else { + iavf_legacy_rx_hash(rx_ring, qw0, qw1, skb, decoded_pt); + csum_bits = iavf_legacy_rx_csum(rx_ring->vsi, le64_to_cpu(qw1), + decoded_pt); + } + iavf_rx_csum(rx_ring->vsi, skb, decoded_pt, csum_bits); skb_record_rx_queue(skb, rx_ring->queue_index); @@ -1092,8 +1239,7 @@ static struct sk_buff *iavf_build_skb(const struct libeth_fqe *rx_buffer, /** * iavf_is_non_eop - process handling of non-EOP buffers * @rx_ring: Rx ring being processed - * @rx_desc: Rx descriptor for current buffer - * @skb: Current socket buffer containing buffer in progress + * @fields: Rx descriptor extracted fields * * This function updates next to clean. If the buffer is an EOP buffer * this function exits returning false, otherwise it will place the @@ -1101,8 +1247,7 @@ static struct sk_buff *iavf_build_skb(const struct libeth_fqe *rx_buffer, * that this is in fact a non-EOP buffer. **/ static bool iavf_is_non_eop(struct iavf_ring *rx_ring, - union iavf_rx_desc *rx_desc, - struct sk_buff *skb) + struct libeth_rqe_info fields) { u32 ntc = rx_ring->next_to_clean + 1; @@ -1113,8 +1258,7 @@ static bool iavf_is_non_eop(struct iavf_ring *rx_ring, prefetch(IAVF_RX_DESC(rx_ring, ntc)); /* if we are the last buffer then there is nothing else to do */ -#define IAVF_RXD_EOF BIT(IAVF_RX_DESC_STATUS_EOF_SHIFT) - if (likely(iavf_test_staterr(rx_desc, IAVF_RXD_EOF))) + if (likely(fields.eop)) return false; rx_ring->rx_stats.non_eop_descs++; @@ -1123,6 +1267,109 @@ static bool iavf_is_non_eop(struct iavf_ring *rx_ring, } /** + * iavf_extract_legacy_rx_fields - Extract fields from the Rx descriptor + * @rx_ring: rx descriptor ring + * @rx_desc: the descriptor to process + * + * Decode the Rx descriptor and extract relevant information including the + * size, VLAN tag, Rx packet type, end of packet field and RXE field value. + * + * This function only operates on the VIRTCHNL_RXDID_1_32B_BASE legacy 32byte + * descriptor writeback format. + * + * Return: fields extracted from the Rx descriptor. + */ +static struct libeth_rqe_info +iavf_extract_legacy_rx_fields(const struct iavf_ring *rx_ring, + const struct iavf_rx_desc *rx_desc) +{ + u64 qw0 = le64_to_cpu(rx_desc->qw0); + u64 qw1 = le64_to_cpu(rx_desc->qw1); + u64 qw2 = le64_to_cpu(rx_desc->qw2); + struct libeth_rqe_info fields; + bool l2tag1p, l2tag2p; + + fields.eop = FIELD_GET(IAVF_RXD_LEGACY_EOP_M, qw1); + fields.len = FIELD_GET(IAVF_RXD_LEGACY_LENGTH_M, qw1); + + if (!fields.eop) + return fields; + + fields.rxe = FIELD_GET(IAVF_RXD_LEGACY_RXE_M, qw1); + fields.ptype = FIELD_GET(IAVF_RXD_LEGACY_PTYPE_M, qw1); + fields.vlan = 0; + + if (rx_ring->flags & IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1) { + l2tag1p = FIELD_GET(IAVF_RXD_LEGACY_L2TAG1P_M, qw1); + if (l2tag1p) + fields.vlan = FIELD_GET(IAVF_RXD_LEGACY_L2TAG1_M, qw0); + } else if (rx_ring->flags & IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2) { + l2tag2p = FIELD_GET(IAVF_RXD_LEGACY_L2TAG2P_M, qw2); + if (l2tag2p) + fields.vlan = FIELD_GET(IAVF_RXD_LEGACY_L2TAG2_M, qw2); + } + + return fields; +} + +/** + * iavf_extract_flex_rx_fields - Extract fields from the Rx descriptor + * @rx_ring: rx descriptor ring + * @rx_desc: the descriptor to process + * + * Decode the Rx descriptor and extract relevant information including the + * size, VLAN tag, Rx packet type, end of packet field and RXE field value. + * + * This function only operates on the VIRTCHNL_RXDID_2_FLEX_SQ_NIC flexible + * descriptor writeback format. + * + * Return: fields extracted from the Rx descriptor. + */ +static struct libeth_rqe_info +iavf_extract_flex_rx_fields(const struct iavf_ring *rx_ring, + const struct iavf_rx_desc *rx_desc) +{ + struct libeth_rqe_info fields = {}; + u64 qw0 = le64_to_cpu(rx_desc->qw0); + u64 qw1 = le64_to_cpu(rx_desc->qw1); + u64 qw2 = le64_to_cpu(rx_desc->qw2); + bool l2tag1p, l2tag2p; + + fields.eop = FIELD_GET(IAVF_RXD_FLEX_EOP_M, qw1); + fields.len = FIELD_GET(IAVF_RXD_FLEX_PKT_LEN_M, qw0); + + if (!fields.eop) + return fields; + + fields.rxe = FIELD_GET(IAVF_RXD_FLEX_RXE_M, qw1); + fields.ptype = FIELD_GET(IAVF_RXD_FLEX_PTYPE_M, qw0); + fields.vlan = 0; + + if (rx_ring->flags & IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1) { + l2tag1p = FIELD_GET(IAVF_RXD_FLEX_L2TAG1P_M, qw1); + if (l2tag1p) + fields.vlan = FIELD_GET(IAVF_RXD_FLEX_L2TAG1_M, qw1); + } else if (rx_ring->flags & IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2) { + l2tag2p = FIELD_GET(IAVF_RXD_FLEX_L2TAG2P_M, qw2); + if (l2tag2p) + fields.vlan = FIELD_GET(IAVF_RXD_FLEX_L2TAG2_2_M, qw2); + } + + return fields; +} + +static struct libeth_rqe_info +iavf_extract_rx_fields(const struct iavf_ring *rx_ring, + const struct iavf_rx_desc *rx_desc, + bool flex) +{ + if (flex) + return iavf_extract_flex_rx_fields(rx_ring, rx_desc); + else + return iavf_extract_legacy_rx_fields(rx_ring, rx_desc); +} + +/** * iavf_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * @rx_ring: rx descriptor ring to transact packets on * @budget: Total limit on number of packets to process @@ -1136,18 +1383,17 @@ static bool iavf_is_non_eop(struct iavf_ring *rx_ring, **/ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget) { + bool flex = rx_ring->rxdid == VIRTCHNL_RXDID_2_FLEX_SQ_NIC; unsigned int total_rx_bytes = 0, total_rx_packets = 0; struct sk_buff *skb = rx_ring->skb; u16 cleaned_count = IAVF_DESC_UNUSED(rx_ring); bool failure = false; while (likely(total_rx_packets < (unsigned int)budget)) { + struct libeth_rqe_info fields; struct libeth_fqe *rx_buffer; - union iavf_rx_desc *rx_desc; - unsigned int size; - u16 vlan_tag = 0; - u8 rx_ptype; - u64 qword; + struct iavf_rx_desc *rx_desc; + u64 qw1; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= IAVF_RX_BUFFER_WRITE) { @@ -1158,35 +1404,32 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget) rx_desc = IAVF_RX_DESC(rx_ring, rx_ring->next_to_clean); - /* status_error_len will always be zero for unused descriptors - * because it's cleared in cleanup, and overlaps with hdr_addr - * which is always zero because packet split isn't used, if the - * hardware wrote DD then the length will be non-zero - */ - qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - /* This memory barrier is needed to keep us from reading * any other fields out of the rx_desc until we have * verified the descriptor has been written back. */ dma_rmb(); -#define IAVF_RXD_DD BIT(IAVF_RX_DESC_STATUS_DD_SHIFT) - if (!iavf_test_staterr(rx_desc, IAVF_RXD_DD)) + + qw1 = le64_to_cpu(rx_desc->qw1); + /* If DD field (descriptor done) is unset then other fields are + * not valid + */ + if (!iavf_is_descriptor_done(qw1, flex)) break; - size = FIELD_GET(IAVF_RXD_QW1_LENGTH_PBUF_MASK, qword); + fields = iavf_extract_rx_fields(rx_ring, rx_desc, flex); iavf_trace(clean_rx_irq, rx_ring, rx_desc, skb); rx_buffer = &rx_ring->rx_fqes[rx_ring->next_to_clean]; - if (!libeth_rx_sync_for_cpu(rx_buffer, size)) + if (!libeth_rx_sync_for_cpu(rx_buffer, fields.len)) goto skip_data; /* retrieve a buffer from the ring */ if (skb) - iavf_add_rx_frag(skb, rx_buffer, size); + iavf_add_rx_frag(skb, rx_buffer, fields.len); else - skb = iavf_build_skb(rx_buffer, size); + skb = iavf_build_skb(rx_buffer, fields.len); /* exit if we failed to retrieve a buffer */ if (!skb) { @@ -1197,15 +1440,14 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget) skip_data: cleaned_count++; - if (iavf_is_non_eop(rx_ring, rx_desc, skb) || unlikely(!skb)) + if (iavf_is_non_eop(rx_ring, fields) || unlikely(!skb)) continue; - /* ERR_MASK will only have valid bits if EOP set, and - * what we are doing here is actually checking - * IAVF_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in - * the error field + /* RXE field in descriptor is an indication of the MAC errors + * (like CRC, alignment, oversize etc). If it is set then iavf + * should finish. */ - if (unlikely(iavf_test_staterr(rx_desc, BIT(IAVF_RXD_QW1_ERROR_SHIFT)))) { + if (unlikely(fields.rxe)) { dev_kfree_skb_any(skb); skb = NULL; continue; @@ -1219,22 +1461,11 @@ skip_data: /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; - qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - rx_ptype = FIELD_GET(IAVF_RXD_QW1_PTYPE_MASK, qword); - /* populate checksum, VLAN, and protocol */ - iavf_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); - - if (qword & BIT(IAVF_RX_DESC_STATUS_L2TAG1P_SHIFT) && - rx_ring->flags & IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1) - vlan_tag = le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1); - if (rx_desc->wb.qword2.ext_status & - cpu_to_le16(BIT(IAVF_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT)) && - rx_ring->flags & IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2) - vlan_tag = le16_to_cpu(rx_desc->wb.qword2.l2tag2_2); + iavf_process_skb_fields(rx_ring, rx_desc, skb, fields.ptype, flex); iavf_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb); - iavf_receive_skb(rx_ring, skb, vlan_tag); + iavf_receive_skb(rx_ring, skb, fields.vlan); skb = NULL; /* update budget accounting */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h index f97c702c0802..79ad554f2d53 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.h +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h @@ -80,25 +80,6 @@ enum iavf_dyn_idx_t { BIT_ULL(IAVF_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | \ BIT_ULL(IAVF_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP)) -#define iavf_rx_desc iavf_32byte_rx_desc - -/** - * iavf_test_staterr - tests bits in Rx descriptor status and error fields - * @rx_desc: pointer to receive descriptor (in le64 format) - * @stat_err_bits: value to mask - * - * This function does some fast chicanery in order to return the - * value of the mask which is really only used for boolean tests. - * The status_error_len doesn't need to be shifted because it begins - * at offset zero. - */ -static inline bool iavf_test_staterr(union iavf_rx_desc *rx_desc, - const u64 stat_err_bits) -{ - return !!(rx_desc->wb.qword1.status_error_len & - cpu_to_le64(stat_err_bits)); -} - /* How many Rx Buffers do we bundle into one write to the hardware ? */ #define IAVF_RX_INCREMENT(r, i) \ do { \ @@ -262,6 +243,8 @@ struct iavf_ring { u16 next_to_use; u16 next_to_clean; + u16 rxdid; /* Rx descriptor format */ + u16 flags; #define IAVF_TXR_FLAGS_WB_ON_ITR BIT(0) #define IAVF_TXR_FLAGS_ARM_WB BIT(1) @@ -269,6 +252,7 @@ struct iavf_ring { #define IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1 BIT(3) #define IAVF_TXR_FLAGS_VLAN_TAG_LOC_L2TAG2 BIT(4) #define IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2 BIT(5) +#define IAVF_TXRX_FLAGS_HW_TSTAMP BIT(6) /* stats structs */ struct iavf_queue_stats stats; @@ -295,6 +279,8 @@ struct iavf_ring { * for this ring. */ + struct iavf_ptp *ptp; + u32 rx_buf_len; struct net_shaper q_shaper; bool q_shaper_update; diff --git a/drivers/net/ethernet/intel/iavf/iavf_type.h b/drivers/net/ethernet/intel/iavf/iavf_type.h index f6b09e57abce..f9e1319620f4 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_type.h +++ b/drivers/net/ethernet/intel/iavf/iavf_type.h @@ -178,110 +178,116 @@ struct iavf_hw { char err_str[16]; }; -/* RX Descriptors */ -union iavf_16byte_rx_desc { - struct { - __le64 pkt_addr; /* Packet buffer address */ - __le64 hdr_addr; /* Header buffer address */ - } read; - struct { - struct { - struct { - union { - __le16 mirroring_status; - __le16 fcoe_ctx_id; - } mirr_fcoe; - __le16 l2tag1; - } lo_dword; - union { - __le32 rss; /* RSS Hash */ - __le32 fd_id; /* Flow director filter id */ - __le32 fcoe_param; /* FCoE DDP Context id */ - } hi_dword; - } qword0; - struct { - /* ext status/error/pktype/length */ - __le64 status_error_len; - } qword1; - } wb; /* writeback */ -}; - -union iavf_32byte_rx_desc { - struct { - __le64 pkt_addr; /* Packet buffer address */ - __le64 hdr_addr; /* Header buffer address */ - /* bit 0 of hdr_buffer_addr is DD bit */ - __le64 rsvd1; - __le64 rsvd2; - } read; - struct { - struct { - struct { - union { - __le16 mirroring_status; - __le16 fcoe_ctx_id; - } mirr_fcoe; - __le16 l2tag1; - } lo_dword; - union { - __le32 rss; /* RSS Hash */ - __le32 fcoe_param; /* FCoE DDP Context id */ - /* Flow director filter id in case of - * Programming status desc WB - */ - __le32 fd_id; - } hi_dword; - } qword0; - struct { - /* status/error/pktype/length */ - __le64 status_error_len; - } qword1; - struct { - __le16 ext_status; /* extended status */ - __le16 rsvd; - __le16 l2tag2_1; - __le16 l2tag2_2; - } qword2; - struct { - union { - __le32 flex_bytes_lo; - __le32 pe_status; - } lo_dword; - union { - __le32 flex_bytes_hi; - __le32 fd_id; - } hi_dword; - } qword3; - } wb; /* writeback */ -}; - -enum iavf_rx_desc_status_bits { - /* Note: These are predefined bit offsets */ - IAVF_RX_DESC_STATUS_DD_SHIFT = 0, - IAVF_RX_DESC_STATUS_EOF_SHIFT = 1, - IAVF_RX_DESC_STATUS_L2TAG1P_SHIFT = 2, - IAVF_RX_DESC_STATUS_L3L4P_SHIFT = 3, - IAVF_RX_DESC_STATUS_CRCP_SHIFT = 4, - IAVF_RX_DESC_STATUS_TSYNINDX_SHIFT = 5, /* 2 BITS */ - IAVF_RX_DESC_STATUS_TSYNVALID_SHIFT = 7, - /* Note: Bit 8 is reserved in X710 and XL710 */ - IAVF_RX_DESC_STATUS_EXT_UDP_0_SHIFT = 8, - IAVF_RX_DESC_STATUS_UMBCAST_SHIFT = 9, /* 2 BITS */ - IAVF_RX_DESC_STATUS_FLM_SHIFT = 11, - IAVF_RX_DESC_STATUS_FLTSTAT_SHIFT = 12, /* 2 BITS */ - IAVF_RX_DESC_STATUS_LPBK_SHIFT = 14, - IAVF_RX_DESC_STATUS_IPV6EXADD_SHIFT = 15, - IAVF_RX_DESC_STATUS_RESERVED_SHIFT = 16, /* 2 BITS */ - /* Note: For non-tunnel packets INT_UDP_0 is the right status for - * UDP header - */ - IAVF_RX_DESC_STATUS_INT_UDP_0_SHIFT = 18, - IAVF_RX_DESC_STATUS_LAST /* this entry must be last!!! */ -}; - -#define IAVF_RXD_QW1_STATUS_SHIFT 0 -#define IAVF_RXD_QW1_STATUS_MASK ((BIT(IAVF_RX_DESC_STATUS_LAST) - 1) \ - << IAVF_RXD_QW1_STATUS_SHIFT) +/** + * struct iavf_rx_desc - Receive descriptor (both legacy and flexible) + * @qw0: quad word 0 fields: + * Legacy: Descriptor Type; Mirror ID; L2TAG1P (S-TAG); Filter Status + * Flex: Descriptor Type; Mirror ID; UMBCAST; Packet Type; Flexible Flags + * Section 0; Packet Length; Header Length; Split Header Flag; + * Flexible Flags section 1 / Extended Status + * @qw1: quad word 1 fields: + * Legacy: Status Field; Error Field; Packet Type; Packet Length (packet, + * header, Split Header Flag) + * Flex: Status / Error 0 Field; L2TAG1P (S-TAG); Flexible Metadata + * Container #0; Flexible Metadata Container #1 + * @qw2: quad word 2 fields: + * Legacy: Extended Status; 1st L2TAG2P (C-TAG); 2nd L2TAG2P (C-TAG) + * Flex: Status / Error 1 Field; Flexible Flags section 2; Timestamp Low; + * 1st L2TAG2 (C-TAG); 2nd L2TAG2 (C-TAG) + * @qw3: quad word 3 fields: + * Legacy: FD Filter ID / Flexible Bytes + * Flex: Flexible Metadata Container #2; Flexible Metadata Container #3; + * Flexible Metadata Container #4 / Timestamp High 0; Flexible + * Metadata Container #5 / Timestamp High 1; + */ +struct iavf_rx_desc { + aligned_le64 qw0; +/* The hash signature (RSS) */ +#define IAVF_RXD_LEGACY_RSS_M GENMASK_ULL(63, 32) +/* Stripped C-TAG VLAN from the receive packet */ +#define IAVF_RXD_LEGACY_L2TAG1_M GENMASK_ULL(33, 16) +/* Packet type */ +#define IAVF_RXD_FLEX_PTYPE_M GENMASK_ULL(25, 16) +/* Packet length */ +#define IAVF_RXD_FLEX_PKT_LEN_M GENMASK_ULL(45, 32) + + aligned_le64 qw1; +/* Descriptor done indication flag. */ +#define IAVF_RXD_LEGACY_DD_M BIT(0) +/* End of packet. Set to 1 if this descriptor is the last one of the packet */ +#define IAVF_RXD_LEGACY_EOP_M BIT(1) +/* L2 TAG 1 presence indication */ +#define IAVF_RXD_LEGACY_L2TAG1P_M BIT(2) +/* Detectable L3 and L4 integrity check is processed by the HW */ +#define IAVF_RXD_LEGACY_L3L4P_M BIT(3) +/* Set when an IPv6 packet contains a Destination Options Header or a Routing + * Header. + */ +#define IAVF_RXD_LEGACY_IPV6EXADD_M BIT(15) +/* Receive MAC Errors: CRC; Alignment; Oversize; Undersizes; Length error */ +#define IAVF_RXD_LEGACY_RXE_M BIT(19) +/* Checksum reports: + * - IPE: IP checksum error + * - L4E: L4 integrity error + * - EIPE: External IP header (tunneled packets) + */ +#define IAVF_RXD_LEGACY_IPE_M BIT(22) +#define IAVF_RXD_LEGACY_L4E_M BIT(23) +#define IAVF_RXD_LEGACY_EIPE_M BIT(24) +/* Set for packets that skip checksum calculation in pre-parser */ +#define IAVF_RXD_LEGACY_PPRS_M BIT(26) +/* Indicates the content in the Filter Status field */ +#define IAVF_RXD_LEGACY_FLTSTAT_M GENMASK_ULL(13, 12) +/* Packet type */ +#define IAVF_RXD_LEGACY_PTYPE_M GENMASK_ULL(37, 30) +/* Packet length */ +#define IAVF_RXD_LEGACY_LENGTH_M GENMASK_ULL(51, 38) +/* Descriptor done indication flag */ +#define IAVF_RXD_FLEX_DD_M BIT(0) +/* End of packet. Set to 1 if this descriptor is the last one of the packet */ +#define IAVF_RXD_FLEX_EOP_M BIT(1) +/* Detectable L3 and L4 integrity check is processed by the HW */ +#define IAVF_RXD_FLEX_L3L4P_M BIT(3) +/* Checksum reports: + * - IPE: IP checksum error + * - L4E: L4 integrity error + * - EIPE: External IP header (tunneled packets) + * - EUDPE: External UDP checksum error (tunneled packets) + */ +#define IAVF_RXD_FLEX_XSUM_IPE_M BIT(4) +#define IAVF_RXD_FLEX_XSUM_L4E_M BIT(5) +#define IAVF_RXD_FLEX_XSUM_EIPE_M BIT(6) +#define IAVF_RXD_FLEX_XSUM_EUDPE_M BIT(7) +/* Set when an IPv6 packet contains a Destination Options Header or a Routing + * Header. + */ +#define IAVF_RXD_FLEX_IPV6EXADD_M BIT(9) +/* Receive MAC Errors: CRC; Alignment; Oversize; Undersizes; Length error */ +#define IAVF_RXD_FLEX_RXE_M BIT(10) +/* Indicates that the RSS/HASH result is valid */ +#define IAVF_RXD_FLEX_RSS_VALID_M BIT(12) +/* L2 TAG 1 presence indication */ +#define IAVF_RXD_FLEX_L2TAG1P_M BIT(13) +/* Stripped L2 Tag from the receive packet */ +#define IAVF_RXD_FLEX_L2TAG1_M GENMASK_ULL(31, 16) +/* The hash signature (RSS) */ +#define IAVF_RXD_FLEX_RSS_HASH_M GENMASK_ULL(63, 32) + + aligned_le64 qw2; +/* L2 Tag 2 Presence */ +#define IAVF_RXD_LEGACY_L2TAG2P_M BIT(0) +/* Stripped S-TAG VLAN from the receive packet */ +#define IAVF_RXD_LEGACY_L2TAG2_M GENMASK_ULL(63, 32) +/* Stripped S-TAG VLAN from the receive packet */ +#define IAVF_RXD_FLEX_L2TAG2_2_M GENMASK_ULL(63, 48) +/* The packet is a UDP tunneled packet */ +#define IAVF_RXD_FLEX_NAT_M BIT(4) +/* L2 Tag 2 Presence */ +#define IAVF_RXD_FLEX_L2TAG2P_M BIT(11) + aligned_le64 qw3; +#define IAVF_RXD_FLEX_QW3_TSTAMP_HIGH_M GENMASK_ULL(63, 32) +} __aligned(4 * sizeof(__le64)); +static_assert(sizeof(struct iavf_rx_desc) == 32); #define IAVF_RXD_QW1_STATUS_TSYNINDX_SHIFT IAVF_RX_DESC_STATUS_TSYNINDX_SHIFT #define IAVF_RXD_QW1_STATUS_TSYNINDX_MASK (0x3UL << \ @@ -298,22 +304,6 @@ enum iavf_rx_desc_fltstat_values { IAVF_RX_DESC_FLTSTAT_RSS_HASH = 3, }; -#define IAVF_RXD_QW1_ERROR_SHIFT 19 -#define IAVF_RXD_QW1_ERROR_MASK (0xFFUL << IAVF_RXD_QW1_ERROR_SHIFT) - -enum iavf_rx_desc_error_bits { - /* Note: These are predefined bit offsets */ - IAVF_RX_DESC_ERROR_RXE_SHIFT = 0, - IAVF_RX_DESC_ERROR_RECIPE_SHIFT = 1, - IAVF_RX_DESC_ERROR_HBO_SHIFT = 2, - IAVF_RX_DESC_ERROR_L3L4E_SHIFT = 3, /* 3 BITS */ - IAVF_RX_DESC_ERROR_IPE_SHIFT = 3, - IAVF_RX_DESC_ERROR_L4E_SHIFT = 4, - IAVF_RX_DESC_ERROR_EIPE_SHIFT = 5, - IAVF_RX_DESC_ERROR_OVERSIZE_SHIFT = 6, - IAVF_RX_DESC_ERROR_PPRS_SHIFT = 7 -}; - enum iavf_rx_desc_error_l3l4e_fcoe_masks { IAVF_RX_DESC_ERROR_L3L4E_NONE = 0, IAVF_RX_DESC_ERROR_L3L4E_PROT = 1, @@ -322,13 +312,6 @@ enum iavf_rx_desc_error_l3l4e_fcoe_masks { IAVF_RX_DESC_ERROR_L3L4E_DMAC_WARN = 4 }; -#define IAVF_RXD_QW1_PTYPE_SHIFT 30 -#define IAVF_RXD_QW1_PTYPE_MASK (0xFFULL << IAVF_RXD_QW1_PTYPE_SHIFT) - -#define IAVF_RXD_QW1_LENGTH_PBUF_SHIFT 38 -#define IAVF_RXD_QW1_LENGTH_PBUF_MASK (0x3FFFULL << \ - IAVF_RXD_QW1_LENGTH_PBUF_SHIFT) - #define IAVF_RXD_QW1_LENGTH_HBUF_SHIFT 52 #define IAVF_RXD_QW1_LENGTH_HBUF_MASK (0x7FFULL << \ IAVF_RXD_QW1_LENGTH_HBUF_SHIFT) @@ -347,6 +330,8 @@ enum iavf_rx_desc_ext_status_bits { IAVF_RX_DESC_EXT_STATUS_PELONGB_SHIFT = 11, }; +#define IAVF_RX_DESC_EXT_STATUS_L2TAG2P_M BIT(IAVF_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT) + enum iavf_rx_desc_pe_status_bits { /* Note: These are predefined bit offsets */ IAVF_RX_DESC_PE_STATUS_QPID_SHIFT = 0, /* 18 BITS */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_types.h b/drivers/net/ethernet/intel/iavf/iavf_types.h new file mode 100644 index 000000000000..a095855122bf --- /dev/null +++ b/drivers/net/ethernet/intel/iavf/iavf_types.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2024 Intel Corporation. */ + +#ifndef _IAVF_TYPES_H_ +#define _IAVF_TYPES_H_ + +#include "iavf_types.h" + +#include <linux/avf/virtchnl.h> +#include <linux/ptp_clock_kernel.h> + +/* structure used to queue PTP commands for processing */ +struct iavf_ptp_aq_cmd { + struct list_head list; + enum virtchnl_ops v_opcode:16; + u16 msglen; + u8 msg[] __counted_by(msglen); +}; + +struct iavf_ptp { + wait_queue_head_t phc_time_waitqueue; + struct virtchnl_ptp_caps hw_caps; + struct ptp_clock_info info; + struct ptp_clock *clock; + struct list_head aq_cmds; + u64 cached_phc_time; + unsigned long cached_phc_updated; + /* Lock protecting access to the AQ command list */ + struct mutex aq_cmd_lock; + struct kernel_hwtstamp_config hwtstamp_config; + bool phc_time_ready:1; +}; + +#endif /* _IAVF_TYPES_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 15d388b431c5..a6f0e5990be2 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -4,6 +4,7 @@ #include <linux/net/intel/libie/rx.h> #include "iavf.h" +#include "iavf_ptp.h" #include "iavf_prototype.h" /** @@ -144,9 +145,11 @@ int iavf_send_vf_config_msg(struct iavf_adapter *adapter) VIRTCHNL_VF_OFFLOAD_ENCAP | VIRTCHNL_VF_OFFLOAD_TC_U32 | VIRTCHNL_VF_OFFLOAD_VLAN_V2 | + VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC | VIRTCHNL_VF_OFFLOAD_CRC | VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM | VIRTCHNL_VF_OFFLOAD_REQ_QUEUES | + VIRTCHNL_VF_CAP_PTP | VIRTCHNL_VF_OFFLOAD_ADQ | VIRTCHNL_VF_OFFLOAD_USO | VIRTCHNL_VF_OFFLOAD_FDIR_PF | @@ -177,6 +180,54 @@ int iavf_send_vf_offload_vlan_v2_msg(struct iavf_adapter *adapter) NULL, 0); } +int iavf_send_vf_supported_rxdids_msg(struct iavf_adapter *adapter) +{ + adapter->aq_required &= ~IAVF_FLAG_AQ_GET_SUPPORTED_RXDIDS; + + if (!IAVF_RXDID_ALLOWED(adapter)) + return -EOPNOTSUPP; + + adapter->current_op = VIRTCHNL_OP_GET_SUPPORTED_RXDIDS; + + return iavf_send_pf_msg(adapter, VIRTCHNL_OP_GET_SUPPORTED_RXDIDS, + NULL, 0); +} + +/** + * iavf_send_vf_ptp_caps_msg - Send request for PTP capabilities + * @adapter: private adapter structure + * + * Send the VIRTCHNL_OP_1588_PTP_GET_CAPS command to the PF to request the PTP + * capabilities available to this device. This includes the following + * potential access: + * + * * READ_PHC - access to read the PTP hardware clock time + * * RX_TSTAMP - access to request Rx timestamps on all received packets + * + * The PF will reply with the same opcode a filled out copy of the + * virtchnl_ptp_caps structure which defines the specifics of which features + * are accessible to this device. + * + * Return: 0 if success, error code otherwise. + */ +int iavf_send_vf_ptp_caps_msg(struct iavf_adapter *adapter) +{ + struct virtchnl_ptp_caps hw_caps = { + .caps = VIRTCHNL_1588_PTP_CAP_READ_PHC | + VIRTCHNL_1588_PTP_CAP_RX_TSTAMP + }; + + adapter->aq_required &= ~IAVF_FLAG_AQ_GET_PTP_CAPS; + + if (!IAVF_PTP_ALLOWED(adapter)) + return -EOPNOTSUPP; + + adapter->current_op = VIRTCHNL_OP_1588_PTP_GET_CAPS; + + return iavf_send_pf_msg(adapter, VIRTCHNL_OP_1588_PTP_GET_CAPS, + (u8 *)&hw_caps, sizeof(hw_caps)); +} + /** * iavf_validate_num_queues * @adapter: adapter structure @@ -263,6 +314,40 @@ int iavf_get_vf_vlan_v2_caps(struct iavf_adapter *adapter) return err; } +int iavf_get_vf_supported_rxdids(struct iavf_adapter *adapter) +{ + struct iavf_arq_event_info event; + u64 rxdids; + int err; + + event.msg_buf = (u8 *)&rxdids; + event.buf_len = sizeof(rxdids); + + err = iavf_poll_virtchnl_msg(&adapter->hw, &event, + VIRTCHNL_OP_GET_SUPPORTED_RXDIDS); + if (!err) + adapter->supp_rxdids = rxdids; + + return err; +} + +int iavf_get_vf_ptp_caps(struct iavf_adapter *adapter) +{ + struct virtchnl_ptp_caps caps = {}; + struct iavf_arq_event_info event; + int err; + + event.msg_buf = (u8 *)∩︀ + event.buf_len = sizeof(caps); + + err = iavf_poll_virtchnl_msg(&adapter->hw, &event, + VIRTCHNL_OP_1588_PTP_GET_CAPS); + if (!err) + adapter->ptp.hw_caps = caps; + + return err; +} + /** * iavf_configure_queues * @adapter: adapter structure @@ -275,6 +360,7 @@ void iavf_configure_queues(struct iavf_adapter *adapter) int pairs = adapter->num_active_queues; struct virtchnl_queue_pair_info *vqpi; u32 i, max_frame; + u8 rx_flags = 0; size_t len; max_frame = LIBIE_MAX_RX_FRM_LEN(adapter->rx_rings->pp->p.offset); @@ -292,6 +378,9 @@ void iavf_configure_queues(struct iavf_adapter *adapter) if (!vqci) return; + if (iavf_ptp_cap_supported(adapter, VIRTCHNL_1588_PTP_CAP_RX_TSTAMP)) + rx_flags |= VIRTCHNL_PTP_RX_TSTAMP; + vqci->vsi_id = adapter->vsi_res->vsi_id; vqci->num_queue_pairs = pairs; vqpi = vqci->qpair; @@ -309,9 +398,12 @@ void iavf_configure_queues(struct iavf_adapter *adapter) vqpi->rxq.dma_ring_addr = adapter->rx_rings[i].dma; vqpi->rxq.max_pkt_size = max_frame; vqpi->rxq.databuffer_size = adapter->rx_rings[i].rx_buf_len; + if (IAVF_RXDID_ALLOWED(adapter)) + vqpi->rxq.rxdid = adapter->rxdid; if (CRC_OFFLOAD_ALLOWED(adapter)) vqpi->rxq.crc_disable = !!(adapter->netdev->features & NETIF_F_RXFCS); + vqpi->rxq.flags = rx_flags; vqpi++; } @@ -1402,6 +1494,67 @@ void iavf_disable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid) VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2); } +#if IS_ENABLED(CONFIG_PTP_1588_CLOCK) +/** + * iavf_virtchnl_send_ptp_cmd - Send one queued PTP command + * @adapter: adapter private structure + * + * De-queue one PTP command request and send the command message to the PF. + * Clear IAVF_FLAG_AQ_SEND_PTP_CMD if no more messages are left to send. + */ +void iavf_virtchnl_send_ptp_cmd(struct iavf_adapter *adapter) +{ + struct iavf_ptp_aq_cmd *cmd; + int err; + + if (!adapter->ptp.clock) { + /* This shouldn't be possible to hit, since no messages should + * be queued if PTP is not initialized. + */ + pci_err(adapter->pdev, "PTP is not initialized\n"); + adapter->aq_required &= ~IAVF_FLAG_AQ_SEND_PTP_CMD; + return; + } + + mutex_lock(&adapter->ptp.aq_cmd_lock); + cmd = list_first_entry_or_null(&adapter->ptp.aq_cmds, + struct iavf_ptp_aq_cmd, list); + if (!cmd) { + /* no further PTP messages to send */ + adapter->aq_required &= ~IAVF_FLAG_AQ_SEND_PTP_CMD; + goto out_unlock; + } + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + pci_err(adapter->pdev, + "Cannot send PTP command %d, command %d pending\n", + cmd->v_opcode, adapter->current_op); + goto out_unlock; + } + + err = iavf_send_pf_msg(adapter, cmd->v_opcode, cmd->msg, cmd->msglen); + if (!err) { + /* Command was sent without errors, so we can remove it from + * the list and discard it. + */ + list_del(&cmd->list); + kfree(cmd); + } else { + /* We failed to send the command, try again next cycle */ + pci_err(adapter->pdev, "Failed to send PTP command %d\n", + cmd->v_opcode); + } + + if (list_empty(&adapter->ptp.aq_cmds)) + /* no further PTP messages to send */ + adapter->aq_required &= ~IAVF_FLAG_AQ_SEND_PTP_CMD; + +out_unlock: + mutex_unlock(&adapter->ptp.aq_cmd_lock); +} +#endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ + /** * iavf_print_link_message - print link up or down * @adapter: adapter structure @@ -2098,6 +2251,37 @@ static void iavf_activate_fdir_filters(struct iavf_adapter *adapter) } /** + * iavf_virtchnl_ptp_get_time - Respond to VIRTCHNL_OP_1588_PTP_GET_TIME + * @adapter: private adapter structure + * @data: the message from the PF + * @len: length of the message from the PF + * + * Handle the VIRTCHNL_OP_1588_PTP_GET_TIME message from the PF. This message + * is sent by the PF in response to the same op as a request from the VF. + * Extract the 64bit nanoseconds time from the message and store it in + * cached_phc_time. Then, notify any thread that is waiting for the update via + * the wait queue. + */ +static void iavf_virtchnl_ptp_get_time(struct iavf_adapter *adapter, + void *data, u16 len) +{ + struct virtchnl_phc_time *msg = data; + + if (len != sizeof(*msg)) { + dev_err_once(&adapter->pdev->dev, + "Invalid VIRTCHNL_OP_1588_PTP_GET_TIME from PF. Got size %u, expected %zu\n", + len, sizeof(*msg)); + return; + } + + adapter->ptp.cached_phc_time = msg->time; + adapter->ptp.cached_phc_updated = jiffies; + adapter->ptp.phc_time_ready = true; + + wake_up(&adapter->ptp.phc_time_waitqueue); +} + +/** * iavf_virtchnl_completion * @adapter: adapter structure * @v_opcode: opcode sent by PF @@ -2509,6 +2693,25 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, aq_required; } break; + case VIRTCHNL_OP_GET_SUPPORTED_RXDIDS: + if (msglen != sizeof(u64)) + return; + + adapter->supp_rxdids = *(u64 *)msg; + + break; + case VIRTCHNL_OP_1588_PTP_GET_CAPS: + if (msglen != sizeof(adapter->ptp.hw_caps)) + return; + + adapter->ptp.hw_caps = *(struct virtchnl_ptp_caps *)msg; + + /* process any state change needed due to new capabilities */ + iavf_ptp_process_caps(adapter); + break; + case VIRTCHNL_OP_1588_PTP_GET_TIME: + iavf_virtchnl_ptp_get_time(adapter, msg, msglen); + break; case VIRTCHNL_OP_ENABLE_QUEUES: /* enable transmits */ iavf_irq_enable(adapter, true); diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c index dbdb83567364..fcb199efbea5 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -1205,6 +1205,25 @@ static int ice_devlink_set_parent(struct devlink_rate *devlink_rate, return status; } +static void ice_set_min_max_msix(struct ice_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + union devlink_param_value val; + int err; + + err = devl_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN, + &val); + if (!err) + pf->msix.min = val.vu32; + + err = devl_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX, + &val); + if (!err) + pf->msix.max = val.vu32; +} + /** * ice_devlink_reinit_up - do reinit of the given PF * @pf: pointer to the PF struct @@ -1220,6 +1239,9 @@ static int ice_devlink_reinit_up(struct ice_pf *pf) return err; } + /* load MSI-X values */ + ice_set_min_max_msix(pf); + err = ice_init_dev(pf); if (err) goto unroll_hw_init; @@ -1533,6 +1555,43 @@ static int ice_devlink_local_fwd_validate(struct devlink *devlink, u32 id, return 0; } +static int +ice_devlink_msix_max_pf_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_priv(devlink); + + if (val.vu32 > pf->hw.func_caps.common_cap.num_msix_vectors) + return -EINVAL; + + return 0; +} + +static int +ice_devlink_msix_min_pf_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + if (val.vu32 < ICE_MIN_MSIX) + return -EINVAL; + + return 0; +} + +static int ice_devlink_enable_rdma_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_priv(devlink); + bool new_state = val.vbool; + + if (new_state && !test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) + return -EOPNOTSUPP; + + return 0; +} + enum ice_param_id { ICE_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, ICE_DEVLINK_PARAM_ID_TX_SCHED_LAYERS, @@ -1548,6 +1607,17 @@ static const struct devlink_param ice_dvl_rdma_params[] = { ice_devlink_enable_iw_get, ice_devlink_enable_iw_set, ice_devlink_enable_iw_validate), + DEVLINK_PARAM_GENERIC(ENABLE_RDMA, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, ice_devlink_enable_rdma_validate), +}; + +static const struct devlink_param ice_dvl_msix_params[] = { + DEVLINK_PARAM_GENERIC(MSIX_VEC_PER_PF_MAX, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, ice_devlink_msix_max_pf_validate), + DEVLINK_PARAM_GENERIC(MSIX_VEC_PER_PF_MIN, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, ice_devlink_msix_min_pf_validate), }; static const struct devlink_param ice_dvl_sched_params[] = { @@ -1651,6 +1721,7 @@ void ice_devlink_unregister(struct ice_pf *pf) int ice_devlink_register_params(struct ice_pf *pf) { struct devlink *devlink = priv_to_devlink(pf); + union devlink_param_value value; struct ice_hw *hw = &pf->hw; int status; @@ -1659,10 +1730,39 @@ int ice_devlink_register_params(struct ice_pf *pf) if (status) return status; + status = devl_params_register(devlink, ice_dvl_msix_params, + ARRAY_SIZE(ice_dvl_msix_params)); + if (status) + goto unregister_rdma_params; + if (hw->func_caps.common_cap.tx_sched_topo_comp_mode_en) status = devl_params_register(devlink, ice_dvl_sched_params, ARRAY_SIZE(ice_dvl_sched_params)); + if (status) + goto unregister_msix_params; + + value.vu32 = pf->msix.max; + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX, + value); + value.vu32 = pf->msix.min; + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN, + value); + + value.vbool = test_bit(ICE_FLAG_RDMA_ENA, pf->flags); + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, + value); + + return 0; +unregister_msix_params: + devl_params_unregister(devlink, ice_dvl_msix_params, + ARRAY_SIZE(ice_dvl_msix_params)); +unregister_rdma_params: + devl_params_unregister(devlink, ice_dvl_rdma_params, + ARRAY_SIZE(ice_dvl_rdma_params)); return status; } @@ -1673,6 +1773,8 @@ void ice_devlink_unregister_params(struct ice_pf *pf) devl_params_unregister(devlink, ice_dvl_rdma_params, ARRAY_SIZE(ice_dvl_rdma_params)); + devl_params_unregister(devlink, ice_dvl_msix_params, + ARRAY_SIZE(ice_dvl_msix_params)); if (hw->func_caps.common_cap.tx_sched_topo_comp_mode_en) devl_params_unregister(devlink, ice_dvl_sched_params, diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 71e05d30f0fd..c9104b13e1d2 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -97,9 +97,6 @@ #define ICE_MIN_LAN_OICR_MSIX 1 #define ICE_MIN_MSIX (ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_LAN_OICR_MSIX) #define ICE_FDIR_MSIX 2 -#define ICE_RDMA_NUM_AEQ_MSIX 4 -#define ICE_MIN_RDMA_MSIX 2 -#define ICE_ESWITCH_MSIX 1 #define ICE_NO_VSI 0xffff #define ICE_VSI_MAP_CONTIG 0 #define ICE_VSI_MAP_SCATTER 1 @@ -542,6 +539,14 @@ struct ice_agg_node { u8 valid; }; +struct ice_pf_msix { + u32 cur; + u32 min; + u32 max; + u32 total; + u32 rest; +}; + struct ice_pf { struct pci_dev *pdev; struct ice_adapter *adapter; @@ -556,13 +561,7 @@ struct ice_pf { /* OS reserved IRQ details */ struct msix_entry *msix_entries; struct ice_irq_tracker irq_tracker; - /* First MSIX vector used by SR-IOV VFs. Calculated by subtracting the - * number of MSIX vectors needed for all SR-IOV VFs from the number of - * MSIX vectors allowed on this PF. - */ - u16 sriov_base_vector; - unsigned long *sriov_irq_bm; /* bitmap to track irq usage */ - u16 sriov_irq_size; /* size of the irq_bm bitmap */ + struct ice_virt_irq_tracker virt_irq_tracker; u16 ctrl_vsi_idx; /* control VSI index in pf->vsi array */ @@ -612,7 +611,7 @@ struct ice_pf { struct msi_map ll_ts_irq; /* LL_TS interrupt MSIX vector */ u16 max_pf_txqs; /* Total Tx queues PF wide */ u16 max_pf_rxqs; /* Total Rx queues PF wide */ - u16 num_lan_msix; /* Total MSIX vectors for base driver */ + struct ice_pf_msix msix; u16 num_lan_tx; /* num LAN Tx queues setup */ u16 num_lan_rx; /* num LAN Rx queues setup */ u16 next_vsi; /* Next free slot in pf->vsi[] - 0-based! */ @@ -1047,10 +1046,5 @@ static inline void ice_clear_rdma_cap(struct ice_pf *pf) clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); } -static inline enum ice_phy_model ice_get_phy_model(const struct ice_hw *hw) -{ - return hw->ptp.phy_model; -} - extern const struct xdp_metadata_ops ice_xdp_md_ops; #endif /* _ICE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index b2af8e3586f7..b3234a55a253 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -473,9 +473,6 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) */ if (vsi->type != ICE_VSI_VF) ice_write_qrxflxp_cntxt(hw, pf_q, rxdid, 0x3, true); - else - ice_write_qrxflxp_cntxt(hw, pf_q, ICE_RXDID_LEGACY_1, 0x3, - false); /* Absolute queue number out of 2K needs to be passed */ err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q); @@ -801,13 +798,11 @@ int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi) return 0; err_out: - while (v_idx--) - ice_free_q_vector(vsi, v_idx); - dev_err(dev, "Failed to allocate %d q_vector for VSI %d, ret=%d\n", - vsi->num_q_vectors, vsi->vsi_num, err); - vsi->num_q_vectors = 0; - return err; + dev_info(dev, "Failed to allocate %d q_vectors for VSI %d, new value %d", + vsi->num_q_vectors, vsi->vsi_num, v_idx); + vsi->num_q_vectors = v_idx; + return v_idx ? 0 : err; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 7a2a2e8da8fa..aaa592ffd2d8 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -186,7 +186,7 @@ static int ice_set_mac_type(struct ice_hw *hw) * ice_is_generic_mac - check if device's mac_type is generic * @hw: pointer to the hardware structure * - * Return: true if mac_type is generic (with SBQ support), false if not + * Return: true if mac_type is ICE_MAC_GENERIC*, false otherwise. */ bool ice_is_generic_mac(struct ice_hw *hw) { @@ -195,120 +195,6 @@ bool ice_is_generic_mac(struct ice_hw *hw) } /** - * ice_is_e810 - * @hw: pointer to the hardware structure - * - * returns true if the device is E810 based, false if not. - */ -bool ice_is_e810(struct ice_hw *hw) -{ - return hw->mac_type == ICE_MAC_E810; -} - -/** - * ice_is_e810t - * @hw: pointer to the hardware structure - * - * returns true if the device is E810T based, false if not. - */ -bool ice_is_e810t(struct ice_hw *hw) -{ - switch (hw->device_id) { - case ICE_DEV_ID_E810C_SFP: - switch (hw->subsystem_device_id) { - case ICE_SUBDEV_ID_E810T: - case ICE_SUBDEV_ID_E810T2: - case ICE_SUBDEV_ID_E810T3: - case ICE_SUBDEV_ID_E810T4: - case ICE_SUBDEV_ID_E810T6: - case ICE_SUBDEV_ID_E810T7: - return true; - } - break; - case ICE_DEV_ID_E810C_QSFP: - switch (hw->subsystem_device_id) { - case ICE_SUBDEV_ID_E810T2: - case ICE_SUBDEV_ID_E810T3: - case ICE_SUBDEV_ID_E810T5: - return true; - } - break; - default: - break; - } - - return false; -} - -/** - * ice_is_e822 - Check if a device is E822 family device - * @hw: pointer to the hardware structure - * - * Return: true if the device is E822 based, false if not. - */ -bool ice_is_e822(struct ice_hw *hw) -{ - switch (hw->device_id) { - case ICE_DEV_ID_E822C_BACKPLANE: - case ICE_DEV_ID_E822C_QSFP: - case ICE_DEV_ID_E822C_SFP: - case ICE_DEV_ID_E822C_10G_BASE_T: - case ICE_DEV_ID_E822C_SGMII: - case ICE_DEV_ID_E822L_BACKPLANE: - case ICE_DEV_ID_E822L_SFP: - case ICE_DEV_ID_E822L_10G_BASE_T: - case ICE_DEV_ID_E822L_SGMII: - return true; - default: - return false; - } -} - -/** - * ice_is_e823 - * @hw: pointer to the hardware structure - * - * returns true if the device is E823-L or E823-C based, false if not. - */ -bool ice_is_e823(struct ice_hw *hw) -{ - switch (hw->device_id) { - case ICE_DEV_ID_E823L_BACKPLANE: - case ICE_DEV_ID_E823L_SFP: - case ICE_DEV_ID_E823L_10G_BASE_T: - case ICE_DEV_ID_E823L_1GBE: - case ICE_DEV_ID_E823L_QSFP: - case ICE_DEV_ID_E823C_BACKPLANE: - case ICE_DEV_ID_E823C_QSFP: - case ICE_DEV_ID_E823C_SFP: - case ICE_DEV_ID_E823C_10G_BASE_T: - case ICE_DEV_ID_E823C_SGMII: - return true; - default: - return false; - } -} - -/** - * ice_is_e825c - Check if a device is E825C family device - * @hw: pointer to the hardware structure - * - * Return: true if the device is E825-C based, false if not. - */ -bool ice_is_e825c(struct ice_hw *hw) -{ - switch (hw->device_id) { - case ICE_DEV_ID_E825C_BACKPLANE: - case ICE_DEV_ID_E825C_QSFP: - case ICE_DEV_ID_E825C_SFP: - case ICE_DEV_ID_E825C_SGMII: - return true; - default: - return false; - } -} - -/** * ice_is_pf_c827 - check if pf contains c827 phy * @hw: pointer to the hw struct * @@ -2408,7 +2294,7 @@ ice_parse_1588_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p, info->tmr_index_owned = ((number & ICE_TS_TMR_IDX_OWND_M) != 0); info->tmr_index_assoc = ((number & ICE_TS_TMR_IDX_ASSOC_M) != 0); - if (!ice_is_e825c(hw)) { + if (hw->mac_type != ICE_MAC_GENERIC_3K_E825) { info->clk_freq = FIELD_GET(ICE_TS_CLK_FREQ_M, number); info->clk_src = ((number & ICE_TS_CLK_SRC_M) != 0); } else { @@ -5765,6 +5651,96 @@ ice_aq_write_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr, } /** + * ice_get_pca9575_handle - find and return the PCA9575 controller + * @hw: pointer to the hw struct + * @pca9575_handle: GPIO controller's handle + * + * Find and return the GPIO controller's handle in the netlist. + * When found - the value will be cached in the hw structure and following calls + * will return cached value. + * + * Return: 0 on success, -ENXIO when there's no PCA9575 present. + */ +int ice_get_pca9575_handle(struct ice_hw *hw, u16 *pca9575_handle) +{ + struct ice_aqc_get_link_topo *cmd; + struct ice_aq_desc desc; + int err; + u8 idx; + + /* If handle was read previously return cached value */ + if (hw->io_expander_handle) { + *pca9575_handle = hw->io_expander_handle; + return 0; + } + +#define SW_PCA9575_SFP_TOPO_IDX 2 +#define SW_PCA9575_QSFP_TOPO_IDX 1 + + /* Check if the SW IO expander controlling SMA exists in the netlist. */ + if (hw->device_id == ICE_DEV_ID_E810C_SFP) + idx = SW_PCA9575_SFP_TOPO_IDX; + else if (hw->device_id == ICE_DEV_ID_E810C_QSFP) + idx = SW_PCA9575_QSFP_TOPO_IDX; + else + return -ENXIO; + + /* If handle was not detected read it from the netlist */ + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo); + cmd = &desc.params.get_link_topo; + cmd->addr.topo_params.node_type_ctx = + ICE_AQC_LINK_TOPO_NODE_TYPE_GPIO_CTRL; + cmd->addr.topo_params.index = idx; + + err = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); + if (err) + return -ENXIO; + + /* Verify if we found the right IO expander type */ + if (desc.params.get_link_topo.node_part_num != + ICE_AQC_GET_LINK_TOPO_NODE_NR_PCA9575) + return -ENXIO; + + /* If present save the handle and return it */ + hw->io_expander_handle = + le16_to_cpu(desc.params.get_link_topo.addr.handle); + *pca9575_handle = hw->io_expander_handle; + + return 0; +} + +/** + * ice_read_pca9575_reg - read the register from the PCA9575 controller + * @hw: pointer to the hw struct + * @offset: GPIO controller register offset + * @data: pointer to data to be read from the GPIO controller + * + * Return: 0 on success, negative error code otherwise. + */ +int ice_read_pca9575_reg(struct ice_hw *hw, u8 offset, u8 *data) +{ + struct ice_aqc_link_topo_addr link_topo; + __le16 addr; + u16 handle; + int err; + + memset(&link_topo, 0, sizeof(link_topo)); + + err = ice_get_pca9575_handle(hw, &handle); + if (err) + return err; + + link_topo.handle = cpu_to_le16(handle); + link_topo.topo_params.node_type_ctx = + FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_CTX_M, + ICE_AQC_LINK_TOPO_NODE_CTX_PROVIDED); + + addr = cpu_to_le16((u16)offset); + + return ice_aq_read_i2c(hw, link_topo, 0, addr, 1, data, NULL); +} + +/** * ice_aq_set_gpio * @hw: pointer to the hw struct * @gpio_ctrl_handle: GPIO controller node handle diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 15ba38543738..9b00aa0ddf10 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -131,7 +131,6 @@ int ice_aq_manage_mac_write(struct ice_hw *hw, const u8 *mac_addr, u8 flags, struct ice_sq_cd *cd); bool ice_is_generic_mac(struct ice_hw *hw); -bool ice_is_e810(struct ice_hw *hw); int ice_clear_pf_cfg(struct ice_hw *hw); int ice_aq_set_phy_cfg(struct ice_hw *hw, struct ice_port_info *pi, @@ -276,10 +275,6 @@ ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, void ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, u64 *prev_stat, u64 *cur_stat); -bool ice_is_e810t(struct ice_hw *hw); -bool ice_is_e822(struct ice_hw *hw); -bool ice_is_e823(struct ice_hw *hw); -bool ice_is_e825c(struct ice_hw *hw); int ice_sched_query_elem(struct ice_hw *hw, u32 node_teid, struct ice_aqc_txsched_elem_data *buf); @@ -306,5 +301,7 @@ int ice_aq_write_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr, u16 bus_addr, __le16 addr, u8 params, const u8 *data, struct ice_sq_cd *cd); +int ice_get_pca9575_handle(struct ice_hw *hw, u16 *pca9575_handle); +int ice_read_pca9575_reg(struct ice_hw *hw, u8 offset, u8 *data); bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw); #endif /* _ICE_COMMON_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c index 03988be03729..69d5b1a28491 100644 --- a/drivers/net/ethernet/intel/ice/ice_ddp.c +++ b/drivers/net/ethernet/intel/ice/ice_ddp.c @@ -2345,14 +2345,14 @@ ice_get_set_tx_topo(struct ice_hw *hw, u8 *buf, u16 buf_size, cmd->set_flags |= ICE_AQC_TX_TOPO_FLAGS_SRC_RAM | ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW; - if (ice_is_e825c(hw)) + if (hw->mac_type == ICE_MAC_GENERIC_3K_E825) desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); } else { ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_tx_topo); cmd->get_flags = ICE_AQC_TX_TOPO_GET_RAM; } - if (!ice_is_e825c(hw)) + if (hw->mac_type != ICE_MAC_GENERIC_3K_E825) desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index f241493a6ac8..b0805704834d 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3788,8 +3788,7 @@ ice_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) */ static int ice_get_max_txq(struct ice_pf *pf) { - return min3(pf->num_lan_msix, (u16)num_online_cpus(), - (u16)pf->hw.func_caps.common_cap.num_txq); + return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_txq); } /** @@ -3798,8 +3797,7 @@ static int ice_get_max_txq(struct ice_pf *pf) */ static int ice_get_max_rxq(struct ice_pf *pf) { - return min3(pf->num_lan_msix, (u16)num_online_cpus(), - (u16)pf->hw.func_caps.common_cap.num_rxq); + return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_rxq); } /** @@ -3817,8 +3815,7 @@ static u32 ice_get_combined_cnt(struct ice_vsi *vsi) ice_for_each_q_vector(vsi, q_idx) { struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; - if (q_vector->rx.rx_ring && q_vector->tx.tx_ring) - combined++; + combined += min(q_vector->num_ring_tx, q_vector->num_ring_rx); } return combined; diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index ee9862ddfe15..1d118171de37 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -1605,22 +1605,19 @@ void ice_fdir_replay_fltrs(struct ice_pf *pf) */ int ice_fdir_create_dflt_rules(struct ice_pf *pf) { + const enum ice_fltr_ptype dflt_rules[] = { + ICE_FLTR_PTYPE_NONF_IPV4_TCP, ICE_FLTR_PTYPE_NONF_IPV4_UDP, + ICE_FLTR_PTYPE_NONF_IPV6_TCP, ICE_FLTR_PTYPE_NONF_IPV6_UDP, + }; int err; /* Create perfect TCP and UDP rules in hardware. */ - err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV4_TCP); - if (err) - return err; - - err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV4_UDP); - if (err) - return err; + for (int i = 0; i < ARRAY_SIZE(dflt_rules); i++) { + err = ice_create_init_fdir_rule(pf, dflt_rules[i]); - err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV6_TCP); - if (err) - return err; - - err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV6_UDP); + if (err) + break; + } return err; } diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c index b2148dbe49b2..6b26290452d4 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.c +++ b/drivers/net/ethernet/intel/ice/ice_gnss.c @@ -381,32 +381,23 @@ void ice_gnss_exit(struct ice_pf *pf) } /** - * ice_gnss_is_gps_present - Check if GPS HW is present + * ice_gnss_is_module_present - Check if GNSS HW is present * @hw: pointer to HW struct + * + * Return: true when GNSS is present, false otherwise. */ -bool ice_gnss_is_gps_present(struct ice_hw *hw) +bool ice_gnss_is_module_present(struct ice_hw *hw) { - if (!hw->func_caps.ts_func_info.src_tmr_owned) - return false; + int err; + u8 data; - if (!ice_is_gps_in_netlist(hw)) + if (!hw->func_caps.ts_func_info.src_tmr_owned || + !ice_is_gps_in_netlist(hw)) return false; -#if IS_ENABLED(CONFIG_PTP_1588_CLOCK) - if (ice_is_e810t(hw)) { - int err; - u8 data; - - err = ice_read_pca9575_reg(hw, ICE_PCA9575_P0_IN, &data); - if (err || !!(data & ICE_P0_GNSS_PRSNT_N)) - return false; - } else { - return false; - } -#else - if (!ice_is_e810t(hw)) + err = ice_read_pca9575_reg(hw, ICE_PCA9575_P0_IN, &data); + if (err || !!(data & ICE_P0_GNSS_PRSNT_N)) return false; -#endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ return true; } diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.h b/drivers/net/ethernet/intel/ice/ice_gnss.h index 75e567ad7059..15daf603ed7b 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.h +++ b/drivers/net/ethernet/intel/ice/ice_gnss.h @@ -37,11 +37,11 @@ struct gnss_serial { #if IS_ENABLED(CONFIG_GNSS) void ice_gnss_init(struct ice_pf *pf); void ice_gnss_exit(struct ice_pf *pf); -bool ice_gnss_is_gps_present(struct ice_hw *hw); +bool ice_gnss_is_module_present(struct ice_hw *hw); #else static inline void ice_gnss_init(struct ice_pf *pf) { } static inline void ice_gnss_exit(struct ice_pf *pf) { } -static inline bool ice_gnss_is_gps_present(struct ice_hw *hw) +static inline bool ice_gnss_is_module_present(struct ice_hw *hw) { return false; } diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index dc88aea9f473..aa4bfbcf85d2 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -541,10 +541,22 @@ #define PFPM_WUS_MAG_M BIT(1) #define PFPM_WUS_MNG_M BIT(3) #define PFPM_WUS_FW_RST_WK_M BIT(31) +#define E830_PRTMAC_TS_TX_MEM_VALID_H 0x001E2020 +#define E830_PRTMAC_TS_TX_MEM_VALID_L 0x001E2000 #define E830_PRTMAC_CL01_PS_QNT 0x001E32A0 #define E830_PRTMAC_CL01_PS_QNT_CL0_M GENMASK(15, 0) #define E830_PRTMAC_CL01_QNT_THR 0x001E3320 #define E830_PRTMAC_CL01_QNT_THR_CL0_M GENMASK(15, 0) +#define E830_PRTTSYN_TXTIME_H(_i) (0x001E5800 + ((_i) * 32)) +#define E830_PRTTSYN_TXTIME_L(_i) (0x001E5000 + ((_i) * 32)) +#define E830_GLPTM_ART_CTL 0x00088B50 +#define E830_GLPTM_ART_CTL_ACTIVE_M BIT(0) +#define E830_GLPTM_ART_TIME_H 0x00088B54 +#define E830_GLPTM_ART_TIME_L 0x00088B58 +#define E830_GLTSYN_PTMTIME_H(_i) (0x00088B48 + ((_i) * 4)) +#define E830_GLTSYN_PTMTIME_L(_i) (0x00088B40 + ((_i) * 4)) +#define E830_PFPTM_SEM 0x00088B00 +#define E830_PFPTM_SEM_BUSY_M BIT(0) #define VFINT_DYN_CTLN(_i) (0x00003800 + ((_i) * 4)) #define VFINT_DYN_CTLN_CLEARPBA_M BIT(1) #define E830_MBX_PF_IN_FLIGHT_VF_MSGS_THRESH 0x00234000 diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c index 145b27f2a4ce..bab3e81cad5d 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc.c +++ b/drivers/net/ethernet/intel/ice/ice_idc.c @@ -228,61 +228,34 @@ void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos) } EXPORT_SYMBOL_GPL(ice_get_qos_params); -/** - * ice_alloc_rdma_qvectors - Allocate vector resources for RDMA driver - * @pf: board private structure to initialize - */ -static int ice_alloc_rdma_qvectors(struct ice_pf *pf) +int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry) { - if (ice_is_rdma_ena(pf)) { - int i; - - pf->msix_entries = kcalloc(pf->num_rdma_msix, - sizeof(*pf->msix_entries), - GFP_KERNEL); - if (!pf->msix_entries) - return -ENOMEM; + struct msi_map map = ice_alloc_irq(pf, true); - /* RDMA is the only user of pf->msix_entries array */ - pf->rdma_base_vector = 0; - - for (i = 0; i < pf->num_rdma_msix; i++) { - struct msix_entry *entry = &pf->msix_entries[i]; - struct msi_map map; + if (map.index < 0) + return -ENOMEM; - map = ice_alloc_irq(pf, false); - if (map.index < 0) - break; + entry->entry = map.index; + entry->vector = map.virq; - entry->entry = map.index; - entry->vector = map.virq; - } - } return 0; } +EXPORT_SYMBOL_GPL(ice_alloc_rdma_qvector); /** * ice_free_rdma_qvector - free vector resources reserved for RDMA driver * @pf: board private structure to initialize + * @entry: MSI-X entry to be removed */ -static void ice_free_rdma_qvector(struct ice_pf *pf) +void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry) { - int i; - - if (!pf->msix_entries) - return; - - for (i = 0; i < pf->num_rdma_msix; i++) { - struct msi_map map; + struct msi_map map; - map.index = pf->msix_entries[i].entry; - map.virq = pf->msix_entries[i].vector; - ice_free_irq(pf, map); - } - - kfree(pf->msix_entries); - pf->msix_entries = NULL; + map.index = entry->entry; + map.virq = entry->vector; + ice_free_irq(pf, map); } +EXPORT_SYMBOL_GPL(ice_free_rdma_qvector); /** * ice_adev_release - function to be mapped to AUX dev's release op @@ -382,12 +355,6 @@ int ice_init_rdma(struct ice_pf *pf) return -ENOMEM; } - /* Reserve vector resources */ - ret = ice_alloc_rdma_qvectors(pf); - if (ret < 0) { - dev_err(dev, "failed to reserve vectors for RDMA\n"); - goto err_reserve_rdma_qvector; - } pf->rdma_mode |= IIDC_RDMA_PROTOCOL_ROCEV2; ret = ice_plug_aux_dev(pf); if (ret) @@ -395,8 +362,6 @@ int ice_init_rdma(struct ice_pf *pf) return 0; err_plug_aux_dev: - ice_free_rdma_qvector(pf); -err_reserve_rdma_qvector: pf->adev = NULL; xa_erase(&ice_aux_id, pf->aux_idx); return ret; @@ -412,6 +377,5 @@ void ice_deinit_rdma(struct ice_pf *pf) return; ice_unplug_aux_dev(pf); - ice_free_rdma_qvector(pf); xa_erase(&ice_aux_id, pf->aux_idx); } diff --git a/drivers/net/ethernet/intel/ice/ice_irq.c b/drivers/net/ethernet/intel/ice/ice_irq.c index ad82ff7d1995..30801fd375f0 100644 --- a/drivers/net/ethernet/intel/ice/ice_irq.c +++ b/drivers/net/ethernet/intel/ice/ice_irq.c @@ -20,6 +20,19 @@ ice_init_irq_tracker(struct ice_pf *pf, unsigned int max_vectors, xa_init_flags(&pf->irq_tracker.entries, XA_FLAGS_ALLOC); } +static int +ice_init_virt_irq_tracker(struct ice_pf *pf, u32 base, u32 num_entries) +{ + pf->virt_irq_tracker.bm = bitmap_zalloc(num_entries, GFP_KERNEL); + if (!pf->virt_irq_tracker.bm) + return -ENOMEM; + + pf->virt_irq_tracker.num_entries = num_entries; + pf->virt_irq_tracker.base = base; + + return 0; +} + /** * ice_deinit_irq_tracker - free xarray tracker * @pf: board private structure @@ -29,6 +42,11 @@ static void ice_deinit_irq_tracker(struct ice_pf *pf) xa_destroy(&pf->irq_tracker.entries); } +static void ice_deinit_virt_irq_tracker(struct ice_pf *pf) +{ + bitmap_free(pf->virt_irq_tracker.bm); +} + /** * ice_free_irq_res - free a block of resources * @pf: board private structure @@ -45,7 +63,7 @@ static void ice_free_irq_res(struct ice_pf *pf, u16 index) /** * ice_get_irq_res - get an interrupt resource * @pf: board private structure - * @dyn_only: force entry to be dynamically allocated + * @dyn_allowed: allow entry to be dynamically allocated * * Allocate new irq entry in the free slot of the tracker. Since xarray * is used, always allocate new entry at the lowest possible index. Set @@ -53,11 +71,12 @@ static void ice_free_irq_res(struct ice_pf *pf, u16 index) * * Returns allocated irq entry or NULL on failure. */ -static struct ice_irq_entry *ice_get_irq_res(struct ice_pf *pf, bool dyn_only) +static struct ice_irq_entry *ice_get_irq_res(struct ice_pf *pf, + bool dyn_allowed) { - struct xa_limit limit = { .max = pf->irq_tracker.num_entries, + struct xa_limit limit = { .max = pf->irq_tracker.num_entries - 1, .min = 0 }; - unsigned int num_static = pf->irq_tracker.num_static; + unsigned int num_static = pf->irq_tracker.num_static - 1; struct ice_irq_entry *entry; unsigned int index; int ret; @@ -66,9 +85,9 @@ static struct ice_irq_entry *ice_get_irq_res(struct ice_pf *pf, bool dyn_only) if (!entry) return NULL; - /* skip preallocated entries if the caller says so */ - if (dyn_only) - limit.min = num_static; + /* only already allocated if the caller says so */ + if (!dyn_allowed) + limit.max = num_static; ret = xa_alloc(&pf->irq_tracker.entries, &index, entry, limit, GFP_KERNEL); @@ -78,161 +97,18 @@ static struct ice_irq_entry *ice_get_irq_res(struct ice_pf *pf, bool dyn_only) entry = NULL; } else { entry->index = index; - entry->dynamic = index >= num_static; + entry->dynamic = index > num_static; } return entry; } -/** - * ice_reduce_msix_usage - Reduce usage of MSI-X vectors - * @pf: board private structure - * @v_remain: number of remaining MSI-X vectors to be distributed - * - * Reduce the usage of MSI-X vectors when entire request cannot be fulfilled. - * pf->num_lan_msix and pf->num_rdma_msix values are set based on number of - * remaining vectors. - */ -static void ice_reduce_msix_usage(struct ice_pf *pf, int v_remain) -{ - int v_rdma; - - if (!ice_is_rdma_ena(pf)) { - pf->num_lan_msix = v_remain; - return; - } - - /* RDMA needs at least 1 interrupt in addition to AEQ MSIX */ - v_rdma = ICE_RDMA_NUM_AEQ_MSIX + 1; - - if (v_remain < ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_RDMA_MSIX) { - dev_warn(ice_pf_to_dev(pf), "Not enough MSI-X vectors to support RDMA.\n"); - clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); - - pf->num_rdma_msix = 0; - pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX; - } else if ((v_remain < ICE_MIN_LAN_TXRX_MSIX + v_rdma) || - (v_remain - v_rdma < v_rdma)) { - /* Support minimum RDMA and give remaining vectors to LAN MSIX - */ - pf->num_rdma_msix = ICE_MIN_RDMA_MSIX; - pf->num_lan_msix = v_remain - ICE_MIN_RDMA_MSIX; - } else { - /* Split remaining MSIX with RDMA after accounting for AEQ MSIX - */ - pf->num_rdma_msix = (v_remain - ICE_RDMA_NUM_AEQ_MSIX) / 2 + - ICE_RDMA_NUM_AEQ_MSIX; - pf->num_lan_msix = v_remain - pf->num_rdma_msix; - } -} - -/** - * ice_ena_msix_range - Request a range of MSIX vectors from the OS - * @pf: board private structure - * - * Compute the number of MSIX vectors wanted and request from the OS. Adjust - * device usage if there are not enough vectors. Return the number of vectors - * reserved or negative on failure. - */ -static int ice_ena_msix_range(struct ice_pf *pf) +#define ICE_RDMA_AEQ_MSIX 1 +static int ice_get_default_msix_amount(struct ice_pf *pf) { - int num_cpus, hw_num_msix, v_other, v_wanted, v_actual; - struct device *dev = ice_pf_to_dev(pf); - int err; - - hw_num_msix = pf->hw.func_caps.common_cap.num_msix_vectors; - num_cpus = num_online_cpus(); - - /* LAN miscellaneous handler */ - v_other = ICE_MIN_LAN_OICR_MSIX; - - /* Flow Director */ - if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) - v_other += ICE_FDIR_MSIX; - - /* switchdev */ - v_other += ICE_ESWITCH_MSIX; - - v_wanted = v_other; - - /* LAN traffic */ - pf->num_lan_msix = num_cpus; - v_wanted += pf->num_lan_msix; - - /* RDMA auxiliary driver */ - if (ice_is_rdma_ena(pf)) { - pf->num_rdma_msix = num_cpus + ICE_RDMA_NUM_AEQ_MSIX; - v_wanted += pf->num_rdma_msix; - } - - if (v_wanted > hw_num_msix) { - int v_remain; - - dev_warn(dev, "not enough device MSI-X vectors. wanted = %d, available = %d\n", - v_wanted, hw_num_msix); - - if (hw_num_msix < ICE_MIN_MSIX) { - err = -ERANGE; - goto exit_err; - } - - v_remain = hw_num_msix - v_other; - if (v_remain < ICE_MIN_LAN_TXRX_MSIX) { - v_other = ICE_MIN_MSIX - ICE_MIN_LAN_TXRX_MSIX; - v_remain = ICE_MIN_LAN_TXRX_MSIX; - } - - ice_reduce_msix_usage(pf, v_remain); - v_wanted = pf->num_lan_msix + pf->num_rdma_msix + v_other; - - dev_notice(dev, "Reducing request to %d MSI-X vectors for LAN traffic.\n", - pf->num_lan_msix); - if (ice_is_rdma_ena(pf)) - dev_notice(dev, "Reducing request to %d MSI-X vectors for RDMA.\n", - pf->num_rdma_msix); - } - - /* actually reserve the vectors */ - v_actual = pci_alloc_irq_vectors(pf->pdev, ICE_MIN_MSIX, v_wanted, - PCI_IRQ_MSIX); - if (v_actual < 0) { - dev_err(dev, "unable to reserve MSI-X vectors\n"); - err = v_actual; - goto exit_err; - } - - if (v_actual < v_wanted) { - dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n", - v_wanted, v_actual); - - if (v_actual < ICE_MIN_MSIX) { - /* error if we can't get minimum vectors */ - pci_free_irq_vectors(pf->pdev); - err = -ERANGE; - goto exit_err; - } else { - int v_remain = v_actual - v_other; - - if (v_remain < ICE_MIN_LAN_TXRX_MSIX) - v_remain = ICE_MIN_LAN_TXRX_MSIX; - - ice_reduce_msix_usage(pf, v_remain); - - dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n", - pf->num_lan_msix); - - if (ice_is_rdma_ena(pf)) - dev_notice(dev, "Enabled %d MSI-X vectors for RDMA.\n", - pf->num_rdma_msix); - } - } - - return v_actual; - -exit_err: - pf->num_rdma_msix = 0; - pf->num_lan_msix = 0; - return err; + return ICE_MIN_LAN_OICR_MSIX + num_online_cpus() + + (test_bit(ICE_FLAG_FD_ENA, pf->flags) ? ICE_FDIR_MSIX : 0) + + (ice_is_rdma_ena(pf) ? num_online_cpus() + ICE_RDMA_AEQ_MSIX : 0); } /** @@ -243,6 +119,7 @@ void ice_clear_interrupt_scheme(struct ice_pf *pf) { pci_free_irq_vectors(pf->pdev); ice_deinit_irq_tracker(pf); + ice_deinit_virt_irq_tracker(pf); } /** @@ -252,27 +129,38 @@ void ice_clear_interrupt_scheme(struct ice_pf *pf) int ice_init_interrupt_scheme(struct ice_pf *pf) { int total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors; - int vectors, max_vectors; + int vectors; - vectors = ice_ena_msix_range(pf); + /* load default PF MSI-X range */ + if (!pf->msix.min) + pf->msix.min = ICE_MIN_MSIX; - if (vectors < 0) - return -ENOMEM; + if (!pf->msix.max) + pf->msix.max = min(total_vectors, + ice_get_default_msix_amount(pf)); + + pf->msix.total = total_vectors; + pf->msix.rest = total_vectors - pf->msix.max; if (pci_msix_can_alloc_dyn(pf->pdev)) - max_vectors = total_vectors; + vectors = pf->msix.min; else - max_vectors = vectors; + vectors = pf->msix.max; + + vectors = pci_alloc_irq_vectors(pf->pdev, pf->msix.min, vectors, + PCI_IRQ_MSIX); + if (vectors < 0) + return vectors; - ice_init_irq_tracker(pf, max_vectors, vectors); + ice_init_irq_tracker(pf, pf->msix.max, vectors); - return 0; + return ice_init_virt_irq_tracker(pf, pf->msix.max, pf->msix.rest); } /** * ice_alloc_irq - Allocate new interrupt vector * @pf: board private structure - * @dyn_only: force dynamic allocation of the interrupt + * @dyn_allowed: allow dynamic allocation of the interrupt * * Allocate new interrupt vector for a given owner id. * return struct msi_map with interrupt details and track @@ -285,27 +173,22 @@ int ice_init_interrupt_scheme(struct ice_pf *pf) * interrupt will be allocated with pci_msix_alloc_irq_at. * * Some callers may only support dynamically allocated interrupts. - * This is indicated with dyn_only flag. + * This is indicated with dyn_allowed flag. * * On failure, return map with negative .index. The caller * is expected to check returned map index. * */ -struct msi_map ice_alloc_irq(struct ice_pf *pf, bool dyn_only) +struct msi_map ice_alloc_irq(struct ice_pf *pf, bool dyn_allowed) { - int sriov_base_vector = pf->sriov_base_vector; struct msi_map map = { .index = -ENOENT }; struct device *dev = ice_pf_to_dev(pf); struct ice_irq_entry *entry; - entry = ice_get_irq_res(pf, dyn_only); + entry = ice_get_irq_res(pf, dyn_allowed); if (!entry) return map; - /* fail if we're about to violate SRIOV vectors space */ - if (sriov_base_vector && entry->index >= sriov_base_vector) - goto exit_free_res; - if (pci_msix_can_alloc_dyn(pf->pdev) && entry->dynamic) { map = pci_msix_alloc_irq_at(pf->pdev, entry->index, NULL); if (map.index < 0) @@ -353,26 +236,40 @@ void ice_free_irq(struct ice_pf *pf, struct msi_map map) } /** - * ice_get_max_used_msix_vector - Get the max used interrupt vector - * @pf: board private structure + * ice_virt_get_irqs - get irqs for SR-IOV usacase + * @pf: pointer to PF structure + * @needed: number of irqs to get * - * Return index of maximum used interrupt vectors with respect to the - * beginning of the MSIX table. Take into account that some interrupts - * may have been dynamically allocated after MSIX was initially enabled. + * This returns the first MSI-X vector index in PF space that is used by this + * VF. This index is used when accessing PF relative registers such as + * GLINT_VECT2FUNC and GLINT_DYN_CTL. + * This will always be the OICR index in the AVF driver so any functionality + * using vf->first_vector_idx for queue configuration_id: id of VF which will + * use this irqs */ -int ice_get_max_used_msix_vector(struct ice_pf *pf) +int ice_virt_get_irqs(struct ice_pf *pf, u32 needed) { - unsigned long start, index, max_idx; - void *entry; + int res = bitmap_find_next_zero_area(pf->virt_irq_tracker.bm, + pf->virt_irq_tracker.num_entries, + 0, needed, 0); - /* Treat all preallocated interrupts as used */ - start = pf->irq_tracker.num_static; - max_idx = start - 1; + if (res >= pf->virt_irq_tracker.num_entries) + return -ENOENT; - xa_for_each_start(&pf->irq_tracker.entries, index, entry, start) { - if (index > max_idx) - max_idx = index; - } + bitmap_set(pf->virt_irq_tracker.bm, res, needed); + + /* conversion from number in bitmap to global irq index */ + return res + pf->virt_irq_tracker.base; +} - return max_idx; +/** + * ice_virt_free_irqs - free irqs used by the VF + * @pf: pointer to PF structure + * @index: first index to be free + * @irqs: number of irqs to free + */ +void ice_virt_free_irqs(struct ice_pf *pf, u32 index, u32 irqs) +{ + bitmap_clear(pf->virt_irq_tracker.bm, index - pf->virt_irq_tracker.base, + irqs); } diff --git a/drivers/net/ethernet/intel/ice/ice_irq.h b/drivers/net/ethernet/intel/ice/ice_irq.h index f35efc08575e..b2f9dbafd57e 100644 --- a/drivers/net/ethernet/intel/ice/ice_irq.h +++ b/drivers/net/ethernet/intel/ice/ice_irq.h @@ -15,11 +15,22 @@ struct ice_irq_tracker { u16 num_static; /* preallocated entries */ }; +struct ice_virt_irq_tracker { + unsigned long *bm; /* bitmap to track irq usage */ + u32 num_entries; + /* First MSIX vector used by SR-IOV VFs. Calculated by subtracting the + * number of MSIX vectors needed for all SR-IOV VFs from the number of + * MSIX vectors allowed on this PF. + */ + u32 base; +}; + int ice_init_interrupt_scheme(struct ice_pf *pf); void ice_clear_interrupt_scheme(struct ice_pf *pf); struct msi_map ice_alloc_irq(struct ice_pf *pf, bool dyn_only); void ice_free_irq(struct ice_pf *pf, struct msi_map map); -int ice_get_max_used_msix_vector(struct ice_pf *pf); +int ice_virt_get_irqs(struct ice_pf *pf, u32 needed); +void ice_virt_free_irqs(struct ice_pf *pf, u32 index, u32 irqs); #endif diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 38a1c8372180..7f5b229cab05 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -157,6 +157,16 @@ static void ice_vsi_set_num_desc(struct ice_vsi *vsi) } } +static u16 ice_get_rxq_count(struct ice_pf *pf) +{ + return min(ice_get_avail_rxq_count(pf), num_online_cpus()); +} + +static u16 ice_get_txq_count(struct ice_pf *pf) +{ + return min(ice_get_avail_txq_count(pf), num_online_cpus()); +} + /** * ice_vsi_set_num_qs - Set number of queues, descriptors and vectors for a VSI * @vsi: the VSI being configured @@ -178,9 +188,7 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi) vsi->alloc_txq = vsi->req_txq; vsi->num_txq = vsi->req_txq; } else { - vsi->alloc_txq = min3(pf->num_lan_msix, - ice_get_avail_txq_count(pf), - (u16)num_online_cpus()); + vsi->alloc_txq = ice_get_txq_count(pf); } pf->num_lan_tx = vsi->alloc_txq; @@ -193,17 +201,13 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi) vsi->alloc_rxq = vsi->req_rxq; vsi->num_rxq = vsi->req_rxq; } else { - vsi->alloc_rxq = min3(pf->num_lan_msix, - ice_get_avail_rxq_count(pf), - (u16)num_online_cpus()); + vsi->alloc_rxq = ice_get_rxq_count(pf); } } pf->num_lan_rx = vsi->alloc_rxq; - vsi->num_q_vectors = min_t(int, pf->num_lan_msix, - max_t(int, vsi->alloc_rxq, - vsi->alloc_txq)); + vsi->num_q_vectors = max(vsi->alloc_rxq, vsi->alloc_txq); break; case ICE_VSI_SF: vsi->alloc_txq = 1; @@ -567,6 +571,8 @@ ice_vsi_alloc_def(struct ice_vsi *vsi, struct ice_channel *ch) return -ENOMEM; } + vsi->irq_dyn_alloc = pci_msix_can_alloc_dyn(vsi->back->pdev); + switch (vsi->type) { case ICE_VSI_PF: case ICE_VSI_SF: @@ -827,7 +833,13 @@ bool ice_is_safe_mode(struct ice_pf *pf) */ bool ice_is_rdma_ena(struct ice_pf *pf) { - return test_bit(ICE_FLAG_RDMA_ENA, pf->flags); + union devlink_param_value value; + int err; + + err = devl_param_driverinit_value_get(priv_to_devlink(pf), + DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, + &value); + return err ? test_bit(ICE_FLAG_RDMA_ENA, pf->flags) : value.vbool; } /** @@ -1173,12 +1185,11 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) static void ice_chnl_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) { - struct ice_pf *pf = vsi->back; u16 qcount, qmap; u8 offset = 0; int pow; - qcount = min_t(int, vsi->num_rxq, pf->num_lan_msix); + qcount = vsi->num_rxq; pow = order_base_2(qcount); qmap = FIELD_PREP(ICE_AQ_VSI_TC_Q_OFFSET_M, offset); @@ -1764,9 +1775,8 @@ void ice_update_eth_stats(struct ice_vsi *vsi) * @prio: priority for the RXDID for this queue * @ena_ts: true to enable timestamp and false to disable timestamp */ -void -ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio, - bool ena_ts) +void ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio, + bool ena_ts) { int regval = rd32(hw, QRXFLXP_CNTXT(pf_q)); @@ -3882,7 +3892,7 @@ void ice_init_feature_support(struct ice_pf *pf) ice_set_feature_support(pf, ICE_F_CGU); if (ice_is_clock_mux_in_netlist(&pf->hw)) ice_set_feature_support(pf, ICE_F_SMA_CTRL); - if (ice_gnss_is_gps_present(&pf->hw)) + if (ice_gnss_is_module_present(&pf->hw)) ice_set_feature_support(pf, ICE_F_GNSS); break; default: diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c3a0fb97c5ee..b084839eb811 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3304,22 +3304,8 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) if (oicr & PFINT_OICR_TSYN_TX_M) { ena_mask &= ~PFINT_OICR_TSYN_TX_M; - if (ice_pf_state_is_nominal(pf) && - pf->hw.dev_caps.ts_dev_info.ts_ll_int_read) { - struct ice_ptp_tx *tx = &pf->ptp.port.tx; - unsigned long flags; - u8 idx; - - spin_lock_irqsave(&tx->lock, flags); - idx = find_next_bit_wrap(tx->in_use, tx->len, - tx->last_ll_ts_idx_read + 1); - if (idx != tx->len) - ice_ptp_req_tx_single_tstamp(tx, idx); - spin_unlock_irqrestore(&tx->lock, flags); - } else if (ice_ptp_pf_handles_tx_interrupt(pf)) { - set_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread); - ret = IRQ_WAKE_THREAD; - } + + ret = ice_ptp_ts_irq(pf); } if (oicr & PFINT_OICR_TSYN_EVNT_M) { @@ -4066,8 +4052,7 @@ static void ice_set_pf_caps(struct ice_pf *pf) } clear_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags); - if (func_caps->common_cap.ieee_1588 && - !(pf->hw.mac_type == ICE_MAC_E830)) + if (func_caps->common_cap.ieee_1588) set_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags); pf->max_pf_txqs = func_caps->common_cap.num_txq; @@ -5087,6 +5072,12 @@ static int ice_init(struct ice_pf *pf) if (err) return err; + if (pf->hw.mac_type == ICE_MAC_E830) { + err = pci_enable_ptm(pf->pdev, NULL); + if (err) + dev_dbg(ice_pf_to_dev(pf), "PCIe PTM not supported by PCIe bus/controller\n"); + } + err = ice_alloc_vsis(pf); if (err) goto err_alloc_vsis; @@ -5186,11 +5177,12 @@ int ice_load(struct ice_pf *pf) ice_napi_add(vsi); + ice_init_features(pf); + err = ice_init_rdma(pf); if (err) goto err_init_rdma; - ice_init_features(pf); ice_service_task_restart(pf); clear_bit(ICE_DOWN, pf->state); @@ -5198,6 +5190,7 @@ int ice_load(struct ice_pf *pf) return 0; err_init_rdma: + ice_deinit_features(pf); ice_tc_indir_block_unregister(vsi); err_tc_indir_block_register: ice_unregister_netdev(vsi); @@ -5221,8 +5214,8 @@ void ice_unload(struct ice_pf *pf) devl_assert_locked(priv_to_devlink(pf)); - ice_deinit_features(pf); ice_deinit_rdma(pf); + ice_deinit_features(pf); ice_tc_indir_block_unregister(vsi); ice_unregister_netdev(vsi); ice_devlink_destroy_pf_port(pf); diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index e26320ce52ca..1bb0033347c7 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -298,8 +298,8 @@ void ice_ptp_restore_timestamp_mode(struct ice_pf *pf) * @sts: Optional parameter for holding a pair of system timestamps from * the system clock. Will be ignored if NULL is given. */ -static u64 -ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts) +u64 ice_ptp_read_src_clk_reg(struct ice_pf *pf, + struct ptp_system_timestamp *sts) { struct ice_hw *hw = &pf->hw; u32 hi, lo, lo2; @@ -310,6 +310,15 @@ ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts) /* Read the system timestamp pre PHC read */ ptp_read_system_prets(sts); + if (hw->mac_type == ICE_MAC_E830) { + u64 clk_time = rd64(hw, E830_GLTSYN_TIME_L(tmr_idx)); + + /* Read the system timestamp post PHC read */ + ptp_read_system_postts(sts); + + return clk_time; + } + lo = rd32(hw, GLTSYN_TIME_L(tmr_idx)); /* Read the system timestamp post PHC read */ @@ -972,28 +981,6 @@ ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) } /** - * ice_ptp_init_tx_eth56g - Initialize tracking for Tx timestamps - * @pf: Board private structure - * @tx: the Tx tracking structure to initialize - * @port: the port this structure tracks - * - * Initialize the Tx timestamp tracker for this port. ETH56G PHYs - * have independent memory blocks for all ports. - * - * Return: 0 for success, -ENOMEM when failed to allocate Tx tracker - */ -static int ice_ptp_init_tx_eth56g(struct ice_pf *pf, struct ice_ptp_tx *tx, - u8 port) -{ - tx->block = port; - tx->offset = 0; - tx->len = INDEX_PER_PORT_ETH56G; - tx->has_ready_bitmap = 1; - - return ice_ptp_alloc_tx_tracker(tx); -} - -/** * ice_ptp_init_tx_e82x - Initialize tracking for Tx timestamps * @pf: Board private structure * @tx: the Tx tracking structure to initialize @@ -1003,9 +990,11 @@ static int ice_ptp_init_tx_eth56g(struct ice_pf *pf, struct ice_ptp_tx *tx, * the timestamp block is shared for all ports in the same quad. To avoid * ports using the same timestamp index, logically break the block of * registers into chunks based on the port number. + * + * Return: 0 on success, -ENOMEM when out of memory */ -static int -ice_ptp_init_tx_e82x(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port) +static int ice_ptp_init_tx_e82x(struct ice_pf *pf, struct ice_ptp_tx *tx, + u8 port) { tx->block = ICE_GET_QUAD_NUM(port); tx->offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT_E82X; @@ -1016,24 +1005,27 @@ ice_ptp_init_tx_e82x(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port) } /** - * ice_ptp_init_tx_e810 - Initialize tracking for Tx timestamps + * ice_ptp_init_tx - Initialize tracking for Tx timestamps * @pf: Board private structure * @tx: the Tx tracking structure to initialize + * @port: the port this structure tracks + * + * Initialize the Tx timestamp tracker for this PF. For all PHYs except E82X, + * each port has its own block of timestamps, independent of the other ports. * - * Initialize the Tx timestamp tracker for this PF. For E810 devices, each - * port has its own block of timestamps, independent of the other ports. + * Return: 0 on success, -ENOMEM when out of memory */ -static int -ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx) +static int ice_ptp_init_tx(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port) { - tx->block = pf->hw.port_info->lport; + tx->block = port; tx->offset = 0; - tx->len = INDEX_PER_PORT_E810; + tx->len = INDEX_PER_PORT; + /* The E810 PHY does not provide a timestamp ready bitmap. Instead, * verify new timestamps against cached copy of the last read * timestamp. */ - tx->has_ready_bitmap = 0; + tx->has_ready_bitmap = pf->hw.mac_type != ICE_MAC_E810; return ice_ptp_alloc_tx_tracker(tx); } @@ -1318,20 +1310,21 @@ ice_ptp_port_phy_stop(struct ice_ptp_port *ptp_port) struct ice_hw *hw = &pf->hw; int err; - if (ice_is_e810(hw)) - return 0; - mutex_lock(&ptp_port->ps_lock); - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - err = ice_stop_phy_timer_eth56g(hw, port, true); + switch (hw->mac_type) { + case ICE_MAC_E810: + case ICE_MAC_E830: + err = 0; break; - case ICE_PHY_E82X: + case ICE_MAC_GENERIC: kthread_cancel_delayed_work_sync(&ptp_port->ov_work); err = ice_stop_phy_timer_e82x(hw, port, true); break; + case ICE_MAC_GENERIC_3K_E825: + err = ice_stop_phy_timer_eth56g(hw, port, true); + break; default: err = -ENODEV; } @@ -1361,19 +1354,17 @@ ice_ptp_port_phy_restart(struct ice_ptp_port *ptp_port) unsigned long flags; int err; - if (ice_is_e810(hw)) - return 0; - if (!ptp_port->link_up) return ice_ptp_port_phy_stop(ptp_port); mutex_lock(&ptp_port->ps_lock); - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - err = ice_start_phy_timer_eth56g(hw, port); + switch (hw->mac_type) { + case ICE_MAC_E810: + case ICE_MAC_E830: + err = 0; break; - case ICE_PHY_E82X: + case ICE_MAC_GENERIC: /* Start the PHY timer in Vernier mode */ kthread_cancel_delayed_work_sync(&ptp_port->ov_work); @@ -1398,6 +1389,9 @@ ice_ptp_port_phy_restart(struct ice_ptp_port *ptp_port) kthread_queue_delayed_work(pf->ptp.kworker, &ptp_port->ov_work, 0); break; + case ICE_MAC_GENERIC_3K_E825: + err = ice_start_phy_timer_eth56g(hw, port); + break; default: err = -ENODEV; } @@ -1432,12 +1426,14 @@ void ice_ptp_link_change(struct ice_pf *pf, bool linkup) /* Skip HW writes if reset is in progress */ if (pf->hw.reset_ongoing) return; - switch (ice_get_phy_model(hw)) { - case ICE_PHY_E810: - /* Do not reconfigure E810 PHY */ + + switch (hw->mac_type) { + case ICE_MAC_E810: + case ICE_MAC_E830: + /* Do not reconfigure E810 or E830 PHY */ return; - case ICE_PHY_ETH56G: - case ICE_PHY_E82X: + case ICE_MAC_GENERIC: + case ICE_MAC_GENERIC_3K_E825: ice_ptp_port_phy_restart(ptp_port); return; default: @@ -1465,46 +1461,45 @@ static int ice_ptp_cfg_phy_interrupt(struct ice_pf *pf, bool ena, u32 threshold) ice_ptp_reset_ts_memory(hw); - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: { - int port; + switch (hw->mac_type) { + case ICE_MAC_E810: + case ICE_MAC_E830: + return 0; + case ICE_MAC_GENERIC: { + int quad; - for (port = 0; port < hw->ptp.num_lports; port++) { + for (quad = 0; quad < ICE_GET_QUAD_NUM(hw->ptp.num_lports); + quad++) { int err; - err = ice_phy_cfg_intr_eth56g(hw, port, ena, threshold); + err = ice_phy_cfg_intr_e82x(hw, quad, ena, threshold); if (err) { - dev_err(dev, "Failed to configure PHY interrupt for port %d, err %d\n", - port, err); + dev_err(dev, "Failed to configure PHY interrupt for quad %d, err %d\n", + quad, err); return err; } } return 0; } - case ICE_PHY_E82X: { - int quad; + case ICE_MAC_GENERIC_3K_E825: { + int port; - for (quad = 0; quad < ICE_GET_QUAD_NUM(hw->ptp.num_lports); - quad++) { + for (port = 0; port < hw->ptp.num_lports; port++) { int err; - err = ice_phy_cfg_intr_e82x(hw, quad, ena, threshold); + err = ice_phy_cfg_intr_eth56g(hw, port, ena, threshold); if (err) { - dev_err(dev, "Failed to configure PHY interrupt for quad %d, err %d\n", - quad, err); + dev_err(dev, "Failed to configure PHY interrupt for port %d, err %d\n", + port, err); return err; } } return 0; } - case ICE_PHY_E810: - return 0; - case ICE_PHY_UNSUP: + case ICE_MAC_UNKNOWN: default: - dev_warn(dev, "%s: Unexpected PHY model %d\n", __func__, - ice_get_phy_model(hw)); return -EOPNOTSUPP; } } @@ -1740,7 +1735,7 @@ static int ice_ptp_write_perout(struct ice_hw *hw, unsigned int chan, /* 0. Reset mode & out_en in AUX_OUT */ wr32(hw, GLTSYN_AUX_OUT(chan, tmr_idx), 0); - if (ice_is_e825c(hw)) { + if (hw->mac_type == ICE_MAC_GENERIC_3K_E825) { int err; /* Enable/disable CGU 1PPS output for E825C */ @@ -1824,7 +1819,7 @@ static int ice_ptp_cfg_perout(struct ice_pf *pf, struct ptp_perout_request *rq, return ice_ptp_write_perout(hw, rq->index, gpio_pin, 0, 0); if (strncmp(pf->ptp.pin_desc[pin_desc_idx].name, "1PPS", 64) == 0 && - period != NSEC_PER_SEC && hw->ptp.phy_model == ICE_PHY_E82X) { + period != NSEC_PER_SEC && hw->mac_type == ICE_MAC_GENERIC) { dev_err(ice_pf_to_dev(pf), "1PPS pin supports only 1 s period\n"); return -EOPNOTSUPP; } @@ -2078,7 +2073,7 @@ ice_ptp_settime64(struct ptp_clock_info *info, const struct timespec64 *ts) /* For Vernier mode on E82X, we need to recalibrate after new settime. * Start with marking timestamps as invalid. */ - if (ice_get_phy_model(hw) == ICE_PHY_E82X) { + if (hw->mac_type == ICE_MAC_GENERIC) { err = ice_ptp_clear_phy_offset_ready_e82x(hw); if (err) dev_warn(ice_pf_to_dev(pf), "Failed to mark timestamps as invalid before settime\n"); @@ -2102,7 +2097,7 @@ ice_ptp_settime64(struct ptp_clock_info *info, const struct timespec64 *ts) ice_ptp_enable_all_perout(pf); /* Recalibrate and re-enable timestamp blocks for E822/E823 */ - if (ice_get_phy_model(hw) == ICE_PHY_E82X) + if (hw->mac_type == ICE_MAC_GENERIC) ice_ptp_restart_all_phy(pf); exit: if (err) { @@ -2180,93 +2175,157 @@ static int ice_ptp_adjtime(struct ptp_clock_info *info, s64 delta) return 0; } +/** + * struct ice_crosststamp_cfg - Device cross timestamp configuration + * @lock_reg: The hardware semaphore lock to use + * @lock_busy: Bit in the semaphore lock indicating the lock is busy + * @ctl_reg: The hardware register to request cross timestamp + * @ctl_active: Bit in the control register to request cross timestamp + * @art_time_l: Lower 32-bits of ART system time + * @art_time_h: Upper 32-bits of ART system time + * @dev_time_l: Lower 32-bits of device time (per timer index) + * @dev_time_h: Upper 32-bits of device time (per timer index) + */ +struct ice_crosststamp_cfg { + /* HW semaphore lock register */ + u32 lock_reg; + u32 lock_busy; + + /* Capture control register */ + u32 ctl_reg; + u32 ctl_active; + + /* Time storage */ + u32 art_time_l; + u32 art_time_h; + u32 dev_time_l[2]; + u32 dev_time_h[2]; +}; + +static const struct ice_crosststamp_cfg ice_crosststamp_cfg_e82x = { + .lock_reg = PFHH_SEM, + .lock_busy = PFHH_SEM_BUSY_M, + .ctl_reg = GLHH_ART_CTL, + .ctl_active = GLHH_ART_CTL_ACTIVE_M, + .art_time_l = GLHH_ART_TIME_L, + .art_time_h = GLHH_ART_TIME_H, + .dev_time_l[0] = GLTSYN_HHTIME_L(0), + .dev_time_h[0] = GLTSYN_HHTIME_H(0), + .dev_time_l[1] = GLTSYN_HHTIME_L(1), + .dev_time_h[1] = GLTSYN_HHTIME_H(1), +}; + #ifdef CONFIG_ICE_HWTS +static const struct ice_crosststamp_cfg ice_crosststamp_cfg_e830 = { + .lock_reg = E830_PFPTM_SEM, + .lock_busy = E830_PFPTM_SEM_BUSY_M, + .ctl_reg = E830_GLPTM_ART_CTL, + .ctl_active = E830_GLPTM_ART_CTL_ACTIVE_M, + .art_time_l = E830_GLPTM_ART_TIME_L, + .art_time_h = E830_GLPTM_ART_TIME_H, + .dev_time_l[0] = E830_GLTSYN_PTMTIME_L(0), + .dev_time_h[0] = E830_GLTSYN_PTMTIME_H(0), + .dev_time_l[1] = E830_GLTSYN_PTMTIME_L(1), + .dev_time_h[1] = E830_GLTSYN_PTMTIME_H(1), +}; + +#endif /* CONFIG_ICE_HWTS */ +/** + * struct ice_crosststamp_ctx - Device cross timestamp context + * @snapshot: snapshot of system clocks for historic interpolation + * @pf: pointer to the PF private structure + * @cfg: pointer to hardware configuration for cross timestamp + */ +struct ice_crosststamp_ctx { + struct system_time_snapshot snapshot; + struct ice_pf *pf; + const struct ice_crosststamp_cfg *cfg; +}; + /** - * ice_ptp_get_syncdevicetime - Get the cross time stamp info + * ice_capture_crosststamp - Capture a device/system cross timestamp * @device: Current device time * @system: System counter value read synchronously with device time - * @ctx: Context provided by timekeeping code + * @__ctx: Context passed from ice_ptp_getcrosststamp * * Read device and system (ART) clock simultaneously and return the corrected * clock values in ns. + * + * Return: zero on success, or a negative error code on failure. */ -static int -ice_ptp_get_syncdevicetime(ktime_t *device, - struct system_counterval_t *system, - void *ctx) +static int ice_capture_crosststamp(ktime_t *device, + struct system_counterval_t *system, + void *__ctx) { - struct ice_pf *pf = (struct ice_pf *)ctx; - struct ice_hw *hw = &pf->hw; - u32 hh_lock, hh_art_ctl; - int i; + struct ice_crosststamp_ctx *ctx = __ctx; + const struct ice_crosststamp_cfg *cfg; + u32 lock, ctl, ts_lo, ts_hi, tmr_idx; + struct ice_pf *pf; + struct ice_hw *hw; + int err; + u64 ts; -#define MAX_HH_HW_LOCK_TRIES 5 -#define MAX_HH_CTL_LOCK_TRIES 100 + cfg = ctx->cfg; + pf = ctx->pf; + hw = &pf->hw; - for (i = 0; i < MAX_HH_HW_LOCK_TRIES; i++) { - /* Get the HW lock */ - hh_lock = rd32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id)); - if (hh_lock & PFHH_SEM_BUSY_M) { - usleep_range(10000, 15000); - continue; - } - break; - } - if (hh_lock & PFHH_SEM_BUSY_M) { - dev_err(ice_pf_to_dev(pf), "PTP failed to get hh lock\n"); + tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc; + if (tmr_idx > 1) + return -EINVAL; + + /* Poll until we obtain the cross-timestamp hardware semaphore */ + err = rd32_poll_timeout(hw, cfg->lock_reg, lock, + !(lock & cfg->lock_busy), + 10 * USEC_PER_MSEC, 50 * USEC_PER_MSEC); + if (err) { + dev_err(ice_pf_to_dev(pf), "PTP failed to get cross timestamp lock\n"); return -EBUSY; } + /* Snapshot system time for historic interpolation */ + ktime_get_snapshot(&ctx->snapshot); + /* Program cmd to master timer */ ice_ptp_src_cmd(hw, ICE_PTP_READ_TIME); /* Start the ART and device clock sync sequence */ - hh_art_ctl = rd32(hw, GLHH_ART_CTL); - hh_art_ctl = hh_art_ctl | GLHH_ART_CTL_ACTIVE_M; - wr32(hw, GLHH_ART_CTL, hh_art_ctl); - - for (i = 0; i < MAX_HH_CTL_LOCK_TRIES; i++) { - /* Wait for sync to complete */ - hh_art_ctl = rd32(hw, GLHH_ART_CTL); - if (hh_art_ctl & GLHH_ART_CTL_ACTIVE_M) { - udelay(1); - continue; - } else { - u32 hh_ts_lo, hh_ts_hi, tmr_idx; - u64 hh_ts; - - tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc; - /* Read ART time */ - hh_ts_lo = rd32(hw, GLHH_ART_TIME_L); - hh_ts_hi = rd32(hw, GLHH_ART_TIME_H); - hh_ts = ((u64)hh_ts_hi << 32) | hh_ts_lo; - system->cycles = hh_ts; - system->cs_id = CSID_X86_ART; - /* Read Device source clock time */ - hh_ts_lo = rd32(hw, GLTSYN_HHTIME_L(tmr_idx)); - hh_ts_hi = rd32(hw, GLTSYN_HHTIME_H(tmr_idx)); - hh_ts = ((u64)hh_ts_hi << 32) | hh_ts_lo; - *device = ns_to_ktime(hh_ts); - break; - } - } + ctl = rd32(hw, cfg->ctl_reg); + ctl |= cfg->ctl_active; + wr32(hw, cfg->ctl_reg, ctl); + /* Poll until hardware completes the capture */ + err = rd32_poll_timeout(hw, cfg->ctl_reg, ctl, !(ctl & cfg->ctl_active), + 5, 20 * USEC_PER_MSEC); + if (err) + goto err_timeout; + + /* Read ART system time */ + ts_lo = rd32(hw, cfg->art_time_l); + ts_hi = rd32(hw, cfg->art_time_h); + ts = ((u64)ts_hi << 32) | ts_lo; + system->cycles = ts; + system->cs_id = CSID_X86_ART; + + /* Read Device source clock time */ + ts_lo = rd32(hw, cfg->dev_time_l[tmr_idx]); + ts_hi = rd32(hw, cfg->dev_time_h[tmr_idx]); + ts = ((u64)ts_hi << 32) | ts_lo; + *device = ns_to_ktime(ts); + +err_timeout: /* Clear the master timer */ ice_ptp_src_cmd(hw, ICE_PTP_NOP); /* Release HW lock */ - hh_lock = rd32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id)); - hh_lock = hh_lock & ~PFHH_SEM_BUSY_M; - wr32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id), hh_lock); - - if (i == MAX_HH_CTL_LOCK_TRIES) - return -ETIMEDOUT; + lock = rd32(hw, cfg->lock_reg); + lock &= ~cfg->lock_busy; + wr32(hw, cfg->lock_reg, lock); - return 0; + return err; } /** - * ice_ptp_getcrosststamp_e82x - Capture a device cross timestamp + * ice_ptp_getcrosststamp - Capture a device cross timestamp * @info: the driver's PTP info structure * @cts: The memory to fill the cross timestamp info * @@ -2274,22 +2333,36 @@ ice_ptp_get_syncdevicetime(ktime_t *device, * clock. Fill the cross timestamp information and report it back to the * caller. * - * This is only valid for E822 and E823 devices which have support for - * generating the cross timestamp via PCIe PTM. - * * In order to correctly correlate the ART timestamp back to the TSC time, the * CPU must have X86_FEATURE_TSC_KNOWN_FREQ. + * + * Return: zero on success, or a negative error code on failure. */ -static int -ice_ptp_getcrosststamp_e82x(struct ptp_clock_info *info, - struct system_device_crosststamp *cts) +static int ice_ptp_getcrosststamp(struct ptp_clock_info *info, + struct system_device_crosststamp *cts) { struct ice_pf *pf = ptp_info_to_pf(info); + struct ice_crosststamp_ctx ctx = { + .pf = pf, + }; + + switch (pf->hw.mac_type) { + case ICE_MAC_GENERIC: + case ICE_MAC_GENERIC_3K_E825: + ctx.cfg = &ice_crosststamp_cfg_e82x; + break; +#ifdef CONFIG_ICE_HWTS + case ICE_MAC_E830: + ctx.cfg = &ice_crosststamp_cfg_e830; + break; +#endif /* CONFIG_ICE_HWTS */ + default: + return -EOPNOTSUPP; + } - return get_device_system_crosststamp(ice_ptp_get_syncdevicetime, - pf, NULL, cts); + return get_device_system_crosststamp(ice_capture_crosststamp, &ctx, + &ctx.snapshot, cts); } -#endif /* CONFIG_ICE_HWTS */ /** * ice_ptp_get_ts_config - ioctl interface to read the timestamping config @@ -2550,13 +2623,9 @@ static int ice_ptp_parse_sdp_entries(struct ice_pf *pf, __le16 *entries, */ static void ice_ptp_set_funcs_e82x(struct ice_pf *pf) { -#ifdef CONFIG_ICE_HWTS - if (boot_cpu_has(X86_FEATURE_ART) && - boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) - pf->ptp.info.getcrosststamp = ice_ptp_getcrosststamp_e82x; + pf->ptp.info.getcrosststamp = ice_ptp_getcrosststamp; -#endif /* CONFIG_ICE_HWTS */ - if (ice_is_e825c(&pf->hw)) { + if (pf->hw.mac_type == ICE_MAC_GENERIC_3K_E825) { pf->ptp.ice_pin_desc = ice_pin_desc_e825c; pf->ptp.info.n_pins = ICE_PIN_DESC_ARR_LEN(ice_pin_desc_e825c); } else { @@ -2623,6 +2692,28 @@ err: } /** + * ice_ptp_set_funcs_e830 - Set specialized functions for E830 support + * @pf: Board private structure + * + * Assign functions to the PTP capabiltiies structure for E830 devices. + * Functions which operate across all device families should be set directly + * in ice_ptp_set_caps. Only add functions here which are distinct for E830 + * devices. + */ +static void ice_ptp_set_funcs_e830(struct ice_pf *pf) +{ +#ifdef CONFIG_ICE_HWTS + if (pcie_ptm_enabled(pf->pdev) && boot_cpu_has(X86_FEATURE_ART)) + pf->ptp.info.getcrosststamp = ice_ptp_getcrosststamp; + +#endif /* CONFIG_ICE_HWTS */ + /* Rest of the config is the same as base E810 */ + pf->ptp.ice_pin_desc = ice_pin_desc_e810; + pf->ptp.info.n_pins = ICE_PIN_DESC_ARR_LEN(ice_pin_desc_e810); + ice_ptp_setup_pin_cfg(pf); +} + +/** * ice_ptp_set_caps - Set PTP capabilities * @pf: Board private structure */ @@ -2644,10 +2735,20 @@ static void ice_ptp_set_caps(struct ice_pf *pf) info->enable = ice_ptp_gpio_enable; info->verify = ice_verify_pin; - if (ice_is_e810(&pf->hw)) + switch (pf->hw.mac_type) { + case ICE_MAC_E810: ice_ptp_set_funcs_e810(pf); - else + return; + case ICE_MAC_E830: + ice_ptp_set_funcs_e830(pf); + return; + case ICE_MAC_GENERIC: + case ICE_MAC_GENERIC_3K_E825: ice_ptp_set_funcs_e82x(pf); + return; + default: + return; + } } /** @@ -2758,6 +2859,65 @@ enum ice_tx_tstamp_work ice_ptp_process_ts(struct ice_pf *pf) } /** + * ice_ptp_ts_irq - Process the PTP Tx timestamps in IRQ context + * @pf: Board private structure + * + * Return: IRQ_WAKE_THREAD if Tx timestamp read has to be handled in the bottom + * half of the interrupt and IRQ_HANDLED otherwise. + */ +irqreturn_t ice_ptp_ts_irq(struct ice_pf *pf) +{ + struct ice_hw *hw = &pf->hw; + + switch (hw->mac_type) { + case ICE_MAC_E810: + /* E810 capable of low latency timestamping with interrupt can + * request a single timestamp in the top half and wait for + * a second LL TS interrupt from the FW when it's ready. + */ + if (hw->dev_caps.ts_dev_info.ts_ll_int_read) { + struct ice_ptp_tx *tx = &pf->ptp.port.tx; + u8 idx; + + if (!ice_pf_state_is_nominal(pf)) + return IRQ_HANDLED; + + spin_lock(&tx->lock); + idx = find_next_bit_wrap(tx->in_use, tx->len, + tx->last_ll_ts_idx_read + 1); + if (idx != tx->len) + ice_ptp_req_tx_single_tstamp(tx, idx); + spin_unlock(&tx->lock); + + return IRQ_HANDLED; + } + fallthrough; /* non-LL_TS E810 */ + case ICE_MAC_GENERIC: + case ICE_MAC_GENERIC_3K_E825: + /* All other devices process timestamps in the bottom half due + * to sleeping or polling. + */ + if (!ice_ptp_pf_handles_tx_interrupt(pf)) + return IRQ_HANDLED; + + set_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread); + return IRQ_WAKE_THREAD; + case ICE_MAC_E830: + /* E830 can read timestamps in the top half using rd32() */ + if (ice_ptp_process_ts(pf) == ICE_TX_TSTAMP_WORK_PENDING) { + /* Process outstanding Tx timestamps. If there + * is more work, re-arm the interrupt to trigger again. + */ + wr32(hw, PFINT_OICR, PFINT_OICR_TSYN_TX_M); + ice_flush(hw); + } + return IRQ_HANDLED; + default: + return IRQ_HANDLED; + } +} + +/** * ice_ptp_maybe_trigger_tx_interrupt - Trigger Tx timstamp interrupt * @pf: Board private structure * @@ -2777,7 +2937,7 @@ static void ice_ptp_maybe_trigger_tx_interrupt(struct ice_pf *pf) bool trigger_oicr = false; unsigned int i; - if (ice_is_e810(hw)) + if (!pf->ptp.port.tx.has_ready_bitmap) return; if (!ice_pf_src_tmr_owned(pf)) @@ -2912,14 +3072,12 @@ static int ice_ptp_rebuild_owner(struct ice_pf *pf) */ ice_ptp_flush_all_tx_tracker(pf); - if (!ice_is_e810(hw)) { - /* Enable quad interrupts */ - err = ice_ptp_cfg_phy_interrupt(pf, true, 1); - if (err) - return err; + /* Enable quad interrupts */ + err = ice_ptp_cfg_phy_interrupt(pf, true, 1); + if (err) + return err; - ice_ptp_restart_all_phy(pf); - } + ice_ptp_restart_all_phy(pf); /* Re-enable all periodic outputs and external timestamp events */ ice_ptp_enable_all_perout(pf); @@ -2971,8 +3129,9 @@ err: static bool ice_is_primary(struct ice_hw *hw) { - return ice_is_e825c(hw) && ice_is_dual(hw) ? - !!(hw->dev_caps.nac_topo.mode & ICE_NAC_TOPO_PRIMARY_M) : true; + return hw->mac_type == ICE_MAC_GENERIC_3K_E825 && ice_is_dual(hw) ? + !!(hw->dev_caps.nac_topo.mode & ICE_NAC_TOPO_PRIMARY_M) : + true; } static int ice_ptp_setup_adapter(struct ice_pf *pf) @@ -2990,7 +3149,7 @@ static int ice_ptp_setup_pf(struct ice_pf *pf) struct ice_ptp *ctrl_ptp = ice_get_ctrl_ptp(pf); struct ice_ptp *ptp = &pf->ptp; - if (WARN_ON(!ctrl_ptp) || ice_get_phy_model(&pf->hw) == ICE_PHY_UNSUP) + if (WARN_ON(!ctrl_ptp) || pf->hw.mac_type == ICE_MAC_UNKNOWN) return -ENODEV; INIT_LIST_HEAD(&ptp->port.list_node); @@ -3007,7 +3166,7 @@ static void ice_ptp_cleanup_pf(struct ice_pf *pf) { struct ice_ptp *ptp = &pf->ptp; - if (ice_get_phy_model(&pf->hw) != ICE_PHY_UNSUP) { + if (pf->hw.mac_type != ICE_MAC_UNKNOWN) { mutex_lock(&pf->adapter->ports.lock); list_del(&ptp->port.list_node); mutex_unlock(&pf->adapter->ports.lock); @@ -3127,6 +3286,8 @@ static int ice_ptp_init_work(struct ice_pf *pf, struct ice_ptp *ptp) * ice_ptp_init_port - Initialize PTP port structure * @pf: Board private structure * @ptp_port: PTP port structure + * + * Return: 0 on success, -ENODEV on invalid MAC type, -ENOMEM on failed alloc. */ static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port) { @@ -3134,16 +3295,14 @@ static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port) mutex_init(&ptp_port->ps_lock); - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - return ice_ptp_init_tx_eth56g(pf, &ptp_port->tx, - ptp_port->port_num); - case ICE_PHY_E810: - return ice_ptp_init_tx_e810(pf, &ptp_port->tx); - case ICE_PHY_E82X: + switch (hw->mac_type) { + case ICE_MAC_E810: + case ICE_MAC_E830: + case ICE_MAC_GENERIC_3K_E825: + return ice_ptp_init_tx(pf, &ptp_port->tx, ptp_port->port_num); + case ICE_MAC_GENERIC: kthread_init_delayed_work(&ptp_port->ov_work, ice_ptp_wait_for_offsets); - return ice_ptp_init_tx_e82x(pf, &ptp_port->tx, ptp_port->port_num); default: @@ -3162,8 +3321,8 @@ static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port) */ static void ice_ptp_init_tx_interrupt_mode(struct ice_pf *pf) { - switch (ice_get_phy_model(&pf->hw)) { - case ICE_PHY_E82X: + switch (pf->hw.mac_type) { + case ICE_MAC_GENERIC: /* E822 based PHY has the clock owner process the interrupt * for all ports. */ diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index a1d0e988c084..3b769a0cad00 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -128,8 +128,7 @@ struct ice_ptp_tx { /* Quad and port information for initializing timestamp blocks */ #define INDEX_PER_QUAD 64 #define INDEX_PER_PORT_E82X 16 -#define INDEX_PER_PORT_E810 64 -#define INDEX_PER_PORT_ETH56G 64 +#define INDEX_PER_PORT 64 /** * struct ice_ptp_port - data used to initialize an external port for PTP @@ -304,6 +303,9 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb); void ice_ptp_req_tx_single_tstamp(struct ice_ptp_tx *tx, u8 idx); void ice_ptp_complete_tx_single_tstamp(struct ice_ptp_tx *tx); enum ice_tx_tstamp_work ice_ptp_process_ts(struct ice_pf *pf); +irqreturn_t ice_ptp_ts_irq(struct ice_pf *pf); +u64 ice_ptp_read_src_clk_reg(struct ice_pf *pf, + struct ptp_system_timestamp *sts); u64 ice_ptp_get_rx_hwts(const union ice_32b_rx_flex_desc *rx_desc, const struct ice_pkt_ctx *pkt_ctx); @@ -342,6 +344,17 @@ static inline bool ice_ptp_process_ts(struct ice_pf *pf) return true; } +static inline irqreturn_t ice_ptp_ts_irq(struct ice_pf *pf) +{ + return IRQ_HANDLED; +} + +static inline u64 ice_ptp_read_src_clk_reg(struct ice_pf *pf, + struct ptp_system_timestamp *sts) +{ + return 0; +} + static inline u64 ice_ptp_get_rx_hwts(const union ice_32b_rx_flex_desc *rx_desc, const struct ice_pkt_ctx *pkt_ctx) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index ec91822e9280..3e824f7b30c0 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -746,7 +746,7 @@ static int ice_init_cgu_e82x(struct ice_hw *hw) int err; /* Disable sticky lock detection so lock err reported is accurate */ - if (ice_is_e825c(hw)) + if (hw->mac_type == ICE_MAC_GENERIC_3K_E825) err = ice_cfg_cgu_pll_dis_sticky_bits_e825c(hw); else err = ice_cfg_cgu_pll_dis_sticky_bits_e82x(hw); @@ -756,7 +756,7 @@ static int ice_init_cgu_e82x(struct ice_hw *hw) /* Configure the CGU PLL using the parameters from the function * capabilities. */ - if (ice_is_e825c(hw)) + if (hw->mac_type == ICE_MAC_GENERIC_3K_E825) err = ice_cfg_cgu_pll_e825c(hw, ts_info->time_ref, (enum ice_clk_src)ts_info->clk_src); else @@ -827,8 +827,9 @@ static u32 ice_ptp_tmr_cmd_to_port_reg(struct ice_hw *hw, /* Certain hardware families share the same register values for the * port register and source timer register. */ - switch (ice_get_phy_model(hw)) { - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: + case ICE_MAC_E830: return ice_ptp_tmr_cmd_to_src_reg(hw, cmd) & TS_CMD_MASK_E810; default: break; @@ -895,6 +896,17 @@ static void ice_ptp_exec_tmr_cmd(struct ice_hw *hw) ice_flush(hw); } +/** + * ice_ptp_cfg_sync_delay - Configure PHC to PHY synchronization delay + * @hw: pointer to HW struct + * @delay: delay between PHC and PHY SYNC command execution in nanoseconds + */ +static void ice_ptp_cfg_sync_delay(const struct ice_hw *hw, u32 delay) +{ + wr32(hw, GLTSYN_SYNC_DLAY, delay); + ice_flush(hw); +} + /* 56G PHY device functions * * The following functions operate on devices with the ETH 56G PHY. @@ -1576,9 +1588,8 @@ static int ice_read_ptp_tstamp_eth56g(struct ice_hw *hw, u8 port, u8 idx, * lower 8 bits in the low register, and the upper 32 bits in the high * register. */ - *tstamp = FIELD_PREP(TS_PHY_HIGH_M, hi) | - FIELD_PREP(TS_PHY_LOW_M, lo); - + *tstamp = FIELD_PREP(PHY_40B_HIGH_M, hi) | + FIELD_PREP(PHY_40B_LOW_M, lo); return 0; } @@ -2729,10 +2740,7 @@ static void ice_ptp_init_phy_e825(struct ice_hw *hw) { struct ice_ptp_hw *ptp = &hw->ptp; struct ice_eth56g_params *params; - u32 phy_rev; - int err; - ptp->phy_model = ICE_PHY_ETH56G; params = &ptp->phy.eth56g; params->onestep_ena = false; params->peer_delay = 0; @@ -2742,9 +2750,6 @@ static void ice_ptp_init_phy_e825(struct ice_hw *hw) ptp->num_lports = params->num_phys * ptp->ports_per_phy; ice_sb_access_ena_eth56g(hw, true); - err = ice_read_phy_eth56g(hw, hw->pf_id, PHY_REG_REVISION, &phy_rev); - if (err || phy_rev != PHY_REVISION_ETH56G) - ptp->phy_model = ICE_PHY_UNSUP; } /* E822 family functions @@ -3219,7 +3224,8 @@ ice_read_phy_tstamp_e82x(struct ice_hw *hw, u8 quad, u8 idx, u64 *tstamp) * lower 8 bits in the low register, and the upper 32 bits in the high * register. */ - *tstamp = FIELD_PREP(TS_PHY_HIGH_M, hi) | FIELD_PREP(TS_PHY_LOW_M, lo); + *tstamp = FIELD_PREP(PHY_40B_HIGH_M, hi) | + FIELD_PREP(PHY_40B_LOW_M, lo); return 0; } @@ -4792,7 +4798,6 @@ int ice_phy_cfg_intr_e82x(struct ice_hw *hw, u8 quad, bool ena, u8 threshold) */ static void ice_ptp_init_phy_e82x(struct ice_ptp_hw *ptp) { - ptp->phy_model = ICE_PHY_E82X; ptp->num_lports = 8; ptp->ports_per_phy = 8; } @@ -4986,7 +4991,8 @@ ice_read_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx, u64 *tstamp) /* For E810 devices, the timestamp is reported with the lower 32 bits * in the low register, and the upper 8 bits in the high register. */ - *tstamp = ((u64)hi) << TS_HIGH_S | ((u64)lo & TS_LOW_M); + *tstamp = FIELD_PREP(PHY_EXT_40B_HIGH_M, hi) | + FIELD_PREP(PHY_EXT_40B_LOW_M, lo); return 0; } @@ -5049,8 +5055,7 @@ static int ice_ptp_init_phc_e810(struct ice_hw *hw) u8 tmr_idx; int err; - /* Ensure synchronization delay is zero */ - wr32(hw, GLTSYN_SYNC_DLAY, 0); + ice_ptp_cfg_sync_delay(hw, ICE_E810_E830_SYNC_DELAY); tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_ENA(tmr_idx), @@ -5316,68 +5321,6 @@ ice_get_phy_tx_tstamp_ready_e810(struct ice_hw *hw, u8 port, u64 *tstamp_ready) */ /** - * ice_get_pca9575_handle - * @hw: pointer to the hw struct - * @pca9575_handle: GPIO controller's handle - * - * Find and return the GPIO controller's handle in the netlist. - * When found - the value will be cached in the hw structure and following calls - * will return cached value - */ -static int -ice_get_pca9575_handle(struct ice_hw *hw, u16 *pca9575_handle) -{ - struct ice_aqc_get_link_topo *cmd; - struct ice_aq_desc desc; - int status; - u8 idx; - - /* If handle was read previously return cached value */ - if (hw->io_expander_handle) { - *pca9575_handle = hw->io_expander_handle; - return 0; - } - - /* If handle was not detected read it from the netlist */ - cmd = &desc.params.get_link_topo; - ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo); - - /* Set node type to GPIO controller */ - cmd->addr.topo_params.node_type_ctx = - (ICE_AQC_LINK_TOPO_NODE_TYPE_M & - ICE_AQC_LINK_TOPO_NODE_TYPE_GPIO_CTRL); - -#define SW_PCA9575_SFP_TOPO_IDX 2 -#define SW_PCA9575_QSFP_TOPO_IDX 1 - - /* Check if the SW IO expander controlling SMA exists in the netlist. */ - if (hw->device_id == ICE_DEV_ID_E810C_SFP) - idx = SW_PCA9575_SFP_TOPO_IDX; - else if (hw->device_id == ICE_DEV_ID_E810C_QSFP) - idx = SW_PCA9575_QSFP_TOPO_IDX; - else - return -EOPNOTSUPP; - - cmd->addr.topo_params.index = idx; - - status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); - if (status) - return -EOPNOTSUPP; - - /* Verify if we found the right IO expander type */ - if (desc.params.get_link_topo.node_part_num != - ICE_AQC_GET_LINK_TOPO_NODE_NR_PCA9575) - return -EOPNOTSUPP; - - /* If present save the handle and return it */ - hw->io_expander_handle = - le16_to_cpu(desc.params.get_link_topo.addr.handle); - *pca9575_handle = hw->io_expander_handle; - - return 0; -} - -/** * ice_read_sma_ctrl * @hw: pointer to the hw struct * @data: pointer to data to be read from the GPIO controller @@ -5442,37 +5385,6 @@ int ice_write_sma_ctrl(struct ice_hw *hw, u8 data) } /** - * ice_read_pca9575_reg - * @hw: pointer to the hw struct - * @offset: GPIO controller register offset - * @data: pointer to data to be read from the GPIO controller - * - * Read the register from the GPIO controller - */ -int ice_read_pca9575_reg(struct ice_hw *hw, u8 offset, u8 *data) -{ - struct ice_aqc_link_topo_addr link_topo; - __le16 addr; - u16 handle; - int err; - - memset(&link_topo, 0, sizeof(link_topo)); - - err = ice_get_pca9575_handle(hw, &handle); - if (err) - return err; - - link_topo.handle = cpu_to_le16(handle); - link_topo.topo_params.node_type_ctx = - FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_CTX_M, - ICE_AQC_LINK_TOPO_NODE_CTX_PROVIDED); - - addr = cpu_to_le16((u16)offset); - - return ice_aq_read_i2c(hw, link_topo, 0, addr, 1, data, NULL); -} - -/** * ice_ptp_read_sdp_ac - read SDP available connections section from NVM * @hw: pointer to the HW struct * @entries: returns the SDP available connections section from NVM @@ -5538,18 +5450,138 @@ exit: */ static void ice_ptp_init_phy_e810(struct ice_ptp_hw *ptp) { - ptp->phy_model = ICE_PHY_E810; ptp->num_lports = 8; ptp->ports_per_phy = 4; init_waitqueue_head(&ptp->phy.e810.atqbal_wq); } +/* E830 functions + * + * The following functions operate on the E830 series devices. + * + */ + +/** + * ice_ptp_init_phc_e830 - Perform E830 specific PHC initialization + * @hw: pointer to HW struct + * + * Perform E830-specific PTP hardware clock initialization steps. + */ +static void ice_ptp_init_phc_e830(const struct ice_hw *hw) +{ + ice_ptp_cfg_sync_delay(hw, ICE_E810_E830_SYNC_DELAY); +} + +/** + * ice_ptp_write_direct_incval_e830 - Prep PHY port increment value change + * @hw: pointer to HW struct + * @incval: The new 40bit increment value to prepare + * + * Prepare the PHY port for a new increment value by programming the PHC + * GLTSYN_INCVAL_L and GLTSYN_INCVAL_H registers. The actual change is + * completed by FW automatically. + */ +static void ice_ptp_write_direct_incval_e830(const struct ice_hw *hw, + u64 incval) +{ + u8 tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; + + wr32(hw, GLTSYN_INCVAL_L(tmr_idx), lower_32_bits(incval)); + wr32(hw, GLTSYN_INCVAL_H(tmr_idx), upper_32_bits(incval)); +} + +/** + * ice_ptp_write_direct_phc_time_e830 - Prepare PHY port with initial time + * @hw: Board private structure + * @time: Time to initialize the PHY port clock to + * + * Program the PHY port ETH_GLTSYN_SHTIME registers in preparation setting the + * initial clock time. The time will not actually be programmed until the + * driver issues an ICE_PTP_INIT_TIME command. + * + * The time value is the upper 32 bits of the PHY timer, usually in units of + * nominal nanoseconds. + */ +static void ice_ptp_write_direct_phc_time_e830(const struct ice_hw *hw, + u64 time) +{ + u8 tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; + + wr32(hw, GLTSYN_TIME_0(tmr_idx), 0); + wr32(hw, GLTSYN_TIME_L(tmr_idx), lower_32_bits(time)); + wr32(hw, GLTSYN_TIME_H(tmr_idx), upper_32_bits(time)); +} + +/** + * ice_ptp_port_cmd_e830 - Prepare all external PHYs for a timer command + * @hw: pointer to HW struct + * @cmd: Command to be sent to the port + * + * Prepare the external PHYs connected to this device for a timer sync + * command. + * + * Return: 0 on success, negative error code when PHY write failed + */ +static int ice_ptp_port_cmd_e830(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) +{ + u32 val = ice_ptp_tmr_cmd_to_port_reg(hw, cmd); + + return ice_write_phy_reg_e810(hw, E830_ETH_GLTSYN_CMD, val); +} + +/** + * ice_read_phy_tstamp_e830 - Read a PHY timestamp out of the external PHY + * @hw: pointer to the HW struct + * @idx: the timestamp index to read + * @tstamp: on return, the 40bit timestamp value + * + * Read a 40bit timestamp value out of the timestamp block of the external PHY + * on the E830 device. + */ +static void ice_read_phy_tstamp_e830(const struct ice_hw *hw, u8 idx, + u64 *tstamp) +{ + u32 hi, lo; + + hi = rd32(hw, E830_PRTTSYN_TXTIME_H(idx)); + lo = rd32(hw, E830_PRTTSYN_TXTIME_L(idx)); + + /* For E830 devices, the timestamp is reported with the lower 32 bits + * in the low register, and the upper 8 bits in the high register. + */ + *tstamp = FIELD_PREP(PHY_EXT_40B_HIGH_M, hi) | + FIELD_PREP(PHY_EXT_40B_LOW_M, lo); +} + +/** + * ice_get_phy_tx_tstamp_ready_e830 - Read Tx memory status register + * @hw: pointer to the HW struct + * @port: the PHY port to read + * @tstamp_ready: contents of the Tx memory status register + */ +static void ice_get_phy_tx_tstamp_ready_e830(const struct ice_hw *hw, u8 port, + u64 *tstamp_ready) +{ + *tstamp_ready = rd32(hw, E830_PRTMAC_TS_TX_MEM_VALID_H); + *tstamp_ready <<= 32; + *tstamp_ready |= rd32(hw, E830_PRTMAC_TS_TX_MEM_VALID_L); +} + +/** + * ice_ptp_init_phy_e830 - initialize PHY parameters + * @ptp: pointer to the PTP HW struct + */ +static void ice_ptp_init_phy_e830(struct ice_ptp_hw *ptp) +{ + ptp->num_lports = 8; + ptp->ports_per_phy = 4; +} + /* Device agnostic functions * - * The following functions implement shared behavior common to both E822 and - * E810 devices, possibly calling a device specific implementation where - * necessary. + * The following functions implement shared behavior common to all devices, + * possibly calling a device specific implementation where necessary. */ /** @@ -5612,14 +5644,22 @@ void ice_ptp_init_hw(struct ice_hw *hw) { struct ice_ptp_hw *ptp = &hw->ptp; - if (ice_is_e822(hw) || ice_is_e823(hw)) - ice_ptp_init_phy_e82x(ptp); - else if (ice_is_e810(hw)) + switch (hw->mac_type) { + case ICE_MAC_E810: ice_ptp_init_phy_e810(ptp); - else if (ice_is_e825c(hw)) + break; + case ICE_MAC_E830: + ice_ptp_init_phy_e830(ptp); + break; + case ICE_MAC_GENERIC: + ice_ptp_init_phy_e82x(ptp); + break; + case ICE_MAC_GENERIC_3K_E825: ice_ptp_init_phy_e825(hw); - else - ptp->phy_model = ICE_PHY_UNSUP; + break; + default: + return; + } } /** @@ -5640,11 +5680,11 @@ void ice_ptp_init_hw(struct ice_hw *hw) static int ice_ptp_write_port_cmd(struct ice_hw *hw, u8 port, enum ice_ptp_tmr_cmd cmd) { - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - return ice_ptp_write_port_cmd_eth56g(hw, port, cmd); - case ICE_PHY_E82X: + switch (hw->mac_type) { + case ICE_MAC_GENERIC: return ice_ptp_write_port_cmd_e82x(hw, port, cmd); + case ICE_MAC_GENERIC_3K_E825: + return ice_ptp_write_port_cmd_eth56g(hw, port, cmd); default: return -EOPNOTSUPP; } @@ -5705,9 +5745,11 @@ static int ice_ptp_port_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) u32 port; /* PHY models which can program all ports simultaneously */ - switch (ice_get_phy_model(hw)) { - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: return ice_ptp_port_cmd_e810(hw, cmd); + case ICE_MAC_E830: + return ice_ptp_port_cmd_e830(hw, cmd); default: break; } @@ -5778,23 +5820,29 @@ int ice_ptp_init_time(struct ice_hw *hw, u64 time) tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; /* Source timers */ + /* For E830 we don't need to use shadow registers, its automatic */ + if (hw->mac_type == ICE_MAC_E830) { + ice_ptp_write_direct_phc_time_e830(hw, time); + return 0; + } + wr32(hw, GLTSYN_SHTIME_L(tmr_idx), lower_32_bits(time)); wr32(hw, GLTSYN_SHTIME_H(tmr_idx), upper_32_bits(time)); wr32(hw, GLTSYN_SHTIME_0(tmr_idx), 0); /* PHY timers */ /* Fill Rx and Tx ports and send msg to PHY */ - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - err = ice_ptp_prep_phy_time_eth56g(hw, - (u32)(time & 0xFFFFFFFF)); - break; - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: err = ice_ptp_prep_phy_time_e810(hw, time & 0xFFFFFFFF); break; - case ICE_PHY_E82X: + case ICE_MAC_GENERIC: err = ice_ptp_prep_phy_time_e82x(hw, time & 0xFFFFFFFF); break; + case ICE_MAC_GENERIC_3K_E825: + err = ice_ptp_prep_phy_time_eth56g(hw, + (u32)(time & 0xFFFFFFFF)); + break; default: err = -EOPNOTSUPP; } @@ -5826,20 +5874,26 @@ int ice_ptp_write_incval(struct ice_hw *hw, u64 incval) tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; + /* For E830 we don't need to use shadow registers, its automatic */ + if (hw->mac_type == ICE_MAC_E830) { + ice_ptp_write_direct_incval_e830(hw, incval); + return 0; + } + /* Shadow Adjust */ wr32(hw, GLTSYN_SHADJ_L(tmr_idx), lower_32_bits(incval)); wr32(hw, GLTSYN_SHADJ_H(tmr_idx), upper_32_bits(incval)); - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - err = ice_ptp_prep_phy_incval_eth56g(hw, incval); - break; - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: err = ice_ptp_prep_phy_incval_e810(hw, incval); break; - case ICE_PHY_E82X: + case ICE_MAC_GENERIC: err = ice_ptp_prep_phy_incval_e82x(hw, incval); break; + case ICE_MAC_GENERIC_3K_E825: + err = ice_ptp_prep_phy_incval_eth56g(hw, incval); + break; default: err = -EOPNOTSUPP; } @@ -5899,16 +5953,19 @@ int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj) wr32(hw, GLTSYN_SHADJ_L(tmr_idx), 0); wr32(hw, GLTSYN_SHADJ_H(tmr_idx), adj); - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - err = ice_ptp_prep_phy_adj_eth56g(hw, adj); - break; - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: err = ice_ptp_prep_phy_adj_e810(hw, adj); break; - case ICE_PHY_E82X: + case ICE_MAC_E830: + /* E830 sync PHYs automatically after setting GLTSYN_SHADJ */ + return 0; + case ICE_MAC_GENERIC: err = ice_ptp_prep_phy_adj_e82x(hw, adj); break; + case ICE_MAC_GENERIC_3K_E825: + err = ice_ptp_prep_phy_adj_eth56g(hw, adj); + break; default: err = -EOPNOTSUPP; } @@ -5932,13 +5989,16 @@ int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj) */ int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp) { - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - return ice_read_ptp_tstamp_eth56g(hw, block, idx, tstamp); - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: return ice_read_phy_tstamp_e810(hw, block, idx, tstamp); - case ICE_PHY_E82X: + case ICE_MAC_E830: + ice_read_phy_tstamp_e830(hw, idx, tstamp); + return 0; + case ICE_MAC_GENERIC: return ice_read_phy_tstamp_e82x(hw, block, idx, tstamp); + case ICE_MAC_GENERIC_3K_E825: + return ice_read_ptp_tstamp_eth56g(hw, block, idx, tstamp); default: return -EOPNOTSUPP; } @@ -5962,13 +6022,13 @@ int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp) */ int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx) { - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - return ice_clear_ptp_tstamp_eth56g(hw, block, idx); - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: return ice_clear_phy_tstamp_e810(hw, block, idx); - case ICE_PHY_E82X: + case ICE_MAC_GENERIC: return ice_clear_phy_tstamp_e82x(hw, block, idx); + case ICE_MAC_GENERIC_3K_E825: + return ice_clear_ptp_tstamp_eth56g(hw, block, idx); default: return -EOPNOTSUPP; } @@ -6025,14 +6085,14 @@ static int ice_get_pf_c827_idx(struct ice_hw *hw, u8 *idx) */ void ice_ptp_reset_ts_memory(struct ice_hw *hw) { - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - ice_ptp_reset_ts_memory_eth56g(hw); - break; - case ICE_PHY_E82X: + switch (hw->mac_type) { + case ICE_MAC_GENERIC: ice_ptp_reset_ts_memory_e82x(hw); break; - case ICE_PHY_E810: + case ICE_MAC_GENERIC_3K_E825: + ice_ptp_reset_ts_memory_eth56g(hw); + break; + case ICE_MAC_E810: default: return; } @@ -6054,13 +6114,16 @@ int ice_ptp_init_phc(struct ice_hw *hw) /* Clear event err indications for auxiliary pins */ (void)rd32(hw, GLTSYN_STAT(src_idx)); - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - return ice_ptp_init_phc_eth56g(hw); - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: return ice_ptp_init_phc_e810(hw); - case ICE_PHY_E82X: + case ICE_MAC_E830: + ice_ptp_init_phc_e830(hw); + return 0; + case ICE_MAC_GENERIC: return ice_ptp_init_phc_e82x(hw); + case ICE_MAC_GENERIC_3K_E825: + return ice_ptp_init_phc_eth56g(hw); default: return -EOPNOTSUPP; } @@ -6079,17 +6142,19 @@ int ice_ptp_init_phc(struct ice_hw *hw) */ int ice_get_phy_tx_tstamp_ready(struct ice_hw *hw, u8 block, u64 *tstamp_ready) { - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - return ice_get_phy_tx_tstamp_ready_eth56g(hw, block, - tstamp_ready); - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: return ice_get_phy_tx_tstamp_ready_e810(hw, block, tstamp_ready); - case ICE_PHY_E82X: + case ICE_MAC_E830: + ice_get_phy_tx_tstamp_ready_e830(hw, block, tstamp_ready); + return 0; + case ICE_MAC_GENERIC: return ice_get_phy_tx_tstamp_ready_e82x(hw, block, tstamp_ready); - break; + case ICE_MAC_GENERIC_3K_E825: + return ice_get_phy_tx_tstamp_ready_eth56g(hw, block, + tstamp_ready); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h index 6779ce120515..8442d1d60351 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -324,6 +324,7 @@ extern const struct ice_vernier_info_e82x e822_vernier[NUM_ICE_PTP_LNK_SPD]; */ #define ICE_E810_PLL_FREQ 812500000 #define ICE_PTP_NOMINAL_INCVAL_E810 0x13b13b13bULL +#define ICE_E810_E830_SYNC_DELAY 0 /* Device agnostic functions */ u8 ice_get_ptp_src_clock_index(struct ice_hw *hw); @@ -395,7 +396,6 @@ int ice_phy_cfg_intr_e82x(struct ice_hw *hw, u8 quad, bool ena, u8 threshold); /* E810 family functions */ int ice_read_sma_ctrl(struct ice_hw *hw, u8 *data); int ice_write_sma_ctrl(struct ice_hw *hw, u8 data); -int ice_read_pca9575_reg(struct ice_hw *hw, u8 offset, u8 *data); int ice_ptp_read_sdp_ac(struct ice_hw *hw, __le16 *entries, uint *num_entries); int ice_cgu_get_num_pins(struct ice_hw *hw, bool input); enum dpll_pin_type ice_cgu_get_pin_type(struct ice_hw *hw, u8 pin, bool input); @@ -431,13 +431,14 @@ int ice_phy_cfg_ptp_1step_eth56g(struct ice_hw *hw, u8 port); */ static inline u64 ice_get_base_incval(struct ice_hw *hw) { - switch (hw->ptp.phy_model) { - case ICE_PHY_ETH56G: - return ICE_ETH56G_NOMINAL_INCVAL; - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: + case ICE_MAC_E830: return ICE_PTP_NOMINAL_INCVAL_E810; - case ICE_PHY_E82X: + case ICE_MAC_GENERIC: return ice_e82x_nominal_incval(ice_e82x_time_ref(hw)); + case ICE_MAC_GENERIC_3K_E825: + return ICE_ETH56G_NOMINAL_INCVAL; default: return 0; } @@ -650,18 +651,25 @@ static inline bool ice_is_dual(struct ice_hw *hw) /* E810 timer command register */ #define E810_ETH_GLTSYN_CMD 0x03000344 +/* E830 timer command register */ +#define E830_ETH_GLTSYN_CMD 0x00088814 + +/* E810 PHC time register */ +#define E830_GLTSYN_TIME_L(_tmr_idx) (0x0008A000 + 0x1000 * (_tmr_idx)) + /* Source timer incval macros */ #define INCVAL_HIGH_M 0xFF -/* Timestamp block macros */ +/* PHY 40b registers macros */ +#define PHY_EXT_40B_LOW_M GENMASK(31, 0) +#define PHY_EXT_40B_HIGH_M GENMASK_ULL(39, 32) +#define PHY_40B_LOW_M GENMASK(7, 0) +#define PHY_40B_HIGH_M GENMASK_ULL(39, 8) #define TS_VALID BIT(0) #define TS_LOW_M 0xFFFFFFFF #define TS_HIGH_M 0xFF #define TS_HIGH_S 32 -#define TS_PHY_LOW_M GENMASK(7, 0) -#define TS_PHY_HIGH_M GENMASK_ULL(39, 8) - #define BYTES_PER_IDX_ADDR_L_U 8 #define BYTES_PER_IDX_ADDR_L 4 diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index b83f99c01d91..33eac29b6a50 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -123,27 +123,6 @@ static void ice_dis_vf_mappings(struct ice_vf *vf) } /** - * ice_sriov_free_msix_res - Reset/free any used MSIX resources - * @pf: pointer to the PF structure - * - * Since no MSIX entries are taken from the pf->irq_tracker then just clear - * the pf->sriov_base_vector. - * - * Returns 0 on success, and -EINVAL on error. - */ -static int ice_sriov_free_msix_res(struct ice_pf *pf) -{ - if (!pf) - return -EINVAL; - - bitmap_free(pf->sriov_irq_bm); - pf->sriov_irq_size = 0; - pf->sriov_base_vector = 0; - - return 0; -} - -/** * ice_free_vfs - Free all VFs * @pf: pointer to the PF structure */ @@ -177,6 +156,7 @@ void ice_free_vfs(struct ice_pf *pf) ice_eswitch_detach_vf(pf, vf); ice_dis_vf_qs(vf); + ice_virt_free_irqs(pf, vf->first_vector_idx, vf->num_msix); if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { /* disable VF qp mappings and set VF disable state */ @@ -200,9 +180,6 @@ void ice_free_vfs(struct ice_pf *pf) mutex_unlock(&vf->cfg_lock); } - if (ice_sriov_free_msix_res(pf)) - dev_err(dev, "Failed to free MSIX resources used by SR-IOV\n"); - vfs->num_qps_per = 0; ice_free_vf_entries(pf); @@ -372,40 +349,6 @@ void ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector) } /** - * ice_sriov_set_msix_res - Set any used MSIX resources - * @pf: pointer to PF structure - * @num_msix_needed: number of MSIX vectors needed for all SR-IOV VFs - * - * This function allows SR-IOV resources to be taken from the end of the PF's - * allowed HW MSIX vectors so that the irq_tracker will not be affected. We - * just set the pf->sriov_base_vector and return success. - * - * If there are not enough resources available, return an error. This should - * always be caught by ice_set_per_vf_res(). - * - * Return 0 on success, and -EINVAL when there are not enough MSIX vectors - * in the PF's space available for SR-IOV. - */ -static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed) -{ - u16 total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors; - int vectors_used = ice_get_max_used_msix_vector(pf); - int sriov_base_vector; - - sriov_base_vector = total_vectors - num_msix_needed; - - /* make sure we only grab irq_tracker entries from the list end and - * that we have enough available MSIX vectors - */ - if (sriov_base_vector < vectors_used) - return -EINVAL; - - pf->sriov_base_vector = sriov_base_vector; - - return 0; -} - -/** * ice_set_per_vf_res - check if vectors and queues are available * @pf: pointer to the PF structure * @num_vfs: the number of SR-IOV VFs being configured @@ -429,11 +372,9 @@ static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed) */ static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs) { - int vectors_used = ice_get_max_used_msix_vector(pf); u16 num_msix_per_vf, num_txq, num_rxq, avail_qs; int msix_avail_per_vf, msix_avail_for_sriov; struct device *dev = ice_pf_to_dev(pf); - int err; lockdep_assert_held(&pf->vfs.table_lock); @@ -441,8 +382,7 @@ static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs) return -EINVAL; /* determine MSI-X resources per VF */ - msix_avail_for_sriov = pf->hw.func_caps.common_cap.num_msix_vectors - - vectors_used; + msix_avail_for_sriov = pf->virt_irq_tracker.num_entries; msix_avail_per_vf = msix_avail_for_sriov / num_vfs; if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_MED) { num_msix_per_vf = ICE_NUM_VF_MSIX_MED; @@ -481,13 +421,6 @@ static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs) return -ENOSPC; } - err = ice_sriov_set_msix_res(pf, num_msix_per_vf * num_vfs); - if (err) { - dev_err(dev, "Unable to set MSI-X resources for %d VFs, err %d\n", - num_vfs, err); - return err; - } - /* only allow equal Tx/Rx queue count (i.e. queue pairs) */ pf->vfs.num_qps_per = min_t(int, num_txq, num_rxq); pf->vfs.num_msix_per = num_msix_per_vf; @@ -498,52 +431,6 @@ static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs) } /** - * ice_sriov_get_irqs - get irqs for SR-IOV usacase - * @pf: pointer to PF structure - * @needed: number of irqs to get - * - * This returns the first MSI-X vector index in PF space that is used by this - * VF. This index is used when accessing PF relative registers such as - * GLINT_VECT2FUNC and GLINT_DYN_CTL. - * This will always be the OICR index in the AVF driver so any functionality - * using vf->first_vector_idx for queue configuration_id: id of VF which will - * use this irqs - * - * Only SRIOV specific vectors are tracked in sriov_irq_bm. SRIOV vectors are - * allocated from the end of global irq index. First bit in sriov_irq_bm means - * last irq index etc. It simplifies extension of SRIOV vectors. - * They will be always located from sriov_base_vector to the last irq - * index. While increasing/decreasing sriov_base_vector can be moved. - */ -static int ice_sriov_get_irqs(struct ice_pf *pf, u16 needed) -{ - int res = bitmap_find_next_zero_area(pf->sriov_irq_bm, - pf->sriov_irq_size, 0, needed, 0); - /* conversion from number in bitmap to global irq index */ - int index = pf->sriov_irq_size - res - needed; - - if (res >= pf->sriov_irq_size || index < pf->sriov_base_vector) - return -ENOENT; - - bitmap_set(pf->sriov_irq_bm, res, needed); - return index; -} - -/** - * ice_sriov_free_irqs - free irqs used by the VF - * @pf: pointer to PF structure - * @vf: pointer to VF structure - */ -static void ice_sriov_free_irqs(struct ice_pf *pf, struct ice_vf *vf) -{ - /* Move back from first vector index to first index in bitmap */ - int bm_i = pf->sriov_irq_size - vf->first_vector_idx - vf->num_msix; - - bitmap_clear(pf->sriov_irq_bm, bm_i, vf->num_msix); - vf->first_vector_idx = 0; -} - -/** * ice_init_vf_vsi_res - initialize/setup VF VSI resources * @vf: VF to initialize/setup the VSI for * @@ -556,7 +443,7 @@ static int ice_init_vf_vsi_res(struct ice_vf *vf) struct ice_vsi *vsi; int err; - vf->first_vector_idx = ice_sriov_get_irqs(pf, vf->num_msix); + vf->first_vector_idx = ice_virt_get_irqs(pf, vf->num_msix); if (vf->first_vector_idx < 0) return -ENOMEM; @@ -856,16 +743,10 @@ err_free_entries: */ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) { - int total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors; struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; int ret; - pf->sriov_irq_bm = bitmap_zalloc(total_vectors, GFP_KERNEL); - if (!pf->sriov_irq_bm) - return -ENOMEM; - pf->sriov_irq_size = total_vectors; - /* Disable global interrupt 0 so we don't try to handle the VFLR. */ wr32(hw, GLINT_DYN_CTL(pf->oicr_irq.index), ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S); @@ -918,7 +799,6 @@ err_unroll_intr: /* rearm interrupts here */ ice_irq_dynamic_ena(hw, NULL, NULL); clear_bit(ICE_OICR_INTR_DIS, pf->state); - bitmap_free(pf->sriov_irq_bm); return ret; } @@ -992,16 +872,7 @@ u32 ice_sriov_get_vf_total_msix(struct pci_dev *pdev) { struct ice_pf *pf = pci_get_drvdata(pdev); - return pf->sriov_irq_size - ice_get_max_used_msix_vector(pf); -} - -static int ice_sriov_move_base_vector(struct ice_pf *pf, int move) -{ - if (pf->sriov_base_vector - move < ice_get_max_used_msix_vector(pf)) - return -ENOMEM; - - pf->sriov_base_vector -= move; - return 0; + return pf->virt_irq_tracker.num_entries; } static void ice_sriov_remap_vectors(struct ice_pf *pf, u16 restricted_id) @@ -1020,7 +891,8 @@ static void ice_sriov_remap_vectors(struct ice_pf *pf, u16 restricted_id) continue; ice_dis_vf_mappings(tmp_vf); - ice_sriov_free_irqs(pf, tmp_vf); + ice_virt_free_irqs(pf, tmp_vf->first_vector_idx, + tmp_vf->num_msix); vf_ids[to_remap] = tmp_vf->vf_id; to_remap += 1; @@ -1032,7 +904,7 @@ static void ice_sriov_remap_vectors(struct ice_pf *pf, u16 restricted_id) continue; tmp_vf->first_vector_idx = - ice_sriov_get_irqs(pf, tmp_vf->num_msix); + ice_virt_get_irqs(pf, tmp_vf->num_msix); /* there is no need to rebuild VSI as we are only changing the * vector indexes not amount of MSI-X or queues */ @@ -1105,20 +977,15 @@ int ice_sriov_set_msix_vec_count(struct pci_dev *vf_dev, int msix_vec_count) prev_msix = vf->num_msix; prev_queues = vf->num_vf_qs; - if (ice_sriov_move_base_vector(pf, msix_vec_count - prev_msix)) { - ice_put_vf(vf); - return -ENOSPC; - } - ice_dis_vf_mappings(vf); - ice_sriov_free_irqs(pf, vf); + ice_virt_free_irqs(pf, vf->first_vector_idx, vf->num_msix); /* Remap all VFs beside the one is now configured */ ice_sriov_remap_vectors(pf, vf->vf_id); vf->num_msix = msix_vec_count; vf->num_vf_qs = queues; - vf->first_vector_idx = ice_sriov_get_irqs(pf, vf->num_msix); + vf->first_vector_idx = ice_virt_get_irqs(pf, vf->num_msix); if (vf->first_vector_idx < 0) goto unroll; @@ -1147,7 +1014,8 @@ unroll: vf->num_msix = prev_msix; vf->num_vf_qs = prev_queues; - vf->first_vector_idx = ice_sriov_get_irqs(pf, vf->num_msix); + + vf->first_vector_idx = ice_virt_get_irqs(pf, vf->num_msix); if (vf->first_vector_idx < 0) { ice_put_vf(vf); return -EINVAL; diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 33a1a5934c0d..0aab21113cc4 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -871,14 +871,6 @@ union ice_phy_params { struct ice_eth56g_params eth56g; }; -/* PHY model */ -enum ice_phy_model { - ICE_PHY_UNSUP = -1, - ICE_PHY_E810 = 1, - ICE_PHY_E82X, - ICE_PHY_ETH56G, -}; - /* Global Link Topology */ enum ice_global_link_topo { ICE_LINK_TOPO_UP_TO_2_LINKS, @@ -888,7 +880,6 @@ enum ice_global_link_topo { }; struct ice_ptp_hw { - enum ice_phy_model phy_model; union ice_phy_params phy; u8 num_lports; u8 ports_per_phy; diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h index 4261fe1c2bcd..799b2c1f1184 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h @@ -124,6 +124,9 @@ struct ice_vf { u8 spoofchk:1; u8 link_forced:1; u8 link_up:1; /* only valid if VF link is forced */ + + u32 ptp_caps; + unsigned int min_tx_rate; /* Minimum Tx bandwidth limit in Mbps */ unsigned int max_tx_rate; /* Maximum Tx bandwidth limit in Mbps */ DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS); /* VF runtime states */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index ff4ad788d96a..674767781fe4 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -498,6 +498,9 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_QOS) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_QOS; + if (vf->driver_caps & VIRTCHNL_VF_CAP_PTP) + vfres->vf_cap_flags |= VIRTCHNL_VF_CAP_PTP; + vfres->num_vsis = 1; /* Tx and Rx queue are equal for VF */ vfres->num_queue_pairs = vsi->num_txq; @@ -1975,6 +1978,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) struct ice_vsi *vsi; u8 act_prt, pri_prt; int i = -1, q_idx; + bool ena_ts; lag = pf->lag; mutex_lock(&pf->lag_mutex); @@ -2104,9 +2108,14 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) rxdid = ICE_RXDID_LEGACY_1; } + ena_ts = ((vf->driver_caps & + VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) && + (vf->driver_caps & VIRTCHNL_VF_CAP_PTP) && + (qpi->rxq.flags & VIRTCHNL_PTP_RX_TSTAMP)); + ice_write_qrxflxp_cntxt(&vsi->back->hw, - vsi->rxq_map[q_idx], - rxdid, 0x03, false); + vsi->rxq_map[q_idx], rxdid, + ICE_RXDID_PRIO, ena_ts); } } @@ -3031,8 +3040,8 @@ err: static int ice_vc_query_rxdid(struct ice_vf *vf) { enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct virtchnl_supported_rxdids rxdid = {}; struct ice_pf *pf = vf->pf; + u64 rxdid; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; @@ -3044,7 +3053,7 @@ static int ice_vc_query_rxdid(struct ice_vf *vf) goto err; } - rxdid.supported_rxdids = pf->supported_rxdids; + rxdid = pf->supported_rxdids; err: return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_SUPPORTED_RXDIDS, @@ -4092,6 +4101,59 @@ out: v_ret, NULL, 0); } +static int ice_vc_get_ptp_cap(struct ice_vf *vf, + const struct virtchnl_ptp_caps *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_ERR_PARAM; + u32 caps = VIRTCHNL_1588_PTP_CAP_RX_TSTAMP | + VIRTCHNL_1588_PTP_CAP_READ_PHC; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) + goto err; + + v_ret = VIRTCHNL_STATUS_SUCCESS; + + if (msg->caps & caps) + vf->ptp_caps = caps; + +err: + /* send the response back to the VF */ + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_1588_PTP_GET_CAPS, v_ret, + (u8 *)&vf->ptp_caps, + sizeof(struct virtchnl_ptp_caps)); +} + +static int ice_vc_get_phc_time(struct ice_vf *vf) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_ERR_PARAM; + struct virtchnl_phc_time *phc_time = NULL; + struct ice_pf *pf = vf->pf; + u32 len = 0; + int ret; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) + goto err; + + v_ret = VIRTCHNL_STATUS_SUCCESS; + + phc_time = kzalloc(sizeof(*phc_time), GFP_KERNEL); + if (!phc_time) { + v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; + goto err; + } + + len = sizeof(*phc_time); + + phc_time->time = ice_ptp_read_src_clk_reg(pf, NULL); + +err: + /* send the response back to the VF */ + ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_1588_PTP_GET_TIME, v_ret, + (u8 *)phc_time, len); + kfree(phc_time); + return ret; +} + static const struct ice_virtchnl_ops ice_virtchnl_dflt_ops = { .get_ver_msg = ice_vc_get_ver_msg, .get_vf_res_msg = ice_vc_get_vf_res_msg, @@ -4128,6 +4190,8 @@ static const struct ice_virtchnl_ops ice_virtchnl_dflt_ops = { .get_qos_caps = ice_vc_get_qos_caps, .cfg_q_bw = ice_vc_cfg_q_bw, .cfg_q_quanta = ice_vc_cfg_q_quanta, + .get_ptp_cap = ice_vc_get_ptp_cap, + .get_phc_time = ice_vc_get_phc_time, /* If you add a new op here please make sure to add it to * ice_virtchnl_repr_ops as well. */ @@ -4264,6 +4328,8 @@ static const struct ice_virtchnl_ops ice_virtchnl_repr_ops = { .get_qos_caps = ice_vc_get_qos_caps, .cfg_q_bw = ice_vc_cfg_q_bw, .cfg_q_quanta = ice_vc_cfg_q_quanta, + .get_ptp_cap = ice_vc_get_ptp_cap, + .get_phc_time = ice_vc_get_phc_time, }; /** @@ -4501,6 +4567,12 @@ error_handler: case VIRTCHNL_OP_CONFIG_QUANTA: err = ops->cfg_q_quanta(vf, msg); break; + case VIRTCHNL_OP_1588_PTP_GET_CAPS: + err = ops->get_ptp_cap(vf, (const void *)msg); + break; + case VIRTCHNL_OP_1588_PTP_GET_TIME: + err = ops->get_phc_time(vf); + break; case VIRTCHNL_OP_UNKNOWN: default: dev_err(dev, "Unsupported opcode %d from VF %d\n", v_opcode, diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.h b/drivers/net/ethernet/intel/ice/ice_virtchnl.h index 0c629aef9baf..222990f229d5 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.h @@ -26,6 +26,9 @@ #define ICE_MAX_MACADDR_PER_VF 18 #define ICE_FLEX_DESC_RXDID_MAX_NUM 64 +/* Priority to be compared against previous priority from the pipe */ +#define ICE_RXDID_PRIO 0x03 + /* VFs only get a single VSI. For ice hardware, the VF does not need to know * its VSI index. However, the virtchnl interface requires a VSI number, * mainly due to legacy hardware. @@ -72,6 +75,9 @@ struct ice_virtchnl_ops { int (*cfg_q_tc_map)(struct ice_vf *vf, u8 *msg); int (*cfg_q_bw)(struct ice_vf *vf, u8 *msg); int (*cfg_q_quanta)(struct ice_vf *vf, u8 *msg); + int (*get_ptp_cap)(struct ice_vf *vf, + const struct virtchnl_ptp_caps *msg); + int (*get_phc_time)(struct ice_vf *vf); }; #ifdef CONFIG_PCI_IOV diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c index c105a82ee136..a3d1579a619a 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c @@ -84,6 +84,12 @@ static const u32 fdir_pf_allowlist_opcodes[] = { VIRTCHNL_OP_ADD_FDIR_FILTER, VIRTCHNL_OP_DEL_FDIR_FILTER, }; +/* VIRTCHNL_VF_CAP_PTP */ +static const u32 ptp_allowlist_opcodes[] = { + VIRTCHNL_OP_1588_PTP_GET_CAPS, + VIRTCHNL_OP_1588_PTP_GET_TIME, +}; + static const u32 tc_allowlist_opcodes[] = { VIRTCHNL_OP_GET_QOS_CAPS, VIRTCHNL_OP_CONFIG_QUEUE_BW, VIRTCHNL_OP_CONFIG_QUANTA, @@ -110,6 +116,7 @@ static const struct allowlist_opcode_info allowlist_opcodes[] = { ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_FDIR_PF, fdir_pf_allowlist_opcodes), ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_VLAN_V2, vlan_v2_allowlist_opcodes), ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_QOS, tc_allowlist_opcodes), + ALLOW_ITEM(VIRTCHNL_VF_CAP_PTP, ptp_allowlist_opcodes), }; /** diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 8975d2971bc3..a3a4eaa17739 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019, Intel Corporation. */ #include <linux/bpf_trace.h> +#include <linux/unroll.h> #include <net/xdp_sock_drv.h> #include <net/xdp.h> #include "ice.h" @@ -989,7 +990,8 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct ice_tx_desc *tx_desc; u32 i; - loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) { + unrolled_count(PKTS_PER_BATCH) + for (i = 0; i < PKTS_PER_BATCH; i++) { dma_addr_t dma; dma = xsk_buff_raw_get_dma(xsk_pool, descs[i].addr); diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h index 45adeb513253..8dc5d55e26c5 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.h +++ b/drivers/net/ethernet/intel/ice/ice_xsk.h @@ -7,14 +7,6 @@ #define PKTS_PER_BATCH 8 -#ifdef __clang__ -#define loop_unrolled_for _Pragma("clang loop unroll_count(8)") for -#elif __GNUC__ >= 8 -#define loop_unrolled_for _Pragma("GCC unroll 8") for -#else -#define loop_unrolled_for for -#endif - struct ice_vsi; #ifdef CONFIG_XDP_SOCKETS diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c index dfd7cf1d9aa0..eae1b6f474e6 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c @@ -595,7 +595,7 @@ static bool idpf_rx_singleq_is_non_eop(const union virtchnl2_rx_desc *rx_desc) */ static void idpf_rx_singleq_csum(struct idpf_rx_queue *rxq, struct sk_buff *skb, - struct idpf_rx_csum_decoded csum_bits, + struct libeth_rx_csum csum_bits, struct libeth_rx_pt decoded) { bool ipv4, ipv6; @@ -661,10 +661,10 @@ checksum_fail: * * Return: parsed checksum status. **/ -static struct idpf_rx_csum_decoded +static struct libeth_rx_csum idpf_rx_singleq_base_csum(const union virtchnl2_rx_desc *rx_desc) { - struct idpf_rx_csum_decoded csum_bits = { }; + struct libeth_rx_csum csum_bits = { }; u32 rx_error, rx_status; u64 qword; @@ -696,10 +696,10 @@ idpf_rx_singleq_base_csum(const union virtchnl2_rx_desc *rx_desc) * * Return: parsed checksum status. **/ -static struct idpf_rx_csum_decoded +static struct libeth_rx_csum idpf_rx_singleq_flex_csum(const union virtchnl2_rx_desc *rx_desc) { - struct idpf_rx_csum_decoded csum_bits = { }; + struct libeth_rx_csum csum_bits = { }; u16 rx_status0, rx_status1; rx_status0 = le16_to_cpu(rx_desc->flex_nic_wb.status_error0); @@ -798,7 +798,7 @@ idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q, u16 ptype) { struct libeth_rx_pt decoded = rx_q->rx_ptype_lkup[ptype]; - struct idpf_rx_csum_decoded csum_bits; + struct libeth_rx_csum csum_bits; /* modifies the skb - consumes the enet header */ skb->protocol = eth_type_trans(skb, rx_q->netdev); @@ -891,6 +891,7 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rx_q, * idpf_rx_singleq_extract_base_fields - Extract fields from the Rx descriptor * @rx_desc: the descriptor to process * @fields: storage for extracted values + * @ptype: pointer that will store packet type * * Decode the Rx descriptor and extract relevant information including the * size and Rx packet type. @@ -900,20 +901,21 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rx_q, */ static void idpf_rx_singleq_extract_base_fields(const union virtchnl2_rx_desc *rx_desc, - struct idpf_rx_extracted *fields) + struct libeth_rqe_info *fields, u32 *ptype) { u64 qword; qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); - fields->size = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M, qword); - fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M, qword); + fields->len = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M, qword); + *ptype = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M, qword); } /** * idpf_rx_singleq_extract_flex_fields - Extract fields from the Rx descriptor * @rx_desc: the descriptor to process * @fields: storage for extracted values + * @ptype: pointer that will store packet type * * Decode the Rx descriptor and extract relevant information including the * size and Rx packet type. @@ -923,12 +925,12 @@ idpf_rx_singleq_extract_base_fields(const union virtchnl2_rx_desc *rx_desc, */ static void idpf_rx_singleq_extract_flex_fields(const union virtchnl2_rx_desc *rx_desc, - struct idpf_rx_extracted *fields) + struct libeth_rqe_info *fields, u32 *ptype) { - fields->size = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M, - le16_to_cpu(rx_desc->flex_nic_wb.pkt_len)); - fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PTYPE_M, - le16_to_cpu(rx_desc->flex_nic_wb.ptype_flex_flags0)); + fields->len = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M, + le16_to_cpu(rx_desc->flex_nic_wb.pkt_len)); + *ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PTYPE_M, + le16_to_cpu(rx_desc->flex_nic_wb.ptype_flex_flags0)); } /** @@ -936,17 +938,18 @@ idpf_rx_singleq_extract_flex_fields(const union virtchnl2_rx_desc *rx_desc, * @rx_q: Rx descriptor queue * @rx_desc: the descriptor to process * @fields: storage for extracted values + * @ptype: pointer that will store packet type * */ static void idpf_rx_singleq_extract_fields(const struct idpf_rx_queue *rx_q, const union virtchnl2_rx_desc *rx_desc, - struct idpf_rx_extracted *fields) + struct libeth_rqe_info *fields, u32 *ptype) { if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) - idpf_rx_singleq_extract_base_fields(rx_desc, fields); + idpf_rx_singleq_extract_base_fields(rx_desc, fields, ptype); else - idpf_rx_singleq_extract_flex_fields(rx_desc, fields); + idpf_rx_singleq_extract_flex_fields(rx_desc, fields, ptype); } /** @@ -966,9 +969,10 @@ static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget) /* Process Rx packets bounded by budget */ while (likely(total_rx_pkts < (unsigned int)budget)) { - struct idpf_rx_extracted fields = { }; + struct libeth_rqe_info fields = { }; union virtchnl2_rx_desc *rx_desc; struct idpf_rx_buf *rx_buf; + u32 ptype; /* get the Rx desc from Rx queue based on 'next_to_clean' */ rx_desc = &rx_q->rx[ntc]; @@ -989,16 +993,16 @@ static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget) */ dma_rmb(); - idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields); + idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields, &ptype); rx_buf = &rx_q->rx_buf[ntc]; - if (!libeth_rx_sync_for_cpu(rx_buf, fields.size)) + if (!libeth_rx_sync_for_cpu(rx_buf, fields.len)) goto skip_data; if (skb) - idpf_rx_add_frag(rx_buf, skb, fields.size); + idpf_rx_add_frag(rx_buf, skb, fields.len); else - skb = idpf_rx_build_skb(rx_buf, fields.size); + skb = idpf_rx_build_skb(rx_buf, fields.len); /* exit if we failed to retrieve a buffer */ if (!skb) @@ -1033,8 +1037,7 @@ skip_data: total_rx_bytes += skb->len; /* protocol */ - idpf_rx_singleq_process_skb_fields(rx_q, skb, - rx_desc, fields.rx_ptype); + idpf_rx_singleq_process_skb_fields(rx_q, skb, rx_desc, ptype); /* send completed skb up the stack */ napi_gro_receive(rx_q->pp->p.napi, skb); diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 9be6a6b59c4e..2747dc69999a 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -2895,7 +2895,7 @@ idpf_rx_hash(const struct idpf_rx_queue *rxq, struct sk_buff *skb, * skb->protocol must be set before this function is called */ static void idpf_rx_csum(struct idpf_rx_queue *rxq, struct sk_buff *skb, - struct idpf_rx_csum_decoded csum_bits, + struct libeth_rx_csum csum_bits, struct libeth_rx_pt decoded) { bool ipv4, ipv6; @@ -2923,7 +2923,7 @@ static void idpf_rx_csum(struct idpf_rx_queue *rxq, struct sk_buff *skb, if (unlikely(csum_bits.l4e)) goto checksum_fail; - if (csum_bits.raw_csum_inv || + if (!csum_bits.raw_csum_valid || decoded.inner_prot == LIBETH_RX_PT_INNER_SCTP) { skb->ip_summed = CHECKSUM_UNNECESSARY; return; @@ -2946,10 +2946,10 @@ checksum_fail: * * Return: parsed checksum status. **/ -static struct idpf_rx_csum_decoded +static struct libeth_rx_csum idpf_rx_splitq_extract_csum_bits(const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc) { - struct idpf_rx_csum_decoded csum = { }; + struct libeth_rx_csum csum = { }; u8 qword0, qword1; qword0 = rx_desc->status_err0_qw0; @@ -2965,9 +2965,9 @@ idpf_rx_splitq_extract_csum_bits(const struct virtchnl2_rx_flex_desc_adv_nic_3 * qword1); csum.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_IPV6EXADD_M, qword0); - csum.raw_csum_inv = - le16_get_bits(rx_desc->ptype_err_fflags0, - VIRTCHNL2_RX_FLEX_DESC_ADV_RAW_CSUM_INV_M); + csum.raw_csum_valid = + !le16_get_bits(rx_desc->ptype_err_fflags0, + VIRTCHNL2_RX_FLEX_DESC_ADV_RAW_CSUM_INV_M); csum.raw_csum = le16_to_cpu(rx_desc->misc.raw_cs); return csum; @@ -3058,7 +3058,7 @@ static int idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb, const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc) { - struct idpf_rx_csum_decoded csum_bits; + struct libeth_rx_csum csum_bits; struct libeth_rx_pt decoded; u16 rx_ptype; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index 0f71a6f5557b..cd9a20c9cc7f 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -213,25 +213,6 @@ enum idpf_tx_ctx_desc_eipt_offload { IDPF_TX_CTX_EXT_IP_IPV4 = 0x3 }; -/* Checksum offload bits decoded from the receive descriptor. */ -struct idpf_rx_csum_decoded { - u32 l3l4p : 1; - u32 ipe : 1; - u32 eipe : 1; - u32 eudpe : 1; - u32 ipv6exadd : 1; - u32 l4e : 1; - u32 pprs : 1; - u32 nat : 1; - u32 raw_csum_inv : 1; - u32 raw_csum : 16; -}; - -struct idpf_rx_extracted { - unsigned int size; - u16 rx_ptype; -}; - #define IDPF_TX_COMPLQ_CLEAN_BUDGET 256 #define IDPF_TX_MIN_PKT_LEN 17 #define IDPF_TX_DESCS_FOR_SKB_DATA_PTR 1 diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.c b/drivers/net/ethernet/intel/igc/igc_xdp.c index 13bbd3346e01..c538e6b18aad 100644 --- a/drivers/net/ethernet/intel/igc/igc_xdp.c +++ b/drivers/net/ethernet/intel/igc/igc_xdp.c @@ -14,6 +14,7 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog, bool if_running = netif_running(dev); struct bpf_prog *old_prog; bool need_update; + unsigned int i; if (dev->mtu > ETH_DATA_LEN) { /* For now, the driver doesn't support XDP functionality with @@ -24,8 +25,13 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog, } need_update = !!adapter->xdp_prog != !!prog; - if (if_running && need_update) - igc_close(dev); + if (if_running && need_update) { + for (i = 0; i < adapter->num_rx_queues; i++) { + igc_disable_rx_ring(adapter->rx_ring[i]); + igc_disable_tx_ring(adapter->tx_ring[i]); + napi_disable(&adapter->rx_ring[i]->q_vector->napi); + } + } old_prog = xchg(&adapter->xdp_prog, prog); if (old_prog) @@ -36,8 +42,13 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog, else xdp_features_clear_redirect_target(dev); - if (if_running && need_update) - igc_open(dev); + if (if_running && need_update) { + for (i = 0; i < adapter->num_rx_queues; i++) { + napi_enable(&adapter->rx_ring[i]->q_vector->napi); + igc_enable_tx_ring(adapter->tx_ring[i]); + igc_enable_rx_ring(adapter->rx_ring[i]); + } + } return 0; } diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 4fe121b9f94b..44b18c573909 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -5557,7 +5557,6 @@ static int mvneta_probe(struct platform_device *pdev) clk_prepare_enable(pp->clk_bus); pp->phylink_pcs.ops = &mvneta_phylink_pcs_ops; - pp->phylink_pcs.neg_mode = true; pp->phylink_config.dev = &dev->dev; pp->phylink_config.type = PHYLINK_NETDEV; diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index dd76c1b7ed3a..f166dc4e6503 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -7024,9 +7024,7 @@ static int mvpp2_port_probe(struct platform_device *pdev, dev->dev_port = port->id; port->pcs_gmac.ops = &mvpp2_phylink_gmac_pcs_ops; - port->pcs_gmac.neg_mode = true; port->pcs_xlg.ops = &mvpp2_phylink_xlg_pcs_ops; - port->pcs_xlg.neg_mode = true; if (!mvpp2_use_acpi_compat_mode(port_fwnode)) { port->phylink_config.dev = &dev->dev; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile index cb6513ab35e7..69e0778f9ac1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_RVU_ESWITCH) += rvu_rep.o rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \ otx2_flows.o otx2_tc.o cn10k.o otx2_dmac_flt.o \ - otx2_devlink.o qos_sq.o qos.o + otx2_devlink.o qos_sq.o qos.o otx2_xsk.o rvu_nicvf-y := otx2_vf.o rvu_rep-y := rep.o diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c index a15cc86635d6..c3b6e0f60a79 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c @@ -112,9 +112,12 @@ int cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq) struct otx2_nic *pfvf = dev; int cnt = cq->pool_ptrs; u64 ptrs[NPA_MAX_BURST]; + struct otx2_pool *pool; dma_addr_t bufptr; int num_ptrs = 1; + pool = &pfvf->qset.pool[cq->cq_idx]; + /* Refill pool with new buffers */ while (cq->pool_ptrs) { if (otx2_alloc_buffer(pfvf, cq, &bufptr)) { @@ -124,7 +127,9 @@ int cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq) break; } cq->pool_ptrs--; - ptrs[num_ptrs] = (u64)bufptr + OTX2_HEAD_ROOM; + ptrs[num_ptrs] = pool->xsk_pool ? + (u64)bufptr : (u64)bufptr + OTX2_HEAD_ROOM; + num_ptrs++; if (num_ptrs == NPA_MAX_BURST || cq->pool_ptrs == 0) { __cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs, diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 2b49bfec7869..84cd029a85aa 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -17,6 +17,7 @@ #include "otx2_common.h" #include "otx2_struct.h" #include "cn10k.h" +#include "otx2_xsk.h" static bool otx2_is_pfc_enabled(struct otx2_nic *pfvf) { @@ -330,6 +331,10 @@ int otx2_set_rss_table(struct otx2_nic *pfvf, int ctx_id) rss_ctx = rss->rss_ctx[ctx_id]; /* Get memory to put this msg */ for (idx = 0; idx < rss->rss_size; idx++) { + /* Ignore the queue if AF_XDP zero copy is enabled */ + if (test_bit(rss_ctx->ind_tbl[idx], pfvf->af_xdp_zc_qidx)) + continue; + aq = otx2_mbox_alloc_msg_nix_aq_enq(mbox); if (!aq) { /* The shared memory buffer can be full. @@ -549,10 +554,13 @@ static int otx2_alloc_pool_buf(struct otx2_nic *pfvf, struct otx2_pool *pool, } static int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, - dma_addr_t *dma) + dma_addr_t *dma, int qidx, int idx) { u8 *buf; + if (pool->xsk_pool) + return otx2_xsk_pool_alloc_buf(pfvf, pool, dma, idx); + if (pool->page_pool) return otx2_alloc_pool_buf(pfvf, pool, dma); @@ -571,12 +579,12 @@ static int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, } int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, - dma_addr_t *dma) + dma_addr_t *dma, int qidx, int idx) { int ret; local_bh_disable(); - ret = __otx2_alloc_rbuf(pfvf, pool, dma); + ret = __otx2_alloc_rbuf(pfvf, pool, dma, qidx, idx); local_bh_enable(); return ret; } @@ -584,7 +592,8 @@ int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, int otx2_alloc_buffer(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, dma_addr_t *dma) { - if (unlikely(__otx2_alloc_rbuf(pfvf, cq->rbpool, dma))) + if (unlikely(__otx2_alloc_rbuf(pfvf, cq->rbpool, dma, + cq->cq_idx, cq->pool_ptrs - 1))) return -ENOMEM; return 0; } @@ -884,7 +893,7 @@ void otx2_sqb_flush(struct otx2_nic *pfvf) #define RQ_PASS_LVL_AURA (255 - ((95 * 256) / 100)) /* RED when 95% is full */ #define RQ_DROP_LVL_AURA (255 - ((99 * 256) / 100)) /* Drop when 99% is full */ -static int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura) +int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura) { struct otx2_qset *qset = &pfvf->qset; struct nix_aq_enq_req *aq; @@ -1028,6 +1037,10 @@ int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura) sq->stats.bytes = 0; sq->stats.pkts = 0; + /* Attach XSK_BUFF_POOL to XDP queue */ + if (qidx > pfvf->hw.xdp_queues) + otx2_attach_xsk_buff(pfvf, sq, (qidx - pfvf->hw.xdp_queues)); + chan_offset = qidx % pfvf->hw.tx_chan_cnt; err = pfvf->hw_ops->sq_aq_init(pfvf, qidx, chan_offset, sqb_aura); @@ -1041,12 +1054,13 @@ int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura) } -static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx) +int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx) { struct otx2_qset *qset = &pfvf->qset; int err, pool_id, non_xdp_queues; struct nix_aq_enq_req *aq; struct otx2_cq_queue *cq; + struct otx2_pool *pool; cq = &qset->cq[qidx]; cq->cq_idx = qidx; @@ -1055,8 +1069,20 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx) cq->cq_type = CQ_RX; cq->cint_idx = qidx; cq->cqe_cnt = qset->rqe_cnt; - if (pfvf->xdp_prog) + if (pfvf->xdp_prog) { xdp_rxq_info_reg(&cq->xdp_rxq, pfvf->netdev, qidx, 0); + pool = &qset->pool[qidx]; + if (pool->xsk_pool) { + xdp_rxq_info_reg_mem_model(&cq->xdp_rxq, + MEM_TYPE_XSK_BUFF_POOL, + NULL); + xsk_pool_set_rxq_info(pool->xsk_pool, &cq->xdp_rxq); + } else if (pool->page_pool) { + xdp_rxq_info_reg_mem_model(&cq->xdp_rxq, + MEM_TYPE_PAGE_POOL, + pool->page_pool); + } + } } else if (qidx < non_xdp_queues) { cq->cq_type = CQ_TX; cq->cint_idx = qidx - pfvf->hw.rx_queues; @@ -1275,9 +1301,10 @@ void otx2_free_bufs(struct otx2_nic *pfvf, struct otx2_pool *pool, pa = otx2_iova_to_phys(pfvf->iommu_domain, iova); page = virt_to_head_page(phys_to_virt(pa)); - if (pool->page_pool) { page_pool_put_full_page(pool->page_pool, page, true); + } else if (pool->xsk_pool) { + /* Note: No way of identifying xdp_buff */ } else { dma_unmap_page_attrs(pfvf->dev, iova, size, DMA_FROM_DEVICE, @@ -1292,6 +1319,7 @@ void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type) int pool_id, pool_start = 0, pool_end = 0, size = 0; struct otx2_pool *pool; u64 iova; + int idx; if (type == AURA_NIX_SQ) { pool_start = otx2_get_pool_idx(pfvf, type, 0); @@ -1306,16 +1334,21 @@ void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type) /* Free SQB and RQB pointers from the aura pool */ for (pool_id = pool_start; pool_id < pool_end; pool_id++) { - iova = otx2_aura_allocptr(pfvf, pool_id); pool = &pfvf->qset.pool[pool_id]; + iova = otx2_aura_allocptr(pfvf, pool_id); while (iova) { if (type == AURA_NIX_RQ) iova -= OTX2_HEAD_ROOM; - otx2_free_bufs(pfvf, pool, iova, size); - iova = otx2_aura_allocptr(pfvf, pool_id); } + + for (idx = 0 ; idx < pool->xdp_cnt; idx++) { + if (!pool->xdp[idx]) + continue; + + xsk_buff_free(pool->xdp[idx]); + } } } @@ -1332,7 +1365,8 @@ void otx2_aura_pool_free(struct otx2_nic *pfvf) qmem_free(pfvf->dev, pool->stack); qmem_free(pfvf->dev, pool->fc_addr); page_pool_destroy(pool->page_pool); - pool->page_pool = NULL; + devm_kfree(pfvf->dev, pool->xdp); + pool->xsk_pool = NULL; } devm_kfree(pfvf->dev, pfvf->qset.pool); pfvf->qset.pool = NULL; @@ -1419,6 +1453,7 @@ int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, int stack_pages, int numptrs, int buf_size, int type) { struct page_pool_params pp_params = { 0 }; + struct xsk_buff_pool *xsk_pool; struct npa_aq_enq_req *aq; struct otx2_pool *pool; int err; @@ -1462,21 +1497,35 @@ int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, aq->ctype = NPA_AQ_CTYPE_POOL; aq->op = NPA_AQ_INSTOP_INIT; - if (type != AURA_NIX_RQ) { - pool->page_pool = NULL; + if (type != AURA_NIX_RQ) + return 0; + + if (!test_bit(pool_id, pfvf->af_xdp_zc_qidx)) { + pp_params.order = get_order(buf_size); + pp_params.flags = PP_FLAG_DMA_MAP; + pp_params.pool_size = min(OTX2_PAGE_POOL_SZ, numptrs); + pp_params.nid = NUMA_NO_NODE; + pp_params.dev = pfvf->dev; + pp_params.dma_dir = DMA_FROM_DEVICE; + pool->page_pool = page_pool_create(&pp_params); + if (IS_ERR(pool->page_pool)) { + netdev_err(pfvf->netdev, "Creation of page pool failed\n"); + return PTR_ERR(pool->page_pool); + } return 0; } - pp_params.order = get_order(buf_size); - pp_params.flags = PP_FLAG_DMA_MAP; - pp_params.pool_size = min(OTX2_PAGE_POOL_SZ, numptrs); - pp_params.nid = NUMA_NO_NODE; - pp_params.dev = pfvf->dev; - pp_params.dma_dir = DMA_FROM_DEVICE; - pool->page_pool = page_pool_create(&pp_params); - if (IS_ERR(pool->page_pool)) { - netdev_err(pfvf->netdev, "Creation of page pool failed\n"); - return PTR_ERR(pool->page_pool); + /* Set XSK pool to support AF_XDP zero-copy */ + xsk_pool = xsk_get_pool_from_qid(pfvf->netdev, pool_id); + if (xsk_pool) { + pool->xsk_pool = xsk_pool; + pool->xdp_cnt = numptrs; + pool->xdp = devm_kcalloc(pfvf->dev, + numptrs, sizeof(struct xdp_buff *), GFP_KERNEL); + if (IS_ERR(pool->xdp)) { + netdev_err(pfvf->netdev, "Creation of xsk pool failed\n"); + return PTR_ERR(pool->xdp); + } } return 0; @@ -1537,9 +1586,18 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) } for (ptr = 0; ptr < num_sqbs; ptr++) { - err = otx2_alloc_rbuf(pfvf, pool, &bufptr); - if (err) + err = otx2_alloc_rbuf(pfvf, pool, &bufptr, pool_id, ptr); + if (err) { + if (pool->xsk_pool) { + ptr--; + while (ptr >= 0) { + xsk_buff_free(pool->xdp[ptr]); + ptr--; + } + } goto err_mem; + } + pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr); sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr; } @@ -1589,11 +1647,19 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf) /* Allocate pointers and free them to aura/pool */ for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) { pool = &pfvf->qset.pool[pool_id]; + for (ptr = 0; ptr < num_ptrs; ptr++) { - err = otx2_alloc_rbuf(pfvf, pool, &bufptr); - if (err) + err = otx2_alloc_rbuf(pfvf, pool, &bufptr, pool_id, ptr); + if (err) { + if (pool->xsk_pool) { + while (ptr) + xsk_buff_free(pool->xdp[--ptr]); + } return -ENOMEM; + } + pfvf->hw_ops->aura_freeptr(pfvf, pool_id, + pool->xsk_pool ? bufptr : bufptr + OTX2_HEAD_ROOM); } } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 65814e3dc93f..1e88422825be 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -21,6 +21,7 @@ #include <linux/time64.h> #include <linux/dim.h> #include <uapi/linux/if_macsec.h> +#include <net/page_pool/helpers.h> #include <mbox.h> #include <npc.h> @@ -128,6 +129,12 @@ enum otx2_errcodes_re { ERRCODE_IL4_CSUM = 0x22, }; +enum otx2_xdp_action { + OTX2_XDP_TX = BIT(0), + OTX2_XDP_REDIRECT = BIT(1), + OTX2_AF_XDP_FRAME = BIT(2), +}; + struct otx2_dev_stats { u64 rx_bytes; u64 rx_frames; @@ -531,6 +538,8 @@ struct otx2_nic { /* Inline ipsec */ struct cn10k_ipsec ipsec; + /* af_xdp zero-copy */ + unsigned long *af_xdp_zc_qidx; }; static inline bool is_otx2_lbkvf(struct pci_dev *pdev) @@ -1002,7 +1011,7 @@ void otx2_txschq_free_one(struct otx2_nic *pfvf, u16 lvl, u16 schq); void otx2_free_pending_sqe(struct otx2_nic *pfvf); void otx2_sqb_flush(struct otx2_nic *pfvf); int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, - dma_addr_t *dma); + dma_addr_t *dma, int qidx, int idx); int otx2_rxtx_enable(struct otx2_nic *pfvf, bool enable); void otx2_ctx_disable(struct mbox *mbox, int type, bool npa); int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable); @@ -1032,6 +1041,8 @@ void otx2_pfaf_mbox_destroy(struct otx2_nic *pf); void otx2_disable_mbox_intr(struct otx2_nic *pf); void otx2_disable_napi(struct otx2_nic *pf); irqreturn_t otx2_cq_intr_handler(int irq, void *cq_irq); +int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura); +int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx); /* RSS configuration APIs*/ int otx2_rss_init(struct otx2_nic *pfvf); @@ -1094,7 +1105,8 @@ int otx2_del_macfilter(struct net_device *netdev, const u8 *mac); int otx2_add_macfilter(struct net_device *netdev, const u8 *mac); int otx2_enable_rxvlan(struct otx2_nic *pf, bool enable); int otx2_install_rxvlan_offload_flow(struct otx2_nic *pfvf); -bool otx2_xdp_sq_append_pkt(struct otx2_nic *pfvf, u64 iova, int len, u16 qidx); +bool otx2_xdp_sq_append_pkt(struct otx2_nic *pfvf, struct xdp_frame *xdpf, + u64 iova, int len, u16 qidx, u16 flags); u16 otx2_get_max_mtu(struct otx2_nic *pfvf); int otx2_handle_ntuple_tc_features(struct net_device *netdev, netdev_features_t features); @@ -1175,4 +1187,5 @@ static inline int mcam_entry_cmp(const void *a, const void *b) dma_addr_t otx2_dma_map_skb_frag(struct otx2_nic *pfvf, struct sk_buff *skb, int seg, int *len); void otx2_dma_unmap_skb_frags(struct otx2_nic *pfvf, struct sg_list *sg); +int otx2_read_free_sqe(struct otx2_nic *pfvf, u16 qidx); #endif /* OTX2_COMMON_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 2d53dc77ef1e..010385b29988 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -910,8 +910,12 @@ static int otx2_get_rxfh(struct net_device *dev, return -ENOENT; if (indir) { - for (idx = 0; idx < rss->rss_size; idx++) + for (idx = 0; idx < rss->rss_size; idx++) { + /* Ignore if the rx queue is AF_XDP zero copy enabled */ + if (test_bit(rss_ctx->ind_tbl[idx], pfvf->af_xdp_zc_qidx)) + continue; indir[idx] = rss_ctx->ind_tbl[idx]; + } } if (rxfh->key) memcpy(rxfh->key, rss->key, sizeof(rss->key)); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index e1dde93e8af8..c7c562f0f5e5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -27,6 +27,7 @@ #include "qos.h" #include <rvu_trace.h> #include "cn10k_ipsec.h" +#include "otx2_xsk.h" #define DRV_NAME "rvu_nicpf" #define DRV_STRING "Marvell RVU NIC Physical Function Driver" @@ -1662,9 +1663,7 @@ void otx2_free_hw_resources(struct otx2_nic *pf) struct nix_lf_free_req *free_req; struct mbox *mbox = &pf->mbox; struct otx2_cq_queue *cq; - struct otx2_pool *pool; struct msg_req *req; - int pool_id; int qidx; /* Ensure all SQE are processed */ @@ -1705,13 +1704,6 @@ void otx2_free_hw_resources(struct otx2_nic *pf) /* Free RQ buffer pointers*/ otx2_free_aura_ptr(pf, AURA_NIX_RQ); - for (qidx = 0; qidx < pf->hw.rx_queues; qidx++) { - pool_id = otx2_get_pool_idx(pf, AURA_NIX_RQ, qidx); - pool = &pf->qset.pool[pool_id]; - page_pool_destroy(pool->page_pool); - pool->page_pool = NULL; - } - otx2_free_cq_res(pf); /* Free all ingress bandwidth profiles allocated */ @@ -2691,7 +2683,6 @@ static int otx2_get_vf_config(struct net_device *netdev, int vf, static int otx2_xdp_xmit_tx(struct otx2_nic *pf, struct xdp_frame *xdpf, int qidx) { - struct page *page; u64 dma_addr; int err = 0; @@ -2701,11 +2692,11 @@ static int otx2_xdp_xmit_tx(struct otx2_nic *pf, struct xdp_frame *xdpf, if (dma_mapping_error(pf->dev, dma_addr)) return -ENOMEM; - err = otx2_xdp_sq_append_pkt(pf, dma_addr, xdpf->len, qidx); + err = otx2_xdp_sq_append_pkt(pf, xdpf, dma_addr, xdpf->len, + qidx, OTX2_XDP_REDIRECT); if (!err) { otx2_dma_unmap_page(pf, dma_addr, xdpf->len, DMA_TO_DEVICE); - page = virt_to_page(xdpf->data); - put_page(page); + xdp_return_frame(xdpf); return -ENOMEM; } return 0; @@ -2789,6 +2780,8 @@ static int otx2_xdp(struct net_device *netdev, struct netdev_bpf *xdp) switch (xdp->command) { case XDP_SETUP_PROG: return otx2_xdp_setup(pf, xdp->prog); + case XDP_SETUP_XSK_POOL: + return otx2_xsk_pool_setup(pf, xdp->xsk.pool, xdp->xsk.queue_id); default: return -EINVAL; } @@ -2866,6 +2859,7 @@ static const struct net_device_ops otx2_netdev_ops = { .ndo_set_vf_vlan = otx2_set_vf_vlan, .ndo_get_vf_config = otx2_get_vf_config, .ndo_bpf = otx2_xdp, + .ndo_xsk_wakeup = otx2_xsk_wakeup, .ndo_xdp_xmit = otx2_xdp_xmit, .ndo_setup_tc = otx2_setup_tc, .ndo_set_vf_trust = otx2_ndo_set_vf_trust, @@ -3204,16 +3198,26 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* Enable link notifications */ otx2_cgx_config_linkevents(pf, true); + pf->af_xdp_zc_qidx = bitmap_zalloc(qcount, GFP_KERNEL); + if (!pf->af_xdp_zc_qidx) { + err = -ENOMEM; + goto err_sriov_cleannup; + } + #ifdef CONFIG_DCB err = otx2_dcbnl_set_ops(netdev); if (err) - goto err_pf_sriov_init; + goto err_free_zc_bmap; #endif otx2_qos_init(pf, qos_txqs); return 0; +err_free_zc_bmap: + bitmap_free(pf->af_xdp_zc_qidx); +err_sriov_cleannup: + otx2_sriov_vfcfg_cleanup(pf); err_pf_sriov_init: otx2_shutdown_tc(pf); err_mcam_flow_del: diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 224cef938927..6bc5ce5a9f61 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -12,6 +12,7 @@ #include <linux/bpf_trace.h> #include <net/ip6_checksum.h> #include <net/xfrm.h> +#include <net/xdp.h> #include "otx2_reg.h" #include "otx2_common.h" @@ -19,6 +20,7 @@ #include "otx2_txrx.h" #include "otx2_ptp.h" #include "cn10k.h" +#include "otx2_xsk.h" #define CQE_ADDR(CQ, idx) ((CQ)->cqe_base + ((CQ)->cqe_size * (idx))) #define PTP_PORT 0x13F @@ -29,6 +31,12 @@ DEFINE_STATIC_KEY_FALSE(cn10k_ipsec_sa_enabled); +static int otx2_get_free_sqe(struct otx2_snd_queue *sq) +{ + return (sq->cons_head - sq->head - 1 + sq->sqe_cnt) + & (sq->sqe_cnt - 1); +} + static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf, struct bpf_prog *prog, struct nix_cqe_rx_s *cqe, @@ -96,20 +104,22 @@ static unsigned int frag_num(unsigned int i) static void otx2_xdp_snd_pkt_handler(struct otx2_nic *pfvf, struct otx2_snd_queue *sq, - struct nix_cqe_tx_s *cqe) + struct nix_cqe_tx_s *cqe, + int *xsk_frames) { struct nix_send_comp_s *snd_comp = &cqe->comp; struct sg_list *sg; - struct page *page; - u64 pa; sg = &sq->sg[snd_comp->sqe_id]; + if (sg->flags & OTX2_AF_XDP_FRAME) { + (*xsk_frames)++; + return; + } - pa = otx2_iova_to_phys(pfvf->iommu_domain, sg->dma_addr[0]); - otx2_dma_unmap_page(pfvf, sg->dma_addr[0], - sg->size[0], DMA_TO_DEVICE); - page = virt_to_page(phys_to_virt(pa)); - put_page(page); + if (sg->flags & OTX2_XDP_REDIRECT) + otx2_dma_unmap_page(pfvf, sg->dma_addr[0], sg->size[0], DMA_TO_DEVICE); + xdp_return_frame((struct xdp_frame *)sg->skb); + sg->skb = (u64)NULL; } static void otx2_snd_pkt_handler(struct otx2_nic *pfvf, @@ -431,6 +441,18 @@ int otx2_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq) return cnt - cq->pool_ptrs; } +static void otx2_zc_submit_pkts(struct otx2_nic *pfvf, struct xsk_buff_pool *xsk_pool, + int *xsk_frames, int qidx, int budget) +{ + if (*xsk_frames) + xsk_tx_completed(xsk_pool, *xsk_frames); + + if (xsk_uses_need_wakeup(xsk_pool)) + xsk_set_tx_need_wakeup(xsk_pool); + + otx2_zc_napi_handler(pfvf, xsk_pool, qidx, budget); +} + static int otx2_tx_napi_handler(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, int budget) { @@ -439,16 +461,22 @@ static int otx2_tx_napi_handler(struct otx2_nic *pfvf, struct nix_cqe_tx_s *cqe; struct net_device *ndev; int processed_cqe = 0; + int xsk_frames = 0; + + qidx = cq->cq_idx - pfvf->hw.rx_queues; + sq = &pfvf->qset.sq[qidx]; if (cq->pend_cqe >= budget) goto process_cqe; - if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe) + if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe) { + if (sq->xsk_pool) + otx2_zc_submit_pkts(pfvf, sq->xsk_pool, &xsk_frames, + qidx, budget); return 0; + } process_cqe: - qidx = cq->cq_idx - pfvf->hw.rx_queues; - sq = &pfvf->qset.sq[qidx]; while (likely(processed_cqe < budget) && cq->pend_cqe) { cqe = (struct nix_cqe_tx_s *)otx2_get_next_cqe(cq); @@ -458,10 +486,8 @@ process_cqe: break; } - qidx = cq->cq_idx - pfvf->hw.rx_queues; - if (cq->cq_type == CQ_XDP) - otx2_xdp_snd_pkt_handler(pfvf, sq, cqe); + otx2_xdp_snd_pkt_handler(pfvf, sq, cqe, &xsk_frames); else otx2_snd_pkt_handler(pfvf, cq, &pfvf->qset.sq[qidx], cqe, budget, &tx_pkts, &tx_bytes); @@ -502,6 +528,10 @@ process_cqe: netif_carrier_ok(ndev)) netif_tx_wake_queue(txq); } + + if (sq->xsk_pool) + otx2_zc_submit_pkts(pfvf, sq->xsk_pool, &xsk_frames, qidx, budget); + return 0; } @@ -527,9 +557,10 @@ static void otx2_adjust_adaptive_coalese(struct otx2_nic *pfvf, struct otx2_cq_p int otx2_napi_handler(struct napi_struct *napi, int budget) { struct otx2_cq_queue *rx_cq = NULL; + struct otx2_cq_queue *cq = NULL; + struct otx2_pool *pool = NULL; struct otx2_cq_poll *cq_poll; int workdone = 0, cq_idx, i; - struct otx2_cq_queue *cq; struct otx2_qset *qset; struct otx2_nic *pfvf; int filled_cnt = -1; @@ -554,6 +585,7 @@ int otx2_napi_handler(struct napi_struct *napi, int budget) if (rx_cq && rx_cq->pool_ptrs) filled_cnt = pfvf->hw_ops->refill_pool_ptrs(pfvf, rx_cq); + /* Clear the IRQ */ otx2_write64(pfvf, NIX_LF_CINTX_INT(cq_poll->cint_idx), BIT_ULL(0)); @@ -566,20 +598,31 @@ int otx2_napi_handler(struct napi_struct *napi, int budget) if (pfvf->flags & OTX2_FLAG_ADPTV_INT_COAL_ENABLED) otx2_adjust_adaptive_coalese(pfvf, cq_poll); + if (likely(cq)) + pool = &pfvf->qset.pool[cq->cq_idx]; + if (unlikely(!filled_cnt)) { struct refill_work *work; struct delayed_work *dwork; - work = &pfvf->refill_wrk[cq->cq_idx]; - dwork = &work->pool_refill_work; - /* Schedule a task if no other task is running */ - if (!cq->refill_task_sched) { - work->napi = napi; - cq->refill_task_sched = true; - schedule_delayed_work(dwork, - msecs_to_jiffies(100)); + if (likely(cq)) { + work = &pfvf->refill_wrk[cq->cq_idx]; + dwork = &work->pool_refill_work; + /* Schedule a task if no other task is running */ + if (!cq->refill_task_sched) { + work->napi = napi; + cq->refill_task_sched = true; + schedule_delayed_work(dwork, + msecs_to_jiffies(100)); + } + /* Call wake-up for not able to fill buffers */ + if (pool->xsk_pool) + xsk_set_rx_need_wakeup(pool->xsk_pool); } } else { + /* Clear wake-up, since buffers are filled successfully */ + if (pool && pool->xsk_pool) + xsk_clear_rx_need_wakeup(pool->xsk_pool); /* Re-enable interrupts */ otx2_write64(pfvf, NIX_LF_CINTX_ENA_W1S(cq_poll->cint_idx), @@ -1147,7 +1190,7 @@ bool otx2_sq_append_skb(void *dev, struct netdev_queue *txq, /* Check if there is enough room between producer * and consumer index. */ - free_desc = (sq->cons_head - sq->head - 1 + sq->sqe_cnt) & (sq->sqe_cnt - 1); + free_desc = otx2_get_free_sqe(sq); if (free_desc < sq->sqe_thresh) return false; @@ -1230,15 +1273,19 @@ void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, int q u16 pool_id; u64 iova; - if (pfvf->xdp_prog) + pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_RQ, qidx); + pool = &pfvf->qset.pool[pool_id]; + + if (pfvf->xdp_prog) { + if (pool->page_pool) + xdp_rxq_info_unreg_mem_model(&cq->xdp_rxq); + xdp_rxq_info_unreg(&cq->xdp_rxq); + } if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe) return; - pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_RQ, qidx); - pool = &pfvf->qset.pool[pool_id]; - while (cq->pend_cqe) { cqe = (struct nix_cqe_rx_s *)otx2_get_next_cqe(cq); processed_cqe++; @@ -1359,8 +1406,9 @@ void otx2_free_pending_sqe(struct otx2_nic *pfvf) } } -static void otx2_xdp_sqe_add_sg(struct otx2_snd_queue *sq, u64 dma_addr, - int len, int *offset) +static void otx2_xdp_sqe_add_sg(struct otx2_snd_queue *sq, + struct xdp_frame *xdpf, + u64 dma_addr, int len, int *offset, u16 flags) { struct nix_sqe_sg_s *sg = NULL; u64 *iova = NULL; @@ -1377,16 +1425,34 @@ static void otx2_xdp_sqe_add_sg(struct otx2_snd_queue *sq, u64 dma_addr, sq->sg[sq->head].dma_addr[0] = dma_addr; sq->sg[sq->head].size[0] = len; sq->sg[sq->head].num_segs = 1; + sq->sg[sq->head].flags = flags; + sq->sg[sq->head].skb = (u64)xdpf; } -bool otx2_xdp_sq_append_pkt(struct otx2_nic *pfvf, u64 iova, int len, u16 qidx) +int otx2_read_free_sqe(struct otx2_nic *pfvf, u16 qidx) +{ + struct otx2_snd_queue *sq; + int free_sqe; + + sq = &pfvf->qset.sq[qidx]; + free_sqe = otx2_get_free_sqe(sq); + if (free_sqe < sq->sqe_thresh) { + netdev_warn(pfvf->netdev, "No free sqe for Send queue%d\n", qidx); + return 0; + } + + return free_sqe - sq->sqe_thresh; +} + +bool otx2_xdp_sq_append_pkt(struct otx2_nic *pfvf, struct xdp_frame *xdpf, + u64 iova, int len, u16 qidx, u16 flags) { struct nix_sqe_hdr_s *sqe_hdr; struct otx2_snd_queue *sq; int offset, free_sqe; sq = &pfvf->qset.sq[qidx]; - free_sqe = (sq->num_sqbs - *sq->aura_fc_addr) * sq->sqe_per_sqb; + free_sqe = otx2_get_free_sqe(sq); if (free_sqe < sq->sqe_thresh) return false; @@ -1405,7 +1471,7 @@ bool otx2_xdp_sq_append_pkt(struct otx2_nic *pfvf, u64 iova, int len, u16 qidx) offset = sizeof(*sqe_hdr); - otx2_xdp_sqe_add_sg(sq, iova, len, &offset); + otx2_xdp_sqe_add_sg(sq, xdpf, iova, len, &offset, flags); sqe_hdr->sizem1 = (offset / 16) - 1; pfvf->hw_ops->sqe_flush(pfvf, sq, offset, qidx); @@ -1418,14 +1484,28 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, bool *need_xdp_flush) { + struct xdp_buff xdp, *xsk_buff = NULL; unsigned char *hard_start; + struct otx2_pool *pool; + struct xdp_frame *xdpf; int qidx = cq->cq_idx; - struct xdp_buff xdp; struct page *page; u64 iova, pa; u32 act; int err; + pool = &pfvf->qset.pool[qidx]; + + if (pool->xsk_pool) { + xsk_buff = pool->xdp[--cq->rbpool->xdp_top]; + if (!xsk_buff) + return false; + + xsk_buff->data_end = xsk_buff->data + cqe->sg.seg_size; + act = bpf_prog_run_xdp(prog, xsk_buff); + goto handle_xdp_verdict; + } + iova = cqe->sg.seg_addr - OTX2_HEAD_ROOM; pa = otx2_iova_to_phys(pfvf->iommu_domain, iova); page = virt_to_page(phys_to_virt(pa)); @@ -1438,37 +1518,59 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf, act = bpf_prog_run_xdp(prog, &xdp); +handle_xdp_verdict: switch (act) { case XDP_PASS: break; case XDP_TX: qidx += pfvf->hw.tx_queues; cq->pool_ptrs++; - return otx2_xdp_sq_append_pkt(pfvf, iova, - cqe->sg.seg_size, qidx); + xdpf = xdp_convert_buff_to_frame(&xdp); + return otx2_xdp_sq_append_pkt(pfvf, xdpf, + cqe->sg.seg_addr, + cqe->sg.seg_size, + qidx, OTX2_XDP_TX); case XDP_REDIRECT: cq->pool_ptrs++; - err = xdp_do_redirect(pfvf->netdev, &xdp, prog); + if (xsk_buff) { + err = xdp_do_redirect(pfvf->netdev, xsk_buff, prog); + if (!err) { + *need_xdp_flush = true; + return true; + } + return false; + } - otx2_dma_unmap_page(pfvf, iova, pfvf->rbsize, - DMA_FROM_DEVICE); + err = xdp_do_redirect(pfvf->netdev, &xdp, prog); if (!err) { *need_xdp_flush = true; return true; } - put_page(page); + + otx2_dma_unmap_page(pfvf, iova, pfvf->rbsize, + DMA_FROM_DEVICE); + xdpf = xdp_convert_buff_to_frame(&xdp); + xdp_return_frame(xdpf); break; default: bpf_warn_invalid_xdp_action(pfvf->netdev, prog, act); break; case XDP_ABORTED: + if (xsk_buff) + xsk_buff_free(xsk_buff); trace_xdp_exception(pfvf->netdev, prog, act); break; case XDP_DROP: - otx2_dma_unmap_page(pfvf, iova, pfvf->rbsize, - DMA_FROM_DEVICE); - put_page(page); cq->pool_ptrs++; + if (xsk_buff) { + xsk_buff_free(xsk_buff); + } else if (page->pp) { + page_pool_recycle_direct(pool->page_pool, page); + } else { + otx2_dma_unmap_page(pfvf, iova, pfvf->rbsize, + DMA_FROM_DEVICE); + put_page(page); + } return true; } return false; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h index d23810963fdb..acf259d72008 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h @@ -12,6 +12,7 @@ #include <linux/iommu.h> #include <linux/if_vlan.h> #include <net/xdp.h> +#include <net/xdp_sock_drv.h> #define LBK_CHAN_BASE 0x000 #define SDP_CHAN_BASE 0x700 @@ -76,6 +77,7 @@ struct otx2_rcv_queue { struct sg_list { u16 num_segs; + u16 flags; u64 skb; u64 size[OTX2_MAX_FRAGS_IN_SQE]; u64 dma_addr[OTX2_MAX_FRAGS_IN_SQE]; @@ -104,6 +106,8 @@ struct otx2_snd_queue { /* SQE ring and CPT response queue for Inline IPSEC */ struct qmem *sqe_ring; struct qmem *cpt_resp; + /* Buffer pool for af_xdp zero-copy */ + struct xsk_buff_pool *xsk_pool; } ____cacheline_aligned_in_smp; enum cq_type { @@ -127,7 +131,11 @@ struct otx2_pool { struct qmem *stack; struct qmem *fc_addr; struct page_pool *page_pool; + struct xsk_buff_pool *xsk_pool; + struct xdp_buff **xdp; + u16 xdp_cnt; u16 rbsize; + u16 xdp_top; }; struct otx2_cq_queue { @@ -144,6 +152,7 @@ struct otx2_cq_queue { void *cqe_base; struct qmem *cqe; struct otx2_pool *rbpool; + bool xsk_zc_en; struct xdp_rxq_info xdp_rxq; } ____cacheline_aligned_in_smp; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index e926c6ce96cf..63ddd262d122 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -722,15 +722,25 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (err) goto err_shutdown_tc; + vf->af_xdp_zc_qidx = bitmap_zalloc(qcount, GFP_KERNEL); + if (!vf->af_xdp_zc_qidx) { + err = -ENOMEM; + goto err_unreg_devlink; + } + #ifdef CONFIG_DCB err = otx2_dcbnl_set_ops(netdev); if (err) - goto err_shutdown_tc; + goto err_free_zc_bmap; #endif otx2_qos_init(vf, qos_txqs); return 0; +err_free_zc_bmap: + bitmap_free(vf->af_xdp_zc_qidx); +err_unreg_devlink: + otx2_unregister_dl(vf); err_shutdown_tc: otx2_shutdown_tc(vf); err_unreg_netdev: diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c new file mode 100644 index 000000000000..ce10caea8511 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell RVU Ethernet driver + * + * Copyright (C) 2024 Marvell. + * + */ + +#include <linux/bpf_trace.h> +#include <linux/stringify.h> +#include <net/xdp_sock_drv.h> +#include <net/xdp.h> + +#include "otx2_common.h" +#include "otx2_xsk.h" + +int otx2_xsk_pool_alloc_buf(struct otx2_nic *pfvf, struct otx2_pool *pool, + dma_addr_t *dma, int idx) +{ + struct xdp_buff *xdp; + int delta; + + xdp = xsk_buff_alloc(pool->xsk_pool); + if (!xdp) + return -ENOMEM; + + pool->xdp[pool->xdp_top++] = xdp; + *dma = OTX2_DATA_ALIGN(xsk_buff_xdp_get_dma(xdp)); + /* Adjust xdp->data for unaligned addresses */ + delta = *dma - xsk_buff_xdp_get_dma(xdp); + xdp->data += delta; + + return 0; +} + +static int otx2_xsk_ctx_disable(struct otx2_nic *pfvf, u16 qidx, int aura_id) +{ + struct nix_cn10k_aq_enq_req *cn10k_rq_aq; + struct npa_aq_enq_req *aura_aq; + struct npa_aq_enq_req *pool_aq; + struct nix_aq_enq_req *rq_aq; + + if (test_bit(CN10K_LMTST, &pfvf->hw.cap_flag)) { + cn10k_rq_aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox); + if (!cn10k_rq_aq) + return -ENOMEM; + cn10k_rq_aq->qidx = qidx; + cn10k_rq_aq->rq.ena = 0; + cn10k_rq_aq->rq_mask.ena = 1; + cn10k_rq_aq->ctype = NIX_AQ_CTYPE_RQ; + cn10k_rq_aq->op = NIX_AQ_INSTOP_WRITE; + } else { + rq_aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox); + if (!rq_aq) + return -ENOMEM; + rq_aq->qidx = qidx; + rq_aq->sq.ena = 0; + rq_aq->sq_mask.ena = 1; + rq_aq->ctype = NIX_AQ_CTYPE_RQ; + rq_aq->op = NIX_AQ_INSTOP_WRITE; + } + + aura_aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox); + if (!aura_aq) + goto fail; + + aura_aq->aura_id = aura_id; + aura_aq->aura.ena = 0; + aura_aq->aura_mask.ena = 1; + aura_aq->ctype = NPA_AQ_CTYPE_AURA; + aura_aq->op = NPA_AQ_INSTOP_WRITE; + + pool_aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox); + if (!pool_aq) + goto fail; + + pool_aq->aura_id = aura_id; + pool_aq->pool.ena = 0; + pool_aq->pool_mask.ena = 1; + + pool_aq->ctype = NPA_AQ_CTYPE_POOL; + pool_aq->op = NPA_AQ_INSTOP_WRITE; + + return otx2_sync_mbox_msg(&pfvf->mbox); + +fail: + otx2_mbox_reset(&pfvf->mbox.mbox, 0); + return -ENOMEM; +} + +static void otx2_clean_up_rq(struct otx2_nic *pfvf, int qidx) +{ + struct otx2_qset *qset = &pfvf->qset; + struct otx2_cq_queue *cq; + struct otx2_pool *pool; + u64 iova; + + /* If the DOWN flag is set SQs are already freed */ + if (pfvf->flags & OTX2_FLAG_INTF_DOWN) + return; + + cq = &qset->cq[qidx]; + if (cq) + otx2_cleanup_rx_cqes(pfvf, cq, qidx); + + pool = &pfvf->qset.pool[qidx]; + iova = otx2_aura_allocptr(pfvf, qidx); + while (iova) { + iova -= OTX2_HEAD_ROOM; + otx2_free_bufs(pfvf, pool, iova, pfvf->rbsize); + iova = otx2_aura_allocptr(pfvf, qidx); + } + + mutex_lock(&pfvf->mbox.lock); + otx2_xsk_ctx_disable(pfvf, qidx, qidx); + mutex_unlock(&pfvf->mbox.lock); +} + +int otx2_xsk_pool_enable(struct otx2_nic *pf, struct xsk_buff_pool *pool, u16 qidx) +{ + u16 rx_queues = pf->hw.rx_queues; + u16 tx_queues = pf->hw.tx_queues; + int err; + + if (qidx >= rx_queues || qidx >= tx_queues) + return -EINVAL; + + err = xsk_pool_dma_map(pool, pf->dev, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); + if (err) + return err; + + set_bit(qidx, pf->af_xdp_zc_qidx); + otx2_clean_up_rq(pf, qidx); + /* Reconfigure RSS table as 'qidx' cannot be part of RSS now */ + otx2_set_rss_table(pf, DEFAULT_RSS_CONTEXT_GROUP); + /* Kick start the NAPI context so that receiving will start */ + return otx2_xsk_wakeup(pf->netdev, qidx, XDP_WAKEUP_RX); +} + +int otx2_xsk_pool_disable(struct otx2_nic *pf, u16 qidx) +{ + struct net_device *netdev = pf->netdev; + struct xsk_buff_pool *pool; + struct otx2_snd_queue *sq; + + pool = xsk_get_pool_from_qid(netdev, qidx); + if (!pool) + return -EINVAL; + + sq = &pf->qset.sq[qidx + pf->hw.tx_queues]; + sq->xsk_pool = NULL; + otx2_clean_up_rq(pf, qidx); + clear_bit(qidx, pf->af_xdp_zc_qidx); + xsk_pool_dma_unmap(pool, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); + /* Reconfigure RSS table as 'qidx' now need to be part of RSS now */ + otx2_set_rss_table(pf, DEFAULT_RSS_CONTEXT_GROUP); + + return 0; +} + +int otx2_xsk_pool_setup(struct otx2_nic *pf, struct xsk_buff_pool *pool, u16 qidx) +{ + if (pool) + return otx2_xsk_pool_enable(pf, pool, qidx); + + return otx2_xsk_pool_disable(pf, qidx); +} + +int otx2_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) +{ + struct otx2_nic *pf = netdev_priv(dev); + struct otx2_cq_poll *cq_poll = NULL; + struct otx2_qset *qset = &pf->qset; + + if (pf->flags & OTX2_FLAG_INTF_DOWN) + return -ENETDOWN; + + if (queue_id >= pf->hw.rx_queues || queue_id >= pf->hw.tx_queues) + return -EINVAL; + + cq_poll = &qset->napi[queue_id]; + if (!cq_poll) + return -EINVAL; + + /* Trigger interrupt */ + if (!napi_if_scheduled_mark_missed(&cq_poll->napi)) { + otx2_write64(pf, NIX_LF_CINTX_ENA_W1S(cq_poll->cint_idx), BIT_ULL(0)); + otx2_write64(pf, NIX_LF_CINTX_INT_W1S(cq_poll->cint_idx), BIT_ULL(0)); + } + + return 0; +} + +void otx2_attach_xsk_buff(struct otx2_nic *pfvf, struct otx2_snd_queue *sq, int qidx) +{ + if (test_bit(qidx, pfvf->af_xdp_zc_qidx)) + sq->xsk_pool = xsk_get_pool_from_qid(pfvf->netdev, qidx); +} + +void otx2_zc_napi_handler(struct otx2_nic *pfvf, struct xsk_buff_pool *pool, + int queue, int budget) +{ + struct xdp_desc *xdp_desc = pool->tx_descs; + int err, i, work_done = 0, batch; + + budget = min(budget, otx2_read_free_sqe(pfvf, queue)); + batch = xsk_tx_peek_release_desc_batch(pool, budget); + if (!batch) + return; + + for (i = 0; i < batch; i++) { + dma_addr_t dma_addr; + + dma_addr = xsk_buff_raw_get_dma(pool, xdp_desc[i].addr); + err = otx2_xdp_sq_append_pkt(pfvf, NULL, dma_addr, xdp_desc[i].len, + queue, OTX2_AF_XDP_FRAME); + if (!err) { + netdev_err(pfvf->netdev, "AF_XDP: Unable to transfer packet err%d\n", err); + break; + } + work_done++; + } + + if (work_done) + xsk_tx_release(pool); +} diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.h new file mode 100644 index 000000000000..8047fafee8fe --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell RVU PF/VF Netdev Devlink + * + * Copyright (C) 2024 Marvell. + * + */ + +#ifndef OTX2_XSK_H +#define OTX2_XSK_H + +struct otx2_nic; +struct xsk_buff_pool; + +int otx2_xsk_pool_setup(struct otx2_nic *pf, struct xsk_buff_pool *pool, u16 qid); +int otx2_xsk_pool_enable(struct otx2_nic *pf, struct xsk_buff_pool *pool, u16 qid); +int otx2_xsk_pool_disable(struct otx2_nic *pf, u16 qid); +int otx2_xsk_pool_alloc_buf(struct otx2_nic *pfvf, struct otx2_pool *pool, + dma_addr_t *dma, int idx); +int otx2_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags); +void otx2_zc_napi_handler(struct otx2_nic *pfvf, struct xsk_buff_pool *pool, + int queue, int budget); +void otx2_attach_xsk_buff(struct otx2_nic *pfvf, struct otx2_snd_queue *sq, int qidx); + +#endif /* OTX2_XSK_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c index 9d887bfc3108..c5dbae0e513b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c @@ -82,7 +82,7 @@ static int otx2_qos_sq_aura_pool_init(struct otx2_nic *pfvf, int qidx) } for (ptr = 0; ptr < num_sqbs; ptr++) { - err = otx2_alloc_rbuf(pfvf, pool, &bufptr); + err = otx2_alloc_rbuf(pfvf, pool, &bufptr, pool_id, ptr); if (err) goto sqb_free; pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 440a4c42b405..8cdecf61253c 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -396,7 +396,6 @@ static int prestera_port_sfp_bind(struct prestera_port *port) continue; port->phylink_pcs.ops = &prestera_pcs_ops; - port->phylink_pcs.neg_mode = true; port->phy_config.dev = &port->dev->dev; port->phy_config.type = PHYLINK_NETDEV; diff --git a/drivers/net/ethernet/mediatek/airoha_eth.c b/drivers/net/ethernet/mediatek/airoha_eth.c index 09f448f29124..aa5f220ddbcf 100644 --- a/drivers/net/ethernet/mediatek/airoha_eth.c +++ b/drivers/net/ethernet/mediatek/airoha_eth.c @@ -2556,11 +2556,10 @@ static u16 airoha_dev_select_queue(struct net_device *dev, struct sk_buff *skb, static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, struct net_device *dev) { - struct skb_shared_info *sinfo = skb_shinfo(skb); struct airoha_gdm_port *port = netdev_priv(dev); + u32 nr_frags = 1 + skb_shinfo(skb)->nr_frags; u32 msg0, msg1, len = skb_headlen(skb); struct airoha_qdma *qdma = port->qdma; - u32 nr_frags = 1 + sinfo->nr_frags; struct netdev_queue *txq; struct airoha_queue *q; void *data = skb->data; @@ -2583,8 +2582,9 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, if (skb_cow_head(skb, 0)) goto error; - if (sinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { - __be16 csum = cpu_to_be16(sinfo->gso_size); + if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | + SKB_GSO_TCPV6)) { + __be16 csum = cpu_to_be16(skb_shinfo(skb)->gso_size); tcp_hdr(skb)->check = (__force __sum16)csum; msg0 |= FIELD_PREP(QDMA_ETH_TXMSG_TSO_MASK, 1); @@ -2613,7 +2613,7 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, for (i = 0; i < nr_frags; i++) { struct airoha_qdma_desc *desc = &q->desc[index]; struct airoha_queue_entry *e = &q->entry[index]; - skb_frag_t *frag = &sinfo->frags[i]; + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; dma_addr_t addr; u32 val; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 53485142938c..922330b3f4d7 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -815,12 +815,60 @@ static void mtk_mac_link_up(struct phylink_config *config, mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id)); } +static void mtk_mac_disable_tx_lpi(struct phylink_config *config) +{ + struct mtk_mac *mac = container_of(config, struct mtk_mac, + phylink_config); + struct mtk_eth *eth = mac->hw; + + mtk_m32(eth, MAC_MCR_EEE100M | MAC_MCR_EEE1G, 0, MTK_MAC_MCR(mac->id)); +} + +static int mtk_mac_enable_tx_lpi(struct phylink_config *config, u32 timer, + bool tx_clk_stop) +{ + struct mtk_mac *mac = container_of(config, struct mtk_mac, + phylink_config); + struct mtk_eth *eth = mac->hw; + u32 val; + + /* Tx idle timer in ms */ + timer = DIV_ROUND_UP(timer, 1000); + + /* If the timer is zero, then set LPI_MODE, which allows the + * system to enter LPI mode immediately rather than waiting for + * the LPI threshold. + */ + if (!timer) + val = MAC_EEE_LPI_MODE; + else if (FIELD_FIT(MAC_EEE_LPI_TXIDLE_THD, timer)) + val = FIELD_PREP(MAC_EEE_LPI_TXIDLE_THD, timer); + else + val = MAC_EEE_LPI_TXIDLE_THD; + + if (tx_clk_stop) + val |= MAC_EEE_CKG_TXIDLE; + + /* PHY Wake-up time, this field does not have a reset value, so use the + * reset value from MT7531 (36us for 100M and 17us for 1000M). + */ + val |= FIELD_PREP(MAC_EEE_WAKEUP_TIME_1000, 17) | + FIELD_PREP(MAC_EEE_WAKEUP_TIME_100, 36); + + mtk_w32(eth, val, MTK_MAC_EEECR(mac->id)); + mtk_m32(eth, 0, MAC_MCR_EEE100M | MAC_MCR_EEE1G, MTK_MAC_MCR(mac->id)); + + return 0; +} + static const struct phylink_mac_ops mtk_phylink_ops = { .mac_select_pcs = mtk_mac_select_pcs, .mac_config = mtk_mac_config, .mac_finish = mtk_mac_finish, .mac_link_down = mtk_mac_link_down, .mac_link_up = mtk_mac_link_up, + .mac_disable_tx_lpi = mtk_mac_disable_tx_lpi, + .mac_enable_tx_lpi = mtk_mac_enable_tx_lpi, }; static int mtk_mdio_init(struct mtk_eth *eth) @@ -830,17 +878,12 @@ static int mtk_mdio_init(struct mtk_eth *eth) int ret; u32 val; - mii_np = of_get_child_by_name(eth->dev->of_node, "mdio-bus"); + mii_np = of_get_available_child_by_name(eth->dev->of_node, "mdio-bus"); if (!mii_np) { dev_err(eth->dev, "no %s child node found", "mdio-bus"); return -ENODEV; } - if (!of_device_is_available(mii_np)) { - ret = -ENODEV; - goto err_put_node; - } - eth->mii_bus = devm_mdiobus_alloc(eth->dev); if (!eth->mii_bus) { ret = -ENOMEM; @@ -4474,6 +4517,20 @@ static int mtk_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam return phylink_ethtool_set_pauseparam(mac->phylink, pause); } +static int mtk_get_eee(struct net_device *dev, struct ethtool_keee *eee) +{ + struct mtk_mac *mac = netdev_priv(dev); + + return phylink_ethtool_get_eee(mac->phylink, eee); +} + +static int mtk_set_eee(struct net_device *dev, struct ethtool_keee *eee) +{ + struct mtk_mac *mac = netdev_priv(dev); + + return phylink_ethtool_set_eee(mac->phylink, eee); +} + static u16 mtk_select_queue(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev) { @@ -4506,6 +4563,8 @@ static const struct ethtool_ops mtk_ethtool_ops = { .set_pauseparam = mtk_set_pauseparam, .get_rxnfc = mtk_get_rxnfc, .set_rxnfc = mtk_set_rxnfc, + .get_eee = mtk_get_eee, + .set_eee = mtk_set_eee, }; static const struct net_device_ops mtk_netdev_ops = { @@ -4615,6 +4674,9 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) mac->phylink_config.type = PHYLINK_NETDEV; mac->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | MAC_10 | MAC_100 | MAC_1000 | MAC_2500FD; + mac->phylink_config.lpi_capabilities = MAC_100FD | MAC_1000FD | + MAC_2500FD; + mac->phylink_config.lpi_timer_default = 1000; /* MT7623 gmac0 is now missing its speed-specific PLL configuration * in its .mac_config method (since state->speed is not valid there. diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 0d5225f1d3ee..90a377ab4359 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -453,6 +453,8 @@ #define MAC_MCR_RX_FIFO_CLR_DIS BIT(12) #define MAC_MCR_BACKOFF_EN BIT(9) #define MAC_MCR_BACKPR_EN BIT(8) +#define MAC_MCR_EEE1G BIT(7) +#define MAC_MCR_EEE100M BIT(6) #define MAC_MCR_FORCE_RX_FC BIT(5) #define MAC_MCR_FORCE_TX_FC BIT(4) #define MAC_MCR_SPEED_1000 BIT(3) @@ -461,6 +463,15 @@ #define MAC_MCR_FORCE_LINK BIT(0) #define MAC_MCR_FORCE_LINK_DOWN (MAC_MCR_FORCE_MODE) +/* Mac EEE control registers */ +#define MTK_MAC_EEECR(x) (0x10104 + (x * 0x100)) +#define MAC_EEE_WAKEUP_TIME_1000 GENMASK(31, 24) +#define MAC_EEE_WAKEUP_TIME_100 GENMASK(23, 16) +#define MAC_EEE_LPI_TXIDLE_THD GENMASK(15, 8) +#define MAC_EEE_CKG_TXIDLE BIT(3) +#define MAC_EEE_CKG_RXLPI BIT(2) +#define MAC_EEE_LPI_MODE BIT(0) + /* Mac status registers */ #define MTK_MAC_MSR(x) (0x10108 + (x * 0x100)) #define MAC_MSR_EEE1G BIT(7) diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index 25989c79c92e..76f202d7f055 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1427,15 +1427,10 @@ static int mtk_star_mdio_init(struct net_device *ndev) of_node = dev->of_node; - mdio_node = of_get_child_by_name(of_node, "mdio"); + mdio_node = of_get_available_child_by_name(of_node, "mdio"); if (!mdio_node) return -ENODEV; - if (!of_device_is_available(mdio_node)) { - ret = -ENODEV; - goto out_put_node; - } - priv->mii = devm_mdiobus_alloc(dev); if (!priv->mii) { ret = -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index b330020dc0d6..07b061a97a6e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -526,28 +526,6 @@ out: return res; } -u32 mlx4_zone_free_entries(struct mlx4_zone_allocator *zones, u32 uid, u32 obj, u32 count) -{ - struct mlx4_zone_entry *zone; - int res = 0; - - spin_lock(&zones->lock); - - zone = __mlx4_find_zone_by_uid(zones, uid); - - if (NULL == zone) { - res = -1; - goto out; - } - - __mlx4_free_from_zone(zone, obj, count); - -out: - spin_unlock(&zones->lock); - - return res; -} - u32 mlx4_zone_free_entries_unique(struct mlx4_zone_allocator *zones, u32 obj, u32 count) { struct mlx4_zone_entry *zone; @@ -682,9 +660,9 @@ static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device) } static int mlx4_alloc_db_from_pgdir(struct mlx4_db_pgdir *pgdir, - struct mlx4_db *db, int order) + struct mlx4_db *db, unsigned int order) { - int o; + unsigned int o; int i; for (o = order; o <= 1; ++o) { @@ -712,7 +690,7 @@ found: return 0; } -int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order) +int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, unsigned int order) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_db_pgdir *pgdir; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 15c57e9517e9..b33285d755b9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -48,60 +48,43 @@ #if IS_ENABLED(CONFIG_IPV6) #include <net/ip6_checksum.h> #endif +#include <net/page_pool/helpers.h> #include "mlx4_en.h" -static int mlx4_alloc_page(struct mlx4_en_priv *priv, - struct mlx4_en_rx_alloc *frag, - gfp_t gfp) -{ - struct page *page; - dma_addr_t dma; - - page = alloc_page(gfp); - if (unlikely(!page)) - return -ENOMEM; - dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE, priv->dma_dir); - if (unlikely(dma_mapping_error(priv->ddev, dma))) { - __free_page(page); - return -ENOMEM; - } - frag->page = page; - frag->dma = dma; - frag->page_offset = priv->rx_headroom; - return 0; -} - static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_desc *rx_desc, struct mlx4_en_rx_alloc *frags, gfp_t gfp) { + dma_addr_t dma; int i; for (i = 0; i < priv->num_frags; i++, frags++) { if (!frags->page) { - if (mlx4_alloc_page(priv, frags, gfp)) { + frags->page = page_pool_alloc_pages(ring->pp, gfp); + if (!frags->page) { ring->alloc_fail++; return -ENOMEM; } + page_pool_fragment_page(frags->page, 1); + frags->page_offset = priv->rx_headroom; + ring->rx_alloc_pages++; } - rx_desc->data[i].addr = cpu_to_be64(frags->dma + - frags->page_offset); + dma = page_pool_get_dma_addr(frags->page); + rx_desc->data[i].addr = cpu_to_be64(dma + frags->page_offset); } return 0; } static void mlx4_en_free_frag(const struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_alloc *frag) { - if (frag->page) { - dma_unmap_page(priv->ddev, frag->dma, - PAGE_SIZE, priv->dma_dir); - __free_page(frag->page); - } + if (frag->page) + page_pool_put_full_page(ring->pp, frag->page, false); /* We need to clear all fields, otherwise a change of priv->log_rx_info * could lead to see garbage later in frag->page. */ @@ -141,18 +124,6 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, (index << ring->log_stride); struct mlx4_en_rx_alloc *frags = ring->rx_info + (index << priv->log_rx_info); - if (likely(ring->page_cache.index > 0)) { - /* XDP uses a single page per frame */ - if (!frags->page) { - ring->page_cache.index--; - frags->page = ring->page_cache.buf[ring->page_cache.index].page; - frags->dma = ring->page_cache.buf[ring->page_cache.index].dma; - } - frags->page_offset = XDP_PACKET_HEADROOM; - rx_desc->data[0].addr = cpu_to_be64(frags->dma + - XDP_PACKET_HEADROOM); - return 0; - } return mlx4_en_alloc_frags(priv, ring, rx_desc, frags, gfp); } @@ -178,7 +149,7 @@ static void mlx4_en_free_rx_desc(const struct mlx4_en_priv *priv, frags = ring->rx_info + (index << priv->log_rx_info); for (nr = 0; nr < priv->num_frags; nr++) { en_dbg(DRV, priv, "Freeing fragment:%d\n", nr); - mlx4_en_free_frag(priv, frags + nr); + mlx4_en_free_frag(priv, ring, frags + nr); } } @@ -268,6 +239,7 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, u32 size, u16 stride, int node, int queue_index) { struct mlx4_en_dev *mdev = priv->mdev; + struct page_pool_params pp = {}; struct mlx4_en_rx_ring *ring; int err = -ENOMEM; int tmp; @@ -286,9 +258,26 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, ring->log_stride = ffs(ring->stride) - 1; ring->buf_size = ring->size * ring->stride + TXBB_SIZE; - if (xdp_rxq_info_reg(&ring->xdp_rxq, priv->dev, queue_index, 0) < 0) + pp.flags = PP_FLAG_DMA_MAP; + pp.pool_size = size * DIV_ROUND_UP(priv->rx_skb_size, PAGE_SIZE); + pp.nid = node; + pp.napi = &priv->rx_cq[queue_index]->napi; + pp.netdev = priv->dev; + pp.dev = &mdev->dev->persist->pdev->dev; + pp.dma_dir = priv->dma_dir; + + ring->pp = page_pool_create(&pp); + if (!ring->pp) goto err_ring; + if (xdp_rxq_info_reg(&ring->xdp_rxq, priv->dev, queue_index, 0) < 0) + goto err_pp; + + err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_PAGE_POOL, + ring->pp); + if (err) + goto err_xdp_info; + tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS * sizeof(struct mlx4_en_rx_alloc)); ring->rx_info = kvzalloc_node(tmp, GFP_KERNEL, node); @@ -319,6 +308,8 @@ err_info: ring->rx_info = NULL; err_xdp_info: xdp_rxq_info_unreg(&ring->xdp_rxq); +err_pp: + page_pool_destroy(ring->pp); err_ring: kfree(ring); *pring = NULL; @@ -409,26 +400,6 @@ void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv) } } -/* When the rx ring is running in page-per-packet mode, a released frame can go - * directly into a small cache, to avoid unmapping or touching the page - * allocator. In bpf prog performance scenarios, buffers are either forwarded - * or dropped, never converted to skbs, so every page can come directly from - * this cache when it is sized to be a multiple of the napi budget. - */ -bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, - struct mlx4_en_rx_alloc *frame) -{ - struct mlx4_en_page_cache *cache = &ring->page_cache; - - if (cache->index >= MLX4_EN_CACHE_SIZE) - return false; - - cache->buf[cache->index].page = frame->page; - cache->buf[cache->index].dma = frame->dma; - cache->index++; - return true; -} - void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, u16 stride) @@ -445,6 +416,7 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, xdp_rxq_info_unreg(&ring->xdp_rxq); mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE); kvfree(ring->rx_info); + page_pool_destroy(ring->pp); ring->rx_info = NULL; kfree(ring); *pring = NULL; @@ -453,14 +425,6 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring) { - int i; - - for (i = 0; i < ring->page_cache.index; i++) { - dma_unmap_page(priv->ddev, ring->page_cache.buf[i].dma, - PAGE_SIZE, priv->dma_dir); - put_page(ring->page_cache.buf[i].page); - } - ring->page_cache.index = 0; mlx4_en_free_rx_buf(priv, ring); if (ring->stride <= TXBB_SIZE) ring->buf -= TXBB_SIZE; @@ -487,7 +451,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, if (unlikely(!page)) goto fail; - dma = frags->dma; + dma = page_pool_get_dma_addr(page); dma_sync_single_range_for_cpu(priv->ddev, dma, frags->page_offset, frag_size, priv->dma_dir); @@ -498,6 +462,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, if (frag_info->frag_stride == PAGE_SIZE / 2) { frags->page_offset ^= PAGE_SIZE / 2; release = page_count(page) != 1 || + atomic_long_read(&page->pp_ref_count) != 1 || page_is_pfmemalloc(page) || page_to_nid(page) != numa_mem_id(); } else if (!priv->rx_headroom) { @@ -511,10 +476,9 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, release = frags->page_offset + frag_info->frag_size > PAGE_SIZE; } if (release) { - dma_unmap_page(priv->ddev, dma, PAGE_SIZE, priv->dma_dir); frags->page = NULL; } else { - page_ref_inc(page); + page_pool_ref_page(page); } nr++; @@ -784,7 +748,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud /* Get pointer to first fragment since we haven't * skb yet and cast it to ethhdr struct */ - dma = frags[0].dma + frags[0].page_offset; + dma = page_pool_get_dma_addr(frags[0].page); + dma += frags[0].page_offset; dma_sync_single_for_cpu(priv->ddev, dma, sizeof(*ethh), DMA_FROM_DEVICE); @@ -823,7 +788,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud void *orig_data; u32 act; - dma = frags[0].dma + frags[0].page_offset; + dma = page_pool_get_dma_addr(frags[0].page); + dma += frags[0].page_offset; dma_sync_single_for_cpu(priv->ddev, dma, priv->frag_info[0].frag_size, DMA_FROM_DEVICE); @@ -886,6 +852,7 @@ xdp_drop_no_cnt: skb = napi_get_frags(&cq->napi); if (unlikely(!skb)) goto next; + skb_mark_for_recycle(skb); if (unlikely(ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL)) { u64 timestamp = mlx4_en_get_cqe_ts(cqe); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 1ddb11cb25f9..87f35bcbeff8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -44,6 +44,7 @@ #include <linux/ipv6.h> #include <linux/indirect_call_wrapper.h> #include <net/ipv6.h> +#include <net/page_pool/helpers.h> #include "mlx4_en.h" @@ -350,16 +351,10 @@ u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, int napi_mode) { struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; - struct mlx4_en_rx_alloc frame = { - .page = tx_info->page, - .dma = tx_info->map0_dma, - }; - - if (!napi_mode || !mlx4_en_rx_recycle(ring->recycle_ring, &frame)) { - dma_unmap_page(priv->ddev, tx_info->map0_dma, - PAGE_SIZE, priv->dma_dir); - put_page(tx_info->page); - } + struct page_pool *pool = ring->recycle_ring->pp; + + /* Note that napi_mode = 0 means ndo_close() path, not budget = 0 */ + page_pool_put_full_page(pool, tx_info->page, !!napi_mode); return tx_info->nr_txbb; } @@ -450,6 +445,8 @@ int mlx4_en_process_tx_cq(struct net_device *dev, if (unlikely(!priv->port_up)) return 0; + if (unlikely(!napi_budget) && cq->type == TX_XDP) + return 0; netdev_txq_bql_complete_prefetchw(ring->tx_queue); @@ -1194,7 +1191,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, tx_desc = ring->buf + (index << LOG_TXBB_SIZE); data = &tx_desc->data; - dma = frame->dma; + dma = page_pool_get_dma_addr(frame->page); tx_info->page = frame->page; frame->page = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index d7d856d1758a..b213094ea30f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1478,12 +1478,6 @@ void mlx4_zone_allocator_destroy(struct mlx4_zone_allocator *zone_alloc); u32 mlx4_zone_alloc_entries(struct mlx4_zone_allocator *zones, u32 uid, int count, int align, u32 skip_mask, u32 *puid); -/* Free <count> objects, start from <obj> of the uid <uid> from zone_allocator - * <zones>. - */ -u32 mlx4_zone_free_entries(struct mlx4_zone_allocator *zones, - u32 uid, u32 obj, u32 count); - /* If <zones> was allocated with MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP, instead of * specifying the uid when freeing an object, zone allocator could figure it by * itself. Other parameters are similar to mlx4_zone_free. diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 28b70dcc652e..ad0d91a75184 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -247,20 +247,11 @@ struct mlx4_en_tx_desc { struct mlx4_en_rx_alloc { struct page *page; - dma_addr_t dma; u32 page_offset; }; #define MLX4_EN_CACHE_SIZE (2 * NAPI_POLL_WEIGHT) -struct mlx4_en_page_cache { - u32 index; - struct { - struct page *page; - dma_addr_t dma; - } buf[MLX4_EN_CACHE_SIZE]; -}; - enum { MLX4_EN_TX_RING_STATE_RECOVERING, }; @@ -335,14 +326,14 @@ struct mlx4_en_rx_ring { u16 stride; u16 log_stride; u16 cqn; /* index of port CQ associated with this ring */ + u8 fcs_del; u32 prod; u32 cons; u32 buf_size; - u8 fcs_del; + struct page_pool *pp; void *buf; void *rx_info; struct bpf_prog __rcu *xdp_prog; - struct mlx4_en_page_cache page_cache; unsigned long bytes; unsigned long packets; unsigned long csum_ok; @@ -707,8 +698,6 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, struct mlx4_en_priv *priv, unsigned int length, int tx_ind, bool *doorbell_pending); void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring); -bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, - struct mlx4_en_rx_alloc *frame); int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring, diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 4e43f4a7d246..e3d0b13c1610 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -147,26 +147,6 @@ static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port, return err; } -int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx) -{ - struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; - struct mlx4_mac_table *table = &info->mac_table; - int i; - - for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { - if (!table->refs[i]) - continue; - - if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) { - *idx = i; - return 0; - } - } - - return -ENOENT; -} -EXPORT_SYMBOL_GPL(mlx4_find_cached_mac); - static bool mlx4_need_mf_bond(struct mlx4_dev *dev) { int i, num_eth_ports = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c index c7216e84ef8c..86253a89c24c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c @@ -13,6 +13,50 @@ struct mlx5_vnic_diag_stats { __be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)]; }; +static void mlx5_reporter_vnic_diagnose_counter_icm(struct mlx5_core_dev *dev, + struct devlink_fmsg *fmsg, + u16 vport_num, bool other_vport) +{ + u32 out_icm_reg[MLX5_ST_SZ_DW(vhca_icm_ctrl_reg)] = {}; + u32 in_icm_reg[MLX5_ST_SZ_DW(vhca_icm_ctrl_reg)] = {}; + u32 out_reg[MLX5_ST_SZ_DW(nic_cap_reg)] = {}; + u32 in_reg[MLX5_ST_SZ_DW(nic_cap_reg)] = {}; + u32 cur_alloc_icm; + int vhca_icm_ctrl; + u16 vhca_id; + int err; + + err = mlx5_core_access_reg(dev, in_reg, sizeof(in_reg), out_reg, + sizeof(out_reg), MLX5_REG_NIC_CAP, 0, 0); + if (err) { + mlx5_core_warn(dev, "Reading nic_cap_reg failed. err = %d\n", err); + return; + } + vhca_icm_ctrl = MLX5_GET(nic_cap_reg, out_reg, vhca_icm_ctrl); + if (!vhca_icm_ctrl) + return; + + MLX5_SET(vhca_icm_ctrl_reg, in_icm_reg, vhca_id_valid, other_vport); + if (other_vport) { + err = mlx5_vport_get_vhca_id(dev, vport_num, &vhca_id); + if (err) { + mlx5_core_warn(dev, "vport to vhca_id failed. vport_num = %d, err = %d\n", + vport_num, err); + return; + } + MLX5_SET(vhca_icm_ctrl_reg, in_icm_reg, vhca_id, vhca_id); + } + err = mlx5_core_access_reg(dev, in_icm_reg, sizeof(in_icm_reg), + out_icm_reg, sizeof(out_icm_reg), + MLX5_REG_VHCA_ICM_CTRL, 0, 0); + if (err) { + mlx5_core_warn(dev, "Reading vhca_icm_ctrl failed. err = %d\n", err); + return; + } + cur_alloc_icm = MLX5_GET(vhca_icm_ctrl_reg, out_icm_reg, cur_alloc_icm); + devlink_fmsg_u32_pair_put(fmsg, "icm_consumption", cur_alloc_icm); +} + void mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev, struct devlink_fmsg *fmsg, u16 vport_num, bool other_vport) @@ -59,6 +103,8 @@ void mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev, devlink_fmsg_u64_pair_put(fmsg, "handled_pkt_steering_fail", VNIC_ENV_GET64(&vnic, handled_pkt_steering_fail)); } + if (MLX5_CAP_GEN(dev, nic_cap_reg)) + mlx5_reporter_vnic_diagnose_counter_icm(dev, fmsg, vport_num, other_vport); devlink_fmsg_obj_nest_end(fmsg); devlink_fmsg_pair_nest_end(fmsg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 979fc56205e1..769e683f2488 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -95,8 +95,6 @@ struct page_pool; #define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) \ MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, order_base_2(MLX5E_RX_MAX_HEAD)) -#define MLX5_MPWRQ_MAX_LOG_WQE_SZ 18 - /* Keep in sync with mlx5e_mpwrq_log_wqe_sz. * These are theoretical maximums, which can be further restricted by * capabilities. These values are used for static resource allocations and @@ -386,7 +384,6 @@ enum { MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, MLX5E_SQ_STATE_PENDING_XSK_TX, MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, - MLX5E_SQ_STATE_XDP_MULTIBUF, MLX5E_NUM_SQ_STATES, /* Must be kept last */ }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 64b62ed17b07..aa36670d9a36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -10,6 +10,9 @@ #include <net/page_pool/types.h> #include <net/xdp_sock_drv.h> +#define MLX5_MPWRQ_MAX_LOG_WQE_SZ 18 +#define MLX5_REP_MPWRQ_MAX_LOG_WQE_SZ 17 + static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev) { u8 min_page_shift = MLX5_CAP_GEN_2(mdev, log_min_mkey_entity_size); @@ -103,18 +106,22 @@ u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, enum mlx5e_mpwrq_umr_mode umr_mode) { u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode); - u8 max_pages_per_wqe, max_log_mpwqe_size; + u8 max_pages_per_wqe, max_log_wqe_size_calc; + u8 max_log_wqe_size_cap; u16 max_wqe_size; /* Keep in sync with MLX5_MPWRQ_MAX_PAGES_PER_WQE. */ max_wqe_size = mlx5e_get_max_sq_aligned_wqebbs(mdev) * MLX5_SEND_WQE_BB; max_pages_per_wqe = ALIGN_DOWN(max_wqe_size - sizeof(struct mlx5e_umr_wqe), MLX5_UMR_FLEX_ALIGNMENT) / umr_entry_size; - max_log_mpwqe_size = ilog2(max_pages_per_wqe) + page_shift; + max_log_wqe_size_calc = ilog2(max_pages_per_wqe) + page_shift; + + WARN_ON_ONCE(max_log_wqe_size_calc < MLX5E_ORDER2_MAX_PACKET_MTU); - WARN_ON_ONCE(max_log_mpwqe_size < MLX5E_ORDER2_MAX_PACKET_MTU); + max_log_wqe_size_cap = mlx5_core_is_ecpf(mdev) ? + MLX5_REP_MPWRQ_MAX_LOG_WQE_SZ : MLX5_MPWRQ_MAX_LOG_WQE_SZ; - return min_t(u8, max_log_mpwqe_size, MLX5_MPWRQ_MAX_LOG_WQE_SZ); + return min_t(u8, max_log_wqe_size_calc, max_log_wqe_size_cap); } u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, @@ -1240,7 +1247,6 @@ void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, mlx5e_build_sq_param_common(mdev, param); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE); - param->is_xdp_mb = !mlx5e_rx_is_linear_skb(mdev, params, xsk); mlx5e_build_tx_cq_param(mdev, params, ¶m->cqp); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 3f8986f9d862..bd5877acc5b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -33,7 +33,6 @@ struct mlx5e_sq_param { struct mlx5_wq_param wq; bool is_mpw; bool is_tls; - bool is_xdp_mb; u16 stop_room; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 5f6a0605e4ae..f62fbfb67a1b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -296,11 +296,16 @@ enum mlx5e_fec_supported_link_mode { MLX5E_FEC_SUPPORTED_LINK_MODE_200G_2X, MLX5E_FEC_SUPPORTED_LINK_MODE_400G_4X, MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X, + MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X, + MLX5E_FEC_SUPPORTED_LINK_MODE_400G_2X, + MLX5E_FEC_SUPPORTED_LINK_MODE_800G_4X, + MLX5E_FEC_SUPPORTED_LINK_MODE_1600G_8X, MLX5E_MAX_FEC_SUPPORTED_LINK_MODE, }; #define MLX5E_FEC_FIRST_50G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X #define MLX5E_FEC_FIRST_100G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_100G_1X +#define MLX5E_FEC_FIRST_200G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X #define MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, policy, write, link) \ do { \ @@ -320,8 +325,10 @@ static bool mlx5e_is_fec_supported_link_mode(struct mlx5_core_dev *dev, return link_mode < MLX5E_FEC_FIRST_50G_PER_LANE_MODE || (link_mode < MLX5E_FEC_FIRST_100G_PER_LANE_MODE && MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm)) || - (link_mode >= MLX5E_FEC_FIRST_100G_PER_LANE_MODE && - MLX5_CAP_PCAM_FEATURE(dev, fec_100G_per_lane_in_pplm)); + (link_mode < MLX5E_FEC_FIRST_200G_PER_LANE_MODE && + MLX5_CAP_PCAM_FEATURE(dev, fec_100G_per_lane_in_pplm)) || + (link_mode >= MLX5E_FEC_FIRST_200G_PER_LANE_MODE && + MLX5_CAP_PCAM_FEATURE(dev, fec_200G_per_lane_in_pplm)); } /* get/set FEC admin field for a given speed */ @@ -368,6 +375,18 @@ static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write, case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X: MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 800g_8x); break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 200g_1x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_2X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 400g_2x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_4X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 800g_4x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_1600G_8X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 1600g_8x); + break; default: return -EINVAL; } @@ -421,6 +440,18 @@ static int mlx5e_get_fec_cap_field(u32 *pplm, u16 *fec_cap, case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X: *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 800g_8x); break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 200g_1x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_2X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 400g_2x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_4X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 800g_4x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_1600G_8X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 1600g_8x); + break; default: return -EINVAL; } @@ -494,6 +525,26 @@ out: return 0; } +static u16 mlx5e_remap_fec_conf_mode(enum mlx5e_fec_supported_link_mode link_mode, + u16 conf_fec) +{ + /* RS fec in ethtool is originally mapped to MLX5E_FEC_RS_528_514. + * For link modes up to 25G per lane, the value is kept. + * For 50G or 100G per lane, it's remapped to MLX5E_FEC_RS_544_514. + * For 200G per lane, remapped to MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD. + */ + if (conf_fec != BIT(MLX5E_FEC_RS_528_514)) + return conf_fec; + + if (link_mode >= MLX5E_FEC_FIRST_200G_PER_LANE_MODE) + return BIT(MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD); + + if (link_mode >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE) + return BIT(MLX5E_FEC_RS_544_514); + + return conf_fec; +} + int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy) { bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm); @@ -530,14 +581,7 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy) if (!mlx5e_is_fec_supported_link_mode(dev, i)) break; - /* RS fec in ethtool is mapped to MLX5E_FEC_RS_528_514 - * to link modes up to 25G per lane and to - * MLX5E_FEC_RS_544_514 in the new link modes based on - * 50G or 100G per lane - */ - if (conf_fec == (1 << MLX5E_FEC_RS_528_514) && - i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE) - conf_fec = (1 << MLX5E_FEC_RS_544_514); + conf_fec = mlx5e_remap_fec_conf_mode(i, conf_fec); mlx5e_get_fec_cap_field(out, &fec_caps, i); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index d1da225f35da..fa2283dd383b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -61,6 +61,7 @@ enum { MLX5E_FEC_NOFEC, MLX5E_FEC_FIRECODE, MLX5E_FEC_RS_528_514, + MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD = 4, MLX5E_FEC_RS_544_514 = 7, MLX5E_FEC_LLRS_272_257_1 = 9, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index afd654583b6b..131ed97ca997 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -326,7 +326,7 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix, int node; sq->pdev = c->pdev; - sq->clock = &mdev->clock; + sq->clock = mdev->clock; sq->mkey_be = c->mkey_be; sq->netdev = c->netdev; sq->priv = c->priv; @@ -696,7 +696,7 @@ static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, rq->pdev = c->pdev; rq->netdev = priv->netdev; rq->priv = priv; - rq->clock = &mdev->clock; + rq->clock = mdev->clock; rq->tstamp = &priv->tstamp; rq->mdev = mdev; rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 25d751eba99b..e75759533ae0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -317,10 +317,8 @@ mlx5e_rx_reporter_diagnose_common_ptp_config(struct mlx5e_priv *priv, struct mlx } static void -mlx5e_rx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg) +mlx5e_rx_reporter_diagnose_common_config(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg) { - struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); struct mlx5e_rq *generic_rq = &priv->channels.c[0]->rq; struct mlx5e_ptp *ptp_ch = priv->channels.ptp; @@ -340,20 +338,100 @@ static void mlx5e_rx_reporter_build_diagnose_output_ptp_rq(struct mlx5e_rq *rq, devlink_fmsg_obj_nest_end(fmsg); } -static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg, - struct netlink_ext_ack *extack) +static void mlx5e_rx_reporter_diagnose_rx_res_dir_tirns(struct mlx5e_rx_res *rx_res, + struct devlink_fmsg *fmsg) { - struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); - struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + unsigned int max_nch = mlx5e_rx_res_get_max_nch(rx_res); int i; - mutex_lock(&priv->state_lock); + devlink_fmsg_arr_pair_nest_start(fmsg, "Direct TIRs"); - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) - goto unlock; + for (i = 0; i < max_nch; i++) { + devlink_fmsg_obj_nest_start(fmsg); + + devlink_fmsg_u32_pair_put(fmsg, "ix", i); + devlink_fmsg_u32_pair_put(fmsg, "tirn", mlx5e_rx_res_get_tirn_direct(rx_res, i)); + devlink_fmsg_u32_pair_put(fmsg, "rqtn", mlx5e_rx_res_get_rqtn_direct(rx_res, i)); + + devlink_fmsg_obj_nest_end(fmsg); + } + + devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static void mlx5e_rx_reporter_diagnose_rx_res_rss_tirn(struct mlx5e_rss *rss, bool inner, + struct devlink_fmsg *fmsg) +{ + bool found_valid_tir = false; + int tt; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + if (!mlx5e_rss_valid_tir(rss, tt, inner)) + continue; + + if (!found_valid_tir) { + char *tir_msg = inner ? "Inner TIRs Numbers" : "TIRs Numbers"; + + found_valid_tir = true; + devlink_fmsg_arr_pair_nest_start(fmsg, tir_msg); + } + + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_string_pair_put(fmsg, "tt", mlx5_ttc_get_name(tt)); + devlink_fmsg_u32_pair_put(fmsg, "tirn", mlx5e_rss_get_tirn(rss, tt, inner)); + devlink_fmsg_obj_nest_end(fmsg); + } + + if (found_valid_tir) + devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static void mlx5e_rx_reporter_diagnose_rx_res_rss_ix(struct mlx5e_rx_res *rx_res, u32 rss_idx, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_rss *rss = mlx5e_rx_res_rss_get(rx_res, rss_idx); + + if (!rss) + return; + + devlink_fmsg_obj_nest_start(fmsg); + + devlink_fmsg_u32_pair_put(fmsg, "Index", rss_idx); + devlink_fmsg_u32_pair_put(fmsg, "rqtn", mlx5e_rss_get_rqtn(rss)); + mlx5e_rx_reporter_diagnose_rx_res_rss_tirn(rss, false, fmsg); + if (mlx5e_rss_get_inner_ft_support(rss)) + mlx5e_rx_reporter_diagnose_rx_res_rss_tirn(rss, true, fmsg); + + devlink_fmsg_obj_nest_end(fmsg); +} + +static void mlx5e_rx_reporter_diagnose_rx_res_rss(struct mlx5e_rx_res *rx_res, + struct devlink_fmsg *fmsg) +{ + int rss_ix; + + devlink_fmsg_arr_pair_nest_start(fmsg, "RSS"); + for (rss_ix = 0; rss_ix < MLX5E_MAX_NUM_RSS; rss_ix++) + mlx5e_rx_reporter_diagnose_rx_res_rss_ix(rx_res, rss_ix, fmsg); + devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static void mlx5e_rx_reporter_diagnose_rx_res(struct mlx5e_priv *priv, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_rx_res *rx_res = priv->rx_res; + + mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX resources"); + mlx5e_rx_reporter_diagnose_rx_res_dir_tirns(rx_res, fmsg); + mlx5e_rx_reporter_diagnose_rx_res_rss(rx_res, fmsg); + mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static void mlx5e_rx_reporter_diagnose_rqs(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg) +{ + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + int i; - mlx5e_rx_reporter_diagnose_common_config(reporter, fmsg); devlink_fmsg_arr_pair_nest_start(fmsg, "RQs"); for (i = 0; i < priv->channels.num; i++) { @@ -367,7 +445,24 @@ static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, } if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) mlx5e_rx_reporter_build_diagnose_output_ptp_rq(&ptp_ch->rq, fmsg); + devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + mlx5e_rx_reporter_diagnose_common_config(priv, fmsg); + mlx5e_rx_reporter_diagnose_rqs(priv, fmsg); + mlx5e_rx_reporter_diagnose_rx_res(priv, fmsg); unlock: mutex_unlock(&priv->state_lock); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 09433b91be17..532c7fa94d17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -16,7 +16,6 @@ static const char * const sq_sw_state_type_name[] = { [MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE] = "vlan_need_l2_inline", [MLX5E_SQ_STATE_PENDING_XSK_TX] = "pending_xsk_tx", [MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC] = "pending_tls_rx_resync", - [MLX5E_SQ_STATE_XDP_MULTIBUF] = "xdp_multibuf", }; static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c index 5f742f896600..0d8ccc7b6c11 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c @@ -81,6 +81,11 @@ struct mlx5e_rss { refcount_t refcnt; }; +bool mlx5e_rss_get_inner_ft_support(struct mlx5e_rss *rss) +{ + return rss->inner_ft_support; +} + void mlx5e_rss_params_indir_modify_actual_size(struct mlx5e_rss *rss, u32 num_channels) { rss->indir.actual_table_size = mlx5e_rqt_size(rss->mdev, num_channels); @@ -449,6 +454,16 @@ u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, return mlx5e_tir_get_tirn(tir); } +u32 mlx5e_rss_get_rqtn(struct mlx5e_rss *rss) +{ + return mlx5e_rqt_get_rqtn(&rss->rqt); +} + +bool mlx5e_rss_valid_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, bool inner) +{ + return !!rss_get_tir(rss, tt, inner); +} + /* Fill the "tirn" output parameter. * Create the requested TIR if it's its first usage. */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h index d0df98963c8d..72089f5f473c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h @@ -32,8 +32,11 @@ void mlx5e_rss_refcnt_inc(struct mlx5e_rss *rss); void mlx5e_rss_refcnt_dec(struct mlx5e_rss *rss); unsigned int mlx5e_rss_refcnt_read(struct mlx5e_rss *rss); +bool mlx5e_rss_get_inner_ft_support(struct mlx5e_rss *rss); u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, bool inner); +bool mlx5e_rss_valid_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, bool inner); +u32 mlx5e_rss_get_rqtn(struct mlx5e_rss *rss); int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, const struct mlx5e_packet_merge_param *init_pkt_merge_param, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c index a86eade9a9e0..9d8b2f5f6c96 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -5,8 +5,6 @@ #include "channels.h" #include "params.h" -#define MLX5E_MAX_NUM_RSS 16 - struct mlx5e_rx_res { struct mlx5_core_dev *mdev; /* primary */ enum mlx5e_rx_res_features features; @@ -497,6 +495,11 @@ void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res) mlx5e_rx_res_free(res); } +unsigned int mlx5e_rx_res_get_max_nch(struct mlx5e_rx_res *res) +{ + return res->max_nch; +} + u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix) { return mlx5e_tir_get_tirn(&res->channels[ix].direct_tir); @@ -522,7 +525,7 @@ u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res) return mlx5e_tir_get_tirn(&res->ptp.tir); } -static u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix) +u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix) { return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h index 7b1a9f0f1874..05b438043bcb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h @@ -10,6 +10,8 @@ #include "fs.h" #include "rss.h" +#define MLX5E_MAX_NUM_RSS 16 + struct mlx5e_rx_res; struct mlx5e_channels; @@ -34,6 +36,9 @@ u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix); u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res); +u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix); +unsigned int mlx5e_rx_res_get_max_nch(struct mlx5e_rx_res *res); +bool mlx5_rx_res_rss_inner_ft_support(struct mlx5e_rx_res *res); /* Activate/deactivate API */ void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h index d6c12d0ea55b..2e528b2c34d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h @@ -73,11 +73,6 @@ struct mlx5e_tc_act { bool is_terminating_action; }; -struct mlx5e_tc_flow_action { - unsigned int num_entries; - struct flow_action_entry **entries; -}; - extern struct mlx5e_tc_act mlx5e_tc_act_drop; extern struct mlx5e_tc_act mlx5e_tc_act_trap; extern struct mlx5e_tc_act mlx5e_tc_act_accept; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index 53ca16cb9c41..140606fcd23b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -46,7 +46,7 @@ static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params rq->pdev = t->pdev; rq->netdev = priv->netdev; rq->priv = priv; - rq->clock = &mdev->clock; + rq->clock = mdev->clock; rq->tstamp = &priv->tstamp; rq->mdev = mdev; rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 94b291662087..6f3094a479e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -289,9 +289,9 @@ static u64 mlx5e_xsk_fill_timestamp(void *_priv) ts = get_cqe_ts(priv->cqe); if (mlx5_is_real_time_rq(priv->cq->mdev) || mlx5_is_real_time_sq(priv->cq->mdev)) - return mlx5_real_time_cyc2time(&priv->cq->mdev->clock, ts); + return mlx5_real_time_cyc2time(priv->cq->mdev->clock, ts); - return mlx5_timecounter_cyc2time(&priv->cq->mdev->clock, ts); + return mlx5_timecounter_cyc2time(priv->cq->mdev->clock, ts); } static void mlx5e_xsk_request_checksum(u16 csum_start, u16 csum_offset, void *priv) @@ -546,6 +546,7 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, bool inline_ok; bool linear; u16 pi; + int i; struct mlx5e_xdpsq_stats *stats = sq->stats; @@ -612,41 +613,33 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); - if (test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) { - int i; - - memset(&cseg->trailer, 0, sizeof(cseg->trailer)); - memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer)); - - eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); + memset(&cseg->trailer, 0, sizeof(cseg->trailer)); + memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer)); - for (i = 0; i < num_frags; i++) { - skb_frag_t *frag = &xdptxdf->sinfo->frags[i]; - dma_addr_t addr; + eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); - addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[i] : - page_pool_get_dma_addr(skb_frag_page(frag)) + - skb_frag_off(frag); + for (i = 0; i < num_frags; i++) { + skb_frag_t *frag = &xdptxdf->sinfo->frags[i]; + dma_addr_t addr; - dseg->addr = cpu_to_be64(addr); - dseg->byte_count = cpu_to_be32(skb_frag_size(frag)); - dseg->lkey = sq->mkey_be; - dseg++; - } + addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[i] : + page_pool_get_dma_addr(skb_frag_page(frag)) + + skb_frag_off(frag); - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + dseg->addr = cpu_to_be64(addr); + dseg->byte_count = cpu_to_be32(skb_frag_size(frag)); + dseg->lkey = sq->mkey_be; + dseg++; + } - sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) { - .num_wqebbs = num_wqebbs, - .num_pkts = 1, - }; + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - sq->pc += num_wqebbs; - } else { - cseg->fm_ce_se = 0; + sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) { + .num_wqebbs = num_wqebbs, + .num_pkts = 1, + }; - sq->pc++; - } + sq->pc += num_wqebbs; xsk_tx_metadata_request(meta, &mlx5e_xsk_tx_metadata_ops, eseg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 9240cfe25d10..d743e823362a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -72,7 +72,7 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c, rq->netdev = c->netdev; rq->priv = c->priv; rq->tstamp = c->tstamp; - rq->clock = &mdev->clock; + rq->clock = mdev->clock; rq->icosq = &c->icosq; rq->ix = c->ix; rq->channel = c; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index cae39198b4db..f9113cb13a0c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -237,6 +237,27 @@ void mlx5e_build_ptys2ethtool_map(void) ETHTOOL_LINK_MODE_800000baseDR8_2_Full_BIT, ETHTOOL_LINK_MODE_800000baseSR8_Full_BIT, ETHTOOL_LINK_MODE_800000baseVR8_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_200GAUI_1_200GBASE_CR1_KR1, ext, + ETHTOOL_LINK_MODE_200000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_200000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_200000baseDR_Full_BIT, + ETHTOOL_LINK_MODE_200000baseDR_2_Full_BIT, + ETHTOOL_LINK_MODE_200000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_200000baseVR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_400GAUI_2_400GBASE_CR2_KR2, ext, + ETHTOOL_LINK_MODE_400000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_400000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_400000baseDR2_Full_BIT, + ETHTOOL_LINK_MODE_400000baseDR2_2_Full_BIT, + ETHTOOL_LINK_MODE_400000baseSR2_Full_BIT, + ETHTOOL_LINK_MODE_400000baseVR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_800GAUI_4_800GBASE_CR4_KR4, ext, + ETHTOOL_LINK_MODE_800000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_800000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_800000baseDR4_Full_BIT, + ETHTOOL_LINK_MODE_800000baseDR4_2_Full_BIT, + ETHTOOL_LINK_MODE_800000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_800000baseVR4_Full_BIT); } static void mlx5e_ethtool_get_speed_arr(struct mlx5_core_dev *mdev, @@ -931,6 +952,7 @@ static const u32 pplm_fec_2_ethtool[] = { [MLX5E_FEC_RS_528_514] = ETHTOOL_FEC_RS, [MLX5E_FEC_RS_544_514] = ETHTOOL_FEC_RS, [MLX5E_FEC_LLRS_272_257_1] = ETHTOOL_FEC_LLRS, + [MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD] = ETHTOOL_FEC_RS, }; static u32 pplm2ethtool_fec(u_long fec_mode, unsigned long size) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a814b63ed97e..5d5e7b19c396 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -737,7 +737,7 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param rq->netdev = c->netdev; rq->priv = c->priv; rq->tstamp = c->tstamp; - rq->clock = &mdev->clock; + rq->clock = mdev->clock; rq->icosq = &c->icosq; rq->ix = c->ix; rq->channel = c; @@ -1614,7 +1614,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, int err; sq->pdev = c->pdev; - sq->clock = &mdev->clock; + sq->clock = mdev->clock; sq->mkey_be = c->mkey_be; sq->netdev = c->netdev; sq->mdev = c->mdev; @@ -2023,41 +2023,12 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, csp.min_inline_mode = sq->min_inline_mode; set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - if (param->is_xdp_mb) - set_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state); - err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); if (err) goto err_free_xdpsq; mlx5e_set_xmit_fp(sq, param->is_mpw); - if (!param->is_mpw && !test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) { - unsigned int ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1; - unsigned int inline_hdr_sz = 0; - int i; - - if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { - inline_hdr_sz = MLX5E_XDP_MIN_INLINE; - ds_cnt++; - } - - /* Pre initialize fixed WQE fields */ - for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) { - struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i); - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - struct mlx5_wqe_eth_seg *eseg = &wqe->eth; - - sq->db.wqe_info[i] = (struct mlx5e_xdp_wqe_info) { - .num_wqebbs = 1, - .num_pkts = 1, - }; - - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); - } - } - return 0; err_free_xdpsq: @@ -3816,8 +3787,11 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, /* MQPRIO is another toplevel qdisc that can't be attached * simultaneously with the offloaded HTB. */ - if (WARN_ON(mlx5e_selq_is_htb_enabled(&priv->selq))) - return -EINVAL; + if (mlx5e_selq_is_htb_enabled(&priv->selq)) { + NL_SET_ERR_MSG_MOD(mqprio->extack, + "MQPRIO cannot be configured when HTB offload is enabled."); + return -EOPNOTSUPP; + } switch (mqprio->mode) { case TC_MQPRIO_MODE_DCB: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index fdff9fd8a89e..07f38f472a27 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -65,6 +65,7 @@ #define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \ max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) #define MLX5E_REP_PARAMS_DEF_NUM_CHANNELS 1 +#define MLX5E_REP_PARAMS_DEF_LOG_RQ_SIZE 0x8 static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; @@ -855,6 +856,8 @@ static void mlx5e_build_rep_params(struct net_device *netdev) /* RQ */ mlx5e_build_rq_params(mdev, params); + if (!mlx5e_is_uplink_rep(priv) && mlx5_core_is_ecpf(mdev)) + params->log_rq_mtu_frames = MLX5E_REP_PARAMS_DEF_LOG_RQ_SIZE; /* If netdev is already registered (e.g. move from nic profile to uplink, * RTNL lock must be held before triggering netdev notifiers. @@ -886,6 +889,8 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev, netdev->ethtool_ops = &mlx5e_rep_ethtool_ops; netdev->watchdog_timeo = 15 * HZ; + if (mlx5_core_is_ecpf(mdev)) + netdev->tx_queue_len = 1 << MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE; #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) netdev->hw_features |= NETIF_F_HW_TC; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 1d60465cc2ca..2f7a543feca6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -166,6 +166,9 @@ mlx5e_test_loopback_validate(struct sk_buff *skb, struct udphdr *udph; struct iphdr *iph; + if (skb_linearize(skb)) + goto out; + /* We are only going to peek, no need to clone the SKB */ if (MLX5E_TEST_PKT_SIZE - ETH_HLEN > skb_headlen(skb)) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 20cc01ceee8a..0fa0333106a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -4157,37 +4157,12 @@ u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, } EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); -static int mlx5_esw_query_vport_vhca_id(struct mlx5_eswitch *esw, u16 vport_num, u16 *vhca_id) -{ - int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); - void *query_ctx; - void *hca_caps; - int err; - - *vhca_id = 0; - - query_ctx = kzalloc(query_out_sz, GFP_KERNEL); - if (!query_ctx) - return -ENOMEM; - - err = mlx5_vport_get_other_func_general_cap(esw->dev, vport_num, query_ctx); - if (err) - goto out_free; - - hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); - *vhca_id = MLX5_GET(cmd_hca_cap, hca_caps, vhca_id); - -out_free: - kfree(query_ctx); - return err; -} - int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num) { u16 *old_entry, *vhca_map_entry, vhca_id; int err; - err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id); + err = mlx5_vport_get_vhca_id(esw->dev, vport_num, &vhca_id); if (err) { esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%u,err=%d)\n", vport_num, err); @@ -4213,7 +4188,7 @@ void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num) u16 *vhca_map_entry, vhca_id; int err; - err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id); + err = mlx5_vport_get_vhca_id(esw->dev, vport_num, &vhca_id); if (err) esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%hu,err=%d)\n", vport_num, err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index d91ea53eb394..01c5f5990f9a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -6,6 +6,7 @@ #include "mlx5_core.h" #include "lib/eq.h" #include "lib/events.h" +#include "hwmon.h" struct mlx5_event_nb { struct mlx5_nb nb; @@ -153,21 +154,50 @@ static int any_notifier(struct notifier_block *nb, return NOTIFY_OK; } +#if IS_ENABLED(CONFIG_HWMON) +static void print_sensor_names_in_bit_set(struct mlx5_core_dev *dev, struct mlx5_hwmon *hwmon, + u64 bit_set, int bit_set_offset) +{ + unsigned long *bit_set_ptr = (unsigned long *)&bit_set; + int num_bits = sizeof(bit_set) * BITS_PER_BYTE; + int i; + + for_each_set_bit(i, bit_set_ptr, num_bits) { + const char *sensor_name = hwmon_get_sensor_name(hwmon, i + bit_set_offset); + + mlx5_core_warn(dev, "Sensor name[%d]: %s\n", i + bit_set_offset, sensor_name); + } +} +#endif /* CONFIG_HWMON */ + /* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */ static int temp_warn(struct notifier_block *nb, unsigned long type, void *data) { struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); struct mlx5_events *events = event_nb->ctx; + struct mlx5_core_dev *dev = events->dev; struct mlx5_eqe *eqe = data; u64 value_lsb; u64 value_msb; value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb); + /* bit 1-63 are not supported for NICs, + * hence read only bit 0 (asic) from lsb. + */ + value_lsb &= 0x1; value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb); - mlx5_core_warn(events->dev, - "High temperature on sensors with bit set %llx %llx", - value_msb, value_lsb); + if (net_ratelimit()) { + mlx5_core_warn(dev, "High temperature on sensors with bit set %#llx %#llx.\n", + value_msb, value_lsb); +#if IS_ENABLED(CONFIG_HWMON) + if (dev->hwmon) { + print_sensor_names_in_bit_set(dev, dev->hwmon, value_lsb, 0); + print_sensor_names_in_bit_set(dev, dev->hwmon, value_msb, + sizeof(value_lsb) * BITS_PER_BYTE); + } +#endif + } return NOTIFY_OK; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/hwmon.c b/drivers/net/ethernet/mellanox/mlx5/core/hwmon.c index 353f81dccd1c..4ba2636d7fb6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/hwmon.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/hwmon.c @@ -416,3 +416,8 @@ void mlx5_hwmon_dev_unregister(struct mlx5_core_dev *mdev) mlx5_hwmon_free(hwmon); mdev->hwmon = NULL; } + +const char *hwmon_get_sensor_name(struct mlx5_hwmon *hwmon, int channel) +{ + return hwmon->temp_channel_desc[channel].sensor_name; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/hwmon.h b/drivers/net/ethernet/mellanox/mlx5/core/hwmon.h index 999654a9b9da..f38271c22c10 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/hwmon.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/hwmon.h @@ -10,6 +10,7 @@ int mlx5_hwmon_dev_register(struct mlx5_core_dev *mdev); void mlx5_hwmon_dev_unregister(struct mlx5_core_dev *mdev); +const char *hwmon_get_sensor_name(struct mlx5_hwmon *hwmon, int channel); #else static inline int mlx5_hwmon_dev_register(struct mlx5_core_dev *mdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index bde79cac33a9..d832a12ffec0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -97,7 +97,7 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, mlx5_del_flow_rules(lag_definer->rules[idx]); } j = ldev->buckets; - }; + } goto destroy_fg; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index d61a1a9297c9..65a94e46edcf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -43,6 +43,8 @@ #include <linux/cpufeature.h> #endif /* CONFIG_X86 */ +#define MLX5_RT_CLOCK_IDENTITY_SIZE MLX5_FLD_SZ_BYTES(mrtcq_reg, rt_clock_identity) + enum { MLX5_PIN_MODE_IN = 0x0, MLX5_PIN_MODE_OUT = 0x1, @@ -77,6 +79,56 @@ enum { MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX = 200000, }; +struct mlx5_clock_dev_state { + struct mlx5_core_dev *mdev; + struct mlx5_devcom_comp_dev *compdev; + struct mlx5_nb pps_nb; + struct work_struct out_work; +}; + +struct mlx5_clock_priv { + struct mlx5_clock clock; + struct mlx5_core_dev *mdev; + struct mutex lock; /* protect mdev and used in PTP callbacks */ + struct mlx5_core_dev *event_mdev; +}; + +static struct mlx5_clock_priv *clock_priv(struct mlx5_clock *clock) +{ + return container_of(clock, struct mlx5_clock_priv, clock); +} + +static void mlx5_clock_lockdep_assert(struct mlx5_clock *clock) +{ + if (!clock->shared) + return; + + lockdep_assert(lockdep_is_held(&clock_priv(clock)->lock)); +} + +static struct mlx5_core_dev *mlx5_clock_mdev_get(struct mlx5_clock *clock) +{ + mlx5_clock_lockdep_assert(clock); + + return clock_priv(clock)->mdev; +} + +static void mlx5_clock_lock(struct mlx5_clock *clock) +{ + if (!clock->shared) + return; + + mutex_lock(&clock_priv(clock)->lock); +} + +static void mlx5_clock_unlock(struct mlx5_clock *clock) +{ + if (!clock->shared) + return; + + mutex_unlock(&clock_priv(clock)->lock); +} + static bool mlx5_real_time_mode(struct mlx5_core_dev *mdev) { return (mlx5_is_real_time_rq(mdev) || mlx5_is_real_time_sq(mdev)); @@ -94,6 +146,22 @@ static bool mlx5_modify_mtutc_allowed(struct mlx5_core_dev *mdev) return MLX5_CAP_MCAM_FEATURE(mdev, ptpcyc2realtime_modify); } +static int mlx5_clock_identity_get(struct mlx5_core_dev *mdev, + u8 identify[MLX5_RT_CLOCK_IDENTITY_SIZE]) +{ + u32 out[MLX5_ST_SZ_DW(mrtcq_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(mrtcq_reg)] = {}; + int err; + + err = mlx5_core_access_reg(mdev, in, sizeof(in), + out, sizeof(out), MLX5_REG_MRTCQ, 0, 0); + if (!err) + memcpy(identify, MLX5_ADDR_OF(mrtcq_reg, out, rt_clock_identity), + MLX5_RT_CLOCK_IDENTITY_SIZE); + + return err; +} + static u32 mlx5_ptp_shift_constant(u32 dev_freq_khz) { /* Optimal shift constant leads to corrections above just 1 scaled ppm. @@ -119,21 +187,30 @@ static u32 mlx5_ptp_shift_constant(u32 dev_freq_khz) ilog2((U32_MAX / NSEC_PER_MSEC) * dev_freq_khz)); } +static s32 mlx5_clock_getmaxphase(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_MCAM_FEATURE(mdev, mtutc_time_adjustment_extended_range) ? + MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX : + MLX5_MTUTC_OPERATION_ADJUST_TIME_MAX; +} + static s32 mlx5_ptp_getmaxphase(struct ptp_clock_info *ptp) { struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); struct mlx5_core_dev *mdev; + s32 ret; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); + ret = mlx5_clock_getmaxphase(mdev); + mlx5_clock_unlock(clock); - return MLX5_CAP_MCAM_FEATURE(mdev, mtutc_time_adjustment_extended_range) ? - MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX : - MLX5_MTUTC_OPERATION_ADJUST_TIME_MAX; + return ret; } static bool mlx5_is_mtutc_time_adj_cap(struct mlx5_core_dev *mdev, s64 delta) { - s64 max = mlx5_ptp_getmaxphase(&mdev->clock.ptp_info); + s64 max = mlx5_clock_getmaxphase(mdev); if (delta < -max || delta > max) return false; @@ -209,7 +286,7 @@ static int mlx5_mtctr_syncdevicetime(ktime_t *device_time, if (real_time_mode) *device_time = ns_to_ktime(REAL_TIME_TO_NS(device >> 32, device & U32_MAX)); else - *device_time = mlx5_timecounter_cyc2time(&mdev->clock, device); + *device_time = mlx5_timecounter_cyc2time(mdev->clock, device); return 0; } @@ -220,16 +297,23 @@ static int mlx5_ptp_getcrosststamp(struct ptp_clock_info *ptp, struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); struct system_time_snapshot history_begin = {0}; struct mlx5_core_dev *mdev; + int err; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); - if (!mlx5_is_ptm_source_time_available(mdev)) - return -EBUSY; + if (!mlx5_is_ptm_source_time_available(mdev)) { + err = -EBUSY; + goto unlock; + } ktime_get_snapshot(&history_begin); - return get_device_system_crosststamp(mlx5_mtctr_syncdevicetime, mdev, - &history_begin, cts); + err = get_device_system_crosststamp(mlx5_mtctr_syncdevicetime, mdev, + &history_begin, cts); +unlock: + mlx5_clock_unlock(clock); + return err; } #endif /* CONFIG_X86 */ @@ -263,8 +347,7 @@ static u64 read_internal_timer(const struct cyclecounter *cc) { struct mlx5_timer *timer = container_of(cc, struct mlx5_timer, cycles); struct mlx5_clock *clock = container_of(timer, struct mlx5_clock, timer); - struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, - clock); + struct mlx5_core_dev *mdev = mlx5_clock_mdev_get(clock); return mlx5_read_time(mdev, NULL, false) & cc->mask; } @@ -272,7 +355,7 @@ static u64 read_internal_timer(const struct cyclecounter *cc) static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev) { struct mlx5_ib_clock_info *clock_info = mdev->clock_info; - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; struct mlx5_timer *timer; u32 sign; @@ -295,12 +378,10 @@ static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev) static void mlx5_pps_out(struct work_struct *work) { - struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps, - out_work); - struct mlx5_clock *clock = container_of(pps_info, struct mlx5_clock, - pps_info); - struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, - clock); + struct mlx5_clock_dev_state *clock_state = container_of(work, struct mlx5_clock_dev_state, + out_work); + struct mlx5_core_dev *mdev = clock_state->mdev; + struct mlx5_clock *clock = mdev->clock; u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; unsigned long flags; int i; @@ -330,7 +411,8 @@ static long mlx5_timestamp_overflow(struct ptp_clock_info *ptp_info) unsigned long flags; clock = container_of(ptp_info, struct mlx5_clock, ptp_info); - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); timer = &clock->timer; if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) @@ -342,6 +424,7 @@ static long mlx5_timestamp_overflow(struct ptp_clock_info *ptp_info) write_sequnlock_irqrestore(&clock->lock, flags); out: + mlx5_clock_unlock(clock); return timer->overflow_period; } @@ -361,15 +444,12 @@ static int mlx5_ptp_settime_real_time(struct mlx5_core_dev *mdev, return mlx5_set_mtutc(mdev, in, sizeof(in)); } -static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts) +static int mlx5_clock_settime(struct mlx5_core_dev *mdev, struct mlx5_clock *clock, + const struct timespec64 *ts) { - struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); struct mlx5_timer *timer = &clock->timer; - struct mlx5_core_dev *mdev; unsigned long flags; - mdev = container_of(clock, struct mlx5_core_dev, clock); - if (mlx5_modify_mtutc_allowed(mdev)) { int err = mlx5_ptp_settime_real_time(mdev, ts); @@ -385,6 +465,20 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 return 0; } +static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev; + int err; + + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); + err = mlx5_clock_settime(mdev, clock, ts); + mlx5_clock_unlock(clock); + + return err; +} + static struct timespec64 mlx5_ptp_gettimex_real_time(struct mlx5_core_dev *mdev, struct ptp_system_timestamp *sts) @@ -404,7 +498,8 @@ static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts, struct mlx5_core_dev *mdev; u64 cycles, ns; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); if (mlx5_real_time_mode(mdev)) { *ts = mlx5_ptp_gettimex_real_time(mdev, sts); goto out; @@ -414,6 +509,7 @@ static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts, ns = mlx5_timecounter_cyc2time(clock, cycles); *ts = ns_to_timespec64(ns); out: + mlx5_clock_unlock(clock); return 0; } @@ -444,14 +540,16 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) struct mlx5_timer *timer = &clock->timer; struct mlx5_core_dev *mdev; unsigned long flags; + int err = 0; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); if (mlx5_modify_mtutc_allowed(mdev)) { - int err = mlx5_ptp_adjtime_real_time(mdev, delta); + err = mlx5_ptp_adjtime_real_time(mdev, delta); if (err) - return err; + goto unlock; } write_seqlock_irqsave(&clock->lock, flags); @@ -459,17 +557,23 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) mlx5_update_clock_info_page(mdev); write_sequnlock_irqrestore(&clock->lock, flags); - return 0; +unlock: + mlx5_clock_unlock(clock); + return err; } static int mlx5_ptp_adjphase(struct ptp_clock_info *ptp, s32 delta) { struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); struct mlx5_core_dev *mdev; + int err; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); + err = mlx5_ptp_adjtime_real_time(mdev, delta); + mlx5_clock_unlock(clock); - return mlx5_ptp_adjtime_real_time(mdev, delta); + return err; } static int mlx5_ptp_freq_adj_real_time(struct mlx5_core_dev *mdev, long scaled_ppm) @@ -498,15 +602,17 @@ static int mlx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) struct mlx5_timer *timer = &clock->timer; struct mlx5_core_dev *mdev; unsigned long flags; + int err = 0; u32 mult; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); if (mlx5_modify_mtutc_allowed(mdev)) { - int err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm); + err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm); if (err) - return err; + goto unlock; } mult = (u32)adjust_by_scaled_ppm(timer->nominal_c_mult, scaled_ppm); @@ -518,7 +624,9 @@ static int mlx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) write_sequnlock_irqrestore(&clock->lock, flags); ptp_schedule_worker(clock->ptp, timer->overflow_period); - return 0; +unlock: + mlx5_clock_unlock(clock); + return err; } static int mlx5_extts_configure(struct ptp_clock_info *ptp, @@ -527,18 +635,14 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp, { struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); - struct mlx5_core_dev *mdev = - container_of(clock, struct mlx5_core_dev, clock); u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + struct mlx5_core_dev *mdev; u32 field_select = 0; u8 pin_mode = 0; u8 pattern = 0; int pin = -1; int err = 0; - if (!MLX5_PPS_CAP(mdev)) - return -EOPNOTSUPP; - /* Reject requests with unsupported flags */ if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | PTP_RISING_EDGE | @@ -569,6 +673,14 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp, field_select = MLX5_MTPPS_FS_ENABLE; } + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); + + if (!MLX5_PPS_CAP(mdev)) { + err = -EOPNOTSUPP; + goto unlock; + } + MLX5_SET(mtpps_reg, in, pin, pin); MLX5_SET(mtpps_reg, in, pin_mode, pin_mode); MLX5_SET(mtpps_reg, in, pattern, pattern); @@ -577,15 +689,23 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp, err = mlx5_set_mtpps(mdev, in, sizeof(in)); if (err) - return err; + goto unlock; + + err = mlx5_set_mtppse(mdev, pin, 0, MLX5_EVENT_MODE_REPETETIVE & on); + if (err) + goto unlock; + + clock->pps_info.pin_armed[pin] = on; + clock_priv(clock)->event_mdev = mdev; - return mlx5_set_mtppse(mdev, pin, 0, - MLX5_EVENT_MODE_REPETETIVE & on); +unlock: + mlx5_clock_unlock(clock); + return err; } static u64 find_target_cycles(struct mlx5_core_dev *mdev, s64 target_ns) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; u64 cycles_now, cycles_delta; u64 nsec_now, nsec_delta; struct mlx5_timer *timer; @@ -644,7 +764,7 @@ static int mlx5_perout_conf_out_pulse_duration(struct mlx5_core_dev *mdev, struct ptp_clock_request *rq, u32 *out_pulse_duration_ns) { - struct mlx5_pps *pps_info = &mdev->clock.pps_info; + struct mlx5_pps *pps_info = &mdev->clock->pps_info; u32 out_pulse_duration; struct timespec64 ts; @@ -677,7 +797,7 @@ static int perout_conf_npps_real_time(struct mlx5_core_dev *mdev, struct ptp_clo u32 *field_select, u32 *out_pulse_duration_ns, u64 *period, u64 *time_stamp) { - struct mlx5_pps *pps_info = &mdev->clock.pps_info; + struct mlx5_pps *pps_info = &mdev->clock->pps_info; struct ptp_clock_time *time = &rq->perout.start; struct timespec64 ts; @@ -712,26 +832,18 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, { struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); - struct mlx5_core_dev *mdev = - container_of(clock, struct mlx5_core_dev, clock); - bool rt_mode = mlx5_real_time_mode(mdev); u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; u32 out_pulse_duration_ns = 0; + struct mlx5_core_dev *mdev; u32 field_select = 0; u64 npps_period = 0; u64 time_stamp = 0; u8 pin_mode = 0; u8 pattern = 0; + bool rt_mode; int pin = -1; int err = 0; - if (!MLX5_PPS_CAP(mdev)) - return -EOPNOTSUPP; - - /* Reject requests with unsupported flags */ - if (mlx5_perout_verify_flags(mdev, rq->perout.flags)) - return -EOPNOTSUPP; - if (rq->perout.index >= clock->ptp_info.n_pins) return -EINVAL; @@ -740,14 +852,29 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, if (pin < 0) return -EBUSY; - if (on) { - bool rt_mode = mlx5_real_time_mode(mdev); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); + rt_mode = mlx5_real_time_mode(mdev); + + if (!MLX5_PPS_CAP(mdev)) { + err = -EOPNOTSUPP; + goto unlock; + } + + /* Reject requests with unsupported flags */ + if (mlx5_perout_verify_flags(mdev, rq->perout.flags)) { + err = -EOPNOTSUPP; + goto unlock; + } + if (on) { pin_mode = MLX5_PIN_MODE_OUT; pattern = MLX5_OUT_PATTERN_PERIODIC; - if (rt_mode && rq->perout.start.sec > U32_MAX) - return -EINVAL; + if (rt_mode && rq->perout.start.sec > U32_MAX) { + err = -EINVAL; + goto unlock; + } field_select |= MLX5_MTPPS_FS_PIN_MODE | MLX5_MTPPS_FS_PATTERN | @@ -760,7 +887,7 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, else err = perout_conf_1pps(mdev, rq, &time_stamp, rt_mode); if (err) - return err; + goto unlock; } MLX5_SET(mtpps_reg, in, pin, pin); @@ -773,13 +900,16 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, MLX5_SET(mtpps_reg, in, out_pulse_duration_ns, out_pulse_duration_ns); err = mlx5_set_mtpps(mdev, in, sizeof(in)); if (err) - return err; + goto unlock; if (rt_mode) - return 0; + goto unlock; + + err = mlx5_set_mtppse(mdev, pin, 0, MLX5_EVENT_MODE_REPETETIVE & on); - return mlx5_set_mtppse(mdev, pin, 0, - MLX5_EVENT_MODE_REPETETIVE & on); +unlock: + mlx5_clock_unlock(clock); + return err; } static int mlx5_pps_configure(struct ptp_clock_info *ptp, @@ -866,10 +996,8 @@ static int mlx5_query_mtpps_pin_mode(struct mlx5_core_dev *mdev, u8 pin, mtpps_size, MLX5_REG_MTPPS, 0, 0); } -static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin) +static int mlx5_get_pps_pin_mode(struct mlx5_core_dev *mdev, u8 pin) { - struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock); - u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {}; u8 mode; int err; @@ -888,8 +1016,9 @@ static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin) return PTP_PF_NONE; } -static void mlx5_init_pin_config(struct mlx5_clock *clock) +static void mlx5_init_pin_config(struct mlx5_core_dev *mdev) { + struct mlx5_clock *clock = mdev->clock; int i; if (!clock->ptp_info.n_pins) @@ -910,15 +1039,15 @@ static void mlx5_init_pin_config(struct mlx5_clock *clock) sizeof(clock->ptp_info.pin_config[i].name), "mlx5_pps%d", i); clock->ptp_info.pin_config[i].index = i; - clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(clock, i); + clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(mdev, i); clock->ptp_info.pin_config[i].chan = 0; } } static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + struct mlx5_clock *clock = mdev->clock; mlx5_query_mtpps(mdev, out, sizeof(out)); @@ -968,16 +1097,16 @@ static u64 perout_conf_next_event_timer(struct mlx5_core_dev *mdev, static int mlx5_pps_event(struct notifier_block *nb, unsigned long type, void *data) { - struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb); + struct mlx5_clock_dev_state *clock_state = mlx5_nb_cof(nb, struct mlx5_clock_dev_state, + pps_nb); + struct mlx5_core_dev *mdev = clock_state->mdev; + struct mlx5_clock *clock = mdev->clock; struct ptp_clock_event ptp_event; struct mlx5_eqe *eqe = data; int pin = eqe->data.pps.pin; - struct mlx5_core_dev *mdev; unsigned long flags; u64 ns; - mdev = container_of(clock, struct mlx5_core_dev, clock); - switch (clock->ptp_info.pin_config[pin].func) { case PTP_PF_EXTTS: ptp_event.index = pin; @@ -997,11 +1126,15 @@ static int mlx5_pps_event(struct notifier_block *nb, ptp_clock_event(clock->ptp, &ptp_event); break; case PTP_PF_PEROUT: + if (clock->shared) { + mlx5_core_warn(mdev, " Received unexpected PPS out event\n"); + break; + } ns = perout_conf_next_event_timer(mdev, clock); write_seqlock_irqsave(&clock->lock, flags); clock->pps_info.start[pin] = ns; write_sequnlock_irqrestore(&clock->lock, flags); - schedule_work(&clock->pps_info.out_work); + schedule_work(&clock_state->out_work); break; default: mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n", @@ -1013,7 +1146,7 @@ static int mlx5_pps_event(struct notifier_block *nb, static void mlx5_timecounter_init(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; struct mlx5_timer *timer = &clock->timer; u32 dev_freq; @@ -1029,10 +1162,10 @@ static void mlx5_timecounter_init(struct mlx5_core_dev *mdev) ktime_to_ns(ktime_get_real())); } -static void mlx5_init_overflow_period(struct mlx5_clock *clock) +static void mlx5_init_overflow_period(struct mlx5_core_dev *mdev) { - struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock); struct mlx5_ib_clock_info *clock_info = mdev->clock_info; + struct mlx5_clock *clock = mdev->clock; struct mlx5_timer *timer = &clock->timer; u64 overflow_cycles; u64 frac = 0; @@ -1065,7 +1198,7 @@ static void mlx5_init_overflow_period(struct mlx5_clock *clock) static void mlx5_init_clock_info(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; struct mlx5_ib_clock_info *info; struct mlx5_timer *timer; @@ -1088,7 +1221,7 @@ static void mlx5_init_clock_info(struct mlx5_core_dev *mdev) static void mlx5_init_timer_max_freq_adjustment(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; u32 out[MLX5_ST_SZ_DW(mtutc_reg)] = {}; u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {}; u8 log_max_freq_adjustment = 0; @@ -1107,7 +1240,7 @@ static void mlx5_init_timer_max_freq_adjustment(struct mlx5_core_dev *mdev) static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; /* Configure the PHC */ clock->ptp_info = mlx5_ptp_clock_info; @@ -1123,38 +1256,30 @@ static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev) mlx5_timecounter_init(mdev); mlx5_init_clock_info(mdev); - mlx5_init_overflow_period(clock); + mlx5_init_overflow_period(mdev); if (mlx5_real_time_mode(mdev)) { struct timespec64 ts; ktime_get_real_ts64(&ts); - mlx5_ptp_settime(&clock->ptp_info, &ts); + mlx5_clock_settime(mdev, clock, &ts); } } static void mlx5_init_pps(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; - if (!MLX5_PPS_CAP(mdev)) return; mlx5_get_pps_caps(mdev); - mlx5_init_pin_config(clock); + mlx5_init_pin_config(mdev); } -void mlx5_init_clock(struct mlx5_core_dev *mdev) +static void mlx5_init_clock_dev(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; - - if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) { - mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n"); - return; - } + struct mlx5_clock *clock = mdev->clock; seqlock_init(&clock->lock); - INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out); /* Initialize the device clock */ mlx5_init_timer_clock(mdev); @@ -1163,35 +1288,27 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) mlx5_init_pps(mdev); clock->ptp = ptp_clock_register(&clock->ptp_info, - &mdev->pdev->dev); + clock->shared ? NULL : &mdev->pdev->dev); if (IS_ERR(clock->ptp)) { - mlx5_core_warn(mdev, "ptp_clock_register failed %ld\n", + mlx5_core_warn(mdev, "%sptp_clock_register failed %ld\n", + clock->shared ? "shared clock " : "", PTR_ERR(clock->ptp)); clock->ptp = NULL; } - MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT); - mlx5_eq_notifier_register(mdev, &clock->pps_nb); - if (clock->ptp) ptp_schedule_worker(clock->ptp, 0); } -void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) +static void mlx5_destroy_clock_dev(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; - if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) - return; - - mlx5_eq_notifier_unregister(mdev, &clock->pps_nb); if (clock->ptp) { ptp_clock_unregister(clock->ptp); clock->ptp = NULL; } - cancel_work_sync(&clock->pps_info.out_work); - if (mdev->clock_info) { free_page((unsigned long)mdev->clock_info); mdev->clock_info = NULL; @@ -1199,3 +1316,248 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) kfree(clock->ptp_info.pin_config); } + +static void mlx5_clock_free(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock_priv *cpriv = clock_priv(mdev->clock); + + mlx5_destroy_clock_dev(mdev); + mutex_destroy(&cpriv->lock); + kfree(cpriv); + mdev->clock = NULL; +} + +static int mlx5_clock_alloc(struct mlx5_core_dev *mdev, bool shared) +{ + struct mlx5_clock_priv *cpriv; + struct mlx5_clock *clock; + + cpriv = kzalloc(sizeof(*cpriv), GFP_KERNEL); + if (!cpriv) + return -ENOMEM; + + mutex_init(&cpriv->lock); + cpriv->mdev = mdev; + clock = &cpriv->clock; + clock->shared = shared; + mdev->clock = clock; + mlx5_clock_lock(clock); + mlx5_init_clock_dev(mdev); + mlx5_clock_unlock(clock); + + if (!clock->shared) + return 0; + + if (!clock->ptp) { + mlx5_core_warn(mdev, "failed to create ptp dev shared by multiple functions"); + mlx5_clock_free(mdev); + return -EINVAL; + } + + return 0; +} + +static void mlx5_shared_clock_register(struct mlx5_core_dev *mdev, u64 key) +{ + struct mlx5_core_dev *peer_dev, *next = NULL; + struct mlx5_devcom_comp_dev *pos; + + mdev->clock_state->compdev = mlx5_devcom_register_component(mdev->priv.devc, + MLX5_DEVCOM_SHARED_CLOCK, + key, NULL, mdev); + if (IS_ERR(mdev->clock_state->compdev)) + return; + + mlx5_devcom_comp_lock(mdev->clock_state->compdev); + mlx5_devcom_for_each_peer_entry(mdev->clock_state->compdev, peer_dev, pos) { + if (peer_dev->clock) { + next = peer_dev; + break; + } + } + + if (next) { + mdev->clock = next->clock; + /* clock info is shared among all the functions using the same clock */ + mdev->clock_info = next->clock_info; + } else { + mlx5_clock_alloc(mdev, true); + } + mlx5_devcom_comp_unlock(mdev->clock_state->compdev); + + if (!mdev->clock) { + mlx5_devcom_unregister_component(mdev->clock_state->compdev); + mdev->clock_state->compdev = NULL; + } +} + +static void mlx5_shared_clock_unregister(struct mlx5_core_dev *mdev) +{ + struct mlx5_core_dev *peer_dev, *next = NULL; + struct mlx5_clock *clock = mdev->clock; + struct mlx5_devcom_comp_dev *pos; + + mlx5_devcom_comp_lock(mdev->clock_state->compdev); + mlx5_devcom_for_each_peer_entry(mdev->clock_state->compdev, peer_dev, pos) { + if (peer_dev->clock && peer_dev != mdev) { + next = peer_dev; + break; + } + } + + if (next) { + struct mlx5_clock_priv *cpriv = clock_priv(clock); + + mlx5_clock_lock(clock); + if (mdev == cpriv->mdev) + cpriv->mdev = next; + mlx5_clock_unlock(clock); + } else { + mlx5_clock_free(mdev); + } + + mdev->clock = NULL; + mdev->clock_info = NULL; + mlx5_devcom_comp_unlock(mdev->clock_state->compdev); + + mlx5_devcom_unregister_component(mdev->clock_state->compdev); +} + +static void mlx5_clock_arm_pps_in_event(struct mlx5_clock *clock, + struct mlx5_core_dev *new_mdev, + struct mlx5_core_dev *old_mdev) +{ + struct ptp_clock_info *ptp_info = &clock->ptp_info; + struct mlx5_clock_priv *cpriv = clock_priv(clock); + int i; + + for (i = 0; i < ptp_info->n_pins; i++) { + if (ptp_info->pin_config[i].func != PTP_PF_EXTTS || + !clock->pps_info.pin_armed[i]) + continue; + + if (new_mdev) { + mlx5_set_mtppse(new_mdev, i, 0, MLX5_EVENT_MODE_REPETETIVE); + cpriv->event_mdev = new_mdev; + } else { + cpriv->event_mdev = NULL; + } + + if (old_mdev) + mlx5_set_mtppse(old_mdev, i, 0, MLX5_EVENT_MODE_DISABLE); + } +} + +void mlx5_clock_load(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = mdev->clock; + struct mlx5_clock_priv *cpriv; + + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) + return; + + INIT_WORK(&mdev->clock_state->out_work, mlx5_pps_out); + MLX5_NB_INIT(&mdev->clock_state->pps_nb, mlx5_pps_event, PPS_EVENT); + mlx5_eq_notifier_register(mdev, &mdev->clock_state->pps_nb); + + if (!clock->shared) { + mlx5_clock_arm_pps_in_event(clock, mdev, NULL); + return; + } + + cpriv = clock_priv(clock); + mlx5_devcom_comp_lock(mdev->clock_state->compdev); + mlx5_clock_lock(clock); + if (mdev == cpriv->mdev && mdev != cpriv->event_mdev) + mlx5_clock_arm_pps_in_event(clock, mdev, cpriv->event_mdev); + mlx5_clock_unlock(clock); + mlx5_devcom_comp_unlock(mdev->clock_state->compdev); +} + +void mlx5_clock_unload(struct mlx5_core_dev *mdev) +{ + struct mlx5_core_dev *peer_dev, *next = NULL; + struct mlx5_clock *clock = mdev->clock; + struct mlx5_devcom_comp_dev *pos; + + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) + return; + + if (!clock->shared) { + mlx5_clock_arm_pps_in_event(clock, NULL, mdev); + goto out; + } + + mlx5_devcom_comp_lock(mdev->clock_state->compdev); + mlx5_devcom_for_each_peer_entry(mdev->clock_state->compdev, peer_dev, pos) { + if (peer_dev->clock && peer_dev != mdev) { + next = peer_dev; + break; + } + } + + mlx5_clock_lock(clock); + if (mdev == clock_priv(clock)->event_mdev) + mlx5_clock_arm_pps_in_event(clock, next, mdev); + mlx5_clock_unlock(clock); + mlx5_devcom_comp_unlock(mdev->clock_state->compdev); + +out: + mlx5_eq_notifier_unregister(mdev, &mdev->clock_state->pps_nb); + cancel_work_sync(&mdev->clock_state->out_work); +} + +static struct mlx5_clock null_clock; + +int mlx5_init_clock(struct mlx5_core_dev *mdev) +{ + u8 identity[MLX5_RT_CLOCK_IDENTITY_SIZE]; + struct mlx5_clock_dev_state *clock_state; + u64 key; + int err; + + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) { + mdev->clock = &null_clock; + mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n"); + return 0; + } + + clock_state = kzalloc(sizeof(*clock_state), GFP_KERNEL); + if (!clock_state) + return -ENOMEM; + clock_state->mdev = mdev; + mdev->clock_state = clock_state; + + if (MLX5_CAP_MCAM_REG3(mdev, mrtcq) && mlx5_real_time_mode(mdev)) { + if (mlx5_clock_identity_get(mdev, identity)) { + mlx5_core_warn(mdev, "failed to get rt clock identity, create ptp dev per function\n"); + } else { + memcpy(&key, &identity, sizeof(key)); + mlx5_shared_clock_register(mdev, key); + } + } + + if (!mdev->clock) { + err = mlx5_clock_alloc(mdev, false); + if (err) { + kfree(clock_state); + mdev->clock_state = NULL; + return err; + } + } + + return 0; +} + +void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) +{ + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) + return; + + if (mdev->clock->shared) + mlx5_shared_clock_unregister(mdev); + else + mlx5_clock_free(mdev); + kfree(mdev->clock_state); + mdev->clock_state = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h index bd95b9f8d143..c18a652c0faa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h @@ -33,6 +33,35 @@ #ifndef __LIB_CLOCK_H__ #define __LIB_CLOCK_H__ +#include <linux/ptp_clock_kernel.h> + +#define MAX_PIN_NUM 8 +struct mlx5_pps { + u8 pin_caps[MAX_PIN_NUM]; + u64 start[MAX_PIN_NUM]; + u8 enabled; + u64 min_npps_period; + u64 min_out_pulse_duration_ns; + bool pin_armed[MAX_PIN_NUM]; +}; + +struct mlx5_timer { + struct cyclecounter cycles; + struct timecounter tc; + u32 nominal_c_mult; + unsigned long overflow_period; +}; + +struct mlx5_clock { + seqlock_t lock; + struct hwtstamp_config hwtstamp_config; + struct ptp_clock *ptp; + struct ptp_clock_info ptp_info; + struct mlx5_pps pps_info; + struct mlx5_timer timer; + bool shared; +}; + static inline bool mlx5_is_real_time_rq(struct mlx5_core_dev *mdev) { u8 rq_ts_format_cap = MLX5_CAP_GEN(mdev, rq_ts_format); @@ -54,12 +83,14 @@ static inline bool mlx5_is_real_time_sq(struct mlx5_core_dev *mdev) typedef ktime_t (*cqe_ts_to_ns)(struct mlx5_clock *, u64); #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) -void mlx5_init_clock(struct mlx5_core_dev *mdev); +int mlx5_init_clock(struct mlx5_core_dev *mdev); void mlx5_cleanup_clock(struct mlx5_core_dev *mdev); +void mlx5_clock_load(struct mlx5_core_dev *mdev); +void mlx5_clock_unload(struct mlx5_core_dev *mdev); static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) { - return mdev->clock.ptp ? ptp_clock_index(mdev->clock.ptp) : -1; + return mdev->clock->ptp ? ptp_clock_index(mdev->clock->ptp) : -1; } static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock, @@ -87,8 +118,10 @@ static inline ktime_t mlx5_real_time_cyc2time(struct mlx5_clock *clock, return ns_to_ktime(time); } #else -static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {} +static inline int mlx5_init_clock(struct mlx5_core_dev *mdev) { return 0; } static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {} +static inline void mlx5_clock_load(struct mlx5_core_dev *mdev) {} +static inline void mlx5_clock_unload(struct mlx5_core_dev *mdev) {} static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) { return -1; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h index d58032dd0df7..c79699b94a02 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -11,6 +11,7 @@ enum mlx5_devcom_component { MLX5_DEVCOM_MPV, MLX5_DEVCOM_HCA_PORTS, MLX5_DEVCOM_SD_GROUP, + MLX5_DEVCOM_SHARED_CLOCK, MLX5_DEVCOM_NUM_COMPONENTS, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c index 9f13cea16446..eb3bd9c7f66e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c @@ -61,6 +61,25 @@ static void mlx5_cleanup_ttc_rules(struct mlx5_ttc_table *ttc) } } +static const char *mlx5_traffic_types_names[MLX5_NUM_TT] = { + [MLX5_TT_IPV4_TCP] = "TT_IPV4_TCP", + [MLX5_TT_IPV6_TCP] = "TT_IPV6_TCP", + [MLX5_TT_IPV4_UDP] = "TT_IPV4_UDP", + [MLX5_TT_IPV6_UDP] = "TT_IPV6_UDP", + [MLX5_TT_IPV4_IPSEC_AH] = "TT_IPV4_IPSEC_AH", + [MLX5_TT_IPV6_IPSEC_AH] = "TT_IPV6_IPSEC_AH", + [MLX5_TT_IPV4_IPSEC_ESP] = "TT_IPV4_IPSEC_ESP", + [MLX5_TT_IPV6_IPSEC_ESP] = "TT_IPV6_IPSEC_ESP", + [MLX5_TT_IPV4] = "TT_IPV4", + [MLX5_TT_IPV6] = "TT_IPV6", + [MLX5_TT_ANY] = "TT_ANY" +}; + +const char *mlx5_ttc_get_name(enum mlx5_traffic_types tt) +{ + return mlx5_traffic_types_names[tt]; +} + struct mlx5_etype_proto { u16 etype; u8 proto; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h index 92eea6bea310..ab9434fe3ae6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h @@ -49,6 +49,7 @@ struct ttc_params { struct mlx5_flow_destination tunnel_dests[MLX5_NUM_TUNNEL_TT]; }; +const char *mlx5_ttc_get_name(enum mlx5_traffic_types tt); struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc); struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index ec956c4bcebd..710633d5fdbe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1038,7 +1038,11 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) mlx5_init_reserved_gids(dev); - mlx5_init_clock(dev); + err = mlx5_init_clock(dev); + if (err) { + mlx5_core_err(dev, "failed to initialize hardware clock\n"); + goto err_tables_cleanup; + } dev->vxlan = mlx5_vxlan_create(dev); dev->geneve = mlx5_geneve_create(dev); @@ -1046,7 +1050,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) err = mlx5_init_rl_table(dev); if (err) { mlx5_core_err(dev, "Failed to init rate limiting\n"); - goto err_tables_cleanup; + goto err_clock_cleanup; } err = mlx5_mpfs_init(dev); @@ -1123,10 +1127,11 @@ err_mpfs_cleanup: mlx5_mpfs_cleanup(dev); err_rl_cleanup: mlx5_cleanup_rl_table(dev); -err_tables_cleanup: +err_clock_cleanup: mlx5_geneve_destroy(dev->geneve); mlx5_vxlan_destroy(dev->vxlan); mlx5_cleanup_clock(dev); +err_tables_cleanup: mlx5_cleanup_reserved_gids(dev); mlx5_cq_debugfs_cleanup(dev); mlx5_fw_reset_cleanup(dev); @@ -1359,6 +1364,8 @@ static int mlx5_load(struct mlx5_core_dev *dev) goto err_eq_table; } + mlx5_clock_load(dev); + err = mlx5_fw_tracer_init(dev->tracer); if (err) { mlx5_core_err(dev, "Failed to init FW tracer %d\n", err); @@ -1442,6 +1449,7 @@ err_fpga_start: mlx5_hv_vhca_cleanup(dev->hv_vhca); mlx5_fw_reset_events_stop(dev); mlx5_fw_tracer_cleanup(dev->tracer); + mlx5_clock_unload(dev); mlx5_eq_table_destroy(dev); err_eq_table: mlx5_irq_table_destroy(dev); @@ -1468,6 +1476,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_hv_vhca_cleanup(dev->hv_vhca); mlx5_fw_reset_events_stop(dev); mlx5_fw_tracer_cleanup(dev->tracer); + mlx5_clock_unload(dev); mlx5_eq_table_destroy(dev); mlx5_irq_table_destroy(dev); mlx5_pagealloc_stop(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 99de67c3aa74..6fef1005c469 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -346,6 +346,8 @@ int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap #define mlx5_vport_get_other_func_general_cap(dev, vport, out) \ mlx5_vport_get_other_func_cap(dev, vport, out, MLX5_CAP_GENERAL) +int mlx5_vport_get_vhca_id(struct mlx5_core_dev *dev, u16 vport, u16 *vhca_id); + static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 50931584132b..3995df064101 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -1105,6 +1105,9 @@ static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = { [MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000, [MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000, [MLX5E_800GAUI_8_800GBASE_CR8_KR8] = 800000, + [MLX5E_200GAUI_1_200GBASE_CR1_KR1] = 200000, + [MLX5E_400GAUI_2_400GBASE_CR2_KR2] = 400000, + [MLX5E_800GAUI_4_800GBASE_CR4_KR4] = 800000, }; int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c index 60cb4527588a..65740bb68b09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c @@ -516,30 +516,6 @@ def_xa_destroy: return NULL; } -/* Assure synchronization of the device steering tables with updates made by SW - * insertion. - */ -int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags) -{ - int ret = 0; - - if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_SW) { - mlx5dr_domain_lock(dmn); - ret = mlx5dr_send_ring_force_drain(dmn); - mlx5dr_domain_unlock(dmn); - if (ret) { - mlx5dr_err(dmn, "Force drain failed flags: %d, ret: %d\n", - flags, ret); - return ret; - } - } - - if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_HW) - ret = mlx5dr_cmd_sync_steering(dmn->mdev); - - return ret; -} - int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn) { if (WARN_ON_ONCE(refcount_read(&dmn->refcount) > 1)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c index f57c84e5128b..4fd4e8483382 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c @@ -1331,36 +1331,3 @@ void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, kfree(send_ring->sync_buff); kfree(send_ring); } - -int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn) -{ - struct mlx5dr_send_ring *send_ring = dmn->send_ring; - struct postsend_info send_info = {}; - u8 data[DR_STE_SIZE]; - int num_of_sends_req; - int ret; - int i; - - /* Sending this amount of requests makes sure we will get drain */ - num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2; - - /* Send fake requests forcing the last to be signaled */ - send_info.write.addr = (uintptr_t)data; - send_info.write.length = DR_STE_SIZE; - send_info.write.lkey = 0; - /* Using the sync_mr in order to write/read */ - send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr; - send_info.rkey = send_ring->sync_mr->mkey; - - for (i = 0; i < num_of_sends_req; i++) { - ret = dr_postsend_icm_data(dmn, &send_info); - if (ret) - return ret; - } - - spin_lock(&send_ring->lock); - ret = dr_handle_pending_wc(dmn, send_ring); - spin_unlock(&send_ring->lock); - - return ret; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h index 7618c6147f86..cc328292bf84 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h @@ -1473,7 +1473,6 @@ struct mlx5dr_send_ring { int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn); void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, struct mlx5dr_send_ring *send_ring); -int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn); int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste, u8 *data, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h index 0bb3724c10c2..fc8a2169d1a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h @@ -45,8 +45,6 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type); int mlx5dr_domain_destroy(struct mlx5dr_domain *domain); -int mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags); - void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, struct mlx5dr_domain *peer_dmn, u16 peer_vhca_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 0d5f750faa45..d10d4c396040 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -1199,6 +1199,31 @@ int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *ou } EXPORT_SYMBOL_GPL(mlx5_vport_get_other_func_cap); +int mlx5_vport_get_vhca_id(struct mlx5_core_dev *dev, u16 vport, u16 *vhca_id) +{ + int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *query_ctx; + void *hca_caps; + int err; + + *vhca_id = 0; + + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); + if (!query_ctx) + return -ENOMEM; + + err = mlx5_vport_get_other_func_general_cap(dev, vport, query_ctx); + if (err) + goto out_free; + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + *vhca_id = MLX5_GET(cmd_hca_cap, hca_caps, vhca_id); + +out_free: + kfree(query_ctx); + return err; +} + int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 vport, u16 opmod) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 5b44c931b660..058dcabfaa2e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -2214,6 +2214,8 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb, for (i++; i < MLXSW_PCI_WQE_SG_ENTRIES; i++) mlxsw_pci_wqe_byte_count_set(wqe, i, 0); + mlxsw_pci_wqe_ipcs_set(wqe, skb->ip_summed == CHECKSUM_PARTIAL); + /* Everything is set up, ring producer doorbell to get HW going */ q->producer_counter++; mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index 6bed495dcf0f..7fa94e5828de 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -90,6 +90,11 @@ MLXSW_ITEM32(pci, wqe, lp, 0x00, 30, 1); */ MLXSW_ITEM32(pci, wqe, type, 0x00, 23, 4); +/* pci_wqe_ipcs + * Calculate IPv4 and TCP / UDP checksums. + */ +MLXSW_ITEM32(pci, wqe, ipcs, 0x00, 14, 1); + /* pci_wqe_byte_count * Size of i-th scatter/gather entry, 0 if entry is unused. */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index d714311fd884..1f8362788c75 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1574,8 +1574,10 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u16 local_port, netif_carrier_off(dev); dev->features |= NETIF_F_SG | NETIF_F_HW_VLAN_CTAG_FILTER | - NETIF_F_HW_TC; - dev->hw_features |= NETIF_F_HW_TC | NETIF_F_LOOPBACK; + NETIF_F_HW_TC | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + dev->hw_features |= NETIF_F_HW_TC | NETIF_F_LOOPBACK | + NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + dev->vlan_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; dev->lltx = true; dev->netns_local = true; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index b10f80fc651b..fa7082ee5183 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -754,9 +754,6 @@ void mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, struct net_device *dev); -bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev); -u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev); u16 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, enum mlxsw_sp_l3proto ul_proto, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 7d6d859cef3f..464821dd492d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -8184,41 +8184,6 @@ mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, return NULL; } -bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev) -{ - struct mlxsw_sp_rif *rif; - - mutex_lock(&mlxsw_sp->router->lock); - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - mutex_unlock(&mlxsw_sp->router->lock); - - return rif; -} - -u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev) -{ - struct mlxsw_sp_rif *rif; - u16 vid = 0; - - mutex_lock(&mlxsw_sp->router->lock); - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!rif) - goto out; - - /* We only return the VID for VLAN RIFs. Otherwise we return an - * invalid value (0). - */ - if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN) - goto out; - - vid = mlxsw_sp_fid_8021q_vid(rif->fid); - -out: - mutex_unlock(&mlxsw_sp->router->lock); - return vid; -} - static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) { char ritr_pl[MLXSW_REG_RITR_LEN]; @@ -8417,19 +8382,6 @@ u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif) return lb_rif->common.rif_index; } -u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) -{ - struct net_device *dev = mlxsw_sp_rif_dev(&lb_rif->common); - u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev); - struct mlxsw_sp_vr *ul_vr; - - ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL); - if (WARN_ON(IS_ERR(ul_vr))) - return 0; - - return ul_vr->id; -} - u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) { return lb_rif->ul_rif_id; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 0432c7cc6b07..313efab5c324 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -90,7 +90,6 @@ struct mlxsw_sp_ipip_entry; struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp, u16 rif_index); u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif); -u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif); u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif); u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev); int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); diff --git a/drivers/net/ethernet/meta/fbnic/Makefile b/drivers/net/ethernet/meta/fbnic/Makefile index 239b2258ec65..0dbc634adb4b 100644 --- a/drivers/net/ethernet/meta/fbnic/Makefile +++ b/drivers/net/ethernet/meta/fbnic/Makefile @@ -20,6 +20,7 @@ fbnic-y := fbnic_csr.o \ fbnic_pci.o \ fbnic_phylink.o \ fbnic_rpc.o \ + fbnic_time.o \ fbnic_tlv.o \ fbnic_txrx.o \ - fbnic_time.o +# End of objects diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h index 14751f16e125..37f81db1fc30 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic.h @@ -60,6 +60,12 @@ struct fbnic_dev { u8 mac_addr_boundary; u8 tce_tcam_last; + /* IP TCAM */ + struct fbnic_ip_addr ip_src[FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES]; + struct fbnic_ip_addr ip_dst[FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES]; + struct fbnic_ip_addr ipo_src[FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES]; + struct fbnic_ip_addr ipo_dst[FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES]; + /* Number of TCQs/RCQs available on hardware */ u16 max_num_queues; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h index 02bb81b3c506..6f24c5f2e175 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h @@ -605,8 +605,11 @@ enum { FBNIC_RPC_ACT_TBL0_DEST_EI = 4, }; +#define FBNIC_RPC_ACT_TBL0_Q_SEL CSR_BIT(4) +#define FBNIC_RPC_ACT_TBL0_Q_ID CSR_GENMASK(15, 8) #define FBNIC_RPC_ACT_TBL0_DMA_HINT CSR_GENMASK(24, 16) #define FBNIC_RPC_ACT_TBL0_TS_ENA CSR_BIT(28) +#define FBNIC_RPC_ACT_TBL0_ACT_TBL_IDX CSR_BIT(29) #define FBNIC_RPC_ACT_TBL0_RSS_CTXT_ID CSR_BIT(30) #define FBNIC_RPC_ACT_TBL1_DEFAULT 0x0840b /* 0x2102c */ @@ -677,6 +680,9 @@ enum { #define FBNIC_RPC_TCAM_OUTER_IPSRC(m, n)\ (0x08c00 + 0x08 * (n) + (m)) /* 0x023000 + 32*n + 4*m */ +#define FBNIC_RPC_TCAM_IP_ADDR_VALUE CSR_GENMASK(15, 0) +#define FBNIC_RPC_TCAM_IP_ADDR_MASK CSR_GENMASK(31, 16) + #define FBNIC_RPC_TCAM_OUTER_IPDST(m, n)\ (0x08c48 + 0x08 * (n) + (m)) /* 0x023120 + 32*n + 4*m */ #define FBNIC_RPC_TCAM_IPSRC(m, n)\ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c b/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c index 59951b5abdb7..e8f2d7f2d962 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c @@ -10,6 +10,166 @@ static struct dentry *fbnic_dbg_root; +static void fbnic_dbg_desc_break(struct seq_file *s, int i) +{ + while (i--) + seq_putc(s, '-'); + + seq_putc(s, '\n'); +} + +static int fbnic_dbg_mac_addr_show(struct seq_file *s, void *v) +{ + struct fbnic_dev *fbd = s->private; + char hdr[80]; + int i; + + /* Generate Header */ + snprintf(hdr, sizeof(hdr), "%3s %s %-17s %s\n", + "Idx", "S", "TCAM Bitmap", "Addr/Mask"); + seq_puts(s, hdr); + fbnic_dbg_desc_break(s, strnlen(hdr, sizeof(hdr))); + + for (i = 0; i < FBNIC_RPC_TCAM_MACDA_NUM_ENTRIES; i++) { + struct fbnic_mac_addr *mac_addr = &fbd->mac_addr[i]; + + seq_printf(s, "%02d %d %64pb %pm\n", + i, mac_addr->state, mac_addr->act_tcam, + mac_addr->value.addr8); + seq_printf(s, " %pm\n", + mac_addr->mask.addr8); + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_mac_addr); + +static int fbnic_dbg_tce_tcam_show(struct seq_file *s, void *v) +{ + struct fbnic_dev *fbd = s->private; + int i, tcam_idx = 0; + char hdr[80]; + + /* Generate Header */ + snprintf(hdr, sizeof(hdr), "%3s %s %-17s %s\n", + "Idx", "S", "TCAM Bitmap", "Addr/Mask"); + seq_puts(s, hdr); + fbnic_dbg_desc_break(s, strnlen(hdr, sizeof(hdr))); + + for (i = 0; i < ARRAY_SIZE(fbd->mac_addr); i++) { + struct fbnic_mac_addr *mac_addr = &fbd->mac_addr[i]; + + /* Verify BMC bit is set */ + if (!test_bit(FBNIC_MAC_ADDR_T_BMC, mac_addr->act_tcam)) + continue; + + if (tcam_idx == FBNIC_TCE_TCAM_NUM_ENTRIES) + break; + + seq_printf(s, "%02d %d %64pb %pm\n", + tcam_idx, mac_addr->state, mac_addr->act_tcam, + mac_addr->value.addr8); + seq_printf(s, " %pm\n", + mac_addr->mask.addr8); + tcam_idx++; + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_tce_tcam); + +static int fbnic_dbg_act_tcam_show(struct seq_file *s, void *v) +{ + struct fbnic_dev *fbd = s->private; + char hdr[80]; + int i; + + /* Generate Header */ + snprintf(hdr, sizeof(hdr), "%3s %s %-55s %-4s %s\n", + "Idx", "S", "Value/Mask", "RSS", "Dest"); + seq_puts(s, hdr); + fbnic_dbg_desc_break(s, strnlen(hdr, sizeof(hdr))); + + for (i = 0; i < FBNIC_RPC_TCAM_ACT_NUM_ENTRIES; i++) { + struct fbnic_act_tcam *act_tcam = &fbd->act_tcam[i]; + + seq_printf(s, "%02d %d %04x %04x %04x %04x %04x %04x %04x %04x %04x %04x %04x %04x %08x\n", + i, act_tcam->state, + act_tcam->value.tcam[10], act_tcam->value.tcam[9], + act_tcam->value.tcam[8], act_tcam->value.tcam[7], + act_tcam->value.tcam[6], act_tcam->value.tcam[5], + act_tcam->value.tcam[4], act_tcam->value.tcam[3], + act_tcam->value.tcam[2], act_tcam->value.tcam[1], + act_tcam->value.tcam[0], act_tcam->rss_en_mask, + act_tcam->dest); + seq_printf(s, " %04x %04x %04x %04x %04x %04x %04x %04x %04x %04x %04x\n", + act_tcam->mask.tcam[10], act_tcam->mask.tcam[9], + act_tcam->mask.tcam[8], act_tcam->mask.tcam[7], + act_tcam->mask.tcam[6], act_tcam->mask.tcam[5], + act_tcam->mask.tcam[4], act_tcam->mask.tcam[3], + act_tcam->mask.tcam[2], act_tcam->mask.tcam[1], + act_tcam->mask.tcam[0]); + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_act_tcam); + +static int fbnic_dbg_ip_addr_show(struct seq_file *s, + struct fbnic_ip_addr *ip_addr) +{ + char hdr[80]; + int i; + + /* Generate Header */ + snprintf(hdr, sizeof(hdr), "%3s %s %-17s %s %s\n", + "Idx", "S", "TCAM Bitmap", "V", "Addr/Mask"); + seq_puts(s, hdr); + fbnic_dbg_desc_break(s, strnlen(hdr, sizeof(hdr))); + + for (i = 0; i < FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES; i++, ip_addr++) { + seq_printf(s, "%02d %d %64pb %d %pi6\n", + i, ip_addr->state, ip_addr->act_tcam, + ip_addr->version, &ip_addr->value); + seq_printf(s, " %pi6\n", + &ip_addr->mask); + } + + return 0; +} + +static int fbnic_dbg_ip_src_show(struct seq_file *s, void *v) +{ + struct fbnic_dev *fbd = s->private; + + return fbnic_dbg_ip_addr_show(s, fbd->ip_src); +} +DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_ip_src); + +static int fbnic_dbg_ip_dst_show(struct seq_file *s, void *v) +{ + struct fbnic_dev *fbd = s->private; + + return fbnic_dbg_ip_addr_show(s, fbd->ip_dst); +} +DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_ip_dst); + +static int fbnic_dbg_ipo_src_show(struct seq_file *s, void *v) +{ + struct fbnic_dev *fbd = s->private; + + return fbnic_dbg_ip_addr_show(s, fbd->ipo_src); +} +DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_ipo_src); + +static int fbnic_dbg_ipo_dst_show(struct seq_file *s, void *v) +{ + struct fbnic_dev *fbd = s->private; + + return fbnic_dbg_ip_addr_show(s, fbd->ipo_dst); +} +DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_ipo_dst); + static int fbnic_dbg_pcie_stats_show(struct seq_file *s, void *v) { struct fbnic_dev *fbd = s->private; @@ -48,6 +208,20 @@ void fbnic_dbg_fbd_init(struct fbnic_dev *fbd) fbd->dbg_fbd = debugfs_create_dir(name, fbnic_dbg_root); debugfs_create_file("pcie_stats", 0400, fbd->dbg_fbd, fbd, &fbnic_dbg_pcie_stats_fops); + debugfs_create_file("mac_addr", 0400, fbd->dbg_fbd, fbd, + &fbnic_dbg_mac_addr_fops); + debugfs_create_file("tce_tcam", 0400, fbd->dbg_fbd, fbd, + &fbnic_dbg_tce_tcam_fops); + debugfs_create_file("act_tcam", 0400, fbd->dbg_fbd, fbd, + &fbnic_dbg_act_tcam_fops); + debugfs_create_file("ip_src", 0400, fbd->dbg_fbd, fbd, + &fbnic_dbg_ip_src_fops); + debugfs_create_file("ip_dst", 0400, fbd->dbg_fbd, fbd, + &fbnic_dbg_ip_dst_fops); + debugfs_create_file("ipo_src", 0400, fbd->dbg_fbd, fbd, + &fbnic_dbg_ipo_src_fops); + debugfs_create_file("ipo_dst", 0400, fbd->dbg_fbd, fbd, + &fbnic_dbg_ipo_dst_fops); } void fbnic_dbg_fbd_exit(struct fbnic_dev *fbd) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c index 20cd9f5f89e2..fb7139a1da46 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c @@ -4,6 +4,7 @@ #include <linux/ethtool.h> #include <linux/netdevice.h> #include <linux/pci.h> +#include <net/ipv6.h> #include "fbnic.h" #include "fbnic_netdev.h" @@ -218,11 +219,234 @@ fbnic_get_rss_hash_opts(struct fbnic_net *fbn, struct ethtool_rxnfc *cmd) return 0; } +static int fbnic_get_cls_rule_all(struct fbnic_net *fbn, + struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + struct fbnic_dev *fbd = fbn->fbd; + int i, cnt = 0; + + /* Report maximum rule count */ + cmd->data = FBNIC_RPC_ACT_TBL_NFC_ENTRIES; + + for (i = 0; i < FBNIC_RPC_ACT_TBL_NFC_ENTRIES; i++) { + int idx = i + FBNIC_RPC_ACT_TBL_NFC_OFFSET; + struct fbnic_act_tcam *act_tcam; + + act_tcam = &fbd->act_tcam[idx]; + if (act_tcam->state != FBNIC_TCAM_S_VALID) + continue; + + if (rule_locs) { + if (cnt == cmd->rule_cnt) + return -EMSGSIZE; + + rule_locs[cnt] = i; + } + + cnt++; + } + + return cnt; +} + +static int fbnic_get_cls_rule(struct fbnic_net *fbn, struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp; + struct fbnic_dev *fbd = fbn->fbd; + struct fbnic_act_tcam *act_tcam; + int idx; + + fsp = (struct ethtool_rx_flow_spec *)&cmd->fs; + + if (fsp->location >= FBNIC_RPC_ACT_TBL_NFC_ENTRIES) + return -EINVAL; + + idx = fsp->location + FBNIC_RPC_ACT_TBL_NFC_OFFSET; + act_tcam = &fbd->act_tcam[idx]; + + if (act_tcam->state != FBNIC_TCAM_S_VALID) + return -EINVAL; + + /* Report maximum rule count */ + cmd->data = FBNIC_RPC_ACT_TBL_NFC_ENTRIES; + + /* Set flow type field */ + if (!(act_tcam->value.tcam[1] & FBNIC_RPC_TCAM_ACT1_IP_VALID)) { + fsp->flow_type = ETHER_FLOW; + if (!FIELD_GET(FBNIC_RPC_TCAM_ACT1_L2_MACDA_IDX, + act_tcam->mask.tcam[1])) { + struct fbnic_mac_addr *mac_addr; + + idx = FIELD_GET(FBNIC_RPC_TCAM_ACT1_L2_MACDA_IDX, + act_tcam->value.tcam[1]); + mac_addr = &fbd->mac_addr[idx]; + + ether_addr_copy(fsp->h_u.ether_spec.h_dest, + mac_addr->value.addr8); + eth_broadcast_addr(fsp->m_u.ether_spec.h_dest); + } + } else if (act_tcam->value.tcam[1] & + FBNIC_RPC_TCAM_ACT1_OUTER_IP_VALID) { + fsp->flow_type = IPV6_USER_FLOW; + fsp->h_u.usr_ip6_spec.l4_proto = IPPROTO_IPV6; + fsp->m_u.usr_ip6_spec.l4_proto = 0xff; + + if (!FIELD_GET(FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_IDX, + act_tcam->mask.tcam[0])) { + struct fbnic_ip_addr *ip_addr; + int i; + + idx = FIELD_GET(FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_IDX, + act_tcam->value.tcam[0]); + ip_addr = &fbd->ipo_src[idx]; + + for (i = 0; i < 4; i++) { + fsp->h_u.usr_ip6_spec.ip6src[i] = + ip_addr->value.s6_addr32[i]; + fsp->m_u.usr_ip6_spec.ip6src[i] = + ~ip_addr->mask.s6_addr32[i]; + } + } + + if (!FIELD_GET(FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_IDX, + act_tcam->mask.tcam[0])) { + struct fbnic_ip_addr *ip_addr; + int i; + + idx = FIELD_GET(FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_IDX, + act_tcam->value.tcam[0]); + ip_addr = &fbd->ipo_dst[idx]; + + for (i = 0; i < 4; i++) { + fsp->h_u.usr_ip6_spec.ip6dst[i] = + ip_addr->value.s6_addr32[i]; + fsp->m_u.usr_ip6_spec.ip6dst[i] = + ~ip_addr->mask.s6_addr32[i]; + } + } + } else if ((act_tcam->value.tcam[1] & FBNIC_RPC_TCAM_ACT1_IP_IS_V6)) { + if (act_tcam->value.tcam[1] & FBNIC_RPC_TCAM_ACT1_L4_VALID) { + if (act_tcam->value.tcam[1] & + FBNIC_RPC_TCAM_ACT1_L4_IS_UDP) + fsp->flow_type = UDP_V6_FLOW; + else + fsp->flow_type = TCP_V6_FLOW; + fsp->h_u.tcp_ip6_spec.psrc = + cpu_to_be16(act_tcam->value.tcam[3]); + fsp->m_u.tcp_ip6_spec.psrc = + cpu_to_be16(~act_tcam->mask.tcam[3]); + fsp->h_u.tcp_ip6_spec.pdst = + cpu_to_be16(act_tcam->value.tcam[4]); + fsp->m_u.tcp_ip6_spec.pdst = + cpu_to_be16(~act_tcam->mask.tcam[4]); + } else { + fsp->flow_type = IPV6_USER_FLOW; + } + + if (!FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPSRC_IDX, + act_tcam->mask.tcam[0])) { + struct fbnic_ip_addr *ip_addr; + int i; + + idx = FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPSRC_IDX, + act_tcam->value.tcam[0]); + ip_addr = &fbd->ip_src[idx]; + + for (i = 0; i < 4; i++) { + fsp->h_u.usr_ip6_spec.ip6src[i] = + ip_addr->value.s6_addr32[i]; + fsp->m_u.usr_ip6_spec.ip6src[i] = + ~ip_addr->mask.s6_addr32[i]; + } + } + + if (!FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPDST_IDX, + act_tcam->mask.tcam[0])) { + struct fbnic_ip_addr *ip_addr; + int i; + + idx = FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPDST_IDX, + act_tcam->value.tcam[0]); + ip_addr = &fbd->ip_dst[idx]; + + for (i = 0; i < 4; i++) { + fsp->h_u.usr_ip6_spec.ip6dst[i] = + ip_addr->value.s6_addr32[i]; + fsp->m_u.usr_ip6_spec.ip6dst[i] = + ~ip_addr->mask.s6_addr32[i]; + } + } + } else { + if (act_tcam->value.tcam[1] & FBNIC_RPC_TCAM_ACT1_L4_VALID) { + if (act_tcam->value.tcam[1] & + FBNIC_RPC_TCAM_ACT1_L4_IS_UDP) + fsp->flow_type = UDP_V4_FLOW; + else + fsp->flow_type = TCP_V4_FLOW; + fsp->h_u.tcp_ip4_spec.psrc = + cpu_to_be16(act_tcam->value.tcam[3]); + fsp->m_u.tcp_ip4_spec.psrc = + cpu_to_be16(~act_tcam->mask.tcam[3]); + fsp->h_u.tcp_ip4_spec.pdst = + cpu_to_be16(act_tcam->value.tcam[4]); + fsp->m_u.tcp_ip4_spec.pdst = + cpu_to_be16(~act_tcam->mask.tcam[4]); + } else { + fsp->flow_type = IPV4_USER_FLOW; + fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4; + } + + if (!FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPSRC_IDX, + act_tcam->mask.tcam[0])) { + struct fbnic_ip_addr *ip_addr; + + idx = FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPSRC_IDX, + act_tcam->value.tcam[0]); + ip_addr = &fbd->ip_src[idx]; + + fsp->h_u.usr_ip4_spec.ip4src = + ip_addr->value.s6_addr32[3]; + fsp->m_u.usr_ip4_spec.ip4src = + ~ip_addr->mask.s6_addr32[3]; + } + + if (!FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPDST_IDX, + act_tcam->mask.tcam[0])) { + struct fbnic_ip_addr *ip_addr; + + idx = FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPDST_IDX, + act_tcam->value.tcam[0]); + ip_addr = &fbd->ip_dst[idx]; + + fsp->h_u.usr_ip4_spec.ip4dst = + ip_addr->value.s6_addr32[3]; + fsp->m_u.usr_ip4_spec.ip4dst = + ~ip_addr->mask.s6_addr32[3]; + } + } + + /* Record action */ + if (act_tcam->dest & FBNIC_RPC_ACT_TBL0_DROP) + fsp->ring_cookie = RX_CLS_FLOW_DISC; + else if (act_tcam->dest & FBNIC_RPC_ACT_TBL0_Q_SEL) + fsp->ring_cookie = FIELD_GET(FBNIC_RPC_ACT_TBL0_Q_ID, + act_tcam->dest); + else + fsp->flow_type |= FLOW_RSS; + + cmd->rss_context = FIELD_GET(FBNIC_RPC_ACT_TBL0_RSS_CTXT_ID, + act_tcam->dest); + + return 0; +} + static int fbnic_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, u32 *rule_locs) { struct fbnic_net *fbn = netdev_priv(netdev); int ret = -EOPNOTSUPP; + u32 special = 0; switch (cmd->cmd) { case ETHTOOL_GRXRINGS: @@ -232,6 +456,22 @@ static int fbnic_get_rxnfc(struct net_device *netdev, case ETHTOOL_GRXFH: ret = fbnic_get_rss_hash_opts(fbn, cmd); break; + case ETHTOOL_GRXCLSRULE: + ret = fbnic_get_cls_rule(fbn, cmd); + break; + case ETHTOOL_GRXCLSRLCNT: + rule_locs = NULL; + special = RX_CLS_LOC_SPECIAL; + fallthrough; + case ETHTOOL_GRXCLSRLALL: + ret = fbnic_get_cls_rule_all(fbn, cmd, rule_locs); + if (ret < 0) + break; + + cmd->data |= special; + cmd->rule_cnt = ret; + ret = 0; + break; } return ret; @@ -272,6 +512,406 @@ fbnic_set_rss_hash_opts(struct fbnic_net *fbn, const struct ethtool_rxnfc *cmd) return 0; } +static int fbnic_cls_rule_any_loc(struct fbnic_dev *fbd) +{ + int i; + + for (i = FBNIC_RPC_ACT_TBL_NFC_ENTRIES; i--;) { + int idx = i + FBNIC_RPC_ACT_TBL_NFC_OFFSET; + + if (fbd->act_tcam[idx].state != FBNIC_TCAM_S_VALID) + return i; + } + + return -ENOSPC; +} + +static int fbnic_set_cls_rule_ins(struct fbnic_net *fbn, + const struct ethtool_rxnfc *cmd) +{ + u16 flow_value = 0, flow_mask = 0xffff, ip_value = 0, ip_mask = 0xffff; + u16 sport = 0, sport_mask = ~0, dport = 0, dport_mask = ~0; + u16 misc = 0, misc_mask = ~0; + u32 dest = FIELD_PREP(FBNIC_RPC_ACT_TBL0_DEST_MASK, + FBNIC_RPC_ACT_TBL0_DEST_HOST); + struct fbnic_ip_addr *ip_src = NULL, *ip_dst = NULL; + struct fbnic_mac_addr *mac_addr = NULL; + struct ethtool_rx_flow_spec *fsp; + struct fbnic_dev *fbd = fbn->fbd; + struct fbnic_act_tcam *act_tcam; + struct in6_addr *addr6, *mask6; + struct in_addr *addr4, *mask4; + int hash_idx, location; + u32 flow_type; + int idx, j; + + fsp = (struct ethtool_rx_flow_spec *)&cmd->fs; + + if (fsp->location != RX_CLS_LOC_ANY) + return -EINVAL; + location = fbnic_cls_rule_any_loc(fbd); + if (location < 0) + return location; + + if (fsp->ring_cookie == RX_CLS_FLOW_DISC) { + dest = FBNIC_RPC_ACT_TBL0_DROP; + } else if (fsp->flow_type & FLOW_RSS) { + if (cmd->rss_context == 1) + dest |= FBNIC_RPC_ACT_TBL0_RSS_CTXT_ID; + } else { + u32 ring_idx = ethtool_get_flow_spec_ring(fsp->ring_cookie); + + if (ring_idx >= fbn->num_rx_queues) + return -EINVAL; + + dest |= FBNIC_RPC_ACT_TBL0_Q_SEL | + FIELD_PREP(FBNIC_RPC_ACT_TBL0_Q_ID, ring_idx); + } + + idx = location + FBNIC_RPC_ACT_TBL_NFC_OFFSET; + act_tcam = &fbd->act_tcam[idx]; + + /* Do not allow overwriting for now. + * To support overwriting rules we will need to add logic to free + * any IP or MACDA TCAMs that may be associated with the old rule. + */ + if (act_tcam->state != FBNIC_TCAM_S_DISABLED) + return -EBUSY; + + flow_type = fsp->flow_type & ~(FLOW_EXT | FLOW_RSS); + hash_idx = fbnic_get_rss_hash_idx(flow_type); + + switch (flow_type) { + case UDP_V4_FLOW: +udp4_flow: + flow_value |= FBNIC_RPC_TCAM_ACT1_L4_IS_UDP; + fallthrough; + case TCP_V4_FLOW: +tcp4_flow: + flow_value |= FBNIC_RPC_TCAM_ACT1_L4_VALID; + flow_mask &= ~(FBNIC_RPC_TCAM_ACT1_L4_IS_UDP | + FBNIC_RPC_TCAM_ACT1_L4_VALID); + + sport = be16_to_cpu(fsp->h_u.tcp_ip4_spec.psrc); + sport_mask = ~be16_to_cpu(fsp->m_u.tcp_ip4_spec.psrc); + dport = be16_to_cpu(fsp->h_u.tcp_ip4_spec.pdst); + dport_mask = ~be16_to_cpu(fsp->m_u.tcp_ip4_spec.pdst); + goto ip4_flow; + case IP_USER_FLOW: + if (!fsp->m_u.usr_ip4_spec.proto) + goto ip4_flow; + if (fsp->m_u.usr_ip4_spec.proto != 0xff) + return -EINVAL; + if (fsp->h_u.usr_ip4_spec.proto == IPPROTO_UDP) + goto udp4_flow; + if (fsp->h_u.usr_ip4_spec.proto == IPPROTO_TCP) + goto tcp4_flow; + return -EINVAL; +ip4_flow: + addr4 = (struct in_addr *)&fsp->h_u.usr_ip4_spec.ip4src; + mask4 = (struct in_addr *)&fsp->m_u.usr_ip4_spec.ip4src; + if (mask4->s_addr) { + ip_src = __fbnic_ip4_sync(fbd, fbd->ip_src, + addr4, mask4); + if (!ip_src) + return -ENOSPC; + + set_bit(idx, ip_src->act_tcam); + ip_value |= FBNIC_RPC_TCAM_ACT0_IPSRC_VALID | + FIELD_PREP(FBNIC_RPC_TCAM_ACT0_IPSRC_IDX, + ip_src - fbd->ip_src); + ip_mask &= ~(FBNIC_RPC_TCAM_ACT0_IPSRC_VALID | + FBNIC_RPC_TCAM_ACT0_IPSRC_IDX); + } + + addr4 = (struct in_addr *)&fsp->h_u.usr_ip4_spec.ip4dst; + mask4 = (struct in_addr *)&fsp->m_u.usr_ip4_spec.ip4dst; + if (mask4->s_addr) { + ip_dst = __fbnic_ip4_sync(fbd, fbd->ip_dst, + addr4, mask4); + if (!ip_dst) { + if (ip_src && ip_src->state == FBNIC_TCAM_S_ADD) + memset(ip_src, 0, sizeof(*ip_src)); + return -ENOSPC; + } + + set_bit(idx, ip_dst->act_tcam); + ip_value |= FBNIC_RPC_TCAM_ACT0_IPDST_VALID | + FIELD_PREP(FBNIC_RPC_TCAM_ACT0_IPDST_IDX, + ip_dst - fbd->ip_dst); + ip_mask &= ~(FBNIC_RPC_TCAM_ACT0_IPDST_VALID | + FBNIC_RPC_TCAM_ACT0_IPDST_IDX); + } + flow_value |= FBNIC_RPC_TCAM_ACT1_IP_VALID | + FBNIC_RPC_TCAM_ACT1_L2_MACDA_VALID; + flow_mask &= ~(FBNIC_RPC_TCAM_ACT1_IP_IS_V6 | + FBNIC_RPC_TCAM_ACT1_IP_VALID | + FBNIC_RPC_TCAM_ACT1_L2_MACDA_VALID); + break; + case UDP_V6_FLOW: +udp6_flow: + flow_value |= FBNIC_RPC_TCAM_ACT1_L4_IS_UDP; + fallthrough; + case TCP_V6_FLOW: +tcp6_flow: + flow_value |= FBNIC_RPC_TCAM_ACT1_L4_VALID; + flow_mask &= ~(FBNIC_RPC_TCAM_ACT1_L4_IS_UDP | + FBNIC_RPC_TCAM_ACT1_L4_VALID); + + sport = be16_to_cpu(fsp->h_u.tcp_ip6_spec.psrc); + sport_mask = ~be16_to_cpu(fsp->m_u.tcp_ip6_spec.psrc); + dport = be16_to_cpu(fsp->h_u.tcp_ip6_spec.pdst); + dport_mask = ~be16_to_cpu(fsp->m_u.tcp_ip6_spec.pdst); + goto ipv6_flow; + case IPV6_USER_FLOW: + if (!fsp->m_u.usr_ip6_spec.l4_proto) + goto ipv6_flow; + + if (fsp->m_u.usr_ip6_spec.l4_proto != 0xff) + return -EINVAL; + if (fsp->h_u.usr_ip6_spec.l4_proto == IPPROTO_UDP) + goto udp6_flow; + if (fsp->h_u.usr_ip6_spec.l4_proto == IPPROTO_TCP) + goto tcp6_flow; + if (fsp->h_u.usr_ip6_spec.l4_proto != IPPROTO_IPV6) + return -EINVAL; + + addr6 = (struct in6_addr *)fsp->h_u.usr_ip6_spec.ip6src; + mask6 = (struct in6_addr *)fsp->m_u.usr_ip6_spec.ip6src; + if (!ipv6_addr_any(mask6)) { + ip_src = __fbnic_ip6_sync(fbd, fbd->ipo_src, + addr6, mask6); + if (!ip_src) + return -ENOSPC; + + set_bit(idx, ip_src->act_tcam); + ip_value |= + FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_VALID | + FIELD_PREP(FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_IDX, + ip_src - fbd->ipo_src); + ip_mask &= + ~(FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_VALID | + FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_IDX); + } + + addr6 = (struct in6_addr *)fsp->h_u.usr_ip6_spec.ip6dst; + mask6 = (struct in6_addr *)fsp->m_u.usr_ip6_spec.ip6dst; + if (!ipv6_addr_any(mask6)) { + ip_dst = __fbnic_ip6_sync(fbd, fbd->ipo_dst, + addr6, mask6); + if (!ip_dst) { + if (ip_src && ip_src->state == FBNIC_TCAM_S_ADD) + memset(ip_src, 0, sizeof(*ip_src)); + return -ENOSPC; + } + + set_bit(idx, ip_dst->act_tcam); + ip_value |= + FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_VALID | + FIELD_PREP(FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_IDX, + ip_dst - fbd->ipo_dst); + ip_mask &= ~(FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_VALID | + FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_IDX); + } + + flow_value |= FBNIC_RPC_TCAM_ACT1_OUTER_IP_VALID; + flow_mask &= FBNIC_RPC_TCAM_ACT1_OUTER_IP_VALID; +ipv6_flow: + addr6 = (struct in6_addr *)fsp->h_u.usr_ip6_spec.ip6src; + mask6 = (struct in6_addr *)fsp->m_u.usr_ip6_spec.ip6src; + if (!ip_src && !ipv6_addr_any(mask6)) { + ip_src = __fbnic_ip6_sync(fbd, fbd->ip_src, + addr6, mask6); + if (!ip_src) + return -ENOSPC; + + set_bit(idx, ip_src->act_tcam); + ip_value |= FBNIC_RPC_TCAM_ACT0_IPSRC_VALID | + FIELD_PREP(FBNIC_RPC_TCAM_ACT0_IPSRC_IDX, + ip_src - fbd->ip_src); + ip_mask &= ~(FBNIC_RPC_TCAM_ACT0_IPSRC_VALID | + FBNIC_RPC_TCAM_ACT0_IPSRC_IDX); + } + + addr6 = (struct in6_addr *)fsp->h_u.usr_ip6_spec.ip6dst; + mask6 = (struct in6_addr *)fsp->m_u.usr_ip6_spec.ip6dst; + if (!ip_dst && !ipv6_addr_any(mask6)) { + ip_dst = __fbnic_ip6_sync(fbd, fbd->ip_dst, + addr6, mask6); + if (!ip_dst) { + if (ip_src && ip_src->state == FBNIC_TCAM_S_ADD) + memset(ip_src, 0, sizeof(*ip_src)); + return -ENOSPC; + } + + set_bit(idx, ip_dst->act_tcam); + ip_value |= FBNIC_RPC_TCAM_ACT0_IPDST_VALID | + FIELD_PREP(FBNIC_RPC_TCAM_ACT0_IPDST_IDX, + ip_dst - fbd->ip_dst); + ip_mask &= ~(FBNIC_RPC_TCAM_ACT0_IPDST_VALID | + FBNIC_RPC_TCAM_ACT0_IPDST_IDX); + } + + flow_value |= FBNIC_RPC_TCAM_ACT1_IP_IS_V6 | + FBNIC_RPC_TCAM_ACT1_IP_VALID | + FBNIC_RPC_TCAM_ACT1_L2_MACDA_VALID; + flow_mask &= ~(FBNIC_RPC_TCAM_ACT1_IP_IS_V6 | + FBNIC_RPC_TCAM_ACT1_IP_VALID | + FBNIC_RPC_TCAM_ACT1_L2_MACDA_VALID); + break; + case ETHER_FLOW: + if (!is_zero_ether_addr(fsp->m_u.ether_spec.h_dest)) { + u8 *addr = fsp->h_u.ether_spec.h_dest; + u8 *mask = fsp->m_u.ether_spec.h_dest; + + /* Do not allow MAC addr of 0 */ + if (is_zero_ether_addr(addr)) + return -EINVAL; + + /* Only support full MAC address to avoid + * conflicts with other MAC addresses. + */ + if (!is_broadcast_ether_addr(mask)) + return -EINVAL; + + if (is_multicast_ether_addr(addr)) + mac_addr = __fbnic_mc_sync(fbd, addr); + else + mac_addr = __fbnic_uc_sync(fbd, addr); + + if (!mac_addr) + return -ENOSPC; + + set_bit(idx, mac_addr->act_tcam); + flow_value |= + FIELD_PREP(FBNIC_RPC_TCAM_ACT1_L2_MACDA_IDX, + mac_addr - fbd->mac_addr); + flow_mask &= ~FBNIC_RPC_TCAM_ACT1_L2_MACDA_IDX; + } + + flow_value |= FBNIC_RPC_TCAM_ACT1_L2_MACDA_VALID; + flow_mask &= ~FBNIC_RPC_TCAM_ACT1_L2_MACDA_VALID; + break; + default: + return -EINVAL; + } + + /* Write action table values */ + act_tcam->dest = dest; + act_tcam->rss_en_mask = fbnic_flow_hash_2_rss_en_mask(fbn, hash_idx); + + /* Write IP Match value/mask to action_tcam[0] */ + act_tcam->value.tcam[0] = ip_value; + act_tcam->mask.tcam[0] = ip_mask; + + /* Write flow type value/mask to action_tcam[1] */ + act_tcam->value.tcam[1] = flow_value; + act_tcam->mask.tcam[1] = flow_mask; + + /* Write error, DSCP, extra L4 matches to action_tcam[2] */ + act_tcam->value.tcam[2] = misc; + act_tcam->mask.tcam[2] = misc_mask; + + /* Write source/destination port values */ + act_tcam->value.tcam[3] = sport; + act_tcam->mask.tcam[3] = sport_mask; + act_tcam->value.tcam[4] = dport; + act_tcam->mask.tcam[4] = dport_mask; + + for (j = 5; j < FBNIC_RPC_TCAM_ACT_WORD_LEN; j++) + act_tcam->mask.tcam[j] = 0xffff; + + act_tcam->state = FBNIC_TCAM_S_UPDATE; + fsp->location = location; + + if (netif_running(fbn->netdev)) { + fbnic_write_rules(fbd); + if (ip_src || ip_dst) + fbnic_write_ip_addr(fbd); + if (mac_addr) + fbnic_write_macda(fbd); + } + + return 0; +} + +static void fbnic_clear_nfc_macda(struct fbnic_net *fbn, + unsigned int tcam_idx) +{ + struct fbnic_dev *fbd = fbn->fbd; + int idx; + + for (idx = ARRAY_SIZE(fbd->mac_addr); idx--;) + __fbnic_xc_unsync(&fbd->mac_addr[idx], tcam_idx); + + /* Write updates to hardware */ + if (netif_running(fbn->netdev)) + fbnic_write_macda(fbd); +} + +static void fbnic_clear_nfc_ip_addr(struct fbnic_net *fbn, + unsigned int tcam_idx) +{ + struct fbnic_dev *fbd = fbn->fbd; + int idx; + + for (idx = ARRAY_SIZE(fbd->ip_src); idx--;) + __fbnic_ip_unsync(&fbd->ip_src[idx], tcam_idx); + for (idx = ARRAY_SIZE(fbd->ip_dst); idx--;) + __fbnic_ip_unsync(&fbd->ip_dst[idx], tcam_idx); + for (idx = ARRAY_SIZE(fbd->ipo_src); idx--;) + __fbnic_ip_unsync(&fbd->ipo_src[idx], tcam_idx); + for (idx = ARRAY_SIZE(fbd->ipo_dst); idx--;) + __fbnic_ip_unsync(&fbd->ipo_dst[idx], tcam_idx); + + /* Write updates to hardware */ + if (netif_running(fbn->netdev)) + fbnic_write_ip_addr(fbd); +} + +static int fbnic_set_cls_rule_del(struct fbnic_net *fbn, + const struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp; + struct fbnic_dev *fbd = fbn->fbd; + struct fbnic_act_tcam *act_tcam; + int idx; + + fsp = (struct ethtool_rx_flow_spec *)&cmd->fs; + + if (fsp->location >= FBNIC_RPC_ACT_TBL_NFC_ENTRIES) + return -EINVAL; + + idx = fsp->location + FBNIC_RPC_ACT_TBL_NFC_OFFSET; + act_tcam = &fbd->act_tcam[idx]; + + if (act_tcam->state != FBNIC_TCAM_S_VALID) + return -EINVAL; + + act_tcam->state = FBNIC_TCAM_S_DELETE; + + if ((act_tcam->value.tcam[1] & FBNIC_RPC_TCAM_ACT1_L2_MACDA_VALID) && + (~act_tcam->mask.tcam[1] & FBNIC_RPC_TCAM_ACT1_L2_MACDA_IDX)) + fbnic_clear_nfc_macda(fbn, idx); + + if ((act_tcam->value.tcam[0] & + (FBNIC_RPC_TCAM_ACT0_IPSRC_VALID | + FBNIC_RPC_TCAM_ACT0_IPDST_VALID | + FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_VALID | + FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_VALID)) && + (~act_tcam->mask.tcam[0] & + (FBNIC_RPC_TCAM_ACT0_IPSRC_IDX | + FBNIC_RPC_TCAM_ACT0_IPDST_IDX | + FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_IDX | + FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_IDX))) + fbnic_clear_nfc_ip_addr(fbn, idx); + + if (netif_running(fbn->netdev)) + fbnic_write_rules(fbd); + + return 0; +} + static int fbnic_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) { struct fbnic_net *fbn = netdev_priv(netdev); @@ -281,6 +921,12 @@ static int fbnic_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) case ETHTOOL_SRXFH: ret = fbnic_set_rss_hash_opts(fbn, cmd); break; + case ETHTOOL_SRXCLSRLINS: + ret = fbnic_set_cls_rule_ins(fbn, cmd); + break; + case ETHTOOL_SRXCLSRLDEL: + ret = fbnic_set_cls_rule_del(fbn, cmd); + break; } return ret; @@ -374,6 +1020,61 @@ fbnic_set_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh, return 0; } +static int +fbnic_modify_rxfh_context(struct net_device *netdev, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + const u32 *indir = rxfh->indir; + unsigned int changes; + + if (!indir) + indir = ethtool_rxfh_context_indir(ctx); + + changes = fbnic_set_indir(fbn, rxfh->rss_context, indir); + if (changes && netif_running(netdev)) + fbnic_rss_reinit_hw(fbn->fbd, fbn); + + return 0; +} + +static int +fbnic_create_rxfh_context(struct net_device *netdev, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + + if (rxfh->hfunc && rxfh->hfunc != ETH_RSS_HASH_TOP) { + NL_SET_ERR_MSG_MOD(extack, "RSS hash function not supported"); + return -EOPNOTSUPP; + } + ctx->hfunc = ETH_RSS_HASH_TOP; + + if (!rxfh->indir) { + u32 *indir = ethtool_rxfh_context_indir(ctx); + unsigned int num_rx = fbn->num_rx_queues; + unsigned int i; + + for (i = 0; i < FBNIC_RPC_RSS_TBL_SIZE; i++) + indir[i] = ethtool_rxfh_indir_default(i, num_rx); + } + + return fbnic_modify_rxfh_context(netdev, ctx, rxfh, extack); +} + +static int +fbnic_remove_rxfh_context(struct net_device *netdev, + struct ethtool_rxfh_context *ctx, u32 rss_context, + struct netlink_ext_ack *extack) +{ + /* Nothing to do, contexts are allocated statically */ + return 0; +} + static void fbnic_get_channels(struct net_device *netdev, struct ethtool_channels *ch) { @@ -523,14 +1224,14 @@ static void fbnic_get_ts_stats(struct net_device *netdev, unsigned int start; int i; - ts_stats->pkts = fbn->tx_stats.ts_packets; - ts_stats->lost = fbn->tx_stats.ts_lost; + ts_stats->pkts = fbn->tx_stats.twq.ts_packets; + ts_stats->lost = fbn->tx_stats.twq.ts_lost; for (i = 0; i < fbn->num_tx_queues; i++) { ring = fbn->tx[i]; do { start = u64_stats_fetch_begin(&ring->stats.syncp); - ts_packets = ring->stats.ts_packets; - ts_lost = ring->stats.ts_lost; + ts_packets = ring->stats.twq.ts_packets; + ts_lost = ring->stats.twq.ts_lost; } while (u64_stats_fetch_retry(&ring->stats.syncp, start)); ts_stats->pkts += ts_packets; ts_stats->lost += ts_lost; @@ -586,6 +1287,7 @@ fbnic_get_eth_mac_stats(struct net_device *netdev, } static const struct ethtool_ops fbnic_ethtool_ops = { + .rxfh_max_num_contexts = FBNIC_RPC_RSS_TBL_COUNT, .get_drvinfo = fbnic_get_drvinfo, .get_regs_len = fbnic_get_regs_len, .get_regs = fbnic_get_regs, @@ -598,6 +1300,9 @@ static const struct ethtool_ops fbnic_ethtool_ops = { .get_rxfh_indir_size = fbnic_get_rxfh_indir_size, .get_rxfh = fbnic_get_rxfh, .set_rxfh = fbnic_set_rxfh, + .create_rxfh_context = fbnic_create_rxfh_context, + .modify_rxfh_context = fbnic_modify_rxfh_context, + .remove_rxfh_context = fbnic_remove_rxfh_context, .get_channels = fbnic_get_channels, .set_channels = fbnic_set_channels, .get_ts_info = fbnic_get_ts_info, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index 7a96b6ee773f..c59f1ce8de32 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -487,8 +487,9 @@ static void fbnic_get_queue_stats_rx(struct net_device *dev, int idx, struct fbnic_net *fbn = netdev_priv(dev); struct fbnic_ring *rxr = fbn->rx[idx]; struct fbnic_queue_stats *stats; + u64 bytes, packets, alloc_fail; + u64 csum_complete, csum_none; unsigned int start; - u64 bytes, packets; if (!rxr) return; @@ -498,10 +499,16 @@ static void fbnic_get_queue_stats_rx(struct net_device *dev, int idx, start = u64_stats_fetch_begin(&stats->syncp); bytes = stats->bytes; packets = stats->packets; + alloc_fail = stats->rx.alloc_failed; + csum_complete = stats->rx.csum_complete; + csum_none = stats->rx.csum_none; } while (u64_stats_fetch_retry(&stats->syncp, start)); rx->bytes = bytes; rx->packets = packets; + rx->alloc_fail = alloc_fail; + rx->csum_complete = csum_complete; + rx->csum_none = csum_none; } static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx, @@ -510,6 +517,7 @@ static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx, struct fbnic_net *fbn = netdev_priv(dev); struct fbnic_ring *txr = fbn->tx[idx]; struct fbnic_queue_stats *stats; + u64 stop, wake, csum, lso; unsigned int start; u64 bytes, packets; @@ -521,10 +529,18 @@ static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx, start = u64_stats_fetch_begin(&stats->syncp); bytes = stats->bytes; packets = stats->packets; + csum = stats->twq.csum_partial; + lso = stats->twq.lso; + stop = stats->twq.stop; + wake = stats->twq.wake; } while (u64_stats_fetch_retry(&stats->syncp, start)); tx->bytes = bytes; tx->packets = packets; + tx->needs_csum = csum + lso; + tx->hw_gso_wire_packets = lso; + tx->stop = stop; + tx->wake = wake; } static void fbnic_get_base_stats(struct net_device *dev, @@ -535,9 +551,16 @@ static void fbnic_get_base_stats(struct net_device *dev, tx->bytes = fbn->tx_stats.bytes; tx->packets = fbn->tx_stats.packets; + tx->needs_csum = fbn->tx_stats.twq.csum_partial + fbn->tx_stats.twq.lso; + tx->hw_gso_wire_packets = fbn->tx_stats.twq.lso; + tx->stop = fbn->tx_stats.twq.stop; + tx->wake = fbn->tx_stats.twq.wake; rx->bytes = fbn->rx_stats.bytes; rx->packets = fbn->rx_stats.packets; + rx->alloc_fail = fbn->rx_stats.rx.alloc_failed; + rx->csum_complete = fbn->rx_stats.rx.csum_complete; + rx->csum_none = fbn->rx_stats.rx.csum_none; } static const struct netdev_stat_ops fbnic_stat_ops = { @@ -628,15 +651,32 @@ struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd) fbnic_rss_key_fill(fbn->rss_key); fbnic_rss_init_en_mask(fbn); + netdev->priv_flags |= IFF_UNICAST_FLT; + + netdev->gso_partial_features = + NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM | + NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->features |= + netdev->gso_partial_features | + FBNIC_TUN_GSO_FEATURES | NETIF_F_RXHASH | NETIF_F_SG | NETIF_F_HW_CSUM | - NETIF_F_RXCSUM; + NETIF_F_RXCSUM | + NETIF_F_TSO | + NETIF_F_TSO_ECN | + NETIF_F_TSO6 | + NETIF_F_GSO_PARTIAL | + NETIF_F_GSO_UDP_L4; netdev->hw_features |= netdev->features; netdev->vlan_features |= netdev->features; netdev->hw_enc_features |= netdev->features; + netdev->features |= NETIF_F_NTUPLE; netdev->min_mtu = IPV6_MIN_MTU; netdev->max_mtu = FBNIC_MAX_JUMBO_FRAME_SIZE - ETH_HLEN; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h index a392ac1cc4f2..b84b447a8d8a 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h @@ -13,6 +13,9 @@ #define FBNIC_MAX_NAPI_VECTORS 128u +/* Natively supported tunnel GSO features (not thru GSO_PARTIAL) */ +#define FBNIC_TUN_GSO_FEATURES NETIF_F_GSO_IPXIP6 + struct fbnic_net { struct fbnic_ring *tx[FBNIC_MAX_TXQS]; struct fbnic_ring *rx[FBNIC_MAX_RXQS]; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c b/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c index bb11fc83367d..860b02b22c15 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c @@ -133,7 +133,6 @@ int fbnic_phylink_init(struct net_device *netdev) struct fbnic_net *fbn = netdev_priv(netdev); struct phylink *phylink; - fbn->phylink_pcs.neg_mode = true; fbn->phylink_pcs.ops = &fbnic_phylink_pcs_ops; fbn->phylink_config.dev = &netdev->dev; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c index c25bd300b902..8ff07b5562e3 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c @@ -3,6 +3,7 @@ #include <linux/etherdevice.h> #include <linux/ethtool.h> +#include <net/ipv6.h> #include "fbnic.h" #include "fbnic_netdev.h" @@ -60,7 +61,7 @@ void fbnic_rss_disable_hw(struct fbnic_dev *fbd) #define FBNIC_FH_2_RSSEM_BIT(_fh, _rssem, _val) \ FIELD_PREP(FBNIC_RPC_ACT_TBL1_RSS_ENA_##_rssem, \ FIELD_GET(RXH_##_fh, _val)) -static u16 fbnic_flow_hash_2_rss_en_mask(struct fbnic_net *fbn, int flow_type) +u16 fbnic_flow_hash_2_rss_en_mask(struct fbnic_net *fbn, int flow_type) { u32 flow_hash = fbn->rss_flow_hash[flow_type]; u32 rss_en_mask = 0; @@ -698,6 +699,359 @@ void fbnic_write_tce_tcam(struct fbnic_dev *fbd) __fbnic_write_tce_tcam(fbd); } +struct fbnic_ip_addr *__fbnic_ip4_sync(struct fbnic_dev *fbd, + struct fbnic_ip_addr *ip_addr, + const struct in_addr *addr, + const struct in_addr *mask) +{ + struct fbnic_ip_addr *avail_addr = NULL; + unsigned int i; + + /* Scan from top of list to bottom, filling bottom up. */ + for (i = 0; i < FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES; i++, ip_addr++) { + struct in6_addr *m = &ip_addr->mask; + + if (ip_addr->state == FBNIC_TCAM_S_DISABLED) { + avail_addr = ip_addr; + continue; + } + + if (ip_addr->version != 4) + continue; + + /* Drop avail_addr if mask is a subset of our current mask, + * This prevents us from inserting a longer prefix behind a + * shorter one. + * + * The mask is stored inverted value so as an example: + * m ffff ffff ffff ffff ffff ffff ffff 0000 0000 + * mask 0000 0000 0000 0000 0000 0000 0000 ffff ffff + * + * "m" and "mask" represent typical IPv4 mask stored in + * the TCAM and those provided by the stack. The code below + * should return a non-zero result if there is a 0 stored + * anywhere in "m" where "mask" has a 0. + */ + if (~m->s6_addr32[3] & ~mask->s_addr) { + avail_addr = NULL; + continue; + } + + /* Check to see if the mask actually contains fewer bits than + * our new mask "m". The XOR below should only result in 0 if + * "m" is masking a bit that we are looking for in our new + * "mask", we eliminated the 0^0 case with the check above. + * + * If it contains fewer bits we need to stop here, otherwise + * we might be adding an unreachable rule. + */ + if (~(m->s6_addr32[3] ^ mask->s_addr)) + break; + + if (ip_addr->value.s6_addr32[3] == addr->s_addr) { + avail_addr = ip_addr; + break; + } + } + + if (avail_addr && avail_addr->state == FBNIC_TCAM_S_DISABLED) { + ipv6_addr_set(&avail_addr->value, 0, 0, 0, addr->s_addr); + ipv6_addr_set(&avail_addr->mask, htonl(~0), htonl(~0), + htonl(~0), ~mask->s_addr); + avail_addr->version = 4; + + avail_addr->state = FBNIC_TCAM_S_ADD; + } + + return avail_addr; +} + +struct fbnic_ip_addr *__fbnic_ip6_sync(struct fbnic_dev *fbd, + struct fbnic_ip_addr *ip_addr, + const struct in6_addr *addr, + const struct in6_addr *mask) +{ + struct fbnic_ip_addr *avail_addr = NULL; + unsigned int i; + + ip_addr = &ip_addr[FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES - 1]; + + /* Scan from bottom of list to top, filling top down. */ + for (i = FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES; i--; ip_addr--) { + struct in6_addr *m = &ip_addr->mask; + + if (ip_addr->state == FBNIC_TCAM_S_DISABLED) { + avail_addr = ip_addr; + continue; + } + + if (ip_addr->version != 6) + continue; + + /* Drop avail_addr if mask is a superset of our current mask. + * This prevents us from inserting a longer prefix behind a + * shorter one. + * + * The mask is stored inverted value so as an example: + * m 0000 0000 0000 0000 0000 0000 0000 0000 0000 + * mask ffff ffff ffff ffff ffff ffff ffff ffff ffff + * + * "m" and "mask" represent typical IPv6 mask stored in + * the TCAM and those provided by the stack. The code below + * should return a non-zero result which will cause us + * to drop the avail_addr value that might be cached + * to prevent us from dropping a v6 address behind it. + */ + if ((m->s6_addr32[0] & mask->s6_addr32[0]) | + (m->s6_addr32[1] & mask->s6_addr32[1]) | + (m->s6_addr32[2] & mask->s6_addr32[2]) | + (m->s6_addr32[3] & mask->s6_addr32[3])) { + avail_addr = NULL; + continue; + } + + /* The previous test eliminated any overlap between the + * two values so now we need to check for gaps. + * + * If the mask is equal to our current mask then it should + * result with m ^ mask = ffff ffff, if however the value + * stored in m is bigger then we should see a 0 appear + * somewhere in the mask. + */ + if (~(m->s6_addr32[0] ^ mask->s6_addr32[0]) | + ~(m->s6_addr32[1] ^ mask->s6_addr32[1]) | + ~(m->s6_addr32[2] ^ mask->s6_addr32[2]) | + ~(m->s6_addr32[3] ^ mask->s6_addr32[3])) + break; + + if (ipv6_addr_cmp(&ip_addr->value, addr)) + continue; + + avail_addr = ip_addr; + break; + } + + if (avail_addr && avail_addr->state == FBNIC_TCAM_S_DISABLED) { + memcpy(&avail_addr->value, addr, sizeof(*addr)); + ipv6_addr_set(&avail_addr->mask, + ~mask->s6_addr32[0], ~mask->s6_addr32[1], + ~mask->s6_addr32[2], ~mask->s6_addr32[3]); + avail_addr->version = 6; + + avail_addr->state = FBNIC_TCAM_S_ADD; + } + + return avail_addr; +} + +int __fbnic_ip_unsync(struct fbnic_ip_addr *ip_addr, unsigned int tcam_idx) +{ + if (!test_and_clear_bit(tcam_idx, ip_addr->act_tcam)) + return -ENOENT; + + if (bitmap_empty(ip_addr->act_tcam, FBNIC_RPC_TCAM_ACT_NUM_ENTRIES)) + ip_addr->state = FBNIC_TCAM_S_DELETE; + + return 0; +} + +static void fbnic_clear_ip_src_entry(struct fbnic_dev *fbd, unsigned int idx) +{ + int i; + + /* Invalidate entry and clear addr state info */ + for (i = 0; i <= FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++) + wr32(fbd, FBNIC_RPC_TCAM_IPSRC(idx, i), 0); +} + +static void fbnic_clear_ip_dst_entry(struct fbnic_dev *fbd, unsigned int idx) +{ + int i; + + /* Invalidate entry and clear addr state info */ + for (i = 0; i <= FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++) + wr32(fbd, FBNIC_RPC_TCAM_IPDST(idx, i), 0); +} + +static void fbnic_clear_ip_outer_src_entry(struct fbnic_dev *fbd, + unsigned int idx) +{ + int i; + + /* Invalidate entry and clear addr state info */ + for (i = 0; i <= FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++) + wr32(fbd, FBNIC_RPC_TCAM_OUTER_IPSRC(idx, i), 0); +} + +static void fbnic_clear_ip_outer_dst_entry(struct fbnic_dev *fbd, + unsigned int idx) +{ + int i; + + /* Invalidate entry and clear addr state info */ + for (i = 0; i <= FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++) + wr32(fbd, FBNIC_RPC_TCAM_OUTER_IPDST(idx, i), 0); +} + +static void fbnic_write_ip_src_entry(struct fbnic_dev *fbd, unsigned int idx, + struct fbnic_ip_addr *ip_addr) +{ + __be16 *mask, *value; + int i; + + mask = &ip_addr->mask.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1]; + value = &ip_addr->value.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1]; + + for (i = 0; i < FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++) + wr32(fbd, FBNIC_RPC_TCAM_IPSRC(idx, i), + FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_MASK, ntohs(*mask--)) | + FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_VALUE, ntohs(*value--))); + wrfl(fbd); + + /* Bit 129 is used to flag for v4/v6 */ + wr32(fbd, FBNIC_RPC_TCAM_IPSRC(idx, i), + (ip_addr->version == 6) | FBNIC_RPC_TCAM_VALIDATE); +} + +static void fbnic_write_ip_dst_entry(struct fbnic_dev *fbd, unsigned int idx, + struct fbnic_ip_addr *ip_addr) +{ + __be16 *mask, *value; + int i; + + mask = &ip_addr->mask.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1]; + value = &ip_addr->value.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1]; + + for (i = 0; i < FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++) + wr32(fbd, FBNIC_RPC_TCAM_IPDST(idx, i), + FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_MASK, ntohs(*mask--)) | + FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_VALUE, ntohs(*value--))); + wrfl(fbd); + + /* Bit 129 is used to flag for v4/v6 */ + wr32(fbd, FBNIC_RPC_TCAM_IPDST(idx, i), + (ip_addr->version == 6) | FBNIC_RPC_TCAM_VALIDATE); +} + +static void fbnic_write_ip_outer_src_entry(struct fbnic_dev *fbd, + unsigned int idx, + struct fbnic_ip_addr *ip_addr) +{ + __be16 *mask, *value; + int i; + + mask = &ip_addr->mask.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1]; + value = &ip_addr->value.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1]; + + for (i = 0; i < FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++) + wr32(fbd, FBNIC_RPC_TCAM_OUTER_IPSRC(idx, i), + FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_MASK, ntohs(*mask--)) | + FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_VALUE, ntohs(*value--))); + wrfl(fbd); + + wr32(fbd, FBNIC_RPC_TCAM_OUTER_IPSRC(idx, i), FBNIC_RPC_TCAM_VALIDATE); +} + +static void fbnic_write_ip_outer_dst_entry(struct fbnic_dev *fbd, + unsigned int idx, + struct fbnic_ip_addr *ip_addr) +{ + __be16 *mask, *value; + int i; + + mask = &ip_addr->mask.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1]; + value = &ip_addr->value.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1]; + + for (i = 0; i < FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++) + wr32(fbd, FBNIC_RPC_TCAM_OUTER_IPDST(idx, i), + FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_MASK, ntohs(*mask--)) | + FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_VALUE, ntohs(*value--))); + wrfl(fbd); + + wr32(fbd, FBNIC_RPC_TCAM_OUTER_IPDST(idx, i), FBNIC_RPC_TCAM_VALIDATE); +} + +void fbnic_write_ip_addr(struct fbnic_dev *fbd) +{ + int idx; + + for (idx = ARRAY_SIZE(fbd->ip_src); idx--;) { + struct fbnic_ip_addr *ip_addr = &fbd->ip_src[idx]; + + /* Check if update flag is set else skip. */ + if (!(ip_addr->state & FBNIC_TCAM_S_UPDATE)) + continue; + + /* Clear by writing 0s. */ + if (ip_addr->state == FBNIC_TCAM_S_DELETE) { + /* Invalidate entry and clear addr state info */ + fbnic_clear_ip_src_entry(fbd, idx); + memset(ip_addr, 0, sizeof(*ip_addr)); + + continue; + } + + fbnic_write_ip_src_entry(fbd, idx, ip_addr); + + ip_addr->state = FBNIC_TCAM_S_VALID; + } + + /* Repeat process for other IP TCAMs */ + for (idx = ARRAY_SIZE(fbd->ip_dst); idx--;) { + struct fbnic_ip_addr *ip_addr = &fbd->ip_dst[idx]; + + if (!(ip_addr->state & FBNIC_TCAM_S_UPDATE)) + continue; + + if (ip_addr->state == FBNIC_TCAM_S_DELETE) { + fbnic_clear_ip_dst_entry(fbd, idx); + memset(ip_addr, 0, sizeof(*ip_addr)); + + continue; + } + + fbnic_write_ip_dst_entry(fbd, idx, ip_addr); + + ip_addr->state = FBNIC_TCAM_S_VALID; + } + + for (idx = ARRAY_SIZE(fbd->ipo_src); idx--;) { + struct fbnic_ip_addr *ip_addr = &fbd->ipo_src[idx]; + + if (!(ip_addr->state & FBNIC_TCAM_S_UPDATE)) + continue; + + if (ip_addr->state == FBNIC_TCAM_S_DELETE) { + fbnic_clear_ip_outer_src_entry(fbd, idx); + memset(ip_addr, 0, sizeof(*ip_addr)); + + continue; + } + + fbnic_write_ip_outer_src_entry(fbd, idx, ip_addr); + + ip_addr->state = FBNIC_TCAM_S_VALID; + } + + for (idx = ARRAY_SIZE(fbd->ipo_dst); idx--;) { + struct fbnic_ip_addr *ip_addr = &fbd->ipo_dst[idx]; + + if (!(ip_addr->state & FBNIC_TCAM_S_UPDATE)) + continue; + + if (ip_addr->state == FBNIC_TCAM_S_DELETE) { + fbnic_clear_ip_outer_dst_entry(fbd, idx); + memset(ip_addr, 0, sizeof(*ip_addr)); + + continue; + } + + fbnic_write_ip_outer_dst_entry(fbd, idx, ip_addr); + + ip_addr->state = FBNIC_TCAM_S_VALID; + } +} + void fbnic_clear_rules(struct fbnic_dev *fbd) { u32 dest = FIELD_PREP(FBNIC_RPC_ACT_TBL0_DEST_MASK, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h index 0d8285fa5b45..6892414195c3 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h @@ -7,6 +7,8 @@ #include <uapi/linux/in6.h> #include <linux/bitfield.h> +struct in_addr; + /* The TCAM state definitions follow an expected ordering. * They start out disabled, then move through the following states: * Disabled 0 -> Add 2 @@ -32,6 +34,12 @@ enum { #define FBNIC_RPC_TCAM_MACDA_WORD_LEN 3 #define FBNIC_RPC_TCAM_MACDA_NUM_ENTRIES 32 +/* 8 IPSRC and IPDST TCAM Entries each + * 8 registers, Validate each + */ +#define FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN 8 +#define FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES 8 + #define FBNIC_RPC_TCAM_ACT_WORD_LEN 11 #define FBNIC_RPC_TCAM_ACT_NUM_ENTRIES 64 @@ -47,6 +55,13 @@ struct fbnic_mac_addr { DECLARE_BITMAP(act_tcam, FBNIC_RPC_TCAM_ACT_NUM_ENTRIES); }; +struct fbnic_ip_addr { + struct in6_addr mask, value; + unsigned char version; + unsigned char state; + DECLARE_BITMAP(act_tcam, FBNIC_RPC_TCAM_ACT_NUM_ENTRIES); +}; + struct fbnic_act_tcam { struct { u16 tcam[FBNIC_RPC_TCAM_ACT_WORD_LEN]; @@ -81,6 +96,11 @@ enum { #define FBNIC_RPC_ACT_TBL_BMC_OFFSET 0 #define FBNIC_RPC_ACT_TBL_BMC_ALL_MULTI_OFFSET 1 +/* This should leave us with 48 total entries in the TCAM that can be used + * for NFC after also deducting the 14 needed for RSS table programming. + */ +#define FBNIC_RPC_ACT_TBL_NFC_OFFSET 2 + /* We reserve the last 14 entries for RSS rules on the host. The BMC * unicast rule will need to be populated above these and is expected to * use MACDA TCAM entry 23 to store the BMC MAC address. @@ -88,6 +108,9 @@ enum { #define FBNIC_RPC_ACT_TBL_RSS_OFFSET \ (FBNIC_RPC_ACT_TBL_NUM_ENTRIES - FBNIC_RSS_EN_NUM_ENTRIES) +#define FBNIC_RPC_ACT_TBL_NFC_ENTRIES \ + (FBNIC_RPC_ACT_TBL_RSS_OFFSET - FBNIC_RPC_ACT_TBL_NFC_OFFSET) + /* Flags used to identify the owner for this MAC filter. Note that any * flags set for Broadcast thru Promisc indicate that the rule belongs * to the RSS filters for the host. @@ -168,6 +191,7 @@ void fbnic_rss_init_en_mask(struct fbnic_net *fbn); void fbnic_rss_disable_hw(struct fbnic_dev *fbd); void fbnic_rss_reinit_hw(struct fbnic_dev *fbd, struct fbnic_net *fbn); void fbnic_rss_reinit(struct fbnic_dev *fbd, struct fbnic_net *fbn); +u16 fbnic_flow_hash_2_rss_en_mask(struct fbnic_net *fbn, int flow_type); int __fbnic_xc_unsync(struct fbnic_mac_addr *mac_addr, unsigned int tcam_idx); struct fbnic_mac_addr *__fbnic_uc_sync(struct fbnic_dev *fbd, @@ -177,6 +201,17 @@ struct fbnic_mac_addr *__fbnic_mc_sync(struct fbnic_dev *fbd, void fbnic_sift_macda(struct fbnic_dev *fbd); void fbnic_write_macda(struct fbnic_dev *fbd); +struct fbnic_ip_addr *__fbnic_ip4_sync(struct fbnic_dev *fbd, + struct fbnic_ip_addr *ip_addr, + const struct in_addr *addr, + const struct in_addr *mask); +struct fbnic_ip_addr *__fbnic_ip6_sync(struct fbnic_dev *fbd, + struct fbnic_ip_addr *ip_addr, + const struct in6_addr *addr, + const struct in6_addr *mask); +int __fbnic_ip_unsync(struct fbnic_ip_addr *ip_addr, unsigned int tcam_idx); +void fbnic_write_ip_addr(struct fbnic_dev *fbd); + static inline int __fbnic_uc_unsync(struct fbnic_mac_addr *mac_addr) { return __fbnic_xc_unsync(mac_addr, FBNIC_MAC_ADDR_T_UNICAST); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c index d4d7027df9a0..b2e544a66de3 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c @@ -6,6 +6,7 @@ #include <linux/pci.h> #include <net/netdev_queues.h> #include <net/page_pool/helpers.h> +#include <net/tcp.h> #include "fbnic.h" #include "fbnic_csr.h" @@ -18,6 +19,7 @@ enum { struct fbnic_xmit_cb { u32 bytecount; + u16 gso_segs; u8 desc_count; u8 flags; int hw_head; @@ -113,6 +115,11 @@ static int fbnic_maybe_stop_tx(const struct net_device *dev, res = netif_txq_maybe_stop(txq, fbnic_desc_unused(ring), size, FBNIC_TX_DESC_WAKEUP); + if (!res) { + u64_stats_update_begin(&ring->stats.syncp); + ring->stats.twq.stop++; + u64_stats_update_end(&ring->stats.syncp); + } return !res; } @@ -174,8 +181,72 @@ static bool fbnic_tx_tstamp(struct sk_buff *skb) } static bool +fbnic_tx_lso(struct fbnic_ring *ring, struct sk_buff *skb, + struct skb_shared_info *shinfo, __le64 *meta, + unsigned int *l2len, unsigned int *i3len) +{ + unsigned int l3_type, l4_type, l4len, hdrlen; + unsigned char *l4hdr; + __be16 payload_len; + + if (unlikely(skb_cow_head(skb, 0))) + return true; + + if (shinfo->gso_type & SKB_GSO_PARTIAL) { + l3_type = FBNIC_TWD_L3_TYPE_OTHER; + } else if (!skb->encapsulation) { + if (ip_hdr(skb)->version == 4) + l3_type = FBNIC_TWD_L3_TYPE_IPV4; + else + l3_type = FBNIC_TWD_L3_TYPE_IPV6; + } else { + unsigned int o3len; + + o3len = skb_inner_network_header(skb) - skb_network_header(skb); + *i3len -= o3len; + *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L3_OHLEN_MASK, + o3len / 2)); + l3_type = FBNIC_TWD_L3_TYPE_V6V6; + } + + l4hdr = skb_checksum_start(skb); + payload_len = cpu_to_be16(skb->len - (l4hdr - skb->data)); + + if (shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { + struct tcphdr *tcph = (struct tcphdr *)l4hdr; + + l4_type = FBNIC_TWD_L4_TYPE_TCP; + l4len = __tcp_hdrlen((struct tcphdr *)l4hdr); + csum_replace_by_diff(&tcph->check, (__force __wsum)payload_len); + } else { + struct udphdr *udph = (struct udphdr *)l4hdr; + + l4_type = FBNIC_TWD_L4_TYPE_UDP; + l4len = sizeof(struct udphdr); + csum_replace_by_diff(&udph->check, (__force __wsum)payload_len); + } + + hdrlen = (l4hdr - skb->data) + l4len; + *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L3_TYPE_MASK, l3_type) | + FIELD_PREP(FBNIC_TWD_L4_TYPE_MASK, l4_type) | + FIELD_PREP(FBNIC_TWD_L4_HLEN_MASK, l4len / 4) | + FIELD_PREP(FBNIC_TWD_MSS_MASK, shinfo->gso_size) | + FBNIC_TWD_FLAG_REQ_LSO); + + FBNIC_XMIT_CB(skb)->bytecount += (shinfo->gso_segs - 1) * hdrlen; + FBNIC_XMIT_CB(skb)->gso_segs = shinfo->gso_segs; + + u64_stats_update_begin(&ring->stats.syncp); + ring->stats.twq.lso += shinfo->gso_segs; + u64_stats_update_end(&ring->stats.syncp); + + return false; +} + +static bool fbnic_tx_offloads(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) { + struct skb_shared_info *shinfo = skb_shinfo(skb); unsigned int l2len, i3len; if (fbnic_tx_tstamp(skb)) @@ -190,7 +261,15 @@ fbnic_tx_offloads(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_CSUM_OFFSET_MASK, skb->csum_offset / 2)); - *meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_CSO); + if (shinfo->gso_size) { + if (fbnic_tx_lso(ring, skb, shinfo, meta, &l2len, &i3len)) + return true; + } else { + *meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_CSO); + u64_stats_update_begin(&ring->stats.syncp); + ring->stats.twq.csum_partial++; + u64_stats_update_end(&ring->stats.syncp); + } *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) | FIELD_PREP(FBNIC_TWD_L3_IHLEN_MASK, i3len / 2)); @@ -198,12 +277,15 @@ fbnic_tx_offloads(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) } static void -fbnic_rx_csum(u64 rcd, struct sk_buff *skb, struct fbnic_ring *rcq) +fbnic_rx_csum(u64 rcd, struct sk_buff *skb, struct fbnic_ring *rcq, + u64 *csum_cmpl, u64 *csum_none) { skb_checksum_none_assert(skb); - if (unlikely(!(skb->dev->features & NETIF_F_RXCSUM))) + if (unlikely(!(skb->dev->features & NETIF_F_RXCSUM))) { + (*csum_none)++; return; + } if (FIELD_GET(FBNIC_RCD_META_L4_CSUM_UNNECESSARY, rcd)) { skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -212,6 +294,7 @@ fbnic_rx_csum(u64 rcd, struct sk_buff *skb, struct fbnic_ring *rcq) skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = (__force __wsum)csum; + (*csum_cmpl)++; } } @@ -329,7 +412,9 @@ fbnic_xmit_frame_ring(struct sk_buff *skb, struct fbnic_ring *ring) /* Write all members within DWORD to condense this into 2 4B writes */ FBNIC_XMIT_CB(skb)->bytecount = skb->len; + FBNIC_XMIT_CB(skb)->gso_segs = 1; FBNIC_XMIT_CB(skb)->desc_count = 0; + FBNIC_XMIT_CB(skb)->flags = 0; if (fbnic_tx_offloads(ring, skb, meta)) goto err_free; @@ -356,6 +441,59 @@ netdev_tx_t fbnic_xmit_frame(struct sk_buff *skb, struct net_device *dev) return fbnic_xmit_frame_ring(skb, fbn->tx[q_map]); } +static netdev_features_t +fbnic_features_check_encap_gso(struct sk_buff *skb, struct net_device *dev, + netdev_features_t features, unsigned int l3len) +{ + netdev_features_t skb_gso_features; + struct ipv6hdr *ip6_hdr; + unsigned char l4_hdr; + unsigned int start; + __be16 frag_off; + + /* Require MANGLEID for GSO_PARTIAL of IPv4. + * In theory we could support TSO with single, innermost v4 header + * by pretending everything before it is L2, but that needs to be + * parsed case by case.. so leaving it for when the need arises. + */ + if (!(features & NETIF_F_TSO_MANGLEID)) + features &= ~NETIF_F_TSO; + + skb_gso_features = skb_shinfo(skb)->gso_type; + skb_gso_features <<= NETIF_F_GSO_SHIFT; + + /* We'd only clear the native GSO features, so don't bother validating + * if the match can only be on those supported thru GSO_PARTIAL. + */ + if (!(skb_gso_features & FBNIC_TUN_GSO_FEATURES)) + return features; + + /* We can only do IPv6-in-IPv6, not v4-in-v6. It'd be nice + * to fall back to partial for this, or any failure below. + * This is just an optimization, UDPv4 will be caught later on. + */ + if (skb_gso_features & NETIF_F_TSO) + return features & ~FBNIC_TUN_GSO_FEATURES; + + /* Inner headers multiple of 2 */ + if ((skb_inner_network_header(skb) - skb_network_header(skb)) % 2) + return features & ~FBNIC_TUN_GSO_FEATURES; + + /* Encapsulated GSO packet, make 100% sure it's IPv6-in-IPv6. */ + ip6_hdr = ipv6_hdr(skb); + if (ip6_hdr->version != 6) + return features & ~FBNIC_TUN_GSO_FEATURES; + + l4_hdr = ip6_hdr->nexthdr; + start = (unsigned char *)ip6_hdr - skb->data + sizeof(struct ipv6hdr); + start = ipv6_skip_exthdr(skb, start, &l4_hdr, &frag_off); + if (frag_off || l4_hdr != IPPROTO_IPV6 || + skb->data + start != skb_inner_network_header(skb)) + return features & ~FBNIC_TUN_GSO_FEATURES; + + return features; +} + netdev_features_t fbnic_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) @@ -376,9 +514,12 @@ fbnic_features_check(struct sk_buff *skb, struct net_device *dev, !FIELD_FIT(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) || !FIELD_FIT(FBNIC_TWD_L3_IHLEN_MASK, l3len / 2) || !FIELD_FIT(FBNIC_TWD_CSUM_OFFSET_MASK, skb->csum_offset / 2)) - return features & ~NETIF_F_CSUM_MASK; + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); - return features; + if (likely(!skb->encapsulation) || !skb_is_gso(skb)) + return features; + + return fbnic_features_check_encap_gso(skb, dev, features, l3len); } static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget, @@ -429,7 +570,7 @@ static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget, } total_bytes += FBNIC_XMIT_CB(skb)->bytecount; - total_packets += 1; + total_packets += FBNIC_XMIT_CB(skb)->gso_segs; napi_consume_skb(skb, napi_budget); } @@ -444,7 +585,7 @@ static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget, if (unlikely(discard)) { u64_stats_update_begin(&ring->stats.syncp); ring->stats.dropped += total_packets; - ring->stats.ts_lost += ts_lost; + ring->stats.twq.ts_lost += ts_lost; u64_stats_update_end(&ring->stats.syncp); netdev_tx_completed_queue(txq, total_packets, total_bytes); @@ -456,9 +597,13 @@ static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget, ring->stats.packets += total_packets; u64_stats_update_end(&ring->stats.syncp); - netif_txq_completed_wake(txq, total_packets, total_bytes, - fbnic_desc_unused(ring), - FBNIC_TX_DESC_WAKEUP); + if (!netif_txq_completed_wake(txq, total_packets, total_bytes, + fbnic_desc_unused(ring), + FBNIC_TX_DESC_WAKEUP)) { + u64_stats_update_begin(&ring->stats.syncp); + ring->stats.twq.wake++; + u64_stats_update_end(&ring->stats.syncp); + } } static void fbnic_clean_tsq(struct fbnic_napi_vector *nv, @@ -507,7 +652,7 @@ static void fbnic_clean_tsq(struct fbnic_napi_vector *nv, skb_tstamp_tx(skb, &hwtstamp); u64_stats_update_begin(&ring->stats.syncp); - ring->stats.ts_packets++; + ring->stats.twq.ts_packets++; u64_stats_update_end(&ring->stats.syncp); } @@ -661,8 +806,13 @@ static void fbnic_fill_bdq(struct fbnic_napi_vector *nv, struct fbnic_ring *bdq) struct page *page; page = page_pool_dev_alloc_pages(nv->page_pool); - if (!page) + if (!page) { + u64_stats_update_begin(&bdq->stats.syncp); + bdq->stats.rx.alloc_failed++; + u64_stats_update_end(&bdq->stats.syncp); + break; + } fbnic_page_pool_init(bdq, i, page); fbnic_bd_prep(bdq, i, page); @@ -875,12 +1025,13 @@ static void fbnic_rx_tstamp(struct fbnic_napi_vector *nv, u64 rcd, static void fbnic_populate_skb_fields(struct fbnic_napi_vector *nv, u64 rcd, struct sk_buff *skb, - struct fbnic_q_triad *qt) + struct fbnic_q_triad *qt, + u64 *csum_cmpl, u64 *csum_none) { struct net_device *netdev = nv->napi.dev; struct fbnic_ring *rcq = &qt->cmpl; - fbnic_rx_csum(rcd, skb, rcq); + fbnic_rx_csum(rcd, skb, rcq, csum_cmpl, csum_none); if (netdev->features & NETIF_F_RXHASH) skb_set_hash(skb, @@ -898,7 +1049,8 @@ static bool fbnic_rcd_metadata_err(u64 rcd) static int fbnic_clean_rcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt, int budget) { - unsigned int packets = 0, bytes = 0, dropped = 0; + unsigned int packets = 0, bytes = 0, dropped = 0, alloc_failed = 0; + u64 csum_complete = 0, csum_none = 0; struct fbnic_ring *rcq = &qt->cmpl; struct fbnic_pkt_buff *pkt; s32 head0 = -1, head1 = -1; @@ -947,14 +1099,22 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv, /* Populate skb and invalidate XDP */ if (!IS_ERR_OR_NULL(skb)) { - fbnic_populate_skb_fields(nv, rcd, skb, qt); + fbnic_populate_skb_fields(nv, rcd, skb, qt, + &csum_complete, + &csum_none); packets++; bytes += skb->len; napi_gro_receive(&nv->napi, skb); } else { - dropped++; + if (!skb) { + alloc_failed++; + dropped++; + } else { + dropped++; + } + fbnic_put_pkt_buff(nv, pkt, 1); } @@ -977,6 +1137,9 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv, /* Re-add ethernet header length (removed in fbnic_build_skb) */ rcq->stats.bytes += ETH_HLEN * packets; rcq->stats.dropped += dropped; + rcq->stats.rx.alloc_failed += alloc_failed; + rcq->stats.rx.csum_complete += csum_complete; + rcq->stats.rx.csum_none += csum_none; u64_stats_update_end(&rcq->stats.syncp); /* Unmap and free processed buffers */ @@ -1054,6 +1217,11 @@ void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn, fbn->rx_stats.bytes += stats->bytes; fbn->rx_stats.packets += stats->packets; fbn->rx_stats.dropped += stats->dropped; + fbn->rx_stats.rx.alloc_failed += stats->rx.alloc_failed; + fbn->rx_stats.rx.csum_complete += stats->rx.csum_complete; + fbn->rx_stats.rx.csum_none += stats->rx.csum_none; + /* Remember to add new stats here */ + BUILD_BUG_ON(sizeof(fbn->tx_stats.rx) / 8 != 3); } void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn, @@ -1065,8 +1233,14 @@ void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn, fbn->tx_stats.bytes += stats->bytes; fbn->tx_stats.packets += stats->packets; fbn->tx_stats.dropped += stats->dropped; - fbn->tx_stats.ts_lost += stats->ts_lost; - fbn->tx_stats.ts_packets += stats->ts_packets; + fbn->tx_stats.twq.csum_partial += stats->twq.csum_partial; + fbn->tx_stats.twq.lso += stats->twq.lso; + fbn->tx_stats.twq.ts_lost += stats->twq.ts_lost; + fbn->tx_stats.twq.ts_packets += stats->twq.ts_packets; + fbn->tx_stats.twq.stop += stats->twq.stop; + fbn->tx_stats.twq.wake += stats->twq.wake; + /* Remember to add new stats here */ + BUILD_BUG_ON(sizeof(fbn->tx_stats.twq) / 8 != 6); } static void fbnic_remove_tx_ring(struct fbnic_net *fbn, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h index c2a94f31f71b..89a5c394f846 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h @@ -56,9 +56,22 @@ struct fbnic_pkt_buff { struct fbnic_queue_stats { u64 packets; u64 bytes; + union { + struct { + u64 csum_partial; + u64 lso; + u64 ts_packets; + u64 ts_lost; + u64 stop; + u64 wake; + } twq; + struct { + u64 alloc_failed; + u64 csum_complete; + u64 csum_none; + } rx; + }; u64 dropped; - u64 ts_packets; - u64 ts_lost; struct u64_stats_sync syncp; }; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 3234a960fcc3..0af143ec0f86 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -828,7 +828,6 @@ static int lan966x_probe_port(struct lan966x *lan966x, u32 p, port->phylink_config.type = PHYLINK_NETDEV; port->phylink_pcs.poll = true; port->phylink_pcs.ops = &lan966x_phylink_pcs_ops; - port->phylink_pcs.neg_mode = true; port->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | MAC_10 | MAC_100 | MAC_1000FD | MAC_2500FD; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index 6a0e5b83ecd0..74ad1d73b465 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -338,7 +338,6 @@ static int sparx5_create_port(struct sparx5 *sparx5, spx5_port->custom_etype = 0x8880; /* Vitesse */ spx5_port->phylink_pcs.poll = true; spx5_port->phylink_pcs.ops = &sparx5_phylink_pcs_ops; - spx5_port->phylink_pcs.neg_mode = true; spx5_port->is_mrouter = false; INIT_LIST_HEAD(&spx5_port->tc_templates); sparx5->ports[config->portno] = spx5_port; diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index be95336ce089..c15a5ef4674e 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -666,8 +666,11 @@ int mana_gd_create_hwc_queue(struct gdma_dev *gd, gmi = &queue->mem_info; err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); - if (err) + if (err) { + dev_err(gc->dev, "GDMA queue type: %d, size: %u, gdma memory allocation err: %d\n", + spec->type, spec->queue_size, err); goto free_q; + } queue->head = 0; queue->tail = 0; @@ -688,6 +691,8 @@ int mana_gd_create_hwc_queue(struct gdma_dev *gd, *queue_ptr = queue; return 0; out: + dev_err(gc->dev, "Failed to create queue type %d of size %u, err: %d\n", + spec->type, spec->queue_size, err); mana_gd_free_memory(gmi); free_q: kfree(queue); @@ -770,7 +775,13 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd, } gmi->dma_region_handle = resp.dma_region_handle; + dev_dbg(gc->dev, "Created DMA region handle 0x%llx\n", + gmi->dma_region_handle); out: + if (err) + dev_dbg(gc->dev, + "Failed to create DMA region of length: %u, page_type: %d, status: 0x%x, err: %d\n", + length, req->gdma_page_type, resp.hdr.status, err); kfree(req); return err; } @@ -793,8 +804,11 @@ int mana_gd_create_mana_eq(struct gdma_dev *gd, gmi = &queue->mem_info; err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); - if (err) + if (err) { + dev_err(gc->dev, "GDMA queue type: %d, size: %u, gdma memory allocation err: %d\n", + spec->type, spec->queue_size, err); goto free_q; + } err = mana_gd_create_dma_region(gd, gmi); if (err) @@ -815,6 +829,8 @@ int mana_gd_create_mana_eq(struct gdma_dev *gd, *queue_ptr = queue; return 0; out: + dev_err(gc->dev, "Failed to create queue type %d of size: %u, err: %d\n", + spec->type, spec->queue_size, err); mana_gd_free_memory(gmi); free_q: kfree(queue); @@ -841,8 +857,11 @@ int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, gmi = &queue->mem_info; err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); - if (err) + if (err) { + dev_err(gc->dev, "GDMA queue type: %d, size: %u, memory allocation err: %d\n", + spec->type, spec->queue_size, err); goto free_q; + } err = mana_gd_create_dma_region(gd, gmi); if (err) @@ -862,6 +881,8 @@ int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, *queue_ptr = queue; return 0; out: + dev_err(gc->dev, "Failed to create queue type %d of size: %u, err: %d\n", + spec->type, spec->queue_size, err); mana_gd_free_memory(gmi); free_q: kfree(queue); @@ -1157,8 +1178,11 @@ int mana_gd_post_and_ring(struct gdma_queue *queue, int err; err = mana_gd_post_work_request(queue, wqe_req, wqe_info); - if (err) + if (err) { + dev_err(gc->dev, "Failed to post work req from queue type %d of size %u (err=%d)\n", + queue->type, queue->queue_size, err); return err; + } mana_gd_wq_ring_doorbell(gc, queue); @@ -1435,8 +1459,10 @@ static int mana_gd_setup(struct pci_dev *pdev) mana_smc_init(&gc->shm_channel, gc->dev, gc->shm_base); err = mana_gd_setup_irqs(pdev); - if (err) + if (err) { + dev_err(gc->dev, "Failed to setup IRQs: %d\n", err); return err; + } err = mana_hwc_create_channel(gc); if (err) @@ -1454,12 +1480,14 @@ static int mana_gd_setup(struct pci_dev *pdev) if (err) goto destroy_hwc; + dev_dbg(&pdev->dev, "mana gdma setup successful\n"); return 0; destroy_hwc: mana_hwc_destroy_channel(gc); remove_irq: mana_gd_remove_irqs(pdev); + dev_err(&pdev->dev, "%s failed (error %d)\n", __func__, err); return err; } @@ -1470,6 +1498,7 @@ static void mana_gd_cleanup(struct pci_dev *pdev) mana_hwc_destroy_channel(gc); mana_gd_remove_irqs(pdev); + dev_dbg(&pdev->dev, "mana gdma cleanup successful\n"); } static bool mana_is_pf(unsigned short dev_id) @@ -1488,8 +1517,10 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) BUILD_BUG_ON(2 * MAX_PORTS_IN_MANA_DEV * GDMA_EQE_SIZE > EQ_SIZE); err = pci_enable_device(pdev); - if (err) + if (err) { + dev_err(&pdev->dev, "Failed to enable pci device (err=%d)\n", err); return -ENXIO; + } pci_set_master(pdev); @@ -1498,9 +1529,10 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto disable_dev; err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (err) + if (err) { + dev_err(&pdev->dev, "DMA set mask failed: %d\n", err); goto release_region; - + } dma_set_max_seg_size(&pdev->dev, UINT_MAX); err = -ENOMEM; @@ -1575,6 +1607,8 @@ static void mana_gd_remove(struct pci_dev *pdev) pci_release_regions(pdev); pci_disable_device(pdev); + + dev_dbg(&pdev->dev, "mana gdma remove successful\n"); } /* The 'state' parameter is not used. */ diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index a00f915c5188..1ba49602089b 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -440,7 +440,8 @@ static int mana_hwc_alloc_dma_buf(struct hw_channel_context *hwc, u16 q_depth, gmi = &dma_buf->mem_info; err = mana_gd_alloc_memory(gc, buf_size, gmi); if (err) { - dev_err(hwc->dev, "Failed to allocate DMA buffer: %d\n", err); + dev_err(hwc->dev, "Failed to allocate DMA buffer size: %u, err %d\n", + buf_size, err); goto out; } @@ -529,6 +530,9 @@ static int mana_hwc_create_wq(struct hw_channel_context *hwc, out: if (err) mana_hwc_destroy_wq(hwc, hwc_wq); + + dev_err(hwc->dev, "Failed to create HWC queue size= %u type= %d err= %d\n", + queue_size, q_type, err); return err; } diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index aa1e47233fe5..0411a1897f57 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -52,10 +52,12 @@ static int mana_open(struct net_device *ndev) { struct mana_port_context *apc = netdev_priv(ndev); int err; - err = mana_alloc_queues(ndev); - if (err) + + if (err) { + netdev_err(ndev, "%s failed to allocate queues: %d\n", __func__, err); return err; + } apc->port_is_up = true; @@ -64,7 +66,7 @@ static int mana_open(struct net_device *ndev) netif_carrier_on(ndev); netif_tx_wake_all_queues(ndev); - + netdev_dbg(ndev, "%s successful\n", __func__); return 0; } @@ -176,6 +178,9 @@ static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc, return 0; frag_err: + if (net_ratelimit()) + netdev_err(apc->ndev, "Failed to map skb of size %u to DMA\n", + skb->len); for (i = sg_i - 1; i >= hsg; i--) dma_unmap_page(dev, ash->dma_handle[i], ash->size[i], DMA_TO_DEVICE); @@ -256,6 +261,9 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) if (skb_cow_head(skb, MANA_HEADROOM)) goto tx_drop_count; + if (unlikely(ipv6_hopopt_jumbo_remove(skb))) + goto tx_drop_count; + txq = &apc->tx_qp[txq_idx].txq; gdma_sq = txq->gdma_sq; cq = &apc->tx_qp[txq_idx].tx_cq; @@ -687,6 +695,7 @@ int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_qu return 0; error: + netdev_err(mpc->ndev, "Failed to pre-allocate RX buffers for %d queues\n", num_queues); mana_pre_dealloc_rxbufs(mpc); return -ENOMEM; } @@ -1304,8 +1313,10 @@ static int mana_create_eq(struct mana_context *ac) for (i = 0; i < gc->max_num_queues; i++) { spec.eq.msix_index = (i + 1) % gc->num_msix_usable; err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq); - if (err) + if (err) { + dev_err(gc->dev, "Failed to create EQ %d : %d\n", i, err); goto out; + } mana_create_eq_debugfs(ac, i); } @@ -2080,6 +2091,8 @@ static int mana_create_txq(struct mana_port_context *apc, return 0; out: + netdev_err(net, "Failed to create %d TX queues, %d\n", + apc->num_queues, err); mana_destroy_txq(apc); return err; } @@ -2415,6 +2428,7 @@ static int mana_add_rx_queues(struct mana_port_context *apc, rxq = mana_create_rxq(apc, i, &ac->eqs[i], ndev); if (!rxq) { err = -ENOMEM; + netdev_err(ndev, "Failed to create rxq %d : %d\n", i, err); goto out; } @@ -2661,12 +2675,18 @@ int mana_alloc_queues(struct net_device *ndev) int err; err = mana_create_vport(apc, ndev); - if (err) + if (err) { + netdev_err(ndev, "Failed to create vPort %u : %d\n", apc->port_idx, err); return err; + } err = netif_set_real_num_tx_queues(ndev, apc->num_queues); - if (err) + if (err) { + netdev_err(ndev, + "netif_set_real_num_tx_queues () failed for ndev with num_queues %u : %d\n", + apc->num_queues, err); goto destroy_vport; + } err = mana_add_rx_queues(apc, ndev); if (err) @@ -2675,14 +2695,20 @@ int mana_alloc_queues(struct net_device *ndev) apc->rss_state = apc->num_queues > 1 ? TRI_STATE_TRUE : TRI_STATE_FALSE; err = netif_set_real_num_rx_queues(ndev, apc->num_queues); - if (err) + if (err) { + netdev_err(ndev, + "netif_set_real_num_rx_queues () failed for ndev with num_queues %u : %d\n", + apc->num_queues, err); goto destroy_vport; + } mana_rss_table_init(apc); err = mana_config_rss(apc, TRI_STATE_TRUE, true, true); - if (err) + if (err) { + netdev_err(ndev, "Failed to configure RSS table: %d\n", err); goto destroy_vport; + } if (gd->gdma_context->is_pf) { err = mana_pf_register_filter(apc); @@ -2823,8 +2849,10 @@ int mana_detach(struct net_device *ndev, bool from_close) if (apc->port_st_save) { err = mana_dealloc_queues(ndev); - if (err) + if (err) { + netdev_err(ndev, "%s failed to deallocate queues: %d\n", __func__, err); return err; + } } if (!from_close) { @@ -2873,6 +2901,8 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, ndev->dev_port = port_idx; SET_NETDEV_DEV(ndev, gc->dev); + netif_set_tso_max_size(ndev, GSO_MAX_SIZE); + netif_carrier_off(ndev); netdev_rss_key_fill(apc->hashkey, MANA_HASH_KEY_SIZE); @@ -2968,6 +2998,8 @@ static int add_adev(struct gdma_dev *gd) goto add_fail; gd->adev = adev; + dev_dbg(gd->gdma_context->dev, + "Auxiliary device added successfully\n"); return 0; add_fail: @@ -3009,8 +3041,10 @@ int mana_probe(struct gdma_dev *gd, bool resuming) } err = mana_create_eq(ac); - if (err) + if (err) { + dev_err(dev, "Failed to create EQs: %d\n", err); goto out; + } err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION, MANA_MICRO_VERSION, &num_ports); @@ -3066,8 +3100,14 @@ int mana_probe(struct gdma_dev *gd, bool resuming) err = add_adev(gd); out: - if (err) + if (err) { mana_remove(gd, false); + } else { + dev_dbg(dev, "gd=%p, id=%u, num_ports=%d, type=%u, instance=%u\n", + gd, gd->dev_id.as_uint32, ac->num_ports, + gd->dev_id.type, gd->dev_id.instance); + dev_dbg(dev, "%s succeeded\n", __func__); + } return err; } @@ -3129,6 +3169,7 @@ out: gd->driver_data = NULL; gd->gdma_context = NULL; kfree(ac); + dev_dbg(dev, "%s succeeded\n", __func__); } struct net_device *mana_get_primary_netdev_rcu(struct mana_context *ac, u32 port_index) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_hwmon.c b/drivers/net/ethernet/netronome/nfp/nfp_hwmon.c index 0d6c59d6d4ae..ea6a288c0d5e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_hwmon.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_hwmon.c @@ -83,42 +83,12 @@ nfp_hwmon_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr, return 0; } -static u32 nfp_chip_config[] = { - HWMON_C_REGISTER_TZ, - 0 -}; - -static const struct hwmon_channel_info nfp_chip = { - .type = hwmon_chip, - .config = nfp_chip_config, -}; - -static u32 nfp_temp_config[] = { - HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT, - 0 -}; - -static const struct hwmon_channel_info nfp_temp = { - .type = hwmon_temp, - .config = nfp_temp_config, -}; - -static u32 nfp_power_config[] = { - HWMON_P_INPUT | HWMON_P_MAX, - HWMON_P_INPUT, - HWMON_P_INPUT, - 0 -}; - -static const struct hwmon_channel_info nfp_power = { - .type = hwmon_power, - .config = nfp_power_config, -}; - static const struct hwmon_channel_info * const nfp_hwmon_info[] = { - &nfp_chip, - &nfp_temp, - &nfp_power, + HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ), + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT), + HWMON_CHANNEL_INFO(power, HWMON_P_INPUT | HWMON_P_MAX, + HWMON_P_INPUT, + HWMON_P_INPUT), NULL }; diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index fa167b1aa019..5222a035fd19 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -3033,7 +3033,7 @@ static void qed_iov_vf_mbx_vport_update(struct qed_hwfn *p_hwfn, u16 length; int rc; - /* Valiate PF can send such a request */ + /* Validate PF can send such a request */ if (!vf->vport_instance) { DP_VERBOSE(p_hwfn, QED_MSG_IOV, @@ -3312,7 +3312,7 @@ static void qed_iov_vf_mbx_ucast_filter(struct qed_hwfn *p_hwfn, goto out; } - /* Determine if the unicast filtering is acceptible by PF */ + /* Determine if the unicast filtering is acceptable by PF */ if ((p_bulletin->valid_bitmap & BIT(VLAN_ADDR_FORCED)) && (params.type == QED_FILTER_VLAN || params.type == QED_FILTER_MAC_VLAN)) { @@ -3729,7 +3729,7 @@ qed_iov_execute_vf_flr_cleanup(struct qed_hwfn *p_hwfn, rc = qed_iov_enable_vf_access(p_hwfn, p_ptt, p_vf); if (rc) { - DP_ERR(p_hwfn, "Failed to re-enable VF[%d] acces\n", + DP_ERR(p_hwfn, "Failed to re-enable VF[%d] access\n", vfid); return rc; } @@ -4480,7 +4480,7 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled) struct qed_ptt *ptt = qed_ptt_acquire(hwfn); /* Failure to acquire the ptt in 100g creates an odd error - * where the first engine has already relased IOV. + * where the first engine has already released IOV. */ if (!ptt) { DP_ERR(hwfn, "Failed to acquire ptt\n"); diff --git a/drivers/net/ethernet/realtek/Kconfig b/drivers/net/ethernet/realtek/Kconfig index 8a8ea51c639e..fe136f61586f 100644 --- a/drivers/net/ethernet/realtek/Kconfig +++ b/drivers/net/ethernet/realtek/Kconfig @@ -114,7 +114,8 @@ config R8169 will be called r8169. This is recommended. config R8169_LEDS - def_bool R8169 && LEDS_TRIGGER_NETDEV + bool "Support for controlling the NIC LEDs" + depends on R8169 && LEDS_TRIGGER_NETDEV depends on !(R8169=y && LEDS_CLASS=m) help Optional support for controlling the NIC LED's with the netdev diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 5a5eba49c651..fa339bd8c775 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -169,6 +169,7 @@ static const struct pci_device_id rtl8169_pci_tbl[] = { { PCI_VDEVICE(REALTEK, 0x8125) }, { PCI_VDEVICE(REALTEK, 0x8126) }, { PCI_VDEVICE(REALTEK, 0x3000) }, + { PCI_VDEVICE(REALTEK, 0x5000) }, {} }; @@ -5199,6 +5200,33 @@ static int r8169_mdio_write_reg(struct mii_bus *mii_bus, int phyaddr, return 0; } +static int r8169_mdio_read_reg_c45(struct mii_bus *mii_bus, int addr, + int devnum, int regnum) +{ + struct rtl8169_private *tp = mii_bus->priv; + + if (addr > 0) + return -ENODEV; + + if (devnum == MDIO_MMD_VEND2 && regnum > MDIO_STAT2) + return r8168_phy_ocp_read(tp, regnum); + + return 0; +} + +static int r8169_mdio_write_reg_c45(struct mii_bus *mii_bus, int addr, + int devnum, int regnum, u16 val) +{ + struct rtl8169_private *tp = mii_bus->priv; + + if (addr > 0 || devnum != MDIO_MMD_VEND2 || regnum <= MDIO_STAT2) + return -ENODEV; + + r8168_phy_ocp_write(tp, regnum, val); + + return 0; +} + static int r8169_mdio_register(struct rtl8169_private *tp) { struct pci_dev *pdev = tp->pci_dev; @@ -5222,12 +5250,18 @@ static int r8169_mdio_register(struct rtl8169_private *tp) new_bus->priv = tp; new_bus->parent = &pdev->dev; new_bus->irq[0] = PHY_MAC_INTERRUPT; + new_bus->phy_mask = GENMASK(31, 1); snprintf(new_bus->id, MII_BUS_ID_SIZE, "r8169-%x-%x", pci_domain_nr(pdev->bus), pci_dev_id(pdev)); new_bus->read = r8169_mdio_read_reg; new_bus->write = r8169_mdio_write_reg; + if (tp->mac_version >= RTL_GIGA_MAC_VER_40) { + new_bus->read_c45 = r8169_mdio_read_reg_c45; + new_bus->write_c45 = r8169_mdio_write_reg_c45; + } + ret = devm_mdiobus_register(&pdev->dev, new_bus); if (ret) return ret; @@ -5251,9 +5285,9 @@ static int r8169_mdio_register(struct rtl8169_private *tp) /* mimic behavior of r8125/r8126 vendor drivers */ if (tp->mac_version == RTL_GIGA_MAC_VER_61) - phy_set_eee_broken(tp->phydev, - ETHTOOL_LINK_MODE_2500baseT_Full_BIT); - phy_set_eee_broken(tp->phydev, ETHTOOL_LINK_MODE_5000baseT_Full_BIT); + phy_disable_eee_mode(tp->phydev, + ETHTOOL_LINK_MODE_2500baseT_Full_BIT); + phy_disable_eee_mode(tp->phydev, ETHTOOL_LINK_MODE_5000baseT_Full_BIT); /* PHY will be woken up in rtl_open() */ phy_suspend(tp->phydev); diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 84d09a8973b7..aba772e14555 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1287,17 +1287,14 @@ static struct device_node *rswitch_get_port_node(struct rswitch_device *rdev) if (!ports) return NULL; - for_each_child_of_node(ports, port) { + for_each_available_child_of_node(ports, port) { err = of_property_read_u32(port, "reg", &index); if (err < 0) { port = NULL; goto out; } - if (index == rdev->etha->index) { - if (!of_device_is_available(port)) - port = NULL; + if (index == rdev->etha->index) break; - } } out: diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig index 3eb55dcfa8a6..c4c43434f314 100644 --- a/drivers/net/ethernet/sfc/Kconfig +++ b/drivers/net/ethernet/sfc/Kconfig @@ -38,8 +38,9 @@ config SFC_MTD default y help This exposes the on-board flash and/or EEPROM as MTD devices - (e.g. /dev/mtd1). This is required to update the firmware or - the boot configuration under Linux. + (e.g. /dev/mtd1). This is required to update the boot + configuration under Linux, or use some older userland tools to + update the firmware. config SFC_MCDI_MON bool "Solarflare SFC9100-family hwmon support" depends on SFC && HWMON && !(SFC=y && HWMON=m) diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile index 8f446b9bd5ee..d99039ec468d 100644 --- a/drivers/net/ethernet/sfc/Makefile +++ b/drivers/net/ethernet/sfc/Makefile @@ -7,7 +7,7 @@ sfc-y += efx.o efx_common.o efx_channels.o nic.o \ mcdi_functions.o mcdi_filters.o mcdi_mon.o \ ef100.o ef100_nic.o ef100_netdev.o \ ef100_ethtool.o ef100_rx.o ef100_tx.o \ - efx_devlink.o + efx_devlink.o efx_reflash.o sfc-$(CONFIG_SFC_MTD) += mtd.o sfc-$(CONFIG_SFC_SRIOV) += sriov.o ef10_sriov.o ef100_sriov.o ef100_rep.o \ mae.o tc.o tc_bindings.o tc_counters.o \ diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 452009ed7a43..47d78abecf30 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -3501,7 +3501,7 @@ static int efx_ef10_mtd_probe_partition(struct efx_nic *efx, MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_METADATA_IN_LEN); MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_METADATA_OUT_LENMAX); const struct efx_ef10_nvram_type_info *info; - size_t size, erase_size, outlen; + size_t size, erase_size, write_size, outlen; int type_idx = 0; bool protected; int rc; @@ -3516,7 +3516,8 @@ static int efx_ef10_mtd_probe_partition(struct efx_nic *efx, if (info->port != efx_port_num(efx)) return -ENODEV; - rc = efx_mcdi_nvram_info(efx, type, &size, &erase_size, &protected); + rc = efx_mcdi_nvram_info(efx, type, &size, &erase_size, &write_size, + &protected); if (rc) return rc; if (protected && @@ -3561,6 +3562,8 @@ static int efx_ef10_mtd_probe_partition(struct efx_nic *efx, if (!erase_size) part->common.mtd.flags |= MTD_NO_ERASE; + part->common.mtd.writesize = write_size; + return 0; } diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c index c88ec3e24836..5a14d94163b1 100644 --- a/drivers/net/ethernet/sfc/efx_common.c +++ b/drivers/net/ethernet/sfc/efx_common.c @@ -1003,6 +1003,7 @@ int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev) INIT_LIST_HEAD(&efx->vf_reps); INIT_WORK(&efx->mac_work, efx_mac_work); init_waitqueue_head(&efx->flush_wq); + mutex_init(&efx->reflash_mutex); efx->tx_queues_per_channel = 1; efx->rxq_entries = EFX_DEFAULT_DMAQ_SIZE; diff --git a/drivers/net/ethernet/sfc/efx_devlink.c b/drivers/net/ethernet/sfc/efx_devlink.c index 3cd750820fdd..d842c60dfc10 100644 --- a/drivers/net/ethernet/sfc/efx_devlink.c +++ b/drivers/net/ethernet/sfc/efx_devlink.c @@ -19,6 +19,7 @@ #include "mae.h" #include "ef100_rep.h" #endif +#include "efx_reflash.h" struct efx_devlink { struct efx_nic *efx; @@ -615,7 +616,19 @@ static int efx_devlink_info_get(struct devlink *devlink, return 0; } +static int efx_devlink_flash_update(struct devlink *devlink, + struct devlink_flash_update_params *params, + struct netlink_ext_ack *extack) +{ + struct efx_devlink *devlink_private = devlink_priv(devlink); + struct efx_nic *efx = devlink_private->efx; + + return efx_reflash_flash_firmware(efx, params->fw, extack); +} + static const struct devlink_ops sfc_devlink_ops = { + .supported_flash_update_params = 0, + .flash_update = efx_devlink_flash_update, .info_get = efx_devlink_info_get, }; diff --git a/drivers/net/ethernet/sfc/efx_reflash.c b/drivers/net/ethernet/sfc/efx_reflash.c new file mode 100644 index 000000000000..ddc53740f098 --- /dev/null +++ b/drivers/net/ethernet/sfc/efx_reflash.c @@ -0,0 +1,514 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for AMD network controllers and boards + * Copyright (C) 2025, Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include <linux/crc32.h> +#include <net/devlink.h> +#include "efx_reflash.h" +#include "net_driver.h" +#include "fw_formats.h" +#include "mcdi_pcol.h" +#include "mcdi.h" + +/* Try to parse a Reflash header at the specified offset */ +static bool efx_reflash_parse_reflash_header(const struct firmware *fw, + size_t header_offset, u32 *type, + u32 *subtype, const u8 **data, + size_t *data_size) +{ + size_t header_end, trailer_offset, trailer_end; + u32 magic, version, payload_size, header_len; + const u8 *header, *trailer; + u32 expected_crc, crc; + + if (check_add_overflow(header_offset, EFX_REFLASH_HEADER_LENGTH_OFST + + EFX_REFLASH_HEADER_LENGTH_LEN, + &header_end)) + return false; + if (fw->size < header_end) + return false; + + header = fw->data + header_offset; + magic = get_unaligned_le32(header + EFX_REFLASH_HEADER_MAGIC_OFST); + if (magic != EFX_REFLASH_HEADER_MAGIC_VALUE) + return false; + + version = get_unaligned_le32(header + EFX_REFLASH_HEADER_VERSION_OFST); + if (version != EFX_REFLASH_HEADER_VERSION_VALUE) + return false; + + payload_size = get_unaligned_le32(header + EFX_REFLASH_HEADER_PAYLOAD_SIZE_OFST); + header_len = get_unaligned_le32(header + EFX_REFLASH_HEADER_LENGTH_OFST); + if (check_add_overflow(header_offset, header_len, &trailer_offset) || + check_add_overflow(trailer_offset, payload_size, &trailer_offset) || + check_add_overflow(trailer_offset, EFX_REFLASH_TRAILER_LEN, + &trailer_end)) + return false; + if (fw->size < trailer_end) + return false; + + trailer = fw->data + trailer_offset; + expected_crc = get_unaligned_le32(trailer + EFX_REFLASH_TRAILER_CRC_OFST); + /* Addition could overflow u32, but not size_t since we already + * checked trailer_offset didn't overflow. So cast to size_t first. + */ + crc = crc32_le(0, header, (size_t)header_len + payload_size); + if (crc != expected_crc) + return false; + + *type = get_unaligned_le32(header + EFX_REFLASH_HEADER_FIRMWARE_TYPE_OFST); + *subtype = get_unaligned_le32(header + EFX_REFLASH_HEADER_FIRMWARE_SUBTYPE_OFST); + if (*type == EFX_REFLASH_FIRMWARE_TYPE_BUNDLE) { + /* All the bundle data is written verbatim to NVRAM */ + *data = fw->data; + *data_size = fw->size; + } else { + /* Other payload types strip the reflash header and trailer + * from the data written to NVRAM + */ + *data = header + header_len; + *data_size = payload_size; + } + + return true; +} + +/* Map from FIRMWARE_TYPE to NVRAM_PARTITION_TYPE */ +static int efx_reflash_partition_type(u32 type, u32 subtype, + u32 *partition_type, + u32 *partition_subtype) +{ + int rc = 0; + + switch (type) { + case EFX_REFLASH_FIRMWARE_TYPE_BOOTROM: + *partition_type = NVRAM_PARTITION_TYPE_EXPANSION_ROM; + *partition_subtype = subtype; + break; + case EFX_REFLASH_FIRMWARE_TYPE_BUNDLE: + *partition_type = NVRAM_PARTITION_TYPE_BUNDLE; + *partition_subtype = subtype; + break; + default: + /* Not supported */ + rc = -EINVAL; + } + + return rc; +} + +/* Try to parse a SmartNIC image header at the specified offset */ +static bool efx_reflash_parse_snic_header(const struct firmware *fw, + size_t header_offset, + u32 *partition_type, + u32 *partition_subtype, + const u8 **data, size_t *data_size) +{ + u32 magic, version, payload_size, header_len, expected_crc, crc; + size_t header_end, payload_end; + const u8 *header; + + if (check_add_overflow(header_offset, EFX_SNICIMAGE_HEADER_MINLEN, + &header_end) || + fw->size < header_end) + return false; + + header = fw->data + header_offset; + magic = get_unaligned_le32(header + EFX_SNICIMAGE_HEADER_MAGIC_OFST); + if (magic != EFX_SNICIMAGE_HEADER_MAGIC_VALUE) + return false; + + version = get_unaligned_le32(header + EFX_SNICIMAGE_HEADER_VERSION_OFST); + if (version != EFX_SNICIMAGE_HEADER_VERSION_VALUE) + return false; + + header_len = get_unaligned_le32(header + EFX_SNICIMAGE_HEADER_LENGTH_OFST); + if (check_add_overflow(header_offset, header_len, &header_end)) + return false; + payload_size = get_unaligned_le32(header + EFX_SNICIMAGE_HEADER_PAYLOAD_SIZE_OFST); + if (check_add_overflow(header_end, payload_size, &payload_end) || + fw->size < payload_end) + return false; + + expected_crc = get_unaligned_le32(header + EFX_SNICIMAGE_HEADER_CRC_OFST); + + /* Calculate CRC omitting the expected CRC field itself */ + crc = crc32_le(~0, header, EFX_SNICIMAGE_HEADER_CRC_OFST); + crc = ~crc32_le(crc, + header + EFX_SNICIMAGE_HEADER_CRC_OFST + + EFX_SNICIMAGE_HEADER_CRC_LEN, + header_len + payload_size - EFX_SNICIMAGE_HEADER_CRC_OFST - + EFX_SNICIMAGE_HEADER_CRC_LEN); + if (crc != expected_crc) + return false; + + *partition_type = + get_unaligned_le32(header + EFX_SNICIMAGE_HEADER_PARTITION_TYPE_OFST); + *partition_subtype = + get_unaligned_le32(header + EFX_SNICIMAGE_HEADER_PARTITION_SUBTYPE_OFST); + *data = fw->data; + *data_size = fw->size; + return true; +} + +/* Try to parse a SmartNIC bundle header at the specified offset */ +static bool efx_reflash_parse_snic_bundle_header(const struct firmware *fw, + size_t header_offset, + u32 *partition_type, + u32 *partition_subtype, + const u8 **data, + size_t *data_size) +{ + u32 magic, version, bundle_type, header_len, expected_crc, crc; + size_t header_end; + const u8 *header; + + if (check_add_overflow(header_offset, EFX_SNICBUNDLE_HEADER_LEN, + &header_end)) + return false; + if (fw->size < header_end) + return false; + + header = fw->data + header_offset; + magic = get_unaligned_le32(header + EFX_SNICBUNDLE_HEADER_MAGIC_OFST); + if (magic != EFX_SNICBUNDLE_HEADER_MAGIC_VALUE) + return false; + + version = get_unaligned_le32(header + EFX_SNICBUNDLE_HEADER_VERSION_OFST); + if (version != EFX_SNICBUNDLE_HEADER_VERSION_VALUE) + return false; + + bundle_type = get_unaligned_le32(header + EFX_SNICBUNDLE_HEADER_BUNDLE_TYPE_OFST); + if (bundle_type != NVRAM_PARTITION_TYPE_BUNDLE) + return false; + + header_len = get_unaligned_le32(header + EFX_SNICBUNDLE_HEADER_LENGTH_OFST); + if (header_len != EFX_SNICBUNDLE_HEADER_LEN) + return false; + + expected_crc = get_unaligned_le32(header + EFX_SNICBUNDLE_HEADER_CRC_OFST); + crc = ~crc32_le(~0, header, EFX_SNICBUNDLE_HEADER_CRC_OFST); + if (crc != expected_crc) + return false; + + *partition_type = NVRAM_PARTITION_TYPE_BUNDLE; + *partition_subtype = get_unaligned_le32(header + EFX_SNICBUNDLE_HEADER_BUNDLE_SUBTYPE_OFST); + *data = fw->data; + *data_size = fw->size; + return true; +} + +/* Try to find a valid firmware payload in the firmware data. + * When we recognise a valid header, we parse it for the partition type + * (so we know where to ask the MC to write it to) and the location of + * the data blob to write. + */ +static int efx_reflash_parse_firmware_data(const struct firmware *fw, + u32 *partition_type, + u32 *partition_subtype, + const u8 **data, size_t *data_size) +{ + size_t header_offset; + u32 type, subtype; + + /* Some packaging formats (such as CMS/PKCS#7 signed images) + * prepend a header for which finding the size is a non-trivial + * task, so step through the firmware data until we find a valid + * header. + * + * The checks are intended to reject firmware data that is clearly not + * in the expected format. They do not need to be exhaustive as the + * running firmware will perform its own comprehensive validity and + * compatibility checks during the update procedure. + * + * Firmware packages may contain multiple reflash images, e.g. a + * bundle containing one or more other images. Only check the + * outermost container by stopping after the first candidate image + * found even it is for an unsupported partition type. + */ + for (header_offset = 0; header_offset < fw->size; header_offset++) { + if (efx_reflash_parse_snic_bundle_header(fw, header_offset, + partition_type, + partition_subtype, + data, data_size)) + return 0; + + if (efx_reflash_parse_snic_header(fw, header_offset, + partition_type, + partition_subtype, data, + data_size)) + return 0; + + if (efx_reflash_parse_reflash_header(fw, header_offset, &type, + &subtype, data, data_size)) + return efx_reflash_partition_type(type, subtype, + partition_type, + partition_subtype); + } + + return -EINVAL; +} + +/* Limit the number of status updates during the erase or write phases */ +#define EFX_DEVLINK_STATUS_UPDATE_COUNT 50 + +/* Expected timeout for the efx_mcdi_nvram_update_finish_polled() */ +#define EFX_DEVLINK_UPDATE_FINISH_TIMEOUT 900 + +/* Ideal erase chunk size. This is a balance between minimising the number of + * MCDI requests to erase an entire partition whilst avoiding tripping the MCDI + * RPC timeout. + */ +#define EFX_NVRAM_ERASE_IDEAL_CHUNK_SIZE (64 * 1024) + +static int efx_reflash_erase_partition(struct efx_nic *efx, + struct netlink_ext_ack *extack, + struct devlink *devlink, u32 type, + size_t partition_size, + size_t align) +{ + size_t chunk, offset, next_update; + int rc; + + /* Partitions that cannot be erased or do not require erase before + * write are advertised with a erase alignment/sector size of zero. + */ + if (align == 0) + /* Nothing to do */ + return 0; + + if (partition_size % align) + return -EINVAL; + + /* Erase the entire NVRAM partition a chunk at a time to avoid + * potentially tripping the MCDI RPC timeout. + */ + if (align >= EFX_NVRAM_ERASE_IDEAL_CHUNK_SIZE) + chunk = align; + else + chunk = rounddown(EFX_NVRAM_ERASE_IDEAL_CHUNK_SIZE, align); + + for (offset = 0, next_update = 0; offset < partition_size; offset += chunk) { + if (offset >= next_update) { + devlink_flash_update_status_notify(devlink, "Erasing", + NULL, offset, + partition_size); + next_update += partition_size / EFX_DEVLINK_STATUS_UPDATE_COUNT; + } + + chunk = min_t(size_t, partition_size - offset, chunk); + rc = efx_mcdi_nvram_erase(efx, type, offset, chunk); + if (rc) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Erase failed for NVRAM partition %#x at %#zx-%#zx", + type, offset, offset + chunk - 1); + return rc; + } + } + + devlink_flash_update_status_notify(devlink, "Erasing", NULL, + partition_size, partition_size); + + return 0; +} + +static int efx_reflash_write_partition(struct efx_nic *efx, + struct netlink_ext_ack *extack, + struct devlink *devlink, u32 type, + const u8 *data, size_t data_size, + size_t align) +{ + size_t write_max, chunk, offset, next_update; + int rc; + + if (align == 0) + return -EINVAL; + + /* Write the NVRAM partition in chunks that are the largest multiple + * of the partition's required write alignment that will fit into the + * MCDI NVRAM_WRITE RPC payload. + */ + if (efx->type->mcdi_max_ver < 2) + write_max = MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_LEN * + MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_MAXNUM; + else + write_max = MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_LEN * + MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_MAXNUM_MCDI2; + chunk = rounddown(write_max, align); + + for (offset = 0, next_update = 0; offset + chunk <= data_size; offset += chunk) { + if (offset >= next_update) { + devlink_flash_update_status_notify(devlink, "Writing", + NULL, offset, + data_size); + next_update += data_size / EFX_DEVLINK_STATUS_UPDATE_COUNT; + } + + rc = efx_mcdi_nvram_write(efx, type, offset, data + offset, chunk); + if (rc) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Write failed for NVRAM partition %#x at %#zx-%#zx", + type, offset, offset + chunk - 1); + return rc; + } + } + + /* Round up left over data to satisfy write alignment */ + if (offset < data_size) { + size_t remaining = data_size - offset; + u8 *buf; + + if (offset >= next_update) + devlink_flash_update_status_notify(devlink, "Writing", + NULL, offset, + data_size); + + chunk = roundup(remaining, align); + buf = kmalloc(chunk, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + memcpy(buf, data + offset, remaining); + memset(buf + remaining, 0xFF, chunk - remaining); + rc = efx_mcdi_nvram_write(efx, type, offset, buf, chunk); + kfree(buf); + if (rc) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Write failed for NVRAM partition %#x at %#zx-%#zx", + type, offset, offset + chunk - 1); + return rc; + } + } + + devlink_flash_update_status_notify(devlink, "Writing", NULL, data_size, + data_size); + + return 0; +} + +int efx_reflash_flash_firmware(struct efx_nic *efx, const struct firmware *fw, + struct netlink_ext_ack *extack) +{ + size_t data_size, size, erase_align, write_align; + struct devlink *devlink = efx->devlink; + u32 type, data_subtype, subtype; + const u8 *data; + bool protected; + int rc; + + if (!efx_has_cap(efx, BUNDLE_UPDATE)) { + NL_SET_ERR_MSG_MOD(extack, "NVRAM bundle updates are not supported by the firmware"); + return -EOPNOTSUPP; + } + + devlink_flash_update_status_notify(devlink, "Checking update", NULL, 0, 0); + + rc = efx_reflash_parse_firmware_data(fw, &type, &data_subtype, &data, + &data_size); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, + "Firmware image validation check failed"); + goto out; + } + + mutex_lock(&efx->reflash_mutex); + + rc = efx_mcdi_nvram_metadata(efx, type, &subtype, NULL, NULL, 0); + if (rc) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Metadata query for NVRAM partition %#x failed", + type); + goto out_unlock; + } + + if (subtype != data_subtype) { + NL_SET_ERR_MSG_MOD(extack, + "Firmware image is not appropriate for this adapter"); + rc = -EINVAL; + goto out_unlock; + } + + rc = efx_mcdi_nvram_info(efx, type, &size, &erase_align, &write_align, + &protected); + if (rc) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Info query for NVRAM partition %#x failed", + type); + goto out_unlock; + } + + if (protected) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "NVRAM partition %#x is protected", + type); + rc = -EPERM; + goto out_unlock; + } + + if (write_align == 0) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "NVRAM partition %#x is not writable", + type); + rc = -EACCES; + goto out_unlock; + } + + if (erase_align != 0 && size % erase_align) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "NVRAM partition %#x has a bad partition table entry, can't erase it", + type); + rc = -EACCES; + goto out_unlock; + } + + if (data_size > size) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Firmware image is too big for NVRAM partition %#x", + type); + rc = -EFBIG; + goto out_unlock; + } + + devlink_flash_update_status_notify(devlink, "Starting update", NULL, 0, 0); + + rc = efx_mcdi_nvram_update_start(efx, type); + if (rc) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Update start request for NVRAM partition %#x failed", + type); + goto out_unlock; + } + + rc = efx_reflash_erase_partition(efx, extack, devlink, type, size, + erase_align); + if (rc) + goto out_update_finish; + + rc = efx_reflash_write_partition(efx, extack, devlink, type, data, + data_size, write_align); + if (rc) + goto out_update_finish; + + devlink_flash_update_timeout_notify(devlink, "Finishing update", NULL, + EFX_DEVLINK_UPDATE_FINISH_TIMEOUT); + +out_update_finish: + if (rc) + /* Don't obscure the return code from an earlier failure */ + efx_mcdi_nvram_update_finish(efx, type, EFX_UPDATE_FINISH_ABORT); + else + rc = efx_mcdi_nvram_update_finish_polled(efx, type); +out_unlock: + mutex_unlock(&efx->reflash_mutex); +out: + devlink_flash_update_status_notify(devlink, rc ? "Update failed" : + "Update complete", + NULL, 0, 0); + return rc; +} diff --git a/drivers/net/ethernet/sfc/efx_reflash.h b/drivers/net/ethernet/sfc/efx_reflash.h new file mode 100644 index 000000000000..3dffac565161 --- /dev/null +++ b/drivers/net/ethernet/sfc/efx_reflash.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for AMD network controllers and boards + * Copyright (C) 2025, Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef _EFX_REFLASH_H +#define _EFX_REFLASH_H + +#include "net_driver.h" +#include <linux/firmware.h> + +int efx_reflash_flash_firmware(struct efx_nic *efx, const struct firmware *fw, + struct netlink_ext_ack *extack); + +#endif /* _EFX_REFLASH_H */ diff --git a/drivers/net/ethernet/sfc/fw_formats.h b/drivers/net/ethernet/sfc/fw_formats.h new file mode 100644 index 000000000000..cbc350c96013 --- /dev/null +++ b/drivers/net/ethernet/sfc/fw_formats.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for AMD network controllers and boards + * Copyright (C) 2025, Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef _EFX_FW_FORMATS_H +#define _EFX_FW_FORMATS_H + +/* Header layouts of firmware update images recognised by Efx NICs. + * The sources-of-truth for these layouts are AMD internal documents + * and sfregistry headers, neither of which are available externally + * nor usable directly by the driver. + * + * While each format includes a 'magic number', these are at different + * offsets in the various formats, and a legal header for one format + * could have the right value in whichever field occupies that offset + * to match another format's magic. + * Besides, some packaging formats (such as CMS/PKCS#7 signed images) + * prepend a header for which finding the size is a non-trivial task; + * rather than trying to parse those headers, we search byte-by-byte + * through the provided firmware image looking for a valid header. + * Thus, format recognition has to include validation of the checksum + * field, even though the firmware will validate that itself before + * applying the image. + */ + +/* EF10 (Medford2, X2) "reflash" header format. Defined in SF-121352-AN */ +#define EFX_REFLASH_HEADER_MAGIC_OFST 0 +#define EFX_REFLASH_HEADER_MAGIC_LEN 4 +#define EFX_REFLASH_HEADER_MAGIC_VALUE 0x106F1A5 + +#define EFX_REFLASH_HEADER_VERSION_OFST 4 +#define EFX_REFLASH_HEADER_VERSION_LEN 4 +#define EFX_REFLASH_HEADER_VERSION_VALUE 4 + +#define EFX_REFLASH_HEADER_FIRMWARE_TYPE_OFST 8 +#define EFX_REFLASH_HEADER_FIRMWARE_TYPE_LEN 4 +#define EFX_REFLASH_FIRMWARE_TYPE_BOOTROM 0x2 +#define EFX_REFLASH_FIRMWARE_TYPE_BUNDLE 0xd + +#define EFX_REFLASH_HEADER_FIRMWARE_SUBTYPE_OFST 12 +#define EFX_REFLASH_HEADER_FIRMWARE_SUBTYPE_LEN 4 + +#define EFX_REFLASH_HEADER_PAYLOAD_SIZE_OFST 16 +#define EFX_REFLASH_HEADER_PAYLOAD_SIZE_LEN 4 + +#define EFX_REFLASH_HEADER_LENGTH_OFST 20 +#define EFX_REFLASH_HEADER_LENGTH_LEN 4 + +/* Reflash trailer */ +#define EFX_REFLASH_TRAILER_CRC_OFST 0 +#define EFX_REFLASH_TRAILER_CRC_LEN 4 + +#define EFX_REFLASH_TRAILER_LEN \ + (EFX_REFLASH_TRAILER_CRC_OFST + EFX_REFLASH_TRAILER_CRC_LEN) + +/* EF100 "SmartNIC image" header format. + * Defined in sfregistry "src/layout/snic_image_hdr.h". + */ +#define EFX_SNICIMAGE_HEADER_MAGIC_OFST 16 +#define EFX_SNICIMAGE_HEADER_MAGIC_LEN 4 +#define EFX_SNICIMAGE_HEADER_MAGIC_VALUE 0x541C057A + +#define EFX_SNICIMAGE_HEADER_VERSION_OFST 20 +#define EFX_SNICIMAGE_HEADER_VERSION_LEN 4 +#define EFX_SNICIMAGE_HEADER_VERSION_VALUE 1 + +#define EFX_SNICIMAGE_HEADER_LENGTH_OFST 24 +#define EFX_SNICIMAGE_HEADER_LENGTH_LEN 4 + +#define EFX_SNICIMAGE_HEADER_PARTITION_TYPE_OFST 36 +#define EFX_SNICIMAGE_HEADER_PARTITION_TYPE_LEN 4 + +#define EFX_SNICIMAGE_HEADER_PARTITION_SUBTYPE_OFST 40 +#define EFX_SNICIMAGE_HEADER_PARTITION_SUBTYPE_LEN 4 + +#define EFX_SNICIMAGE_HEADER_PAYLOAD_SIZE_OFST 60 +#define EFX_SNICIMAGE_HEADER_PAYLOAD_SIZE_LEN 4 + +#define EFX_SNICIMAGE_HEADER_CRC_OFST 64 +#define EFX_SNICIMAGE_HEADER_CRC_LEN 4 + +#define EFX_SNICIMAGE_HEADER_MINLEN 256 + +/* EF100 "SmartNIC bundle" header format. Defined in SF-122606-TC */ +#define EFX_SNICBUNDLE_HEADER_MAGIC_OFST 0 +#define EFX_SNICBUNDLE_HEADER_MAGIC_LEN 4 +#define EFX_SNICBUNDLE_HEADER_MAGIC_VALUE 0xB1001001 + +#define EFX_SNICBUNDLE_HEADER_VERSION_OFST 4 +#define EFX_SNICBUNDLE_HEADER_VERSION_LEN 4 +#define EFX_SNICBUNDLE_HEADER_VERSION_VALUE 1 + +#define EFX_SNICBUNDLE_HEADER_BUNDLE_TYPE_OFST 8 +#define EFX_SNICBUNDLE_HEADER_BUNDLE_TYPE_LEN 4 + +#define EFX_SNICBUNDLE_HEADER_BUNDLE_SUBTYPE_OFST 12 +#define EFX_SNICBUNDLE_HEADER_BUNDLE_SUBTYPE_LEN 4 + +#define EFX_SNICBUNDLE_HEADER_LENGTH_OFST 20 +#define EFX_SNICBUNDLE_HEADER_LENGTH_LEN 4 + +#define EFX_SNICBUNDLE_HEADER_CRC_OFST 224 +#define EFX_SNICBUNDLE_HEADER_CRC_LEN 4 + +#define EFX_SNICBUNDLE_HEADER_LEN \ + (EFX_SNICBUNDLE_HEADER_CRC_OFST + EFX_SNICBUNDLE_HEADER_CRC_LEN) + +#endif /* _EFX_FW_FORMATS_H */ diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index d461b1a6ce81..dbd2ee915838 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -1625,12 +1625,15 @@ fail: return rc; } +#define EFX_MCDI_NVRAM_DEFAULT_WRITE_LEN 128 + int efx_mcdi_nvram_info(struct efx_nic *efx, unsigned int type, size_t *size_out, size_t *erase_size_out, - bool *protected_out) + size_t *write_size_out, bool *protected_out) { MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_INFO_IN_LEN); - MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_INFO_OUT_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_INFO_V2_OUT_LEN); + size_t write_size = 0; size_t outlen; int rc; @@ -1645,6 +1648,12 @@ int efx_mcdi_nvram_info(struct efx_nic *efx, unsigned int type, goto fail; } + if (outlen >= MC_CMD_NVRAM_INFO_V2_OUT_LEN) + write_size = MCDI_DWORD(outbuf, NVRAM_INFO_V2_OUT_WRITESIZE); + else + write_size = EFX_MCDI_NVRAM_DEFAULT_WRITE_LEN; + + *write_size_out = write_size; *size_out = MCDI_DWORD(outbuf, NVRAM_INFO_OUT_SIZE); *erase_size_out = MCDI_DWORD(outbuf, NVRAM_INFO_OUT_ERASESIZE); *protected_out = !!(MCDI_DWORD(outbuf, NVRAM_INFO_OUT_FLAGS) & @@ -2163,11 +2172,9 @@ out_free: return rc; } -#ifdef CONFIG_SFC_MTD - #define EFX_MCDI_NVRAM_LEN_MAX 128 -static int efx_mcdi_nvram_update_start(struct efx_nic *efx, unsigned int type) +int efx_mcdi_nvram_update_start(struct efx_nic *efx, unsigned int type) { MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_UPDATE_START_V2_IN_LEN); int rc; @@ -2185,6 +2192,8 @@ static int efx_mcdi_nvram_update_start(struct efx_nic *efx, unsigned int type) return rc; } +#ifdef CONFIG_SFC_MTD + static int efx_mcdi_nvram_read(struct efx_nic *efx, unsigned int type, loff_t offset, u8 *buffer, size_t length) { @@ -2209,13 +2218,20 @@ static int efx_mcdi_nvram_read(struct efx_nic *efx, unsigned int type, return 0; } -static int efx_mcdi_nvram_write(struct efx_nic *efx, unsigned int type, - loff_t offset, const u8 *buffer, size_t length) +#endif /* CONFIG_SFC_MTD */ + +int efx_mcdi_nvram_write(struct efx_nic *efx, unsigned int type, + loff_t offset, const u8 *buffer, size_t length) { - MCDI_DECLARE_BUF(inbuf, - MC_CMD_NVRAM_WRITE_IN_LEN(EFX_MCDI_NVRAM_LEN_MAX)); + efx_dword_t *inbuf; + size_t inlen; int rc; + inlen = ALIGN(MC_CMD_NVRAM_WRITE_IN_LEN(length), 4); + inbuf = kzalloc(inlen, GFP_KERNEL); + if (!inbuf) + return -ENOMEM; + MCDI_SET_DWORD(inbuf, NVRAM_WRITE_IN_TYPE, type); MCDI_SET_DWORD(inbuf, NVRAM_WRITE_IN_OFFSET, offset); MCDI_SET_DWORD(inbuf, NVRAM_WRITE_IN_LENGTH, length); @@ -2223,14 +2239,14 @@ static int efx_mcdi_nvram_write(struct efx_nic *efx, unsigned int type, BUILD_BUG_ON(MC_CMD_NVRAM_WRITE_OUT_LEN != 0); - rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_WRITE, inbuf, - ALIGN(MC_CMD_NVRAM_WRITE_IN_LEN(length), 4), - NULL, 0, NULL); + rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_WRITE, inbuf, inlen, NULL, 0, NULL); + kfree(inbuf); + return rc; } -static int efx_mcdi_nvram_erase(struct efx_nic *efx, unsigned int type, - loff_t offset, size_t length) +int efx_mcdi_nvram_erase(struct efx_nic *efx, unsigned int type, loff_t offset, + size_t length) { MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_ERASE_IN_LEN); int rc; @@ -2246,7 +2262,8 @@ static int efx_mcdi_nvram_erase(struct efx_nic *efx, unsigned int type, return rc; } -static int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type) +int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type, + enum efx_update_finish_mode mode) { MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_LEN); MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_LEN); @@ -2254,22 +2271,41 @@ static int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type) int rc, rc2; MCDI_SET_DWORD(inbuf, NVRAM_UPDATE_FINISH_IN_TYPE, type); - /* Always set this flag. Old firmware ignores it */ - MCDI_POPULATE_DWORD_1(inbuf, NVRAM_UPDATE_FINISH_V2_IN_FLAGS, + + /* Old firmware doesn't support background update finish and abort + * operations. Fallback to waiting if the requested mode is not + * supported. + */ + if (!efx_has_cap(efx, NVRAM_UPDATE_POLL_VERIFY_RESULT) || + (!efx_has_cap(efx, NVRAM_UPDATE_ABORT_SUPPORTED) && + mode == EFX_UPDATE_FINISH_ABORT)) + mode = EFX_UPDATE_FINISH_WAIT; + + MCDI_POPULATE_DWORD_4(inbuf, NVRAM_UPDATE_FINISH_V2_IN_FLAGS, NVRAM_UPDATE_FINISH_V2_IN_FLAG_REPORT_VERIFY_RESULT, - 1); + (mode != EFX_UPDATE_FINISH_ABORT), + NVRAM_UPDATE_FINISH_V2_IN_FLAG_RUN_IN_BACKGROUND, + (mode == EFX_UPDATE_FINISH_BACKGROUND), + NVRAM_UPDATE_FINISH_V2_IN_FLAG_POLL_VERIFY_RESULT, + (mode == EFX_UPDATE_FINISH_POLL), + NVRAM_UPDATE_FINISH_V2_IN_FLAG_ABORT, + (mode == EFX_UPDATE_FINISH_ABORT)); rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_UPDATE_FINISH, inbuf, sizeof(inbuf), outbuf, sizeof(outbuf), &outlen); if (!rc && outlen >= MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_LEN) { rc2 = MCDI_DWORD(outbuf, NVRAM_UPDATE_FINISH_V2_OUT_RESULT_CODE); - if (rc2 != MC_CMD_NVRAM_VERIFY_RC_SUCCESS) + if (rc2 != MC_CMD_NVRAM_VERIFY_RC_SUCCESS && + rc2 != MC_CMD_NVRAM_VERIFY_RC_PENDING) netif_err(efx, drv, efx->net_dev, "NVRAM update failed verification with code 0x%x\n", rc2); switch (rc2) { case MC_CMD_NVRAM_VERIFY_RC_SUCCESS: break; + case MC_CMD_NVRAM_VERIFY_RC_PENDING: + rc = -EAGAIN; + break; case MC_CMD_NVRAM_VERIFY_RC_CMS_CHECK_FAILED: case MC_CMD_NVRAM_VERIFY_RC_MESSAGE_DIGEST_CHECK_FAILED: case MC_CMD_NVRAM_VERIFY_RC_SIGNATURE_CHECK_FAILED: @@ -2284,6 +2320,8 @@ static int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type) case MC_CMD_NVRAM_VERIFY_RC_NO_VALID_SIGNATURES: case MC_CMD_NVRAM_VERIFY_RC_NO_TRUSTED_APPROVERS: case MC_CMD_NVRAM_VERIFY_RC_NO_SIGNATURE_MATCH: + case MC_CMD_NVRAM_VERIFY_RC_REJECT_TEST_SIGNED: + case MC_CMD_NVRAM_VERIFY_RC_SECURITY_LEVEL_DOWNGRADE: rc = -EPERM; break; default: @@ -2296,6 +2334,42 @@ static int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type) return rc; } +#define EFX_MCDI_NVRAM_UPDATE_FINISH_INITIAL_POLL_DELAY_MS 5 +#define EFX_MCDI_NVRAM_UPDATE_FINISH_MAX_POLL_DELAY_MS 5000 +#define EFX_MCDI_NVRAM_UPDATE_FINISH_RETRIES 185 + +int efx_mcdi_nvram_update_finish_polled(struct efx_nic *efx, unsigned int type) +{ + unsigned int delay = EFX_MCDI_NVRAM_UPDATE_FINISH_INITIAL_POLL_DELAY_MS; + unsigned int retry = 0; + int rc; + + /* NVRAM updates can take a long time (e.g. up to 1 minute for bundle + * images). Polling for NVRAM update completion ensures that other MCDI + * commands can be issued before the background NVRAM update completes. + * + * The initial call either completes the update synchronously, or + * returns -EAGAIN to indicate processing is continuing. In the latter + * case, we poll for at least 900 seconds, at increasing intervals + * (5ms, 50ms, 500ms, 5s). + */ + rc = efx_mcdi_nvram_update_finish(efx, type, EFX_UPDATE_FINISH_BACKGROUND); + while (rc == -EAGAIN) { + if (retry > EFX_MCDI_NVRAM_UPDATE_FINISH_RETRIES) + return -ETIMEDOUT; + retry++; + + msleep(delay); + if (delay < EFX_MCDI_NVRAM_UPDATE_FINISH_MAX_POLL_DELAY_MS) + delay *= 10; + + rc = efx_mcdi_nvram_update_finish(efx, type, EFX_UPDATE_FINISH_POLL); + } + return rc; +} + +#ifdef CONFIG_SFC_MTD + int efx_mcdi_mtd_read(struct mtd_info *mtd, loff_t start, size_t len, size_t *retlen, u8 *buffer) { @@ -2389,7 +2463,8 @@ int efx_mcdi_mtd_sync(struct mtd_info *mtd) if (part->updating) { part->updating = false; - rc = efx_mcdi_nvram_update_finish(efx, part->nvram_type); + rc = efx_mcdi_nvram_update_finish(efx, part->nvram_type, + EFX_UPDATE_FINISH_WAIT); } return rc; diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h index cdb17d7c147f..3755cd3fe1e6 100644 --- a/drivers/net/ethernet/sfc/mcdi.h +++ b/drivers/net/ethernet/sfc/mcdi.h @@ -392,7 +392,7 @@ int efx_mcdi_log_ctrl(struct efx_nic *efx, bool evq, bool uart, u32 dest_evq); int efx_mcdi_nvram_types(struct efx_nic *efx, u32 *nvram_types_out); int efx_mcdi_nvram_info(struct efx_nic *efx, unsigned int type, size_t *size_out, size_t *erase_size_out, - bool *protected_out); + size_t *write_size_out, bool *protected_out); int efx_new_mcdi_nvram_test_all(struct efx_nic *efx); int efx_mcdi_nvram_metadata(struct efx_nic *efx, unsigned int type, u32 *subtype, u16 version[4], char *desc, @@ -424,6 +424,26 @@ static inline int efx_mcdi_mon_probe(struct efx_nic *efx) { return 0; } static inline void efx_mcdi_mon_remove(struct efx_nic *efx) {} #endif +int efx_mcdi_nvram_update_start(struct efx_nic *efx, unsigned int type); +int efx_mcdi_nvram_write(struct efx_nic *efx, unsigned int type, + loff_t offset, const u8 *buffer, size_t length); +int efx_mcdi_nvram_erase(struct efx_nic *efx, unsigned int type, + loff_t offset, size_t length); +int efx_mcdi_nvram_metadata(struct efx_nic *efx, unsigned int type, + u32 *subtype, u16 version[4], char *desc, + size_t descsize); + +enum efx_update_finish_mode { + EFX_UPDATE_FINISH_WAIT, + EFX_UPDATE_FINISH_BACKGROUND, + EFX_UPDATE_FINISH_POLL, + EFX_UPDATE_FINISH_ABORT, +}; + +int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type, + enum efx_update_finish_mode mode); +int efx_mcdi_nvram_update_finish_polled(struct efx_nic *efx, unsigned int type); + #ifdef CONFIG_SFC_MTD int efx_mcdi_mtd_read(struct mtd_info *mtd, loff_t start, size_t len, size_t *retlen, u8 *buffer); diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index f70a7b7d6345..8b0689f749b5 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -1006,6 +1006,7 @@ struct efx_mae; * @dl_port: devlink port associated with the PF * @mem_bar: The BAR that is mapped into membase. * @reg_base: Offset from the start of the bar to the function control window. + * @reflash_mutex: Mutex for serialising firmware reflash operations. * @monitor_work: Hardware monitor workitem * @biu_lock: BIU (bus interface unit) lock * @last_irq_cpu: Last CPU to handle a possible test interrupt. This @@ -1191,6 +1192,7 @@ struct efx_nic { struct devlink_port *dl_port; unsigned int mem_bar; u32 reg_base; + struct mutex reflash_mutex; /* The following fields may be written more often */ diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index e25db747a81a..55053528e498 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -530,6 +530,20 @@ struct dma_features { #define STMMAC_DEFAULT_TWT_LS 0x1E #define STMMAC_ET_MAX 0xFFFFF +/* Common LPI register bits */ +#define LPI_CTRL_STATUS_LPITCSE BIT(21) /* LPI Tx Clock Stop Enable, gmac4, xgmac2 only */ +#define LPI_CTRL_STATUS_LPIATE BIT(20) /* LPI Timer Enable, gmac4 only */ +#define LPI_CTRL_STATUS_LPITXA BIT(19) /* Enable LPI TX Automate */ +#define LPI_CTRL_STATUS_PLSEN BIT(18) /* Enable PHY Link Status */ +#define LPI_CTRL_STATUS_PLS BIT(17) /* PHY Link Status */ +#define LPI_CTRL_STATUS_LPIEN BIT(16) /* LPI Enable */ +#define LPI_CTRL_STATUS_RLPIST BIT(9) /* Receive LPI state, gmac1000 only? */ +#define LPI_CTRL_STATUS_TLPIST BIT(8) /* Transmit LPI state, gmac1000 only? */ +#define LPI_CTRL_STATUS_RLPIEX BIT(3) /* Receive LPI Exit */ +#define LPI_CTRL_STATUS_RLPIEN BIT(2) /* Receive LPI Entry */ +#define LPI_CTRL_STATUS_TLPIEX BIT(1) /* Transmit LPI Exit */ +#define LPI_CTRL_STATUS_TLPIEN BIT(0) /* Transmit LPI Entry */ + #define STMMAC_CHAIN_MODE 0x1 #define STMMAC_RING_MODE 0x2 diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index bd4eb187f8c6..392574bdd4a4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -29,10 +29,8 @@ struct tegra_eqos { void __iomem *regs; struct reset_control *rst; - struct clk *clk_master; struct clk *clk_slave; struct clk *clk_tx; - struct clk *clk_rx; struct gpio_desc *reset; }; @@ -123,49 +121,14 @@ static int dwc_qos_probe(struct platform_device *pdev, struct plat_stmmacenet_data *plat_dat, struct stmmac_resources *stmmac_res) { - int err; - - plat_dat->stmmac_clk = devm_clk_get(&pdev->dev, "apb_pclk"); - if (IS_ERR(plat_dat->stmmac_clk)) { - dev_err(&pdev->dev, "apb_pclk clock not found.\n"); - return PTR_ERR(plat_dat->stmmac_clk); - } - - err = clk_prepare_enable(plat_dat->stmmac_clk); - if (err < 0) { - dev_err(&pdev->dev, "failed to enable apb_pclk clock: %d\n", - err); - return err; - } - - plat_dat->pclk = devm_clk_get(&pdev->dev, "phy_ref_clk"); - if (IS_ERR(plat_dat->pclk)) { - dev_err(&pdev->dev, "phy_ref_clk clock not found.\n"); - err = PTR_ERR(plat_dat->pclk); - goto disable; - } - - err = clk_prepare_enable(plat_dat->pclk); - if (err < 0) { - dev_err(&pdev->dev, "failed to enable phy_ref clock: %d\n", - err); - goto disable; + for (int i = 0; i < plat_dat->num_clks; i++) { + if (strcmp(plat_dat->clks[i].id, "apb_pclk") == 0) + plat_dat->stmmac_clk = plat_dat->clks[i].clk; + else if (strcmp(plat_dat->clks[i].id, "phy_ref_clk") == 0) + plat_dat->pclk = plat_dat->clks[i].clk; } return 0; - -disable: - clk_disable_unprepare(plat_dat->stmmac_clk); - return err; -} - -static void dwc_qos_remove(struct platform_device *pdev) -{ - struct net_device *ndev = platform_get_drvdata(pdev); - struct stmmac_priv *priv = netdev_priv(ndev); - - clk_disable_unprepare(priv->plat->pclk); - clk_disable_unprepare(priv->plat->stmmac_clk); } #define SDMEMCOMPPADCTRL 0x8800 @@ -178,7 +141,7 @@ static void dwc_qos_remove(struct platform_device *pdev) #define AUTO_CAL_STATUS 0x880c #define AUTO_CAL_STATUS_ACTIVE BIT(31) -static void tegra_eqos_fix_speed(void *priv, unsigned int speed, unsigned int mode) +static void tegra_eqos_fix_speed(void *priv, int speed, unsigned int mode) { struct tegra_eqos *eqos = priv; bool needs_calibration = false; @@ -197,7 +160,7 @@ static void tegra_eqos_fix_speed(void *priv, unsigned int speed, unsigned int mo break; default: - dev_err(eqos->dev, "invalid speed %u\n", speed); + dev_err(eqos->dev, "invalid speed %d\n", speed); break; } @@ -278,52 +241,19 @@ static int tegra_eqos_probe(struct platform_device *pdev, if (!is_of_node(dev->fwnode)) goto bypass_clk_reset_gpio; - eqos->clk_master = devm_clk_get(&pdev->dev, "master_bus"); - if (IS_ERR(eqos->clk_master)) { - err = PTR_ERR(eqos->clk_master); - goto error; - } - - err = clk_prepare_enable(eqos->clk_master); - if (err < 0) - goto error; - - eqos->clk_slave = devm_clk_get(&pdev->dev, "slave_bus"); - if (IS_ERR(eqos->clk_slave)) { - err = PTR_ERR(eqos->clk_slave); - goto disable_master; - } - - data->stmmac_clk = eqos->clk_slave; - - err = clk_prepare_enable(eqos->clk_slave); - if (err < 0) - goto disable_master; - - eqos->clk_rx = devm_clk_get(&pdev->dev, "rx"); - if (IS_ERR(eqos->clk_rx)) { - err = PTR_ERR(eqos->clk_rx); - goto disable_slave; - } - - err = clk_prepare_enable(eqos->clk_rx); - if (err < 0) - goto disable_slave; - - eqos->clk_tx = devm_clk_get(&pdev->dev, "tx"); - if (IS_ERR(eqos->clk_tx)) { - err = PTR_ERR(eqos->clk_tx); - goto disable_rx; + for (int i = 0; i < data->num_clks; i++) { + if (strcmp(data->clks[i].id, "slave_bus") == 0) { + eqos->clk_slave = data->clks[i].clk; + data->stmmac_clk = eqos->clk_slave; + } else if (strcmp(data->clks[i].id, "tx") == 0) { + eqos->clk_tx = data->clks[i].clk; + } } - err = clk_prepare_enable(eqos->clk_tx); - if (err < 0) - goto disable_rx; - eqos->reset = devm_gpiod_get(&pdev->dev, "phy-reset", GPIOD_OUT_HIGH); if (IS_ERR(eqos->reset)) { err = PTR_ERR(eqos->reset); - goto disable_tx; + return err; } usleep_range(2000, 4000); @@ -365,15 +295,7 @@ reset: reset_control_assert(eqos->rst); reset_phy: gpiod_set_value(eqos->reset, 1); -disable_tx: - clk_disable_unprepare(eqos->clk_tx); -disable_rx: - clk_disable_unprepare(eqos->clk_rx); -disable_slave: - clk_disable_unprepare(eqos->clk_slave); -disable_master: - clk_disable_unprepare(eqos->clk_master); -error: + return err; } @@ -383,10 +305,6 @@ static void tegra_eqos_remove(struct platform_device *pdev) reset_control_assert(eqos->rst); gpiod_set_value(eqos->reset, 1); - clk_disable_unprepare(eqos->clk_tx); - clk_disable_unprepare(eqos->clk_rx); - clk_disable_unprepare(eqos->clk_slave); - clk_disable_unprepare(eqos->clk_master); } struct dwc_eth_dwmac_data { @@ -398,7 +316,6 @@ struct dwc_eth_dwmac_data { static const struct dwc_eth_dwmac_data dwc_qos_data = { .probe = dwc_qos_probe, - .remove = dwc_qos_remove, }; static const struct dwc_eth_dwmac_data tegra_eqos_data = { @@ -434,9 +351,19 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev) if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); + ret = devm_clk_bulk_get_all(&pdev->dev, &plat_dat->clks); + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, "Failed to retrieve all required clocks\n"); + plat_dat->num_clks = ret; + + ret = clk_bulk_prepare_enable(plat_dat->num_clks, plat_dat->clks); + if (ret) + return dev_err_probe(&pdev->dev, ret, "Failed to enable clocks\n"); + ret = data->probe(pdev, plat_dat, &stmmac_res); if (ret < 0) { dev_err_probe(&pdev->dev, ret, "failed to probe subdriver\n"); + clk_bulk_disable_unprepare(plat_dat->num_clks, plat_dat->clks); return ret; } @@ -451,7 +378,8 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev) return ret; remove: - data->remove(pdev); + if (data->remove) + data->remove(pdev); return ret; } @@ -459,10 +387,15 @@ remove: static void dwc_eth_dwmac_remove(struct platform_device *pdev) { const struct dwc_eth_dwmac_data *data = device_get_match_data(&pdev->dev); + struct plat_stmmacenet_data *plat_data = dev_get_platdata(&pdev->dev); stmmac_dvr_remove(&pdev->dev); - data->remove(pdev); + if (data->remove) + data->remove(pdev); + + if (plat_data) + clk_bulk_disable_unprepare(plat_data->num_clks, plat_data->clks); } static const struct of_device_id dwc_eth_dwmac_match[] = { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index 20d3a202bb8d..610204b51e3f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -51,7 +51,7 @@ struct imx_dwmac_ops { int (*fix_soc_reset)(void *priv, void __iomem *ioaddr); int (*set_intf_mode)(struct plat_stmmacenet_data *plat_dat); - void (*fix_mac_speed)(void *priv, unsigned int speed, unsigned int mode); + void (*fix_mac_speed)(void *priv, int speed, unsigned int mode); }; struct imx_priv_data { @@ -192,7 +192,7 @@ static void imx_dwmac_exit(struct platform_device *pdev, void *priv) /* nothing to do now */ } -static void imx_dwmac_fix_speed(void *priv, unsigned int speed, unsigned int mode) +static void imx_dwmac_fix_speed(void *priv, int speed, unsigned int mode) { struct plat_stmmacenet_data *plat_dat; struct imx_priv_data *dwmac = priv; @@ -208,7 +208,7 @@ static void imx_dwmac_fix_speed(void *priv, unsigned int speed, unsigned int mod rate = rgmii_clock(speed); if (rate < 0) { - dev_err(dwmac->dev, "invalid speed %u\n", speed); + dev_err(dwmac->dev, "invalid speed %d\n", speed); return; } @@ -217,7 +217,7 @@ static void imx_dwmac_fix_speed(void *priv, unsigned int speed, unsigned int mod dev_err(dwmac->dev, "failed to set tx rate %lu\n", rate); } -static void imx93_dwmac_fix_speed(void *priv, unsigned int speed, unsigned int mode) +static void imx93_dwmac_fix_speed(void *priv, int speed, unsigned int mode) { struct imx_priv_data *dwmac = priv; unsigned int iface; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c index ddee6154d40b..0591756a2100 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c @@ -22,13 +22,13 @@ struct intel_dwmac { }; struct intel_dwmac_data { - void (*fix_mac_speed)(void *priv, unsigned int speed, unsigned int mode); + void (*fix_mac_speed)(void *priv, int speed, unsigned int mode); unsigned long ptp_ref_clk_rate; unsigned long tx_clk_rate; bool tx_clk_en; }; -static void kmb_eth_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode) +static void kmb_eth_fix_mac_speed(void *priv, int speed, unsigned int mode) { struct intel_dwmac *dwmac = priv; long rate; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 61227dcf56dc..7f4b9c1cc32b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -112,7 +112,7 @@ struct ipq806x_gmac { phy_interface_t phy_mode; }; -static int get_clk_div_sgmii(struct ipq806x_gmac *gmac, unsigned int speed) +static int get_clk_div_sgmii(struct ipq806x_gmac *gmac, int speed) { struct device *dev = &gmac->pdev->dev; int div; @@ -138,7 +138,7 @@ static int get_clk_div_sgmii(struct ipq806x_gmac *gmac, unsigned int speed) return div; } -static int get_clk_div_rgmii(struct ipq806x_gmac *gmac, unsigned int speed) +static int get_clk_div_rgmii(struct ipq806x_gmac *gmac, int speed) { struct device *dev = &gmac->pdev->dev; int div; @@ -164,7 +164,7 @@ static int get_clk_div_rgmii(struct ipq806x_gmac *gmac, unsigned int speed) return div; } -static int ipq806x_gmac_set_speed(struct ipq806x_gmac *gmac, unsigned int speed) +static int ipq806x_gmac_set_speed(struct ipq806x_gmac *gmac, int speed) { uint32_t clk_bits, val; int div; @@ -260,7 +260,7 @@ static int ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac) return PTR_ERR_OR_ZERO(gmac->qsgmii_csr); } -static void ipq806x_gmac_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode) +static void ipq806x_gmac_fix_mac_speed(void *priv, int speed, unsigned int mode) { struct ipq806x_gmac *gmac = priv; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index bfe6e2d631bd..60a4e3330ccd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -149,8 +149,7 @@ static struct stmmac_pci_info loongson_gmac_pci_info = { .setup = loongson_gmac_data, }; -static void loongson_gnet_fix_speed(void *priv, unsigned int speed, - unsigned int mode) +static void loongson_gnet_fix_speed(void *priv, int speed, unsigned int mode) { struct loongson_data *ld = (struct loongson_data *)priv; struct net_device *ndev = dev_get_drvdata(ld->dev); @@ -574,6 +573,9 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id if (ret) goto err_disable_device; + plat->tx_fifo_size = SZ_16K * plat->tx_queues_to_use; + plat->rx_fifo_size = SZ_16K * plat->rx_queues_to_use; + if (dev_of_node(&pdev->dev)) ret = loongson_dwmac_dt_config(pdev, plat, &res); else diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c index 5469fa1b429e..b115b7873cef 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c @@ -22,7 +22,7 @@ struct meson_dwmac { void __iomem *reg; }; -static void meson6_dwmac_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode) +static void meson6_dwmac_fix_mac_speed(void *priv, int speed, unsigned int mode) { struct meson_dwmac *dwmac = priv; unsigned int val; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 2a5b38723635..192f270197c8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -111,7 +111,7 @@ struct qcom_ethqos { unsigned int link_clk_rate; struct clk *link_clk; struct phy *serdes_phy; - unsigned int speed; + int speed; int serdes_speed; phy_interface_t phy_mode; @@ -175,7 +175,7 @@ static void rgmii_dump(void *priv) #define RGMII_ID_MODE_10_LOW_SVS_CLK_FREQ (5 * 1000 * 1000UL) static void -ethqos_update_link_clk(struct qcom_ethqos *ethqos, unsigned int speed) +ethqos_update_link_clk(struct qcom_ethqos *ethqos, int speed) { if (!phy_interface_mode_is_rgmii(ethqos->phy_mode)) return; @@ -699,7 +699,7 @@ static int ethqos_configure(struct qcom_ethqos *ethqos) return ethqos->configure_func(ethqos); } -static void ethqos_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode) +static void ethqos_fix_mac_speed(void *priv, int speed, unsigned int mode) { struct qcom_ethqos *ethqos = priv; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index a4dc89e23a68..83d104a274c5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1920,7 +1920,7 @@ static void rk_gmac_powerdown(struct rk_priv_data *gmac) gmac_clk_enable(gmac, false); } -static void rk_fix_speed(void *priv, unsigned int speed, unsigned int mode) +static void rk_fix_speed(void *priv, int speed, unsigned int mode) { struct rk_priv_data *bsp_priv = priv; struct device *dev = &bsp_priv->pdev->dev; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c index 9cc0e5817416..6a498833b8ed 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c @@ -100,7 +100,7 @@ static void s32_gmac_exit(struct platform_device *pdev, void *priv) clk_disable_unprepare(gmac->rx_clk); } -static void s32_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode) +static void s32_fix_mac_speed(void *priv, int speed, unsigned int mode) { struct s32_priv_data *gmac = priv; long tx_clk_rate; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 16020b72dec8..6b78ae730466 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -61,7 +61,7 @@ struct socfpga_dwmac { struct mdio_device *pcs_mdiodev; }; -static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode) +static void socfpga_dwmac_fix_mac_speed(void *priv, int speed, unsigned int mode) { struct socfpga_dwmac *dwmac = (struct socfpga_dwmac *)priv; void __iomem *splitter_base = dwmac->splitter_base; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c index 0a0a363d3730..282c846dad0b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c @@ -31,7 +31,7 @@ struct starfive_dwmac { const struct starfive_dwmac_data *data; }; -static void starfive_dwmac_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode) +static void starfive_dwmac_fix_mac_speed(void *priv, int speed, unsigned int mode) { struct starfive_dwmac *dwmac = priv; long rate; @@ -39,7 +39,7 @@ static void starfive_dwmac_fix_mac_speed(void *priv, unsigned int speed, unsigne rate = rgmii_clock(speed); if (rate < 0) { - dev_err(dwmac->dev, "invalid speed %u\n", speed); + dev_err(dwmac->dev, "invalid speed %d\n", speed); return; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c index f25461c292fe..13b9c2a51fce 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c @@ -99,12 +99,12 @@ struct sti_dwmac { int clk_sel_reg; /* GMAC ext clk selection register */ struct regmap *regmap; bool gmac_en; - u32 speed; - void (*fix_retime_src)(void *priv, unsigned int speed, unsigned int mode); + int speed; + void (*fix_retime_src)(void *priv, int speed, unsigned int mode); }; struct sti_dwmac_of_data { - void (*fix_retime_src)(void *priv, unsigned int speed, unsigned int mode); + void (*fix_retime_src)(void *priv, int speed, unsigned int mode); }; static u32 phy_intf_sels[] = { @@ -132,7 +132,7 @@ static u32 stih4xx_tx_retime_val[] = { | STIH4XX_ETH_SEL_INTERNAL_NOTEXT_PHYCLK, }; -static void stih4xx_fix_retime_src(void *priv, u32 spd, unsigned int mode) +static void stih4xx_fix_retime_src(void *priv, int spd, unsigned int mode) { struct sti_dwmac *dwmac = priv; u32 src = dwmac->tx_retime_src; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c index 9ae318436c4a..1b1ce2888b2e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c @@ -72,7 +72,7 @@ static void sun7i_gmac_exit(struct platform_device *pdev, void *priv) regulator_disable(gmac->regulator); } -static void sun7i_fix_speed(void *priv, unsigned int speed, unsigned int mode) +static void sun7i_fix_speed(void *priv, int speed, unsigned int mode) { struct sunxi_priv_data *gmac = priv; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c index dce84ed184e9..ddb1d8aba321 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c @@ -104,7 +104,7 @@ static int thead_dwmac_set_txclk_dir(struct plat_stmmacenet_data *plat) return 0; } -static void thead_dwmac_fix_speed(void *priv, unsigned int speed, unsigned int mode) +static void thead_dwmac_fix_speed(void *priv, int speed, unsigned int mode) { struct plat_stmmacenet_data *plat; struct thead_dwmac *dwmac = priv; @@ -142,7 +142,7 @@ static void thead_dwmac_fix_speed(void *priv, unsigned int speed, unsigned int m div = rate * 10 / GMAC_MII_RATE; break; default: - dev_err(dwmac->dev, "invalid speed %u\n", speed); + dev_err(dwmac->dev, "invalid speed %d\n", speed); return; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c index eccf7f537467..33cf99797df5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c @@ -54,7 +54,7 @@ struct visconti_eth { spinlock_t lock; /* lock to protect register update */ }; -static void visconti_eth_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode) +static void visconti_eth_fix_mac_speed(void *priv, int speed, unsigned int mode) { struct visconti_eth *dwmac = priv; struct net_device *netdev = dev_get_drvdata(dwmac->dev); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h index 600fea8f712f..967a16212faf 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h @@ -59,22 +59,11 @@ enum power_event { /* Energy Efficient Ethernet (EEE) * * LPI status, timer and control register offset + * For LPI control and status bit definitions, see common.h. */ #define LPI_CTRL_STATUS 0x0030 #define LPI_TIMER_CTRL 0x0034 -/* LPI control and status defines */ -#define LPI_CTRL_STATUS_LPITXA 0x00080000 /* Enable LPI TX Automate */ -#define LPI_CTRL_STATUS_PLSEN 0x00040000 /* Enable PHY Link Status */ -#define LPI_CTRL_STATUS_PLS 0x00020000 /* PHY Link Status */ -#define LPI_CTRL_STATUS_LPIEN 0x00010000 /* LPI Enable */ -#define LPI_CTRL_STATUS_RLPIST 0x00000200 /* Receive LPI state */ -#define LPI_CTRL_STATUS_TLPIST 0x00000100 /* Transmit LPI state */ -#define LPI_CTRL_STATUS_RLPIEX 0x00000008 /* Receive LPI Exit */ -#define LPI_CTRL_STATUS_RLPIEN 0x00000004 /* Receive LPI Entry */ -#define LPI_CTRL_STATUS_TLPIEX 0x00000002 /* Transmit LPI Exit */ -#define LPI_CTRL_STATUS_TLPIEN 0x00000001 /* Transmit LPI Entry */ - /* GMAC HW ADDR regs */ #define GMAC_ADDR_HIGH(reg) ((reg > 15) ? 0x00000800 + (reg - 16) * 8 : \ 0x00000040 + (reg * 8)) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 96bcda0856ec..a8b901cdf5cb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -16,6 +16,7 @@ #include <linux/slab.h> #include <linux/ethtool.h> #include <linux/io.h> +#include <linux/string_choices.h> #include "stmmac.h" #include "stmmac_pcs.h" #include "stmmac_ptp.h" @@ -342,31 +343,24 @@ static int dwmac1000_irq_status(struct mac_device_info *hw, return ret; } -static void dwmac1000_set_eee_mode(struct mac_device_info *hw, - bool en_tx_lpi_clockgating) +static int dwmac1000_set_lpi_mode(struct mac_device_info *hw, + enum stmmac_lpi_mode mode, + bool en_tx_lpi_clockgating, u32 et) { void __iomem *ioaddr = hw->pcsr; u32 value; - /*TODO - en_tx_lpi_clockgating treatment */ + if (mode == STMMAC_LPI_TIMER) + return -EOPNOTSUPP; - /* Enable the link status receive on RGMII, SGMII ore SMII - * receive path and instruct the transmit to enter in LPI - * state. - */ value = readl(ioaddr + LPI_CTRL_STATUS); - value |= LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA; + if (mode == STMMAC_LPI_FORCED) + value |= LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA; + else + value &= ~(LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA); writel(value, ioaddr + LPI_CTRL_STATUS); -} - -static void dwmac1000_reset_eee_mode(struct mac_device_info *hw) -{ - void __iomem *ioaddr = hw->pcsr; - u32 value; - value = readl(ioaddr + LPI_CTRL_STATUS); - value &= ~(LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA); - writel(value, ioaddr + LPI_CTRL_STATUS); + return 0; } static void dwmac1000_set_eee_pls(struct mac_device_info *hw, int link) @@ -509,8 +503,7 @@ const struct stmmac_ops dwmac1000_ops = { .pmt = dwmac1000_pmt, .set_umac_addr = dwmac1000_set_umac_addr, .get_umac_addr = dwmac1000_get_umac_addr, - .set_eee_mode = dwmac1000_set_eee_mode, - .reset_eee_mode = dwmac1000_reset_eee_mode, + .set_lpi_mode = dwmac1000_set_lpi_mode, .set_eee_timer = dwmac1000_set_eee_timer, .set_eee_pls = dwmac1000_set_eee_pls, .debug = dwmac1000_debug, @@ -633,7 +626,7 @@ int dwmac1000_ptp_enable(struct ptp_clock_info *ptp, } netdev_dbg(priv->dev, "Auxiliary Snapshot %s.\n", - on ? "enabled" : "disabled"); + str_enabled_disabled(on)); writel(tcr_val, ptpaddr + PTP_TCR); /* wait for auxts fifo clear to finish */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index 184d41a306af..42fe29a4e300 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -177,23 +177,13 @@ enum power_event { /* Energy Efficient Ethernet (EEE) for GMAC4 * * LPI status, timer and control register offset + * For LPI control and status bit definitions, see common.h. */ #define GMAC4_LPI_CTRL_STATUS 0xd0 #define GMAC4_LPI_TIMER_CTRL 0xd4 #define GMAC4_LPI_ENTRY_TIMER 0xd8 #define GMAC4_MAC_ONEUS_TIC_COUNTER 0xdc -/* LPI control and status defines */ -#define GMAC4_LPI_CTRL_STATUS_LPITCSE BIT(21) /* LPI Tx Clock Stop Enable */ -#define GMAC4_LPI_CTRL_STATUS_LPIATE BIT(20) /* LPI Timer Enable */ -#define GMAC4_LPI_CTRL_STATUS_LPITXA BIT(19) /* Enable LPI TX Automate */ -#define GMAC4_LPI_CTRL_STATUS_PLS BIT(17) /* PHY Link Status */ -#define GMAC4_LPI_CTRL_STATUS_LPIEN BIT(16) /* LPI Enable */ -#define GMAC4_LPI_CTRL_STATUS_RLPIEX BIT(3) /* Receive LPI Exit */ -#define GMAC4_LPI_CTRL_STATUS_RLPIEN BIT(2) /* Receive LPI Entry */ -#define GMAC4_LPI_CTRL_STATUS_TLPIEX BIT(1) /* Transmit LPI Exit */ -#define GMAC4_LPI_CTRL_STATUS_TLPIEN BIT(0) /* Transmit LPI Entry */ - /* MAC Debug bitmap */ #define GMAC_DEBUG_TFCSTS_MASK GENMASK(18, 17) #define GMAC_DEBUG_TFCSTS_SHIFT 17 diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 9ed8620580a8..cc4ddf608652 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -376,33 +376,46 @@ static void dwmac4_get_umac_addr(struct mac_device_info *hw, GMAC_ADDR_LOW(reg_n)); } -static void dwmac4_set_eee_mode(struct mac_device_info *hw, - bool en_tx_lpi_clockgating) +static int dwmac4_set_lpi_mode(struct mac_device_info *hw, + enum stmmac_lpi_mode mode, + bool en_tx_lpi_clockgating, u32 et) { void __iomem *ioaddr = hw->pcsr; - u32 value; + u32 value, mask; - /* Enable the link status receive on RGMII, SGMII ore SMII - * receive path and instruct the transmit to enter in LPI - * state. - */ - value = readl(ioaddr + GMAC4_LPI_CTRL_STATUS); - value |= GMAC4_LPI_CTRL_STATUS_LPIEN | GMAC4_LPI_CTRL_STATUS_LPITXA; + if (mode == STMMAC_LPI_DISABLE) { + value = 0; + } else { + value = LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA; - if (en_tx_lpi_clockgating) - value |= GMAC4_LPI_CTRL_STATUS_LPITCSE; + if (mode == STMMAC_LPI_TIMER) { + /* Return ERANGE if the timer is larger than the + * register field. + */ + if (et > STMMAC_ET_MAX) + return -ERANGE; - writel(value, ioaddr + GMAC4_LPI_CTRL_STATUS); -} + /* Set the hardware LPI entry timer */ + writel(et, ioaddr + GMAC4_LPI_ENTRY_TIMER); -static void dwmac4_reset_eee_mode(struct mac_device_info *hw) -{ - void __iomem *ioaddr = hw->pcsr; - u32 value; + /* Interpret a zero LPI entry timer to mean + * immediate entry into LPI mode. + */ + if (et) + value |= LPI_CTRL_STATUS_LPIATE; + } - value = readl(ioaddr + GMAC4_LPI_CTRL_STATUS); - value &= ~(GMAC4_LPI_CTRL_STATUS_LPIEN | GMAC4_LPI_CTRL_STATUS_LPITXA); + if (en_tx_lpi_clockgating) + value |= LPI_CTRL_STATUS_LPITCSE; + } + + mask = LPI_CTRL_STATUS_LPIATE | LPI_CTRL_STATUS_LPIEN | + LPI_CTRL_STATUS_LPITXA | LPI_CTRL_STATUS_LPITCSE; + + value |= readl(ioaddr + GMAC4_LPI_CTRL_STATUS) & ~mask; writel(value, ioaddr + GMAC4_LPI_CTRL_STATUS); + + return 0; } static void dwmac4_set_eee_pls(struct mac_device_info *hw, int link) @@ -413,34 +426,13 @@ static void dwmac4_set_eee_pls(struct mac_device_info *hw, int link) value = readl(ioaddr + GMAC4_LPI_CTRL_STATUS); if (link) - value |= GMAC4_LPI_CTRL_STATUS_PLS; + value |= LPI_CTRL_STATUS_PLS; else - value &= ~GMAC4_LPI_CTRL_STATUS_PLS; + value &= ~LPI_CTRL_STATUS_PLS; writel(value, ioaddr + GMAC4_LPI_CTRL_STATUS); } -static void dwmac4_set_eee_lpi_entry_timer(struct mac_device_info *hw, u32 et) -{ - void __iomem *ioaddr = hw->pcsr; - u32 value = et & STMMAC_ET_MAX; - int regval; - - /* Program LPI entry timer value into register */ - writel(value, ioaddr + GMAC4_LPI_ENTRY_TIMER); - - /* Enable/disable LPI entry timer */ - regval = readl(ioaddr + GMAC4_LPI_CTRL_STATUS); - regval |= GMAC4_LPI_CTRL_STATUS_LPIEN | GMAC4_LPI_CTRL_STATUS_LPITXA; - - if (et) - regval |= GMAC4_LPI_CTRL_STATUS_LPIATE; - else - regval &= ~GMAC4_LPI_CTRL_STATUS_LPIATE; - - writel(regval, ioaddr + GMAC4_LPI_CTRL_STATUS); -} - static void dwmac4_set_eee_timer(struct mac_device_info *hw, int ls, int tw) { void __iomem *ioaddr = hw->pcsr; @@ -849,17 +841,17 @@ static int dwmac4_irq_status(struct mac_device_info *hw, /* Clear LPI interrupt by reading MAC_LPI_Control_Status */ u32 status = readl(ioaddr + GMAC4_LPI_CTRL_STATUS); - if (status & GMAC4_LPI_CTRL_STATUS_TLPIEN) { + if (status & LPI_CTRL_STATUS_TLPIEN) { ret |= CORE_IRQ_TX_PATH_IN_LPI_MODE; x->irq_tx_path_in_lpi_mode_n++; } - if (status & GMAC4_LPI_CTRL_STATUS_TLPIEX) { + if (status & LPI_CTRL_STATUS_TLPIEX) { ret |= CORE_IRQ_TX_PATH_EXIT_LPI_MODE; x->irq_tx_path_exit_lpi_mode_n++; } - if (status & GMAC4_LPI_CTRL_STATUS_RLPIEN) + if (status & LPI_CTRL_STATUS_RLPIEN) x->irq_rx_path_in_lpi_mode_n++; - if (status & GMAC4_LPI_CTRL_STATUS_RLPIEX) + if (status & LPI_CTRL_STATUS_RLPIEX) x->irq_rx_path_exit_lpi_mode_n++; } @@ -1201,9 +1193,7 @@ const struct stmmac_ops dwmac4_ops = { .pmt = dwmac4_pmt, .set_umac_addr = dwmac4_set_umac_addr, .get_umac_addr = dwmac4_get_umac_addr, - .set_eee_mode = dwmac4_set_eee_mode, - .reset_eee_mode = dwmac4_reset_eee_mode, - .set_eee_lpi_entry_timer = dwmac4_set_eee_lpi_entry_timer, + .set_lpi_mode = dwmac4_set_lpi_mode, .set_eee_timer = dwmac4_set_eee_timer, .set_eee_pls = dwmac4_set_eee_pls, .pcs_ctrl_ane = dwmac4_ctrl_ane, @@ -1245,9 +1235,7 @@ const struct stmmac_ops dwmac410_ops = { .pmt = dwmac4_pmt, .set_umac_addr = dwmac4_set_umac_addr, .get_umac_addr = dwmac4_get_umac_addr, - .set_eee_mode = dwmac4_set_eee_mode, - .reset_eee_mode = dwmac4_reset_eee_mode, - .set_eee_lpi_entry_timer = dwmac4_set_eee_lpi_entry_timer, + .set_lpi_mode = dwmac4_set_lpi_mode, .set_eee_timer = dwmac4_set_eee_timer, .set_eee_pls = dwmac4_set_eee_pls, .pcs_ctrl_ane = dwmac4_ctrl_ane, @@ -1291,9 +1279,7 @@ const struct stmmac_ops dwmac510_ops = { .pmt = dwmac4_pmt, .set_umac_addr = dwmac4_set_umac_addr, .get_umac_addr = dwmac4_get_umac_addr, - .set_eee_mode = dwmac4_set_eee_mode, - .reset_eee_mode = dwmac4_reset_eee_mode, - .set_eee_lpi_entry_timer = dwmac4_set_eee_lpi_entry_timer, + .set_lpi_mode = dwmac4_set_lpi_mode, .set_eee_timer = dwmac4_set_eee_timer, .set_eee_pls = dwmac4_set_eee_pls, .pcs_ctrl_ane = dwmac4_ctrl_ane, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index 20027d3c25a7..a03f5d771566 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -112,14 +112,7 @@ #define XGMAC_MGKPKTEN BIT(1) #define XGMAC_PWRDWN BIT(0) #define XGMAC_LPI_CTRL 0x000000d0 -#define XGMAC_TXCGE BIT(21) -#define XGMAC_LPITXA BIT(19) -#define XGMAC_PLS BIT(17) -#define XGMAC_LPITXEN BIT(16) -#define XGMAC_RLPIEX BIT(3) -#define XGMAC_RLPIEN BIT(2) -#define XGMAC_TLPIEX BIT(1) -#define XGMAC_TLPIEN BIT(0) +/* For definitions, see LPI_CTRL_STATUS_xxx in common.h */ #define XGMAC_LPI_TIMER_CTRL 0x000000d4 #define XGMAC_HW_FEATURE0 0x0000011c #define XGMAC_HWFEAT_EDMA BIT(31) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 9a60a6e8f633..a6d395c6bacd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -316,17 +316,17 @@ static int dwxgmac2_host_irq_status(struct mac_device_info *hw, if (stat & XGMAC_LPIIS) { u32 lpi = readl(ioaddr + XGMAC_LPI_CTRL); - if (lpi & XGMAC_TLPIEN) { + if (lpi & LPI_CTRL_STATUS_TLPIEN) { ret |= CORE_IRQ_TX_PATH_IN_LPI_MODE; x->irq_tx_path_in_lpi_mode_n++; } - if (lpi & XGMAC_TLPIEX) { + if (lpi & LPI_CTRL_STATUS_TLPIEX) { ret |= CORE_IRQ_TX_PATH_EXIT_LPI_MODE; x->irq_tx_path_exit_lpi_mode_n++; } - if (lpi & XGMAC_RLPIEN) + if (lpi & LPI_CTRL_STATUS_RLPIEN) x->irq_rx_path_in_lpi_mode_n++; - if (lpi & XGMAC_RLPIEX) + if (lpi & LPI_CTRL_STATUS_RLPIEX) x->irq_rx_path_exit_lpi_mode_n++; } @@ -425,29 +425,28 @@ static void dwxgmac2_get_umac_addr(struct mac_device_info *hw, addr[5] = (hi_addr >> 8) & 0xff; } -static void dwxgmac2_set_eee_mode(struct mac_device_info *hw, - bool en_tx_lpi_clockgating) +static int dwxgmac2_set_lpi_mode(struct mac_device_info *hw, + enum stmmac_lpi_mode mode, + bool en_tx_lpi_clockgating, u32 et) { void __iomem *ioaddr = hw->pcsr; u32 value; - value = readl(ioaddr + XGMAC_LPI_CTRL); - - value |= XGMAC_LPITXEN | XGMAC_LPITXA; - if (en_tx_lpi_clockgating) - value |= XGMAC_TXCGE; - - writel(value, ioaddr + XGMAC_LPI_CTRL); -} - -static void dwxgmac2_reset_eee_mode(struct mac_device_info *hw) -{ - void __iomem *ioaddr = hw->pcsr; - u32 value; + if (mode == STMMAC_LPI_TIMER) + return -EOPNOTSUPP; value = readl(ioaddr + XGMAC_LPI_CTRL); - value &= ~(XGMAC_LPITXEN | XGMAC_LPITXA | XGMAC_TXCGE); + if (mode == STMMAC_LPI_FORCED) { + value |= LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA; + if (en_tx_lpi_clockgating) + value |= LPI_CTRL_STATUS_LPITCSE; + } else { + value &= ~(LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA | + LPI_CTRL_STATUS_LPITCSE); + } writel(value, ioaddr + XGMAC_LPI_CTRL); + + return 0; } static void dwxgmac2_set_eee_pls(struct mac_device_info *hw, int link) @@ -457,9 +456,9 @@ static void dwxgmac2_set_eee_pls(struct mac_device_info *hw, int link) value = readl(ioaddr + XGMAC_LPI_CTRL); if (link) - value |= XGMAC_PLS; + value |= LPI_CTRL_STATUS_PLS; else - value &= ~XGMAC_PLS; + value &= ~LPI_CTRL_STATUS_PLS; writel(value, ioaddr + XGMAC_LPI_CTRL); } @@ -1525,8 +1524,7 @@ const struct stmmac_ops dwxgmac210_ops = { .pmt = dwxgmac2_pmt, .set_umac_addr = dwxgmac2_set_umac_addr, .get_umac_addr = dwxgmac2_get_umac_addr, - .set_eee_mode = dwxgmac2_set_eee_mode, - .reset_eee_mode = dwxgmac2_reset_eee_mode, + .set_lpi_mode = dwxgmac2_set_lpi_mode, .set_eee_timer = dwxgmac2_set_eee_timer, .set_eee_pls = dwxgmac2_set_eee_pls, .debug = NULL, @@ -1582,8 +1580,7 @@ const struct stmmac_ops dwxlgmac2_ops = { .pmt = dwxgmac2_pmt, .set_umac_addr = dwxgmac2_set_umac_addr, .get_umac_addr = dwxgmac2_get_umac_addr, - .set_eee_mode = dwxgmac2_set_eee_mode, - .reset_eee_mode = dwxgmac2_reset_eee_mode, + .set_lpi_mode = dwxgmac2_set_lpi_mode, .set_eee_timer = dwxgmac2_set_eee_timer, .set_eee_pls = dwxgmac2_set_eee_pls, .debug = NULL, diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 0f200b72c225..27c63a9fc163 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -306,6 +306,12 @@ struct stmmac_pps_cfg; struct stmmac_rss; struct stmmac_est; +enum stmmac_lpi_mode { + STMMAC_LPI_DISABLE, + STMMAC_LPI_FORCED, + STMMAC_LPI_TIMER, +}; + /* Helpers to program the MAC core */ struct stmmac_ops { /* MAC core initialization */ @@ -360,10 +366,9 @@ struct stmmac_ops { unsigned int reg_n); void (*get_umac_addr)(struct mac_device_info *hw, unsigned char *addr, unsigned int reg_n); - void (*set_eee_mode)(struct mac_device_info *hw, - bool en_tx_lpi_clockgating); - void (*reset_eee_mode)(struct mac_device_info *hw); - void (*set_eee_lpi_entry_timer)(struct mac_device_info *hw, u32 et); + int (*set_lpi_mode)(struct mac_device_info *hw, + enum stmmac_lpi_mode mode, + bool en_tx_lpi_clockgating, u32 et); void (*set_eee_timer)(struct mac_device_info *hw, int ls, int tw); void (*set_eee_pls)(struct mac_device_info *hw, int link); void (*debug)(struct stmmac_priv *priv, void __iomem *ioaddr, @@ -467,12 +472,8 @@ struct stmmac_ops { stmmac_do_void_callback(__priv, mac, set_umac_addr, __args) #define stmmac_get_umac_addr(__priv, __args...) \ stmmac_do_void_callback(__priv, mac, get_umac_addr, __args) -#define stmmac_set_eee_mode(__priv, __args...) \ - stmmac_do_void_callback(__priv, mac, set_eee_mode, __args) -#define stmmac_reset_eee_mode(__priv, __args...) \ - stmmac_do_void_callback(__priv, mac, reset_eee_mode, __args) -#define stmmac_set_eee_lpi_timer(__priv, __args...) \ - stmmac_do_void_callback(__priv, mac, set_eee_lpi_entry_timer, __args) +#define stmmac_set_lpi_mode(__priv, __args...) \ + stmmac_do_callback(__priv, mac, set_lpi_mode, __args) #define stmmac_set_eee_timer(__priv, __args...) \ stmmac_do_void_callback(__priv, mac, set_eee_timer, __args) #define stmmac_set_eee_pls(__priv, __args...) \ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index f05cae103d83..3395188c198a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -282,8 +282,7 @@ struct stmmac_priv { struct stmmac_channel channel[STMMAC_CH_MAX]; int speed; - unsigned int flow_ctrl; - unsigned int pause; + unsigned int pause_time; struct mii_bus *mii; struct phylink_config phylink_config; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index c0ae7db96f46..4d542f482ecb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -88,13 +88,13 @@ MODULE_PARM_DESC(phyaddr, "Physical device address"); #define STMMAC_XDP_TX BIT(1) #define STMMAC_XDP_REDIRECT BIT(2) -static int flow_ctrl = FLOW_AUTO; +static int flow_ctrl = 0xdead; module_param(flow_ctrl, int, 0644); -MODULE_PARM_DESC(flow_ctrl, "Flow control ability [on/off]"); +MODULE_PARM_DESC(flow_ctrl, "Flow control ability [on/off] (obsolete)"); static int pause = PAUSE_TIME; module_param(pause, int, 0644); -MODULE_PARM_DESC(pause, "Flow Control Pause Time"); +MODULE_PARM_DESC(pause, "Flow Control Pause Time (units of 512 bit times)"); #define TC_DEFAULT 64 static int tc = TC_DEFAULT; @@ -188,12 +188,11 @@ static void stmmac_verify_args(void) watchdog = TX_TIMEO; if (unlikely((buf_sz < DEFAULT_BUFSIZE) || (buf_sz > BUF_SIZE_16KiB))) buf_sz = DEFAULT_BUFSIZE; - if (unlikely(flow_ctrl > 1)) - flow_ctrl = FLOW_AUTO; - else if (likely(flow_ctrl < 0)) - flow_ctrl = FLOW_OFF; if (unlikely((pause < 0) || (pause > 0xffff))) pause = PAUSE_TIME; + + if (flow_ctrl != 0xdead) + pr_warn("stmmac: module parameter 'flow_ctrl' is obsolete - please remove from your module configuration\n"); } static void __stmmac_disable_all_queues(struct stmmac_priv *priv) @@ -390,16 +389,6 @@ static inline u32 stmmac_rx_dirty(struct stmmac_priv *priv, u32 queue) return dirty; } -static void stmmac_disable_hw_lpi_timer(struct stmmac_priv *priv) -{ - stmmac_set_eee_lpi_timer(priv, priv->hw, 0); -} - -static void stmmac_enable_hw_lpi_timer(struct stmmac_priv *priv) -{ - stmmac_set_eee_lpi_timer(priv, priv->hw, priv->tx_lpi_timer); -} - static bool stmmac_eee_tx_busy(struct stmmac_priv *priv) { u32 tx_cnt = priv->plat->tx_queues_to_use; @@ -436,8 +425,9 @@ static void stmmac_try_to_start_sw_lpi(struct stmmac_priv *priv) /* Check and enter in LPI mode */ if (!priv->tx_path_in_lpi_mode) - stmmac_set_eee_mode(priv, priv->hw, - priv->plat->flags & STMMAC_FLAG_EN_TX_LPI_CLOCKGATING); + stmmac_set_lpi_mode(priv, priv->hw, STMMAC_LPI_FORCED, + priv->plat->flags & STMMAC_FLAG_EN_TX_LPI_CLOCKGATING, + 0); } /** @@ -447,8 +437,8 @@ static void stmmac_try_to_start_sw_lpi(struct stmmac_priv *priv) */ static void stmmac_stop_sw_lpi(struct stmmac_priv *priv) { - stmmac_reset_eee_mode(priv, priv->hw); del_timer_sync(&priv->eee_ctrl_timer); + stmmac_set_lpi_mode(priv, priv->hw, STMMAC_LPI_DISABLE, false, 0); priv->tx_path_in_lpi_mode = false; } @@ -466,74 +456,6 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t) stmmac_try_to_start_sw_lpi(priv); } -/** - * stmmac_eee_init - init EEE - * @priv: driver private structure - * @active: indicates whether EEE should be enabled. - * Description: - * if the GMAC supports the EEE (from the HW cap reg) and the phy device - * can also manage EEE, this function enable the LPI state and start related - * timer. - */ -static void stmmac_eee_init(struct stmmac_priv *priv, bool active) -{ - priv->eee_active = active; - - /* Check if MAC core supports the EEE feature. */ - if (!priv->dma_cap.eee) { - priv->eee_enabled = false; - return; - } - - mutex_lock(&priv->lock); - - /* Check if it needs to be deactivated */ - if (!priv->eee_active) { - if (priv->eee_enabled) { - netdev_dbg(priv->dev, "disable EEE\n"); - priv->eee_sw_timer_en = false; - stmmac_disable_hw_lpi_timer(priv); - del_timer_sync(&priv->eee_ctrl_timer); - stmmac_set_eee_timer(priv, priv->hw, 0, - STMMAC_DEFAULT_TWT_LS); - if (priv->hw->xpcs) - xpcs_config_eee(priv->hw->xpcs, - priv->plat->mult_fact_100ns, - false); - } - priv->eee_enabled = false; - mutex_unlock(&priv->lock); - return; - } - - if (priv->eee_active && !priv->eee_enabled) { - stmmac_set_eee_timer(priv, priv->hw, STMMAC_DEFAULT_LIT_LS, - STMMAC_DEFAULT_TWT_LS); - if (priv->hw->xpcs) - xpcs_config_eee(priv->hw->xpcs, - priv->plat->mult_fact_100ns, - true); - } - - if (priv->plat->has_gmac4 && priv->tx_lpi_timer <= STMMAC_ET_MAX) { - /* Use hardware LPI mode */ - del_timer_sync(&priv->eee_ctrl_timer); - priv->tx_path_in_lpi_mode = false; - priv->eee_sw_timer_en = false; - stmmac_enable_hw_lpi_timer(priv); - } else { - /* Use software LPI mode */ - priv->eee_sw_timer_en = true; - stmmac_disable_hw_lpi_timer(priv); - stmmac_restart_sw_lpi_timer(priv); - } - - priv->eee_enabled = true; - - mutex_unlock(&priv->lock); - netdev_dbg(priv->dev, "Energy-Efficient Ethernet initialized\n"); -} - /* stmmac_get_tx_hwtstamp - get HW TX timestamps * @priv: driver private structure * @p : descriptor pointer @@ -935,14 +857,16 @@ static void stmmac_release_ptp(struct stmmac_priv *priv) * stmmac_mac_flow_ctrl - Configure flow control in all queues * @priv: driver private structure * @duplex: duplex passed to the next function + * @flow_ctrl: desired flow control modes * Description: It is used for configuring the flow control in all queues */ -static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex) +static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex, + unsigned int flow_ctrl) { u32 tx_cnt = priv->plat->tx_queues_to_use; - stmmac_flow_ctrl(priv, priv->hw, duplex, priv->flow_ctrl, - priv->pause, tx_cnt); + stmmac_flow_ctrl(priv, priv->hw, duplex, flow_ctrl, priv->pause_time, + tx_cnt); } static unsigned long stmmac_mac_get_caps(struct phylink_config *config, @@ -1002,6 +926,7 @@ static void stmmac_mac_link_up(struct phylink_config *config, bool tx_pause, bool rx_pause) { struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev)); + unsigned int flow_ctrl; u32 old_ctrl, ctrl; if ((priv->plat->flags & STMMAC_FLAG_SERDES_UP_AFTER_PHY_LINKUP) && @@ -1082,15 +1007,15 @@ static void stmmac_mac_link_up(struct phylink_config *config, /* Flow Control operation */ if (rx_pause && tx_pause) - priv->flow_ctrl = FLOW_AUTO; + flow_ctrl = FLOW_AUTO; else if (rx_pause && !tx_pause) - priv->flow_ctrl = FLOW_RX; + flow_ctrl = FLOW_RX; else if (!rx_pause && tx_pause) - priv->flow_ctrl = FLOW_TX; + flow_ctrl = FLOW_TX; else - priv->flow_ctrl = FLOW_OFF; + flow_ctrl = FLOW_OFF; - stmmac_mac_flow_ctrl(priv, duplex); + stmmac_mac_flow_ctrl(priv, duplex, flow_ctrl); if (ctrl != old_ctrl) writel(ctrl, priv->ioaddr + MAC_CTRL_REG); @@ -1110,16 +1035,53 @@ static void stmmac_mac_disable_tx_lpi(struct phylink_config *config) { struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev)); - stmmac_eee_init(priv, false); + priv->eee_active = false; + + mutex_lock(&priv->lock); + + priv->eee_enabled = false; + + netdev_dbg(priv->dev, "disable EEE\n"); + priv->eee_sw_timer_en = false; + del_timer_sync(&priv->eee_ctrl_timer); + stmmac_set_lpi_mode(priv, priv->hw, STMMAC_LPI_DISABLE, false, 0); + priv->tx_path_in_lpi_mode = false; + + stmmac_set_eee_timer(priv, priv->hw, 0, STMMAC_DEFAULT_TWT_LS); + mutex_unlock(&priv->lock); } static int stmmac_mac_enable_tx_lpi(struct phylink_config *config, u32 timer, bool tx_clk_stop) { struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev)); + int ret; priv->tx_lpi_timer = timer; - stmmac_eee_init(priv, true); + priv->eee_active = true; + + mutex_lock(&priv->lock); + + priv->eee_enabled = true; + + stmmac_set_eee_timer(priv, priv->hw, STMMAC_DEFAULT_LIT_LS, + STMMAC_DEFAULT_TWT_LS); + + /* Try to cnfigure the hardware timer. */ + ret = stmmac_set_lpi_mode(priv, priv->hw, STMMAC_LPI_TIMER, + priv->plat->flags & STMMAC_FLAG_EN_TX_LPI_CLOCKGATING, + priv->tx_lpi_timer); + + if (ret) { + /* Hardware timer mode not supported, or value out of range. + * Fall back to using software LPI mode + */ + priv->eee_sw_timer_en = true; + stmmac_restart_sw_lpi_timer(priv); + } + + mutex_unlock(&priv->lock); + netdev_dbg(priv->dev, "Energy-Efficient Ethernet initialized\n"); return 0; } @@ -7444,7 +7406,7 @@ int stmmac_dvr_probe(struct device *device, return -ENOMEM; stmmac_set_ethtool_ops(ndev); - priv->pause = pause; + priv->pause_time = pause; priv->plat = plat_dat; priv->ioaddr = res->addr; priv->dev->base_addr = (unsigned long)res->addr; @@ -7640,9 +7602,6 @@ int stmmac_dvr_probe(struct device *device, "%s: warning: maxmtu having invalid value (%d)\n", __func__, priv->plat->maxmtu); - if (flow_ctrl) - priv->flow_ctrl = FLOW_AUTO; /* RX/TX pause on */ - ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; /* Setup channels NAPI */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 0c7d81ddd440..7c0a4046bbe3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -524,6 +524,8 @@ int stmmac_pcs_setup(struct net_device *ndev) if (ret) return dev_err_probe(priv->device, ret, "No xPCS found\n"); + xpcs_config_eee_mult_fact(xpcs, priv->plat->mult_fact_100ns); + priv->hw->xpcs = xpcs; return 0; diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 2806238629f8..3e671be95d6f 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -164,6 +164,7 @@ #define AM65_CPSW_CPPI_TX_PKT_TYPE 0x7 /* XDP */ +#define AM65_CPSW_XDP_TX BIT(2) #define AM65_CPSW_XDP_CONSUMED BIT(1) #define AM65_CPSW_XDP_REDIRECT BIT(0) #define AM65_CPSW_XDP_PASS 0 @@ -829,19 +830,19 @@ static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma) { struct am65_cpsw_tx_chn *tx_chn = data; enum am65_cpsw_tx_buf_type buf_type; + struct am65_cpsw_tx_swdata *swdata; struct cppi5_host_desc_t *desc_tx; struct xdp_frame *xdpf; struct sk_buff *skb; - void **swdata; desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma); swdata = cppi5_hdesc_get_swdata(desc_tx); buf_type = am65_cpsw_nuss_buf_type(tx_chn, desc_dma); if (buf_type == AM65_CPSW_TX_BUF_TYPE_SKB) { - skb = *(swdata); + skb = swdata->skb; dev_kfree_skb_any(skb); } else { - xdpf = *(swdata); + xdpf = swdata->xdpf; xdp_return_frame(xdpf); } @@ -1098,10 +1099,10 @@ static int am65_cpsw_xdp_tx_frame(struct net_device *ndev, struct am65_cpsw_common *common = am65_ndev_to_common(ndev); struct am65_cpsw_port *port = am65_ndev_to_port(ndev); struct cppi5_host_desc_t *host_desc; + struct am65_cpsw_tx_swdata *swdata; struct netdev_queue *netif_txq; dma_addr_t dma_desc, dma_buf; u32 pkt_len = xdpf->len; - void **swdata; int ret; host_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool); @@ -1131,7 +1132,8 @@ static int am65_cpsw_xdp_tx_frame(struct net_device *ndev, cppi5_hdesc_attach_buf(host_desc, dma_buf, pkt_len, dma_buf, pkt_len); swdata = cppi5_hdesc_get_swdata(host_desc); - *(swdata) = xdpf; + swdata->ndev = ndev; + swdata->xdpf = xdpf; /* Report BQL before sending the packet */ netif_txq = netdev_get_tx_queue(ndev, tx_chn->id); @@ -1167,17 +1169,16 @@ pool_free: static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow, struct am65_cpsw_port *port, - struct xdp_buff *xdp, - int cpu, int *len) + struct xdp_buff *xdp, int *len) { struct am65_cpsw_common *common = flow->common; struct net_device *ndev = port->ndev; int ret = AM65_CPSW_XDP_CONSUMED; struct am65_cpsw_tx_chn *tx_chn; struct netdev_queue *netif_txq; + int cpu = smp_processor_id(); struct xdp_frame *xdpf; struct bpf_prog *prog; - struct page *page; int pkt_len; u32 act; int err; @@ -1193,8 +1194,7 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow, switch (act) { case XDP_PASS: - ret = AM65_CPSW_XDP_PASS; - goto out; + return AM65_CPSW_XDP_PASS; case XDP_TX: tx_chn = &common->tx_chns[cpu % AM65_CPSW_MAX_QUEUES]; netif_txq = netdev_get_tx_queue(ndev, tx_chn->id); @@ -1213,15 +1213,13 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow, goto drop; dev_sw_netstats_rx_add(ndev, pkt_len); - ret = AM65_CPSW_XDP_CONSUMED; - goto out; + return AM65_CPSW_XDP_TX; case XDP_REDIRECT: if (unlikely(xdp_do_redirect(ndev, xdp, prog))) goto drop; dev_sw_netstats_rx_add(ndev, pkt_len); - ret = AM65_CPSW_XDP_REDIRECT; - goto out; + return AM65_CPSW_XDP_REDIRECT; default: bpf_warn_invalid_xdp_action(ndev, prog, act); fallthrough; @@ -1233,10 +1231,6 @@ drop: ndev->stats.rx_dropped++; } - page = virt_to_head_page(xdp->data); - am65_cpsw_put_page(flow, page, true); - -out: return ret; } @@ -1274,7 +1268,7 @@ static void am65_cpsw_nuss_rx_csum(struct sk_buff *skb, u32 csum_info) } static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow, - int cpu, int *xdp_state) + int *xdp_state) { struct am65_cpsw_rx_chn *rx_chn = &flow->common->rx_chns; u32 buf_dma_len, pkt_len, port_id = 0, csum_info; @@ -1334,8 +1328,13 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow, xdp_init_buff(&xdp, PAGE_SIZE, &port->xdp_rxq[flow->id]); xdp_prepare_buff(&xdp, page_addr, AM65_CPSW_HEADROOM, pkt_len, false); - *xdp_state = am65_cpsw_run_xdp(flow, port, &xdp, - cpu, &pkt_len); + *xdp_state = am65_cpsw_run_xdp(flow, port, &xdp, &pkt_len); + if (*xdp_state == AM65_CPSW_XDP_CONSUMED) { + page = virt_to_head_page(xdp.data); + am65_cpsw_put_page(flow, page, true); + goto allocate; + } + if (*xdp_state != AM65_CPSW_XDP_PASS) goto allocate; @@ -1401,7 +1400,6 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget) { struct am65_cpsw_rx_flow *flow = am65_cpsw_napi_to_rx_flow(napi_rx); struct am65_cpsw_common *common = flow->common; - int cpu = smp_processor_id(); int xdp_state_or = 0; int cur_budget, ret; int xdp_state; @@ -1410,7 +1408,7 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget) /* process only this flow */ cur_budget = budget; while (cur_budget--) { - ret = am65_cpsw_nuss_rx_packets(flow, cpu, &xdp_state); + ret = am65_cpsw_nuss_rx_packets(flow, &xdp_state); xdp_state_or |= xdp_state; if (ret) break; @@ -1438,52 +1436,6 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget) return num_rx; } -static struct sk_buff * -am65_cpsw_nuss_tx_compl_packet_skb(struct am65_cpsw_tx_chn *tx_chn, - dma_addr_t desc_dma) -{ - struct cppi5_host_desc_t *desc_tx; - struct sk_buff *skb; - void **swdata; - - desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, - desc_dma); - swdata = cppi5_hdesc_get_swdata(desc_tx); - skb = *(swdata); - am65_cpsw_nuss_xmit_free(tx_chn, desc_tx); - - am65_cpts_tx_timestamp(tx_chn->common->cpts, skb); - - dev_sw_netstats_tx_add(skb->dev, 1, skb->len); - - return skb; -} - -static struct xdp_frame * -am65_cpsw_nuss_tx_compl_packet_xdp(struct am65_cpsw_common *common, - struct am65_cpsw_tx_chn *tx_chn, - dma_addr_t desc_dma, - struct net_device **ndev) -{ - struct cppi5_host_desc_t *desc_tx; - struct am65_cpsw_port *port; - struct xdp_frame *xdpf; - u32 port_id = 0; - void **swdata; - - desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma); - cppi5_desc_get_tags_ids(&desc_tx->hdr, NULL, &port_id); - swdata = cppi5_hdesc_get_swdata(desc_tx); - xdpf = *(swdata); - am65_cpsw_nuss_xmit_free(tx_chn, desc_tx); - - port = am65_common_get_port(common, port_id); - dev_sw_netstats_tx_add(port->ndev, 1, xdpf->len); - *ndev = port->ndev; - - return xdpf; -} - static void am65_cpsw_nuss_tx_wake(struct am65_cpsw_tx_chn *tx_chn, struct net_device *ndev, struct netdev_queue *netif_txq) { @@ -1504,13 +1456,17 @@ static void am65_cpsw_nuss_tx_wake(struct am65_cpsw_tx_chn *tx_chn, struct net_d static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common, int chn, unsigned int budget, bool *tdown) { + bool single_port = AM65_CPSW_IS_CPSW2G(common); enum am65_cpsw_tx_buf_type buf_type; + struct am65_cpsw_tx_swdata *swdata; + struct cppi5_host_desc_t *desc_tx; struct device *dev = common->dev; struct am65_cpsw_tx_chn *tx_chn; struct netdev_queue *netif_txq; unsigned int total_bytes = 0; struct net_device *ndev; struct xdp_frame *xdpf; + unsigned int pkt_len; struct sk_buff *skb; dma_addr_t desc_dma; int res, num_tx = 0; @@ -1518,9 +1474,12 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common, tx_chn = &common->tx_chns[chn]; while (true) { - spin_lock(&tx_chn->lock); + if (!single_port) + spin_lock(&tx_chn->lock); res = k3_udma_glue_pop_tx_chn(tx_chn->tx_chn, &desc_dma); - spin_unlock(&tx_chn->lock); + if (!single_port) + spin_unlock(&tx_chn->lock); + if (res == -ENODATA) break; @@ -1531,27 +1490,43 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common, break; } + desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, + desc_dma); + swdata = cppi5_hdesc_get_swdata(desc_tx); + ndev = swdata->ndev; buf_type = am65_cpsw_nuss_buf_type(tx_chn, desc_dma); if (buf_type == AM65_CPSW_TX_BUF_TYPE_SKB) { - skb = am65_cpsw_nuss_tx_compl_packet_skb(tx_chn, desc_dma); - ndev = skb->dev; - total_bytes = skb->len; + skb = swdata->skb; + am65_cpts_tx_timestamp(tx_chn->common->cpts, skb); + pkt_len = skb->len; napi_consume_skb(skb, budget); } else { - xdpf = am65_cpsw_nuss_tx_compl_packet_xdp(common, tx_chn, - desc_dma, &ndev); - total_bytes = xdpf->len; + xdpf = swdata->xdpf; + pkt_len = xdpf->len; if (buf_type == AM65_CPSW_TX_BUF_TYPE_XDP_TX) xdp_return_frame_rx_napi(xdpf); else xdp_return_frame(xdpf); } + + total_bytes += pkt_len; num_tx++; + am65_cpsw_nuss_xmit_free(tx_chn, desc_tx); + dev_sw_netstats_tx_add(ndev, 1, pkt_len); + if (!single_port) { + /* as packets from multi ports can be interleaved + * on the same channel, we have to figure out the + * port/queue at every packet and report it/wake queue. + */ + netif_txq = netdev_get_tx_queue(ndev, chn); + netdev_tx_completed_queue(netif_txq, 1, pkt_len); + am65_cpsw_nuss_tx_wake(tx_chn, ndev, netif_txq); + } + } + if (single_port) { netif_txq = netdev_get_tx_queue(ndev, chn); - netdev_tx_completed_queue(netif_txq, num_tx, total_bytes); - am65_cpsw_nuss_tx_wake(tx_chn, ndev, netif_txq); } @@ -1560,66 +1535,6 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common, return num_tx; } -static int am65_cpsw_nuss_tx_compl_packets_2g(struct am65_cpsw_common *common, - int chn, unsigned int budget, bool *tdown) -{ - enum am65_cpsw_tx_buf_type buf_type; - struct device *dev = common->dev; - struct am65_cpsw_tx_chn *tx_chn; - struct netdev_queue *netif_txq; - unsigned int total_bytes = 0; - struct net_device *ndev; - struct xdp_frame *xdpf; - struct sk_buff *skb; - dma_addr_t desc_dma; - int res, num_tx = 0; - - tx_chn = &common->tx_chns[chn]; - - while (true) { - res = k3_udma_glue_pop_tx_chn(tx_chn->tx_chn, &desc_dma); - if (res == -ENODATA) - break; - - if (cppi5_desc_is_tdcm(desc_dma)) { - if (atomic_dec_and_test(&common->tdown_cnt)) - complete(&common->tdown_complete); - *tdown = true; - break; - } - - buf_type = am65_cpsw_nuss_buf_type(tx_chn, desc_dma); - if (buf_type == AM65_CPSW_TX_BUF_TYPE_SKB) { - skb = am65_cpsw_nuss_tx_compl_packet_skb(tx_chn, desc_dma); - ndev = skb->dev; - total_bytes += skb->len; - napi_consume_skb(skb, budget); - } else { - xdpf = am65_cpsw_nuss_tx_compl_packet_xdp(common, tx_chn, - desc_dma, &ndev); - total_bytes += xdpf->len; - if (buf_type == AM65_CPSW_TX_BUF_TYPE_XDP_TX) - xdp_return_frame_rx_napi(xdpf); - else - xdp_return_frame(xdpf); - } - num_tx++; - } - - if (!num_tx) - return 0; - - netif_txq = netdev_get_tx_queue(ndev, chn); - - netdev_tx_completed_queue(netif_txq, num_tx, total_bytes); - - am65_cpsw_nuss_tx_wake(tx_chn, ndev, netif_txq); - - dev_dbg(dev, "%s:%u pkt:%d\n", __func__, chn, num_tx); - - return num_tx; -} - static enum hrtimer_restart am65_cpsw_nuss_tx_timer_callback(struct hrtimer *timer) { struct am65_cpsw_tx_chn *tx_chns = @@ -1635,13 +1550,8 @@ static int am65_cpsw_nuss_tx_poll(struct napi_struct *napi_tx, int budget) bool tdown = false; int num_tx; - if (AM65_CPSW_IS_CPSW2G(tx_chn->common)) - num_tx = am65_cpsw_nuss_tx_compl_packets_2g(tx_chn->common, tx_chn->id, - budget, &tdown); - else - num_tx = am65_cpsw_nuss_tx_compl_packets(tx_chn->common, - tx_chn->id, budget, &tdown); - + num_tx = am65_cpsw_nuss_tx_compl_packets(tx_chn->common, + tx_chn->id, budget, &tdown); if (num_tx >= budget) return budget; @@ -1685,12 +1595,12 @@ static netdev_tx_t am65_cpsw_nuss_ndo_slave_xmit(struct sk_buff *skb, struct am65_cpsw_common *common = am65_ndev_to_common(ndev); struct cppi5_host_desc_t *first_desc, *next_desc, *cur_desc; struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + struct am65_cpsw_tx_swdata *swdata; struct device *dev = common->dev; struct am65_cpsw_tx_chn *tx_chn; struct netdev_queue *netif_txq; dma_addr_t desc_dma, buf_dma; int ret, q_idx, i; - void **swdata; u32 *psdata; u32 pkt_len; @@ -1736,7 +1646,8 @@ static netdev_tx_t am65_cpsw_nuss_ndo_slave_xmit(struct sk_buff *skb, k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma); cppi5_hdesc_attach_buf(first_desc, buf_dma, pkt_len, buf_dma, pkt_len); swdata = cppi5_hdesc_get_swdata(first_desc); - *(swdata) = skb; + swdata->ndev = ndev; + swdata->skb = skb; psdata = cppi5_hdesc_get_psdata(first_desc); /* HW csum offload if enabled */ @@ -3578,6 +3489,10 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) __be64 id_temp; int ret, i; + BUILD_BUG_ON_MSG(sizeof(struct am65_cpsw_tx_swdata) > AM65_CPSW_NAV_SW_DATA_SIZE, + "TX SW_DATA size exceeds AM65_CPSW_NAV_SW_DATA_SIZE"); + BUILD_BUG_ON_MSG(sizeof(struct am65_cpsw_swdata) > AM65_CPSW_NAV_SW_DATA_SIZE, + "SW_DATA size exceeds AM65_CPSW_NAV_SW_DATA_SIZE"); common = devm_kzalloc(dev, sizeof(struct am65_cpsw_common), GFP_KERNEL); if (!common) return -ENOMEM; diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h index e7832a5cf3cc..917c37e4e89b 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h @@ -104,6 +104,14 @@ struct am65_cpsw_rx_flow { char name[32]; }; +struct am65_cpsw_tx_swdata { + struct net_device *ndev; + union { + struct sk_buff *skb; + struct xdp_frame *xdpf; + }; +}; + struct am65_cpsw_swdata { u32 flow_id; struct page *page; diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig index 35d96c633a33..7502214cc7d5 100644 --- a/drivers/net/ethernet/xilinx/Kconfig +++ b/drivers/net/ethernet/xilinx/Kconfig @@ -28,6 +28,7 @@ config XILINX_AXI_EMAC depends on HAS_IOMEM depends on XILINX_DMA select PHYLINK + select DIMLIB help This driver supports the 10/100/1000 Ethernet from Xilinx for the AXI bus interface used in Xilinx Virtex FPGAs and Soc's. diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index a3f4f3e42587..5ff742103beb 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -9,6 +9,7 @@ #ifndef XILINX_AXIENET_H #define XILINX_AXIENET_H +#include <linux/dim.h> #include <linux/netdevice.h> #include <linux/spinlock.h> #include <linux/interrupt.h> @@ -112,9 +113,6 @@ #define XAXIDMA_DELAY_MASK 0xFF000000 /* Delay timeout counter */ #define XAXIDMA_COALESCE_MASK 0x00FF0000 /* Coalesce counter */ -#define XAXIDMA_DELAY_SHIFT 24 -#define XAXIDMA_COALESCE_SHIFT 16 - #define XAXIDMA_IRQ_IOC_MASK 0x00001000 /* Completion intr */ #define XAXIDMA_IRQ_DELAY_MASK 0x00002000 /* Delay interrupt */ #define XAXIDMA_IRQ_ERROR_MASK 0x00004000 /* Error interrupt */ @@ -126,8 +124,7 @@ /* Default TX/RX Threshold and delay timer values for SGDMA mode */ #define XAXIDMA_DFT_TX_THRESHOLD 24 #define XAXIDMA_DFT_TX_USEC 50 -#define XAXIDMA_DFT_RX_THRESHOLD 1 -#define XAXIDMA_DFT_RX_USEC 50 +#define XAXIDMA_DFT_RX_USEC 16 #define XAXIDMA_BD_CTRL_TXSOF_MASK 0x08000000 /* First tx packet */ #define XAXIDMA_BD_CTRL_TXEOF_MASK 0x04000000 /* Last tx packet */ @@ -487,7 +484,12 @@ struct skbuf_dma_descriptor { * @regs: Base address for the axienet_local device address space * @dma_regs: Base address for the axidma device address space * @napi_rx: NAPI RX control structure + * @rx_dim: DIM state for the receive queue + * @rx_dim_enabled: Whether DIM is enabled or not + * @rx_irqs: Number of interrupts + * @rx_cr_lock: Lock protecting @rx_dma_cr, its register, and @rx_dma_started * @rx_dma_cr: Nominal content of RX DMA control register + * @rx_dma_started: Set when RX DMA is started * @rx_bd_v: Virtual address of the RX buffer descriptor ring * @rx_bd_p: Physical address(start address) of the RX buffer descr. ring * @rx_bd_num: Size of RX buffer descriptor ring @@ -497,7 +499,9 @@ struct skbuf_dma_descriptor { * @rx_bytes: RX byte count for statistics * @rx_stat_sync: Synchronization object for RX stats * @napi_tx: NAPI TX control structure + * @tx_cr_lock: Lock protecting @tx_dma_cr, its register, and @tx_dma_started * @tx_dma_cr: Nominal content of TX DMA control register + * @tx_dma_started: Set when TX DMA is started * @tx_bd_v: Virtual address of the TX buffer descriptor ring * @tx_bd_p: Physical address(start address) of the TX buffer descr. ring * @tx_bd_num: Size of TX buffer descriptor ring @@ -532,10 +536,6 @@ struct skbuf_dma_descriptor { * supported, the maximum frame size would be 9k. Else it is * 1522 bytes (assuming support for basic VLAN) * @rxmem: Stores rx memory size for jumbo frame handling. - * @coalesce_count_rx: Store the irq coalesce on RX side. - * @coalesce_usec_rx: IRQ coalesce delay for RX - * @coalesce_count_tx: Store the irq coalesce on TX side. - * @coalesce_usec_tx: IRQ coalesce delay for TX * @use_dmaengine: flag to check dmaengine framework usage. * @tx_chan: TX DMA channel. * @rx_chan: RX DMA channel. @@ -569,7 +569,12 @@ struct axienet_local { void __iomem *dma_regs; struct napi_struct napi_rx; + struct dim rx_dim; + bool rx_dim_enabled; + u16 rx_irqs; + spinlock_t rx_cr_lock; u32 rx_dma_cr; + bool rx_dma_started; struct axidma_bd *rx_bd_v; dma_addr_t rx_bd_p; u32 rx_bd_num; @@ -579,7 +584,9 @@ struct axienet_local { struct u64_stats_sync rx_stat_sync; struct napi_struct napi_tx; + spinlock_t tx_cr_lock; u32 tx_dma_cr; + bool tx_dma_started; struct axidma_bd *tx_bd_v; dma_addr_t tx_bd_p; u32 tx_bd_num; @@ -610,10 +617,6 @@ struct axienet_local { u32 max_frm_size; u32 rxmem; - u32 coalesce_count_rx; - u32 coalesce_usec_rx; - u32 coalesce_count_tx; - u32 coalesce_usec_tx; u8 use_dmaengine; struct dma_chan *tx_chan; struct dma_chan *rx_chan; diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index f33178f90c42..054abf283ab3 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -223,23 +223,62 @@ static void axienet_dma_bd_release(struct net_device *ndev) lp->rx_bd_p); } +static u64 axienet_dma_rate(struct axienet_local *lp) +{ + if (lp->axi_clk) + return clk_get_rate(lp->axi_clk); + return 125000000; /* arbitrary guess if no clock rate set */ +} + /** - * axienet_usec_to_timer - Calculate IRQ delay timer value - * @lp: Pointer to the axienet_local structure - * @coalesce_usec: Microseconds to convert into timer value + * axienet_calc_cr() - Calculate control register value + * @lp: Device private data + * @count: Number of completions before an interrupt + * @usec: Microseconds after the last completion before an interrupt + * + * Calculate a control register value based on the coalescing settings. The + * run/stop bit is not set. */ -static u32 axienet_usec_to_timer(struct axienet_local *lp, u32 coalesce_usec) +static u32 axienet_calc_cr(struct axienet_local *lp, u32 count, u32 usec) { - u32 result; - u64 clk_rate = 125000000; /* arbitrary guess if no clock rate set */ + u32 cr; - if (lp->axi_clk) - clk_rate = clk_get_rate(lp->axi_clk); + cr = FIELD_PREP(XAXIDMA_COALESCE_MASK, count) | XAXIDMA_IRQ_IOC_MASK | + XAXIDMA_IRQ_ERROR_MASK; + /* Only set interrupt delay timer if not generating an interrupt on + * the first packet. Otherwise leave at 0 to disable delay interrupt. + */ + if (count > 1) { + u64 clk_rate = axienet_dma_rate(lp); + u32 timer; + + /* 1 Timeout Interval = 125 * (clock period of SG clock) */ + timer = DIV64_U64_ROUND_CLOSEST((u64)usec * clk_rate, + XAXIDMA_DELAY_SCALE); - /* 1 Timeout Interval = 125 * (clock period of SG clock) */ - result = DIV64_U64_ROUND_CLOSEST((u64)coalesce_usec * clk_rate, - XAXIDMA_DELAY_SCALE); - return min(result, FIELD_MAX(XAXIDMA_DELAY_MASK)); + timer = min(timer, FIELD_MAX(XAXIDMA_DELAY_MASK)); + cr |= FIELD_PREP(XAXIDMA_DELAY_MASK, timer) | + XAXIDMA_IRQ_DELAY_MASK; + } + + return cr; +} + +/** + * axienet_coalesce_params() - Extract coalesce parameters from the CR + * @lp: Device private data + * @cr: The control register to parse + * @count: Number of packets before an interrupt + * @usec: Idle time (in usec) before an interrupt + */ +static void axienet_coalesce_params(struct axienet_local *lp, u32 cr, + u32 *count, u32 *usec) +{ + u64 clk_rate = axienet_dma_rate(lp); + u64 timer = FIELD_GET(XAXIDMA_DELAY_MASK, cr); + + *count = FIELD_GET(XAXIDMA_COALESCE_MASK, cr); + *usec = DIV64_U64_ROUND_CLOSEST(timer * XAXIDMA_DELAY_SCALE, clk_rate); } /** @@ -248,30 +287,12 @@ static u32 axienet_usec_to_timer(struct axienet_local *lp, u32 coalesce_usec) */ static void axienet_dma_start(struct axienet_local *lp) { + spin_lock_irq(&lp->rx_cr_lock); + /* Start updating the Rx channel control register */ - lp->rx_dma_cr = (lp->coalesce_count_rx << XAXIDMA_COALESCE_SHIFT) | - XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_ERROR_MASK; - /* Only set interrupt delay timer if not generating an interrupt on - * the first RX packet. Otherwise leave at 0 to disable delay interrupt. - */ - if (lp->coalesce_count_rx > 1) - lp->rx_dma_cr |= (axienet_usec_to_timer(lp, lp->coalesce_usec_rx) - << XAXIDMA_DELAY_SHIFT) | - XAXIDMA_IRQ_DELAY_MASK; + lp->rx_dma_cr &= ~XAXIDMA_CR_RUNSTOP_MASK; axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, lp->rx_dma_cr); - /* Start updating the Tx channel control register */ - lp->tx_dma_cr = (lp->coalesce_count_tx << XAXIDMA_COALESCE_SHIFT) | - XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_ERROR_MASK; - /* Only set interrupt delay timer if not generating an interrupt on - * the first TX packet. Otherwise leave at 0 to disable delay interrupt. - */ - if (lp->coalesce_count_tx > 1) - lp->tx_dma_cr |= (axienet_usec_to_timer(lp, lp->coalesce_usec_tx) - << XAXIDMA_DELAY_SHIFT) | - XAXIDMA_IRQ_DELAY_MASK; - axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, lp->tx_dma_cr); - /* Populate the tail pointer and bring the Rx Axi DMA engine out of * halted state. This will make the Rx side ready for reception. */ @@ -280,6 +301,14 @@ static void axienet_dma_start(struct axienet_local *lp) axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, lp->rx_dma_cr); axienet_dma_out_addr(lp, XAXIDMA_RX_TDESC_OFFSET, lp->rx_bd_p + (sizeof(*lp->rx_bd_v) * (lp->rx_bd_num - 1))); + lp->rx_dma_started = true; + + spin_unlock_irq(&lp->rx_cr_lock); + spin_lock_irq(&lp->tx_cr_lock); + + /* Start updating the Tx channel control register */ + lp->tx_dma_cr &= ~XAXIDMA_CR_RUNSTOP_MASK; + axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, lp->tx_dma_cr); /* Write to the RS (Run-stop) bit in the Tx channel control register. * Tx channel is now ready to run. But only after we write to the @@ -288,6 +317,9 @@ static void axienet_dma_start(struct axienet_local *lp) axienet_dma_out_addr(lp, XAXIDMA_TX_CDESC_OFFSET, lp->tx_bd_p); lp->tx_dma_cr |= XAXIDMA_CR_RUNSTOP_MASK; axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, lp->tx_dma_cr); + lp->tx_dma_started = true; + + spin_unlock_irq(&lp->tx_cr_lock); } /** @@ -623,14 +655,22 @@ static void axienet_dma_stop(struct axienet_local *lp) int count; u32 cr, sr; - cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); - cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK); + spin_lock_irq(&lp->rx_cr_lock); + + cr = lp->rx_dma_cr & ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK); axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); + lp->rx_dma_started = false; + + spin_unlock_irq(&lp->rx_cr_lock); synchronize_irq(lp->rx_irq); - cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); - cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK); + spin_lock_irq(&lp->tx_cr_lock); + + cr = lp->tx_dma_cr & ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK); axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); + lp->tx_dma_started = false; + + spin_unlock_irq(&lp->tx_cr_lock); synchronize_irq(lp->tx_irq); /* Give DMAs a chance to halt gracefully */ @@ -962,6 +1002,7 @@ static int axienet_tx_poll(struct napi_struct *napi, int budget) &size, budget); if (packets) { + netdev_completed_queue(ndev, packets, size); u64_stats_update_begin(&lp->tx_stat_sync); u64_stats_add(&lp->tx_packets, packets); u64_stats_add(&lp->tx_bytes, size); @@ -979,7 +1020,9 @@ static int axienet_tx_poll(struct napi_struct *napi, int budget) * cause an immediate interrupt if any TX packets are * already pending. */ + spin_lock_irq(&lp->tx_cr_lock); axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, lp->tx_dma_cr); + spin_unlock_irq(&lp->tx_cr_lock); } return packets; } @@ -1083,6 +1126,7 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) if (++new_tail_ptr >= lp->tx_bd_num) new_tail_ptr = 0; WRITE_ONCE(lp->tx_bd_tail, new_tail_ptr); + netdev_sent_queue(ndev, skb->len); /* Start the transfer */ axienet_dma_out_addr(lp, XAXIDMA_TX_TDESC_OFFSET, tail_p); @@ -1241,11 +1285,25 @@ static int axienet_rx_poll(struct napi_struct *napi, int budget) axienet_dma_out_addr(lp, XAXIDMA_RX_TDESC_OFFSET, tail_p); if (packets < budget && napi_complete_done(napi, packets)) { + if (READ_ONCE(lp->rx_dim_enabled)) { + struct dim_sample sample = { + .time = ktime_get(), + /* Safe because we are the only writer */ + .pkt_ctr = u64_stats_read(&lp->rx_packets), + .byte_ctr = u64_stats_read(&lp->rx_bytes), + .event_ctr = READ_ONCE(lp->rx_irqs), + }; + + net_dim(&lp->rx_dim, &sample); + } + /* Re-enable RX completion interrupts. This should * cause an immediate interrupt if any RX packets are * already pending. */ + spin_lock_irq(&lp->rx_cr_lock); axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, lp->rx_dma_cr); + spin_unlock_irq(&lp->rx_cr_lock); } return packets; } @@ -1283,11 +1341,14 @@ static irqreturn_t axienet_tx_irq(int irq, void *_ndev) /* Disable further TX completion interrupts and schedule * NAPI to handle the completions. */ - u32 cr = lp->tx_dma_cr; - - cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK); if (napi_schedule_prep(&lp->napi_tx)) { + u32 cr; + + spin_lock(&lp->tx_cr_lock); + cr = lp->tx_dma_cr; + cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK); axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); + spin_unlock(&lp->tx_cr_lock); __napi_schedule(&lp->napi_tx); } } @@ -1328,11 +1389,16 @@ static irqreturn_t axienet_rx_irq(int irq, void *_ndev) /* Disable further RX completion interrupts and schedule * NAPI receive. */ - u32 cr = lp->rx_dma_cr; - - cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK); + WRITE_ONCE(lp->rx_irqs, READ_ONCE(lp->rx_irqs) + 1); if (napi_schedule_prep(&lp->napi_rx)) { + u32 cr; + + spin_lock(&lp->rx_cr_lock); + cr = lp->rx_dma_cr; + cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK); axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); + spin_unlock(&lp->rx_cr_lock); + __napi_schedule(&lp->napi_rx); } } @@ -1625,6 +1691,7 @@ err_free_eth_irq: if (lp->eth_irq > 0) free_irq(lp->eth_irq, ndev); err_phy: + cancel_work_sync(&lp->rx_dim.work); cancel_delayed_work_sync(&lp->stats_work); phylink_stop(lp->phylink); phylink_disconnect_phy(lp->phylink); @@ -1654,6 +1721,7 @@ static int axienet_stop(struct net_device *ndev) napi_disable(&lp->napi_rx); } + cancel_work_sync(&lp->rx_dim.work); cancel_delayed_work_sync(&lp->stats_work); phylink_stop(lp->phylink); @@ -1685,6 +1753,7 @@ static int axienet_stop(struct net_device *ndev) dma_release_channel(lp->tx_chan); } + netdev_reset_queue(ndev); axienet_iow(lp, XAE_IE_OFFSET, 0); if (lp->eth_irq > 0) @@ -1999,6 +2068,87 @@ axienet_ethtools_set_pauseparam(struct net_device *ndev, } /** + * axienet_update_coalesce_rx() - Set RX CR + * @lp: Device private data + * @cr: Value to write to the RX CR + * @mask: Bits to set from @cr + */ +static void axienet_update_coalesce_rx(struct axienet_local *lp, u32 cr, + u32 mask) +{ + spin_lock_irq(&lp->rx_cr_lock); + lp->rx_dma_cr &= ~mask; + lp->rx_dma_cr |= cr; + /* If DMA isn't started, then the settings will be applied the next + * time dma_start() is called. + */ + if (lp->rx_dma_started) { + u32 reg = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); + + /* Don't enable IRQs if they are disabled by NAPI */ + if (reg & XAXIDMA_IRQ_ALL_MASK) + cr = lp->rx_dma_cr; + else + cr = lp->rx_dma_cr & ~XAXIDMA_IRQ_ALL_MASK; + axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); + } + spin_unlock_irq(&lp->rx_cr_lock); +} + +/** + * axienet_dim_coalesce_count_rx() - RX coalesce count for DIM + * @lp: Device private data + */ +static u32 axienet_dim_coalesce_count_rx(struct axienet_local *lp) +{ + return min(1 << (lp->rx_dim.profile_ix << 1), 255); +} + +/** + * axienet_rx_dim_work() - Adjust RX DIM settings + * @work: The work struct + */ +static void axienet_rx_dim_work(struct work_struct *work) +{ + struct axienet_local *lp = + container_of(work, struct axienet_local, rx_dim.work); + u32 cr = axienet_calc_cr(lp, axienet_dim_coalesce_count_rx(lp), 0); + u32 mask = XAXIDMA_COALESCE_MASK | XAXIDMA_IRQ_IOC_MASK | + XAXIDMA_IRQ_ERROR_MASK; + + axienet_update_coalesce_rx(lp, cr, mask); + lp->rx_dim.state = DIM_START_MEASURE; +} + +/** + * axienet_update_coalesce_tx() - Set TX CR + * @lp: Device private data + * @cr: Value to write to the TX CR + * @mask: Bits to set from @cr + */ +static void axienet_update_coalesce_tx(struct axienet_local *lp, u32 cr, + u32 mask) +{ + spin_lock_irq(&lp->tx_cr_lock); + lp->tx_dma_cr &= ~mask; + lp->tx_dma_cr |= cr; + /* If DMA isn't started, then the settings will be applied the next + * time dma_start() is called. + */ + if (lp->tx_dma_started) { + u32 reg = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); + + /* Don't enable IRQs if they are disabled by NAPI */ + if (reg & XAXIDMA_IRQ_ALL_MASK) + cr = lp->tx_dma_cr; + else + cr = lp->tx_dma_cr & ~XAXIDMA_IRQ_ALL_MASK; + axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); + } + spin_unlock_irq(&lp->tx_cr_lock); +} + +/** * axienet_ethtools_get_coalesce - Get DMA interrupt coalescing count. * @ndev: Pointer to net_device structure * @ecoalesce: Pointer to ethtool_coalesce structure @@ -2018,11 +2168,23 @@ axienet_ethtools_get_coalesce(struct net_device *ndev, struct netlink_ext_ack *extack) { struct axienet_local *lp = netdev_priv(ndev); - - ecoalesce->rx_max_coalesced_frames = lp->coalesce_count_rx; - ecoalesce->rx_coalesce_usecs = lp->coalesce_usec_rx; - ecoalesce->tx_max_coalesced_frames = lp->coalesce_count_tx; - ecoalesce->tx_coalesce_usecs = lp->coalesce_usec_tx; + u32 cr; + + ecoalesce->use_adaptive_rx_coalesce = lp->rx_dim_enabled; + + spin_lock_irq(&lp->rx_cr_lock); + cr = lp->rx_dma_cr; + spin_unlock_irq(&lp->rx_cr_lock); + axienet_coalesce_params(lp, cr, + &ecoalesce->rx_max_coalesced_frames, + &ecoalesce->rx_coalesce_usecs); + + spin_lock_irq(&lp->tx_cr_lock); + cr = lp->tx_dma_cr; + spin_unlock_irq(&lp->tx_cr_lock); + axienet_coalesce_params(lp, cr, + &ecoalesce->tx_max_coalesced_frames, + &ecoalesce->tx_coalesce_usecs); return 0; } @@ -2046,12 +2208,9 @@ axienet_ethtools_set_coalesce(struct net_device *ndev, struct netlink_ext_ack *extack) { struct axienet_local *lp = netdev_priv(ndev); - - if (netif_running(ndev)) { - NL_SET_ERR_MSG(extack, - "Please stop netif before applying configuration"); - return -EBUSY; - } + bool new_dim = ecoalesce->use_adaptive_rx_coalesce; + bool old_dim = lp->rx_dim_enabled; + u32 cr, mask = ~XAXIDMA_CR_RUNSTOP_MASK; if (ecoalesce->rx_max_coalesced_frames > 255 || ecoalesce->tx_max_coalesced_frames > 255) { @@ -2065,7 +2224,7 @@ axienet_ethtools_set_coalesce(struct net_device *ndev, return -EINVAL; } - if ((ecoalesce->rx_max_coalesced_frames > 1 && + if (((ecoalesce->rx_max_coalesced_frames > 1 || new_dim) && !ecoalesce->rx_coalesce_usecs) || (ecoalesce->tx_max_coalesced_frames > 1 && !ecoalesce->tx_coalesce_usecs)) { @@ -2074,11 +2233,31 @@ axienet_ethtools_set_coalesce(struct net_device *ndev, return -EINVAL; } - lp->coalesce_count_rx = ecoalesce->rx_max_coalesced_frames; - lp->coalesce_usec_rx = ecoalesce->rx_coalesce_usecs; - lp->coalesce_count_tx = ecoalesce->tx_max_coalesced_frames; - lp->coalesce_usec_tx = ecoalesce->tx_coalesce_usecs; + if (new_dim && !old_dim) { + cr = axienet_calc_cr(lp, axienet_dim_coalesce_count_rx(lp), + ecoalesce->rx_coalesce_usecs); + } else if (!new_dim) { + if (old_dim) { + WRITE_ONCE(lp->rx_dim_enabled, false); + napi_synchronize(&lp->napi_rx); + flush_work(&lp->rx_dim.work); + } + + cr = axienet_calc_cr(lp, ecoalesce->rx_max_coalesced_frames, + ecoalesce->rx_coalesce_usecs); + } else { + /* Dummy value for count just to calculate timer */ + cr = axienet_calc_cr(lp, 2, ecoalesce->rx_coalesce_usecs); + mask = XAXIDMA_DELAY_MASK | XAXIDMA_IRQ_DELAY_MASK; + } + + axienet_update_coalesce_rx(lp, cr, mask); + if (new_dim && !old_dim) + WRITE_ONCE(lp->rx_dim_enabled, true); + cr = axienet_calc_cr(lp, ecoalesce->tx_max_coalesced_frames, + ecoalesce->tx_coalesce_usecs); + axienet_update_coalesce_tx(lp, cr, ~XAXIDMA_CR_RUNSTOP_MASK); return 0; } @@ -2316,7 +2495,8 @@ axienet_ethtool_get_rmon_stats(struct net_device *dev, static const struct ethtool_ops axienet_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | - ETHTOOL_COALESCE_USECS, + ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_USE_ADAPTIVE_RX, .get_drvinfo = axienet_ethtools_get_drvinfo, .get_regs_len = axienet_ethtools_get_regs_len, .get_regs = axienet_ethtools_get_regs, @@ -2499,6 +2679,7 @@ static void axienet_dma_err_handler(struct work_struct *work) ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN)); axienet_dma_stop(lp); + netdev_reset_queue(ndev); for (i = 0; i < lp->tx_bd_num; i++) { cur_p = &lp->tx_bd_v[i]; @@ -2858,10 +3039,15 @@ static int axienet_probe(struct platform_device *pdev) axienet_set_mac_address(ndev, NULL); } - lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD; - lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD; - lp->coalesce_usec_rx = XAXIDMA_DFT_RX_USEC; - lp->coalesce_usec_tx = XAXIDMA_DFT_TX_USEC; + spin_lock_init(&lp->rx_cr_lock); + spin_lock_init(&lp->tx_cr_lock); + INIT_WORK(&lp->rx_dim.work, axienet_rx_dim_work); + lp->rx_dim_enabled = true; + lp->rx_dim.profile_ix = 1; + lp->rx_dma_cr = axienet_calc_cr(lp, axienet_dim_coalesce_count_rx(lp), + XAXIDMA_DFT_RX_USEC); + lp->tx_dma_cr = axienet_calc_cr(lp, XAXIDMA_DFT_TX_THRESHOLD, + XAXIDMA_DFT_TX_USEC); ret = axienet_mdio_setup(lp); if (ret) @@ -2891,7 +3077,6 @@ static int axienet_probe(struct platform_device *pdev) } of_node_put(np); lp->pcs.ops = &axienet_pcs_ops; - lp->pcs.neg_mode = true; lp->pcs.poll = true; } diff --git a/drivers/net/hamradio/baycom_par.c b/drivers/net/hamradio/baycom_par.c index 00ebc25d0b22..f03797103c6a 100644 --- a/drivers/net/hamradio/baycom_par.c +++ b/drivers/net/hamradio/baycom_par.c @@ -427,7 +427,7 @@ static int baycom_ioctl(struct net_device *dev, void __user *data, break; case HDLCDRVCTL_GETMODE: - strcpy(hi->data.modename, bc->options ? "par96" : "picpar"); + strscpy(hi->data.modename, bc->options ? "par96" : "picpar"); if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl))) return -EFAULT; return 0; @@ -439,7 +439,7 @@ static int baycom_ioctl(struct net_device *dev, void __user *data, return baycom_setmode(bc, hi->data.modename); case HDLCDRVCTL_MODELIST: - strcpy(hi->data.modename, "par96,picpar"); + strscpy(hi->data.modename, "par96,picpar"); if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl))) return -EFAULT; return 0; diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c index 799f8ece7824..ee5bd3c12040 100644 --- a/drivers/net/hamradio/baycom_ser_fdx.c +++ b/drivers/net/hamradio/baycom_ser_fdx.c @@ -531,7 +531,7 @@ static int baycom_ioctl(struct net_device *dev, void __user *data, return baycom_setmode(bc, hi->data.modename); case HDLCDRVCTL_MODELIST: - strcpy(hi->data.modename, "ser12,ser3,ser24"); + strscpy(hi->data.modename, "ser12,ser3,ser24"); if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl))) return -EFAULT; return 0; diff --git a/drivers/net/hamradio/baycom_ser_hdx.c b/drivers/net/hamradio/baycom_ser_hdx.c index 5d1ab4840753..05bdad214799 100644 --- a/drivers/net/hamradio/baycom_ser_hdx.c +++ b/drivers/net/hamradio/baycom_ser_hdx.c @@ -570,7 +570,7 @@ static int baycom_ioctl(struct net_device *dev, void __user *data, break; case HDLCDRVCTL_GETMODE: - strcpy(hi->data.modename, "ser12"); + strscpy(hi->data.modename, "ser12"); if (bc->opt_dcd <= 0) strcat(hi->data.modename, (!bc->opt_dcd) ? "*" : (bc->opt_dcd == -2) ? "@" : "+"); if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl))) @@ -584,7 +584,7 @@ static int baycom_ioctl(struct net_device *dev, void __user *data, return baycom_setmode(bc, hi->data.modename); case HDLCDRVCTL_MODELIST: - strcpy(hi->data.modename, "ser12"); + strscpy(hi->data.modename, "ser12"); if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl))) return -EFAULT; return 0; diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 234db693cefa..70f7cb383228 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -1166,6 +1166,8 @@ struct netvsc_device { u32 max_chn; u32 num_chn; + u32 netvsc_gso_max_size; + atomic_t open_chn; struct work_struct subchan_work; wait_queue_head_t subchan_open; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index d6c4abfc3a28..9c6501bf27bd 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2461,6 +2461,21 @@ static int netvsc_vf_changed(struct net_device *vf_netdev, unsigned long event) } else { netdev_info(ndev, "Data path switched %s VF: %s\n", vf_is_up ? "to" : "from", vf_netdev->name); + + /* In Azure, when accelerated networking in enabled, other NICs + * like MANA, MLX, are configured as a bonded nic with + * Netvsc(failover) NIC. For bonded NICs, the min of the max + * pkt aggregate size of the members is propagated in the stack. + * In order to allow these NICs (MANA/MLX) to use up to + * GSO_MAX_SIZE gso packet size, we need to allow Netvsc NIC to + * also support this in the guest. + * This value is only increased for netvsc NIC when datapath is + * switched over to the VF + */ + if (vf_is_up) + netif_set_tso_max_size(ndev, vf_netdev->tso_max_size); + else + netif_set_tso_max_size(ndev, netvsc_dev->netvsc_gso_max_size); } return NOTIFY_OK; diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index c0ceeef4fcd8..82747dfacd70 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1356,9 +1356,10 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, struct net_device_context *net_device_ctx = netdev_priv(net); struct ndis_offload hwcaps; struct ndis_offload_params offloads; - unsigned int gso_max_size = GSO_LEGACY_MAX_SIZE; int ret; + nvdev->netvsc_gso_max_size = GSO_LEGACY_MAX_SIZE; + /* Find HW offload capabilities */ ret = rndis_query_hwcaps(rndis_device, nvdev, &hwcaps); if (ret != 0) @@ -1390,8 +1391,8 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, offloads.lso_v2_ipv4 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED; net->hw_features |= NETIF_F_TSO; - if (hwcaps.lsov2.ip4_maxsz < gso_max_size) - gso_max_size = hwcaps.lsov2.ip4_maxsz; + if (hwcaps.lsov2.ip4_maxsz < nvdev->netvsc_gso_max_size) + nvdev->netvsc_gso_max_size = hwcaps.lsov2.ip4_maxsz; } if (hwcaps.csum.ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) { @@ -1411,8 +1412,8 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, offloads.lso_v2_ipv6 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED; net->hw_features |= NETIF_F_TSO6; - if (hwcaps.lsov2.ip6_maxsz < gso_max_size) - gso_max_size = hwcaps.lsov2.ip6_maxsz; + if (hwcaps.lsov2.ip6_maxsz < nvdev->netvsc_gso_max_size) + nvdev->netvsc_gso_max_size = hwcaps.lsov2.ip6_maxsz; } if (hwcaps.csum.ip6_txcsum & NDIS_TXCSUM_CAP_UDP6) { @@ -1438,7 +1439,7 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, */ net->features &= ~NETVSC_SUPPORTED_HW_FEATURES | net->hw_features; - netif_set_tso_max_size(net, gso_max_size); + netif_set_tso_max_size(net, nvdev->netvsc_gso_max_size); ret = rndis_filter_set_offload_params(net, nvdev, &offloads); diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 86ab4a42769a..f77eddf22185 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -45,12 +45,12 @@ MODULE_DESCRIPTION("Console driver for network interfaces"); MODULE_LICENSE("GPL"); #define MAX_PARAM_LENGTH 256 -#define MAX_USERDATA_ENTRY_LENGTH 256 -#define MAX_USERDATA_VALUE_LENGTH 200 +#define MAX_EXTRADATA_ENTRY_LEN 256 +#define MAX_EXTRADATA_VALUE_LEN 200 /* The number 3 comes from userdata entry format characters (' ', '=', '\n') */ -#define MAX_USERDATA_NAME_LENGTH (MAX_USERDATA_ENTRY_LENGTH - \ - MAX_USERDATA_VALUE_LENGTH - 3) -#define MAX_USERDATA_ITEMS 16 +#define MAX_EXTRADATA_NAME_LEN (MAX_EXTRADATA_ENTRY_LEN - \ + MAX_EXTRADATA_VALUE_LEN - 3) +#define MAX_EXTRADATA_ITEMS 16 #define MAX_PRINT_CHUNK 1000 static char config[MAX_PARAM_LENGTH]; @@ -97,13 +97,23 @@ struct netconsole_target_stats { struct u64_stats_sync syncp; }; +/* Features enabled in sysdata. Contrary to userdata, this data is populated by + * the kernel. The fields are designed as bitwise flags, allowing multiple + * features to be set in sysdata_fields. + */ +enum sysdata_feature { + /* Populate the CPU that sends the message */ + CPU_NR = BIT(0), +}; + /** * struct netconsole_target - Represents a configured netconsole target. * @list: Links this target into the target_list. * @group: Links us into the configfs subsystem hierarchy. * @userdata_group: Links to the userdata configfs hierarchy - * @userdata_complete: Cached, formatted string of append - * @userdata_length: String length of userdata_complete + * @extradata_complete: Cached, formatted string of append + * @userdata_length: String length of usedata in extradata_complete. + * @sysdata_fields: Sysdata features enabled. * @stats: Packet send stats for the target. Used for debugging. * @enabled: On / off knob to enable / disable target. * Visible from userspace (read-write). @@ -123,20 +133,25 @@ struct netconsole_target_stats { * remote_ip (read-write) * local_mac (read-only) * remote_mac (read-write) + * @buf: The buffer used to send the full msg to the network stack */ struct netconsole_target { struct list_head list; #ifdef CONFIG_NETCONSOLE_DYNAMIC struct config_group group; struct config_group userdata_group; - char userdata_complete[MAX_USERDATA_ENTRY_LENGTH * MAX_USERDATA_ITEMS]; + char extradata_complete[MAX_EXTRADATA_ENTRY_LEN * MAX_EXTRADATA_ITEMS]; size_t userdata_length; + /* bit-wise with sysdata_feature bits */ + u32 sysdata_fields; #endif struct netconsole_target_stats stats; bool enabled; bool extended; bool release; struct netpoll np; + /* protected by target_list_lock */ + char buf[MAX_PRINT_CHUNK]; }; #ifdef CONFIG_NETCONSOLE_DYNAMIC @@ -396,6 +411,19 @@ static ssize_t transmit_errors_show(struct config_item *item, char *buf) return sysfs_emit(buf, "%llu\n", xmit_drop_count + enomem_count); } +/* configfs helper to display if cpu_nr sysdata feature is enabled */ +static ssize_t sysdata_cpu_nr_enabled_show(struct config_item *item, char *buf) +{ + struct netconsole_target *nt = to_target(item->ci_parent); + bool cpu_nr_enabled; + + mutex_lock(&dynamic_netconsole_mutex); + cpu_nr_enabled = !!(nt->sysdata_fields & CPU_NR); + mutex_unlock(&dynamic_netconsole_mutex); + + return sysfs_emit(buf, "%d\n", cpu_nr_enabled); +} + /* * This one is special -- targets created through the configfs interface * are not enabled (and the corresponding netpoll activated) by default. @@ -659,6 +687,24 @@ out_unlock: return ret; } +/* Count number of entries we have in extradata. + * This is important because the extradata_complete only supports + * MAX_EXTRADATA_ITEMS entries. Before enabling any new {user,sys}data + * feature, number of entries needs to checked for available space. + */ +static size_t count_extradata_entries(struct netconsole_target *nt) +{ + size_t entries; + + /* Userdata entries */ + entries = list_count_nodes(&nt->userdata_group.cg_children); + /* Plus sysdata entries */ + if (nt->sysdata_fields & CPU_NR) + entries += 1; + + return entries; +} + static ssize_t remote_mac_store(struct config_item *item, const char *buf, size_t count) { @@ -687,7 +733,7 @@ out_unlock: struct userdatum { struct config_item item; - char value[MAX_USERDATA_VALUE_LENGTH]; + char value[MAX_EXTRADATA_VALUE_LEN]; }; static struct userdatum *to_userdatum(struct config_item *item) @@ -724,13 +770,13 @@ static void update_userdata(struct netconsole_target *nt) /* Clear the current string in case the last userdatum was deleted */ nt->userdata_length = 0; - nt->userdata_complete[0] = 0; + nt->extradata_complete[0] = 0; list_for_each(entry, &nt->userdata_group.cg_children) { struct userdatum *udm_item; struct config_item *item; - if (WARN_ON_ONCE(child_count >= MAX_USERDATA_ITEMS)) + if (WARN_ON_ONCE(child_count >= MAX_EXTRADATA_ITEMS)) break; child_count++; @@ -738,19 +784,19 @@ static void update_userdata(struct netconsole_target *nt) udm_item = to_userdatum(item); /* Skip userdata with no value set */ - if (strnlen(udm_item->value, MAX_USERDATA_VALUE_LENGTH) == 0) + if (strnlen(udm_item->value, MAX_EXTRADATA_VALUE_LEN) == 0) continue; - /* This doesn't overflow userdata_complete since it will write - * one entry length (1/MAX_USERDATA_ITEMS long), entry count is + /* This doesn't overflow extradata_complete since it will write + * one entry length (1/MAX_EXTRADATA_ITEMS long), entry count is * checked to not exceed MAX items with child_count above */ - complete_idx += scnprintf(&nt->userdata_complete[complete_idx], - MAX_USERDATA_ENTRY_LENGTH, " %s=%s\n", + complete_idx += scnprintf(&nt->extradata_complete[complete_idx], + MAX_EXTRADATA_ENTRY_LEN, " %s=%s\n", item->ci_name, udm_item->value); } - nt->userdata_length = strnlen(nt->userdata_complete, - sizeof(nt->userdata_complete)); + nt->userdata_length = strnlen(nt->extradata_complete, + sizeof(nt->extradata_complete)); } static ssize_t userdatum_value_store(struct config_item *item, const char *buf, @@ -761,7 +807,7 @@ static ssize_t userdatum_value_store(struct config_item *item, const char *buf, struct userdata *ud; ssize_t ret; - if (count > MAX_USERDATA_VALUE_LENGTH) + if (count > MAX_EXTRADATA_VALUE_LEN) return -EMSGSIZE; mutex_lock(&dynamic_netconsole_mutex); @@ -780,7 +826,62 @@ out_unlock: return ret; } +/* disable_sysdata_feature - Disable sysdata feature and clean sysdata + * @nt: target that is disabling the feature + * @feature: feature being disabled + */ +static void disable_sysdata_feature(struct netconsole_target *nt, + enum sysdata_feature feature) +{ + nt->sysdata_fields &= ~feature; + nt->extradata_complete[nt->userdata_length] = 0; +} + +/* configfs helper to sysdata cpu_nr feature */ +static ssize_t sysdata_cpu_nr_enabled_store(struct config_item *item, + const char *buf, size_t count) +{ + struct netconsole_target *nt = to_target(item->ci_parent); + bool cpu_nr_enabled, curr; + ssize_t ret; + + ret = kstrtobool(buf, &cpu_nr_enabled); + if (ret) + return ret; + + mutex_lock(&dynamic_netconsole_mutex); + curr = nt->sysdata_fields & CPU_NR; + if (cpu_nr_enabled == curr) + /* no change requested */ + goto unlock_ok; + + if (cpu_nr_enabled && + count_extradata_entries(nt) >= MAX_EXTRADATA_ITEMS) { + /* user wants the new feature, but there is no space in the + * buffer. + */ + ret = -ENOSPC; + goto unlock; + } + + if (cpu_nr_enabled) + nt->sysdata_fields |= CPU_NR; + else + /* This is special because extradata_complete might have + * remaining data from previous sysdata, and it needs to be + * cleaned. + */ + disable_sysdata_feature(nt, CPU_NR); + +unlock_ok: + ret = strnlen(buf, count); +unlock: + mutex_unlock(&dynamic_netconsole_mutex); + return ret; +} + CONFIGFS_ATTR(userdatum_, value); +CONFIGFS_ATTR(sysdata_, cpu_nr_enabled); static struct configfs_attribute *userdatum_attrs[] = { &userdatum_attr_value, @@ -808,15 +909,13 @@ static struct config_item *userdatum_make_item(struct config_group *group, struct netconsole_target *nt; struct userdatum *udm; struct userdata *ud; - size_t child_count; - if (strlen(name) > MAX_USERDATA_NAME_LENGTH) + if (strlen(name) > MAX_EXTRADATA_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); ud = to_userdata(&group->cg_item); nt = userdata_to_target(ud); - child_count = list_count_nodes(&nt->userdata_group.cg_children); - if (child_count >= MAX_USERDATA_ITEMS) + if (count_extradata_entries(nt) >= MAX_EXTRADATA_ITEMS) return ERR_PTR(-ENOSPC); udm = kzalloc(sizeof(*udm), GFP_KERNEL); @@ -842,6 +941,7 @@ static void userdatum_drop(struct config_group *group, struct config_item *item) } static struct configfs_attribute *userdata_attrs[] = { + &sysdata_attr_cpu_nr_enabled, NULL, }; @@ -1017,6 +1117,40 @@ static void populate_configfs_item(struct netconsole_target *nt, init_target_config_group(nt, target_name); } +/* + * prepare_extradata - append sysdata at extradata_complete in runtime + * @nt: target to send message to + */ +static int prepare_extradata(struct netconsole_target *nt) +{ + int sysdata_len, extradata_len; + + /* userdata was appended when configfs write helper was called + * by update_userdata(). + */ + extradata_len = nt->userdata_length; + + if (!(nt->sysdata_fields & CPU_NR)) + goto out; + + /* Append cpu=%d at extradata_complete after userdata str */ + sysdata_len = scnprintf(&nt->extradata_complete[nt->userdata_length], + MAX_EXTRADATA_ENTRY_LEN, " cpu=%u\n", + raw_smp_processor_id()); + + extradata_len += sysdata_len; + + WARN_ON_ONCE(extradata_len > + MAX_EXTRADATA_ENTRY_LEN * MAX_EXTRADATA_ITEMS); + +out: + return extradata_len; +} +#else /* CONFIG_NETCONSOLE_DYNAMIC not set */ +static int prepare_extradata(struct netconsole_target *nt) +{ + return 0; +} #endif /* CONFIG_NETCONSOLE_DYNAMIC */ /* Handle network interface device notifications */ @@ -1117,29 +1251,28 @@ static void send_msg_no_fragmentation(struct netconsole_target *nt, int msg_len, int release_len) { - static char buf[MAX_PRINT_CHUNK]; /* protected by target_list_lock */ - const char *userdata = NULL; + const char *extradata = NULL; const char *release; #ifdef CONFIG_NETCONSOLE_DYNAMIC - userdata = nt->userdata_complete; + extradata = nt->extradata_complete; #endif if (release_len) { release = init_utsname()->release; - scnprintf(buf, MAX_PRINT_CHUNK, "%s,%s", release, msg); + scnprintf(nt->buf, MAX_PRINT_CHUNK, "%s,%s", release, msg); msg_len += release_len; } else { - memcpy(buf, msg, msg_len); + memcpy(nt->buf, msg, msg_len); } - if (userdata) - msg_len += scnprintf(&buf[msg_len], + if (extradata) + msg_len += scnprintf(&nt->buf[msg_len], MAX_PRINT_CHUNK - msg_len, - "%s", userdata); + "%s", extradata); - send_udp(nt, buf, msg_len); + send_udp(nt, nt->buf, msg_len); } static void append_release(char *buf) @@ -1150,28 +1283,27 @@ static void append_release(char *buf) scnprintf(buf, MAX_PRINT_CHUNK, "%s,", release); } -static void send_fragmented_body(struct netconsole_target *nt, char *buf, +static void send_fragmented_body(struct netconsole_target *nt, const char *msgbody, int header_len, - int msgbody_len) + int msgbody_len, int extradata_len) { - const char *userdata = NULL; + int sent_extradata, preceding_bytes; + const char *extradata = NULL; int body_len, offset = 0; - int userdata_len = 0; #ifdef CONFIG_NETCONSOLE_DYNAMIC - userdata = nt->userdata_complete; - userdata_len = nt->userdata_length; + extradata = nt->extradata_complete; #endif /* body_len represents the number of bytes that will be sent. This is * bigger than MAX_PRINT_CHUNK, thus, it will be split in multiple * packets */ - body_len = msgbody_len + userdata_len; + body_len = msgbody_len + extradata_len; /* In each iteration of the while loop below, we send a packet * containing the header and a portion of the body. The body is - * composed of two parts: msgbody and userdata. We keep track of how + * composed of two parts: msgbody and extradata. We keep track of how * many bytes have been sent so far using the offset variable, which * ranges from 0 to the total length of the body. */ @@ -1181,7 +1313,7 @@ static void send_fragmented_body(struct netconsole_target *nt, char *buf, int this_offset = 0; int this_chunk = 0; - this_header += scnprintf(buf + this_header, + this_header += scnprintf(nt->buf + this_header, MAX_PRINT_CHUNK - this_header, ",ncfrag=%d/%d;", offset, body_len); @@ -1192,47 +1324,48 @@ static void send_fragmented_body(struct netconsole_target *nt, char *buf, MAX_PRINT_CHUNK - this_header); if (WARN_ON_ONCE(this_chunk <= 0)) return; - memcpy(buf + this_header, msgbody + offset, this_chunk); + memcpy(nt->buf + this_header, msgbody + offset, + this_chunk); this_offset += this_chunk; } /* msgbody was finally written, either in the previous * messages and/or in the current buf. Time to write - * the userdata. + * the extradata. */ msgbody_written |= offset + this_offset >= msgbody_len; - /* Msg body is fully written and there is pending userdata to - * write, append userdata in this chunk + /* Msg body is fully written and there is pending extradata to + * write, append extradata in this chunk */ if (msgbody_written && offset + this_offset < body_len) { /* Track how much user data was already sent. First * time here, sent_userdata is zero */ - int sent_userdata = (offset + this_offset) - msgbody_len; + sent_extradata = (offset + this_offset) - msgbody_len; /* offset of bytes used in current buf */ - int preceding_bytes = this_chunk + this_header; + preceding_bytes = this_chunk + this_header; - if (WARN_ON_ONCE(sent_userdata < 0)) + if (WARN_ON_ONCE(sent_extradata < 0)) return; - this_chunk = min(userdata_len - sent_userdata, + this_chunk = min(extradata_len - sent_extradata, MAX_PRINT_CHUNK - preceding_bytes); if (WARN_ON_ONCE(this_chunk < 0)) /* this_chunk could be zero if all the previous * message used all the buffer. This is not a - * problem, userdata will be sent in the next + * problem, extradata will be sent in the next * iteration */ return; - memcpy(buf + this_header + this_offset, - userdata + sent_userdata, + memcpy(nt->buf + this_header + this_offset, + extradata + sent_extradata, this_chunk); this_offset += this_chunk; } - send_udp(nt, buf, this_header + this_offset); + send_udp(nt, nt->buf, this_header + this_offset); offset += this_offset; } } @@ -1240,9 +1373,9 @@ static void send_fragmented_body(struct netconsole_target *nt, char *buf, static void send_msg_fragmented(struct netconsole_target *nt, const char *msg, int msg_len, - int release_len) + int release_len, + int extradata_len) { - static char buf[MAX_PRINT_CHUNK]; /* protected by target_list_lock */ int header_len, msgbody_len; const char *msgbody; @@ -1260,16 +1393,17 @@ static void send_msg_fragmented(struct netconsole_target *nt, * "ncfrag=<byte-offset>/<total-bytes>" */ if (release_len) - append_release(buf); + append_release(nt->buf); /* Copy the header into the buffer */ - memcpy(buf + release_len, msg, header_len); + memcpy(nt->buf + release_len, msg, header_len); header_len += release_len; /* for now on, the header will be persisted, and the msgbody * will be replaced */ - send_fragmented_body(nt, buf, msgbody, header_len, msgbody_len); + send_fragmented_body(nt, msgbody, header_len, msgbody_len, + extradata_len); } /** @@ -1285,20 +1419,19 @@ static void send_msg_fragmented(struct netconsole_target *nt, static void send_ext_msg_udp(struct netconsole_target *nt, const char *msg, int msg_len) { - int userdata_len = 0; int release_len = 0; + int extradata_len; -#ifdef CONFIG_NETCONSOLE_DYNAMIC - userdata_len = nt->userdata_length; -#endif + extradata_len = prepare_extradata(nt); if (nt->release) release_len = strlen(init_utsname()->release) + 1; - if (msg_len + release_len + userdata_len <= MAX_PRINT_CHUNK) + if (msg_len + release_len + extradata_len <= MAX_PRINT_CHUNK) return send_msg_no_fragmentation(nt, msg, msg_len, release_len); - return send_msg_fragmented(nt, msg, msg_len, release_len); + return send_msg_fragmented(nt, msg, msg_len, release_len, + extradata_len); } static void write_ext_msg(struct console *con, const char *msg, diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 42f247cbdcee..9b394ddc5206 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -645,8 +645,11 @@ nsim_queue_mem_alloc(struct net_device *dev, void *per_queue_mem, int idx) if (ns->rq_reset_mode > 3) return -EINVAL; - if (ns->rq_reset_mode == 1) + if (ns->rq_reset_mode == 1) { + if (!netif_running(ns->netdev)) + return -ENETDOWN; return nsim_create_page_pool(&qmem->pp, &ns->rq[idx]->napi); + } qmem->rq = nsim_queue_alloc(); if (!qmem->rq) @@ -754,11 +757,6 @@ nsim_qreset_write(struct file *file, const char __user *data, return -EINVAL; rtnl_lock(); - if (!netif_running(ns->netdev)) { - ret = -ENETDOWN; - goto exit_unlock; - } - if (queue >= ns->netdev->real_num_rx_queues) { ret = -EINVAL; goto exit_unlock; diff --git a/drivers/net/pcs/pcs-lynx.c b/drivers/net/pcs/pcs-lynx.c index e46f588cae7d..23b40e9eacbb 100644 --- a/drivers/net/pcs/pcs-lynx.c +++ b/drivers/net/pcs/pcs-lynx.c @@ -355,7 +355,6 @@ static struct phylink_pcs *lynx_pcs_create(struct mdio_device *mdio) mdio_device_get(mdio); lynx->mdio = mdio; lynx->pcs.ops = &lynx_pcs_phylink_ops; - lynx->pcs.neg_mode = true; lynx->pcs.poll = true; for (i = 0; i < ARRAY_SIZE(lynx_interfaces); i++) diff --git a/drivers/net/pcs/pcs-mtk-lynxi.c b/drivers/net/pcs/pcs-mtk-lynxi.c index 7d6261dee534..149ddf51d785 100644 --- a/drivers/net/pcs/pcs-mtk-lynxi.c +++ b/drivers/net/pcs/pcs-mtk-lynxi.c @@ -305,7 +305,6 @@ struct phylink_pcs *mtk_pcs_lynxi_create(struct device *dev, mpcs->regmap = regmap; mpcs->flags = flags; mpcs->pcs.ops = &mtk_pcs_lynxi_ops; - mpcs->pcs.neg_mode = true; mpcs->pcs.poll = true; mpcs->interface = PHY_INTERFACE_MODE_NA; diff --git a/drivers/net/pcs/pcs-rzn1-miic.c b/drivers/net/pcs/pcs-rzn1-miic.c index 61944574d087..d79bb9b06cd2 100644 --- a/drivers/net/pcs/pcs-rzn1-miic.c +++ b/drivers/net/pcs/pcs-rzn1-miic.c @@ -268,17 +268,6 @@ static void miic_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, (MIIC_CONVCTRL_CONV_SPEED | MIIC_CONVCTRL_FULLD), val); } -static int miic_validate(struct phylink_pcs *pcs, unsigned long *supported, - const struct phylink_link_state *state) -{ - if (phy_interface_mode_is_rgmii(state->interface) || - state->interface == PHY_INTERFACE_MODE_RMII || - state->interface == PHY_INTERFACE_MODE_MII) - return 1; - - return -EINVAL; -} - static int miic_pre_init(struct phylink_pcs *pcs) { struct miic_port *miic_port = phylink_pcs_to_miic_port(pcs); @@ -307,7 +296,6 @@ static int miic_pre_init(struct phylink_pcs *pcs) } static const struct phylink_pcs_ops miic_phylink_ops = { - .pcs_validate = miic_validate, .pcs_config = miic_config, .pcs_link_up = miic_link_up, .pcs_pre_init = miic_pre_init, @@ -361,7 +349,10 @@ struct phylink_pcs *miic_create(struct device *dev, struct device_node *np) miic_port->miic = miic; miic_port->port = port - 1; miic_port->pcs.ops = &miic_phylink_ops; - miic_port->pcs.neg_mode = true; + + phy_interface_set_rgmii(miic_port->pcs.supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_RMII, miic_port->pcs.supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_MII, miic_port->pcs.supported_interfaces); return &miic_port->pcs; } @@ -472,13 +463,10 @@ static int miic_parse_dt(struct device *dev, u32 *mode_cfg) if (of_property_read_u32(np, "renesas,miic-switch-portin", &conf) == 0) dt_val[0] = conf; - for_each_child_of_node(np, conv) { + for_each_available_child_of_node(np, conv) { if (of_property_read_u32(conv, "reg", &port)) continue; - if (!of_device_is_available(conv)) - continue; - if (of_property_read_u32(conv, "renesas,miic-input", &conf) == 0) dt_val[port] = conf; } diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 1faa37f0e7b9..e32dec4b812e 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -602,36 +602,6 @@ static void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces) __set_bit(compat->interface, interfaces); } -int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable) -{ - u16 mask, val; - int ret; - - mask = DW_VR_MII_EEE_LTX_EN | DW_VR_MII_EEE_LRX_EN | - DW_VR_MII_EEE_TX_QUIET_EN | DW_VR_MII_EEE_RX_QUIET_EN | - DW_VR_MII_EEE_TX_EN_CTRL | DW_VR_MII_EEE_RX_EN_CTRL | - DW_VR_MII_EEE_MULT_FACT_100NS; - - if (enable) - val = DW_VR_MII_EEE_LTX_EN | DW_VR_MII_EEE_LRX_EN | - DW_VR_MII_EEE_TX_QUIET_EN | DW_VR_MII_EEE_RX_QUIET_EN | - DW_VR_MII_EEE_TX_EN_CTRL | DW_VR_MII_EEE_RX_EN_CTRL | - FIELD_PREP(DW_VR_MII_EEE_MULT_FACT_100NS, - mult_fact_100ns); - else - val = 0; - - ret = xpcs_modify(xpcs, MDIO_MMD_VEND2, DW_VR_MII_EEE_MCTRL0, mask, - val); - if (ret < 0) - return ret; - - return xpcs_modify(xpcs, MDIO_MMD_VEND2, DW_VR_MII_EEE_MCTRL1, - DW_VR_MII_EEE_TRN_LPI, - enable ? DW_VR_MII_EEE_TRN_LPI : 0); -} -EXPORT_SYMBOL_GPL(xpcs_config_eee); - static void xpcs_pre_config(struct phylink_pcs *pcs, phy_interface_t interface) { struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs); @@ -1193,6 +1163,63 @@ static void xpcs_an_restart(struct phylink_pcs *pcs) BMCR_ANRESTART); } +static int xpcs_config_eee(struct dw_xpcs *xpcs, bool enable) +{ + u16 mask, val; + int ret; + + mask = DW_VR_MII_EEE_LTX_EN | DW_VR_MII_EEE_LRX_EN | + DW_VR_MII_EEE_TX_QUIET_EN | DW_VR_MII_EEE_RX_QUIET_EN | + DW_VR_MII_EEE_TX_EN_CTRL | DW_VR_MII_EEE_RX_EN_CTRL | + DW_VR_MII_EEE_MULT_FACT_100NS; + + if (enable) + val = DW_VR_MII_EEE_LTX_EN | DW_VR_MII_EEE_LRX_EN | + DW_VR_MII_EEE_TX_QUIET_EN | DW_VR_MII_EEE_RX_QUIET_EN | + DW_VR_MII_EEE_TX_EN_CTRL | DW_VR_MII_EEE_RX_EN_CTRL | + FIELD_PREP(DW_VR_MII_EEE_MULT_FACT_100NS, + xpcs->eee_mult_fact); + else + val = 0; + + ret = xpcs_modify(xpcs, MDIO_MMD_VEND2, DW_VR_MII_EEE_MCTRL0, mask, + val); + if (ret < 0) + return ret; + + return xpcs_modify(xpcs, MDIO_MMD_VEND2, DW_VR_MII_EEE_MCTRL1, + DW_VR_MII_EEE_TRN_LPI, + enable ? DW_VR_MII_EEE_TRN_LPI : 0); +} + +static void xpcs_disable_eee(struct phylink_pcs *pcs) +{ + struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs); + + xpcs_config_eee(xpcs, false); +} + +static void xpcs_enable_eee(struct phylink_pcs *pcs) +{ + struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs); + + xpcs_config_eee(xpcs, true); +} + +/** + * xpcs_config_eee_mult_fact() - set the EEE clock multiplying factor + * @xpcs: pointer to a &struct dw_xpcs instance + * @mult_fact: the multiplying factor + * + * Configure the EEE clock multiplying factor. This value should be such that + * clk_eee_time_period * (mult_fact + 1) is within the range 80 to 120ns. + */ +void xpcs_config_eee_mult_fact(struct dw_xpcs *xpcs, u8 mult_fact) +{ + xpcs->eee_mult_fact = mult_fact; +} +EXPORT_SYMBOL_GPL(xpcs_config_eee_mult_fact); + static int xpcs_read_ids(struct dw_xpcs *xpcs) { int ret; @@ -1341,6 +1368,8 @@ static const struct phylink_pcs_ops xpcs_phylink_ops = { .pcs_get_state = xpcs_get_state, .pcs_an_restart = xpcs_an_restart, .pcs_link_up = xpcs_link_up, + .pcs_disable_eee = xpcs_disable_eee, + .pcs_enable_eee = xpcs_enable_eee, }; static int xpcs_identify(struct dw_xpcs *xpcs) @@ -1374,7 +1403,6 @@ static struct dw_xpcs *xpcs_create_data(struct mdio_device *mdiodev) mdio_device_get(mdiodev); xpcs->mdiodev = mdiodev; xpcs->pcs.ops = &xpcs_phylink_ops; - xpcs->pcs.neg_mode = true; xpcs->pcs.poll = true; return xpcs; diff --git a/drivers/net/pcs/pcs-xpcs.h b/drivers/net/pcs/pcs-xpcs.h index adc5a0b3c883..929fa238445e 100644 --- a/drivers/net/pcs/pcs-xpcs.h +++ b/drivers/net/pcs/pcs-xpcs.h @@ -55,23 +55,11 @@ /* Clause 37 Defines */ /* VR MII MMD registers offsets */ #define DW_VR_MII_DIG_CTRL1 0x8000 -#define DW_VR_MII_AN_CTRL 0x8001 -#define DW_VR_MII_AN_INTR_STS 0x8002 -/* EEE Mode Control Register */ -#define DW_VR_MII_EEE_MCTRL0 0x8006 -#define DW_VR_MII_EEE_MCTRL1 0x800b -#define DW_VR_MII_DIG_CTRL2 0x80e1 - -/* VR_MII_DIG_CTRL1 */ #define DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW BIT(9) #define DW_VR_MII_DIG_CTRL1_2G5_EN BIT(2) #define DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL BIT(0) -/* VR_MII_DIG_CTRL2 */ -#define DW_VR_MII_DIG_CTRL2_TX_POL_INV BIT(4) -#define DW_VR_MII_DIG_CTRL2_RX_POL_INV BIT(0) - -/* VR_MII_AN_CTRL */ +#define DW_VR_MII_AN_CTRL 0x8001 #define DW_VR_MII_AN_CTRL_8BIT BIT(8) #define DW_VR_MII_TX_CONFIG_MASK BIT(3) #define DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII 0x1 @@ -81,7 +69,7 @@ #define DW_VR_MII_PCS_MODE_C37_SGMII 0x2 #define DW_VR_MII_AN_INTR_EN BIT(0) -/* VR_MII_AN_INTR_STS */ +#define DW_VR_MII_AN_INTR_STS 0x8002 #define DW_VR_MII_AN_STS_C37_ANCMPLT_INTR BIT(0) #define DW_VR_MII_AN_STS_C37_ANSGM_FD BIT(1) #define DW_VR_MII_AN_STS_C37_ANSGM_SP GENMASK(3, 2) @@ -90,19 +78,22 @@ #define DW_VR_MII_C37_ANSGM_SP_1000 0x2 #define DW_VR_MII_C37_ANSGM_SP_LNKSTS BIT(4) -/* VR MII EEE Control 0 defines */ +#define DW_VR_MII_EEE_MCTRL0 0x8006 #define DW_VR_MII_EEE_LTX_EN BIT(0) /* LPI Tx Enable */ #define DW_VR_MII_EEE_LRX_EN BIT(1) /* LPI Rx Enable */ #define DW_VR_MII_EEE_TX_QUIET_EN BIT(2) /* Tx Quiet Enable */ #define DW_VR_MII_EEE_RX_QUIET_EN BIT(3) /* Rx Quiet Enable */ #define DW_VR_MII_EEE_TX_EN_CTRL BIT(4) /* Tx Control Enable */ #define DW_VR_MII_EEE_RX_EN_CTRL BIT(7) /* Rx Control Enable */ - #define DW_VR_MII_EEE_MULT_FACT_100NS GENMASK(11, 8) -/* VR MII EEE Control 1 defines */ +#define DW_VR_MII_EEE_MCTRL1 0x800b #define DW_VR_MII_EEE_TRN_LPI BIT(0) /* Transparent Mode Enable */ +#define DW_VR_MII_DIG_CTRL2 0x80e1 +#define DW_VR_MII_DIG_CTRL2_TX_POL_INV BIT(4) +#define DW_VR_MII_DIG_CTRL2_RX_POL_INV BIT(0) + #define DW_XPCS_INFO_DECLARE(_name, _pcs, _pma) \ static const struct dw_xpcs_info _name = { .pcs = _pcs, .pma = _pma } @@ -122,6 +113,7 @@ struct dw_xpcs { struct phylink_pcs pcs; phy_interface_t interface; bool need_reset; + u8 eee_mult_fact; }; int xpcs_read(struct dw_xpcs *xpcs, int dev, u32 reg); diff --git a/drivers/net/phy/aquantia/aquantia_hwmon.c b/drivers/net/phy/aquantia/aquantia_hwmon.c index 7b3c49c3bf49..1a714b56b765 100644 --- a/drivers/net/phy/aquantia/aquantia_hwmon.c +++ b/drivers/net/phy/aquantia/aquantia_hwmon.c @@ -172,33 +172,13 @@ static const struct hwmon_ops aqr_hwmon_ops = { .write = aqr_hwmon_write, }; -static u32 aqr_hwmon_chip_config[] = { - HWMON_C_REGISTER_TZ, - 0, -}; - -static const struct hwmon_channel_info aqr_hwmon_chip = { - .type = hwmon_chip, - .config = aqr_hwmon_chip_config, -}; - -static u32 aqr_hwmon_temp_config[] = { - HWMON_T_INPUT | - HWMON_T_MAX | HWMON_T_MIN | - HWMON_T_MAX_ALARM | HWMON_T_MIN_ALARM | - HWMON_T_CRIT | HWMON_T_LCRIT | - HWMON_T_CRIT_ALARM | HWMON_T_LCRIT_ALARM, - 0, -}; - -static const struct hwmon_channel_info aqr_hwmon_temp = { - .type = hwmon_temp, - .config = aqr_hwmon_temp_config, -}; - static const struct hwmon_channel_info * const aqr_hwmon_info[] = { - &aqr_hwmon_chip, - &aqr_hwmon_temp, + HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ), + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | + HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_MAX_ALARM | HWMON_T_MIN_ALARM | + HWMON_T_CRIT | HWMON_T_LCRIT | + HWMON_T_CRIT_ALARM | HWMON_T_LCRIT_ALARM), NULL, }; diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 22edb7e4c1a1..13e43fee1906 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -16,7 +16,7 @@ #include <linux/delay.h> #include <linux/module.h> #include <linux/phy.h> -#include <linux/pm_wakeup.h> +#include <linux/device.h> #include <linux/brcmphy.h> #include <linux/of.h> #include <linux/interrupt.h> diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index 6599feca1967..3662f3905d5a 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -31,6 +31,7 @@ #define MII_DP83822_RCSR 0x17 #define MII_DP83822_RESET_CTRL 0x1f #define MII_DP83822_MLEDCR 0x25 +#define MII_DP83822_LDCTRL 0x403 #define MII_DP83822_LEDCFG1 0x460 #define MII_DP83822_IOCTRL1 0x462 #define MII_DP83822_IOCTRL2 0x463 @@ -123,6 +124,9 @@ #define DP83822_IOCTRL1_GPIO1_CTRL GENMASK(2, 0) #define DP83822_IOCTRL1_GPIO1_CTRL_LED_1 BIT(0) +/* LDCTRL bits */ +#define DP83822_100BASE_TX_LINE_DRIVER_SWING GENMASK(7, 4) + /* IOCTRL2 bits */ #define DP83822_IOCTRL2_GPIO2_CLK_SRC GENMASK(6, 4) #define DP83822_IOCTRL2_GPIO2_CTRL GENMASK(2, 0) @@ -197,6 +201,7 @@ struct dp83822_private { bool set_gpio2_clk_out; u32 gpio2_clk_out; bool led_pin_enable[DP83822_MAX_LED_PINS]; + int tx_amplitude_100base_tx_index; }; static int dp83822_config_wol(struct phy_device *phydev, @@ -522,6 +527,12 @@ static int dp83822_config_init(struct phy_device *phydev) FIELD_PREP(DP83822_IOCTRL2_GPIO2_CLK_SRC, dp83822->gpio2_clk_out)); + if (dp83822->tx_amplitude_100base_tx_index >= 0) + phy_modify_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_LDCTRL, + DP83822_100BASE_TX_LINE_DRIVER_SWING, + FIELD_PREP(DP83822_100BASE_TX_LINE_DRIVER_SWING, + dp83822->tx_amplitude_100base_tx_index)); + err = dp83822_config_init_leds(phydev); if (err) return err; @@ -720,6 +731,11 @@ static int dp83822_phy_reset(struct phy_device *phydev) } #ifdef CONFIG_OF_MDIO +static const u32 tx_amplitude_100base_tx_gain[] = { + 80, 82, 83, 85, 87, 88, 90, 92, + 93, 95, 97, 98, 100, 102, 103, 105, +}; + static int dp83822_of_init_leds(struct phy_device *phydev) { struct device_node *node = phydev->mdio.dev.of_node; @@ -780,6 +796,8 @@ static int dp83822_of_init(struct phy_device *phydev) struct dp83822_private *dp83822 = phydev->priv; struct device *dev = &phydev->mdio.dev; const char *of_val; + int i, ret; + u32 val; /* Signal detection for the PHY is only enabled if the FX_EN and the * SD_EN pins are strapped. Signal detection can only enabled if FX_EN @@ -815,6 +833,26 @@ static int dp83822_of_init(struct phy_device *phydev) dp83822->set_gpio2_clk_out = true; } + dp83822->tx_amplitude_100base_tx_index = -1; + ret = phy_get_tx_amplitude_gain(phydev, dev, + ETHTOOL_LINK_MODE_100baseT_Full_BIT, + &val); + if (!ret) { + for (i = 0; i < ARRAY_SIZE(tx_amplitude_100base_tx_gain); i++) { + if (tx_amplitude_100base_tx_gain[i] == val) { + dp83822->tx_amplitude_100base_tx_index = i; + break; + } + } + + if (dp83822->tx_amplitude_100base_tx_index < 0) { + phydev_err(phydev, + "Invalid value for tx-amplitude-100base-tx-percent property (%u)\n", + val); + return -EINVAL; + } + } + return dp83822_of_init_leds(phydev); } diff --git a/drivers/net/phy/dp83td510.c b/drivers/net/phy/dp83td510.c index a42af9c168ec..23af1ac194fa 100644 --- a/drivers/net/phy/dp83td510.c +++ b/drivers/net/phy/dp83td510.c @@ -204,10 +204,191 @@ struct dp83td510_priv { #define DP83TD510E_UNKN_030E 0x30e #define DP83TD510E_030E_VAL 0x2520 +#define DP83TD510E_LEDS_CFG_1 0x460 +#define DP83TD510E_LED_FN(idx, val) (((val) & 0xf) << ((idx) * 4)) +#define DP83TD510E_LED_FN_MASK(idx) (0xf << ((idx) * 4)) +/* link OK */ +#define DP83TD510E_LED_MODE_LINK_OK 0x0 +/* TX/RX activity */ +#define DP83TD510E_LED_MODE_TX_RX_ACTIVITY 0x1 +/* TX activity */ +#define DP83TD510E_LED_MODE_TX_ACTIVITY 0x2 +/* RX activity */ +#define DP83TD510E_LED_MODE_RX_ACTIVITY 0x3 +/* LR */ +#define DP83TD510E_LED_MODE_LR 0x4 +/* SR */ +#define DP83TD510E_LED_MODE_SR 0x5 +/* LED SPEED: High for 10Base-T */ +#define DP83TD510E_LED_MODE_LED_SPEED 0x6 +/* Duplex mode */ +#define DP83TD510E_LED_MODE_DUPLEX 0x7 +/* link + blink on activity with stretch option */ +#define DP83TD510E_LED_MODE_LINK_BLINK 0x8 +/* blink on activity with stretch option */ +#define DP83TD510E_LED_MODE_BLINK_ACTIVITY 0x9 +/* blink on tx activity with stretch option */ +#define DP83TD510E_LED_MODE_BLINK_TX 0xa +/* blink on rx activity with stretch option */ +#define DP83TD510E_LED_MODE_BLINK_RX 0xb +/* link_lost */ +#define DP83TD510E_LED_MODE_LINK_LOST 0xc +/* PRBS error: toggles on error */ +#define DP83TD510E_LED_MODE_PRBS_ERROR 0xd +/* XMII TX/RX Error with stretch option */ +#define DP83TD510E_LED_MODE_XMII_ERR 0xe + +#define DP83TD510E_LED_COUNT 4 + +#define DP83TD510E_LEDS_CFG_2 0x469 +#define DP83TD510E_LED_POLARITY(idx) BIT((idx) * 4 + 2) +#define DP83TD510E_LED_DRV_VAL(idx) BIT((idx) * 4 + 1) +#define DP83TD510E_LED_DRV_EN(idx) BIT((idx) * 4) + #define DP83TD510E_ALCD_STAT 0xa9f #define DP83TD510E_ALCD_COMPLETE BIT(15) #define DP83TD510E_ALCD_CABLE_LENGTH GENMASK(10, 0) +static int dp83td510_led_brightness_set(struct phy_device *phydev, u8 index, + enum led_brightness brightness) +{ + u32 val; + + if (index >= DP83TD510E_LED_COUNT) + return -EINVAL; + + val = DP83TD510E_LED_DRV_EN(index); + + if (brightness) + val |= DP83TD510E_LED_DRV_VAL(index); + + return phy_modify_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_LEDS_CFG_2, + DP83TD510E_LED_DRV_VAL(index) | + DP83TD510E_LED_DRV_EN(index), val); +} + +static int dp83td510_led_mode(u8 index, unsigned long rules) +{ + if (index >= DP83TD510E_LED_COUNT) + return -EINVAL; + + switch (rules) { + case BIT(TRIGGER_NETDEV_LINK): + return DP83TD510E_LED_MODE_LINK_OK; + case BIT(TRIGGER_NETDEV_LINK_10): + return DP83TD510E_LED_MODE_LED_SPEED; + case BIT(TRIGGER_NETDEV_FULL_DUPLEX): + return DP83TD510E_LED_MODE_DUPLEX; + case BIT(TRIGGER_NETDEV_TX): + return DP83TD510E_LED_MODE_TX_ACTIVITY; + case BIT(TRIGGER_NETDEV_RX): + return DP83TD510E_LED_MODE_RX_ACTIVITY; + case BIT(TRIGGER_NETDEV_TX) | BIT(TRIGGER_NETDEV_RX): + return DP83TD510E_LED_MODE_TX_RX_ACTIVITY; + case BIT(TRIGGER_NETDEV_LINK) | BIT(TRIGGER_NETDEV_TX) | + BIT(TRIGGER_NETDEV_RX): + return DP83TD510E_LED_MODE_LINK_BLINK; + default: + return -EOPNOTSUPP; + } +} + +static int dp83td510_led_hw_is_supported(struct phy_device *phydev, u8 index, + unsigned long rules) +{ + int ret; + + ret = dp83td510_led_mode(index, rules); + if (ret < 0) + return ret; + + return 0; +} + +static int dp83td510_led_hw_control_set(struct phy_device *phydev, u8 index, + unsigned long rules) +{ + int mode, ret; + + mode = dp83td510_led_mode(index, rules); + if (mode < 0) + return mode; + + ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_LEDS_CFG_1, + DP83TD510E_LED_FN_MASK(index), + DP83TD510E_LED_FN(index, mode)); + if (ret) + return ret; + + return phy_modify_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_LEDS_CFG_2, + DP83TD510E_LED_DRV_EN(index), 0); +} + +static int dp83td510_led_hw_control_get(struct phy_device *phydev, + u8 index, unsigned long *rules) +{ + int val; + + val = phy_read_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_LEDS_CFG_1); + if (val < 0) + return val; + + val &= DP83TD510E_LED_FN_MASK(index); + val >>= index * 4; + + switch (val) { + case DP83TD510E_LED_MODE_LINK_OK: + *rules = BIT(TRIGGER_NETDEV_LINK); + break; + /* LED mode: LED SPEED (10BaseT1L indicator) */ + case DP83TD510E_LED_MODE_LED_SPEED: + *rules = BIT(TRIGGER_NETDEV_LINK_10); + break; + case DP83TD510E_LED_MODE_DUPLEX: + *rules = BIT(TRIGGER_NETDEV_FULL_DUPLEX); + break; + case DP83TD510E_LED_MODE_TX_ACTIVITY: + *rules = BIT(TRIGGER_NETDEV_TX); + break; + case DP83TD510E_LED_MODE_RX_ACTIVITY: + *rules = BIT(TRIGGER_NETDEV_RX); + break; + case DP83TD510E_LED_MODE_TX_RX_ACTIVITY: + *rules = BIT(TRIGGER_NETDEV_TX) | BIT(TRIGGER_NETDEV_RX); + break; + case DP83TD510E_LED_MODE_LINK_BLINK: + *rules = BIT(TRIGGER_NETDEV_LINK) | + BIT(TRIGGER_NETDEV_TX) | + BIT(TRIGGER_NETDEV_RX); + break; + default: + *rules = 0; + break; + } + + return 0; +} + +static int dp83td510_led_polarity_set(struct phy_device *phydev, int index, + unsigned long modes) +{ + u16 polarity = DP83TD510E_LED_POLARITY(index); + u32 mode; + + for_each_set_bit(mode, &modes, __PHY_LED_MODES_NUM) { + switch (mode) { + case PHY_LED_ACTIVE_LOW: + polarity = 0; + break; + default: + return -EINVAL; + } + } + + return phy_modify_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_LEDS_CFG_2, + DP83TD510E_LED_POLARITY(index), polarity); +} + /** * dp83td510_update_stats - Update the PHY statistics for the DP83TD510 PHY. * @phydev: Pointer to the phy_device structure. @@ -712,6 +893,12 @@ static struct phy_driver dp83td510_driver[] = { .get_phy_stats = dp83td510_get_phy_stats, .update_stats = dp83td510_update_stats, + .led_brightness_set = dp83td510_led_brightness_set, + .led_hw_is_supported = dp83td510_led_hw_is_supported, + .led_hw_control_set = dp83td510_led_hw_control_set, + .led_hw_control_get = dp83td510_led_hw_control_get, + .led_polarity_set = dp83td510_led_polarity_set, + .suspend = genphy_suspend, .resume = genphy_resume, } }; diff --git a/drivers/net/phy/dp83tg720.c b/drivers/net/phy/dp83tg720.c index 050f4537d140..7e76323409c4 100644 --- a/drivers/net/phy/dp83tg720.c +++ b/drivers/net/phy/dp83tg720.c @@ -4,12 +4,31 @@ */ #include <linux/bitfield.h> #include <linux/ethtool_netlink.h> +#include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/phy.h> +#include <linux/random.h> #include "open_alliance_helpers.h" +/* + * DP83TG720S_POLL_ACTIVE_LINK - Polling interval in milliseconds when the link + * is active. + * DP83TG720S_POLL_NO_LINK_MIN - Minimum polling interval in milliseconds when + * the link is down. + * DP83TG720S_POLL_NO_LINK_MAX - Maximum polling interval in milliseconds when + * the link is down. + * + * These values are not documented or officially recommended by the vendor but + * were determined through empirical testing. They achieve a good balance in + * minimizing the number of reset retries while ensuring reliable link recovery + * within a reasonable timeframe. + */ +#define DP83TG720S_POLL_ACTIVE_LINK 1000 +#define DP83TG720S_POLL_NO_LINK_MIN 100 +#define DP83TG720S_POLL_NO_LINK_MAX 1000 + #define DP83TG720S_PHY_ID 0x2000a284 /* MDIO_MMD_VEND2 registers */ @@ -371,6 +390,13 @@ static int dp83tg720_read_status(struct phy_device *phydev) if (ret) return ret; + /* Sleep 600ms for PHY stabilization post-reset. + * Empirically chosen value (not documented). + * Helps reduce reset bounces with link partners having similar + * issues. + */ + msleep(600); + /* After HW reset we need to restore master/slave configuration. * genphy_c45_pma_baset1_read_master_slave() call will be done * by the dp83tg720_config_aneg() function. @@ -498,6 +524,57 @@ static int dp83tg720_probe(struct phy_device *phydev) return 0; } +/** + * dp83tg720_get_next_update_time - Determine the next update time for PHY + * state + * @phydev: Pointer to the phy_device structure + * + * This function addresses a limitation of the DP83TG720 PHY, which cannot + * reliably detect or report a stable link state. To recover from such + * scenarios, the PHY must be periodically reset when the link is down. However, + * if the link partner also runs Linux with the same driver, synchronized reset + * intervals can lead to a deadlock where the link never establishes due to + * simultaneous resets on both sides. + * + * To avoid this, the function implements randomized polling intervals when the + * link is down. It ensures that reset intervals are desynchronized by + * introducing a random delay between a configured minimum and maximum range. + * When the link is up, a fixed polling interval is used to minimize overhead. + * + * This mechanism guarantees that the link will reestablish within 10 seconds + * in the worst-case scenario. + * + * Return: Time (in jiffies) until the next update event for the PHY state + * machine. + */ +static unsigned int dp83tg720_get_next_update_time(struct phy_device *phydev) +{ + unsigned int next_time_jiffies; + + if (phydev->link) { + /* When the link is up, use a fixed 1000ms interval + * (in jiffies) + */ + next_time_jiffies = + msecs_to_jiffies(DP83TG720S_POLL_ACTIVE_LINK); + } else { + unsigned int min_jiffies, max_jiffies, rand_jiffies; + + /* When the link is down, randomize interval between min/max + * (in jiffies) + */ + min_jiffies = msecs_to_jiffies(DP83TG720S_POLL_NO_LINK_MIN); + max_jiffies = msecs_to_jiffies(DP83TG720S_POLL_NO_LINK_MAX); + + rand_jiffies = min_jiffies + + get_random_u32_below(max_jiffies - min_jiffies + 1); + next_time_jiffies = rand_jiffies; + } + + /* Ensure the polling time is at least one jiffy */ + return max(next_time_jiffies, 1U); +} + static struct phy_driver dp83tg720_driver[] = { { PHY_ID_MATCH_MODEL(DP83TG720S_PHY_ID), @@ -516,6 +593,7 @@ static struct phy_driver dp83tg720_driver[] = { .get_link_stats = dp83tg720_get_link_stats, .get_phy_stats = dp83tg720_get_phy_stats, .update_stats = dp83tg720_update_stats, + .get_next_update_time = dp83tg720_get_next_update_time, .suspend = genphy_suspend, .resume = genphy_resume, diff --git a/drivers/net/phy/marvell-88q2xxx.c b/drivers/net/phy/marvell-88q2xxx.c index a3996471a1c9..15c0f8adc2f5 100644 --- a/drivers/net/phy/marvell-88q2xxx.c +++ b/drivers/net/phy/marvell-88q2xxx.c @@ -7,30 +7,34 @@ * Copyright (C) 2024 Liebherr-Electronics and Drives GmbH */ #include <linux/ethtool_netlink.h> +#include <linux/hwmon.h> #include <linux/marvell_phy.h> +#include <linux/of.h> #include <linux/phy.h> -#include <linux/hwmon.h> -#define PHY_ID_88Q2220_REVB0 (MARVELL_PHY_ID_88Q2220 | 0x1) -#define PHY_ID_88Q2220_REVB1 (MARVELL_PHY_ID_88Q2220 | 0x2) -#define PHY_ID_88Q2220_REVB2 (MARVELL_PHY_ID_88Q2220 | 0x3) +#define PHY_ID_88Q2220_REVB0 (MARVELL_PHY_ID_88Q2220 | 0x1) +#define PHY_ID_88Q2220_REVB1 (MARVELL_PHY_ID_88Q2220 | 0x2) +#define PHY_ID_88Q2220_REVB2 (MARVELL_PHY_ID_88Q2220 | 0x3) -#define MDIO_MMD_AN_MV_STAT 32769 -#define MDIO_MMD_AN_MV_STAT_ANEG 0x0100 -#define MDIO_MMD_AN_MV_STAT_LOCAL_RX 0x1000 -#define MDIO_MMD_AN_MV_STAT_REMOTE_RX 0x2000 -#define MDIO_MMD_AN_MV_STAT_LOCAL_MASTER 0x4000 -#define MDIO_MMD_AN_MV_STAT_MS_CONF_FAULT 0x8000 +#define MDIO_MMD_AN_MV_STAT 32769 +#define MDIO_MMD_AN_MV_STAT_ANEG 0x0100 +#define MDIO_MMD_AN_MV_STAT_LOCAL_RX 0x1000 +#define MDIO_MMD_AN_MV_STAT_REMOTE_RX 0x2000 +#define MDIO_MMD_AN_MV_STAT_LOCAL_MASTER 0x4000 +#define MDIO_MMD_AN_MV_STAT_MS_CONF_FAULT 0x8000 -#define MDIO_MMD_AN_MV_STAT2 32794 -#define MDIO_MMD_AN_MV_STAT2_AN_RESOLVED 0x0800 -#define MDIO_MMD_AN_MV_STAT2_100BT1 0x2000 -#define MDIO_MMD_AN_MV_STAT2_1000BT1 0x4000 +#define MDIO_MMD_AN_MV_STAT2 32794 +#define MDIO_MMD_AN_MV_STAT2_AN_RESOLVED 0x0800 +#define MDIO_MMD_AN_MV_STAT2_100BT1 0x2000 +#define MDIO_MMD_AN_MV_STAT2_1000BT1 0x4000 -#define MDIO_MMD_PCS_MV_INT_EN 32784 -#define MDIO_MMD_PCS_MV_INT_EN_LINK_UP 0x0040 -#define MDIO_MMD_PCS_MV_INT_EN_LINK_DOWN 0x0080 -#define MDIO_MMD_PCS_MV_INT_EN_100BT1 0x1000 +#define MDIO_MMD_PCS_MV_RESET_CTRL 32768 +#define MDIO_MMD_PCS_MV_RESET_CTRL_TX_DISABLE 0x8 + +#define MDIO_MMD_PCS_MV_INT_EN 32784 +#define MDIO_MMD_PCS_MV_INT_EN_LINK_UP 0x0040 +#define MDIO_MMD_PCS_MV_INT_EN_LINK_DOWN 0x0080 +#define MDIO_MMD_PCS_MV_INT_EN_100BT1 0x1000 #define MDIO_MMD_PCS_MV_GPIO_INT_STAT 32785 #define MDIO_MMD_PCS_MV_GPIO_INT_STAT_LINK_UP 0x0040 @@ -40,6 +44,22 @@ #define MDIO_MMD_PCS_MV_GPIO_INT_CTRL 32787 #define MDIO_MMD_PCS_MV_GPIO_INT_CTRL_TRI_DIS 0x0800 +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL 32790 +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_1_MASK GENMASK(7, 4) +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_0_MASK GENMASK(3, 0) +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK 0x0 /* Link established */ +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_RX_TX 0x1 /* Link established, blink for rx or tx activity */ +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_1000BT1 0x2 /* Blink 3x for 1000BT1 link established */ +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_RX_TX_ON 0x3 /* Receive or transmit activity */ +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_RX_TX 0x4 /* Blink on receive or transmit activity */ +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_TX 0x5 /* Transmit activity */ +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_COPPER 0x6 /* Copper Link established */ +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_1000BT1_ON 0x7 /* 1000BT1 link established */ +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_FORCE_OFF 0x8 /* Force off */ +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_FORCE_ON 0x9 /* Force on */ +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_FORCE_HIGHZ 0xa /* Force Hi-Z */ +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_FORCE_BLINK 0xb /* Force blink */ + #define MDIO_MMD_PCS_MV_TEMP_SENSOR1 32833 #define MDIO_MMD_PCS_MV_TEMP_SENSOR1_RAW_INT 0x0001 #define MDIO_MMD_PCS_MV_TEMP_SENSOR1_INT 0x0040 @@ -60,11 +80,11 @@ #define MDIO_MMD_PCS_MV_100BT1_STAT1_REMOTE_RX 0x2000 #define MDIO_MMD_PCS_MV_100BT1_STAT1_LOCAL_MASTER 0x4000 -#define MDIO_MMD_PCS_MV_100BT1_STAT2 33033 -#define MDIO_MMD_PCS_MV_100BT1_STAT2_JABBER 0x0001 -#define MDIO_MMD_PCS_MV_100BT1_STAT2_POL 0x0002 -#define MDIO_MMD_PCS_MV_100BT1_STAT2_LINK 0x0004 -#define MDIO_MMD_PCS_MV_100BT1_STAT2_ANGE 0x0008 +#define MDIO_MMD_PCS_MV_100BT1_STAT2 33033 +#define MDIO_MMD_PCS_MV_100BT1_STAT2_JABBER 0x0001 +#define MDIO_MMD_PCS_MV_100BT1_STAT2_POL 0x0002 +#define MDIO_MMD_PCS_MV_100BT1_STAT2_LINK 0x0004 +#define MDIO_MMD_PCS_MV_100BT1_STAT2_ANGE 0x0008 #define MDIO_MMD_PCS_MV_100BT1_INT_EN 33042 #define MDIO_MMD_PCS_MV_100BT1_INT_EN_LINKEVENT 0x0400 @@ -72,7 +92,7 @@ #define MDIO_MMD_PCS_MV_COPPER_INT_STAT 33043 #define MDIO_MMD_PCS_MV_COPPER_INT_STAT_LINKEVENT 0x0400 -#define MDIO_MMD_PCS_MV_RX_STAT 33328 +#define MDIO_MMD_PCS_MV_RX_STAT 33328 #define MDIO_MMD_PCS_MV_TDR_RESET 65226 #define MDIO_MMD_PCS_MV_TDR_RESET_TDR_RST 0x1000 @@ -95,8 +115,12 @@ #define MDIO_MMD_PCS_MV_TDR_OFF_CUTOFF 65246 +#define MV88Q2XXX_LED_INDEX_TX_ENABLE 0 +#define MV88Q2XXX_LED_INDEX_GPIO 1 + struct mv88q2xxx_priv { bool enable_temp; + bool enable_led0; }; struct mmd_val { @@ -460,6 +484,9 @@ static int mv88q2xxx_config_aneg(struct phy_device *phydev) static int mv88q2xxx_config_init(struct phy_device *phydev) { + struct mv88q2xxx_priv *priv = phydev->priv; + int ret; + /* The 88Q2XXX PHYs do have the extended ability register available, but * register MDIO_PMA_EXTABLE where they should signalize it does not * work according to specification. Therefore, we force it here. @@ -469,10 +496,31 @@ static int mv88q2xxx_config_init(struct phy_device *phydev) /* Configure interrupt with default settings, output is driven low for * active interrupt and high for inactive. */ - if (phy_interrupt_is_valid(phydev)) - return phy_set_bits_mmd(phydev, MDIO_MMD_PCS, - MDIO_MMD_PCS_MV_GPIO_INT_CTRL, - MDIO_MMD_PCS_MV_GPIO_INT_CTRL_TRI_DIS); + if (phy_interrupt_is_valid(phydev)) { + ret = phy_set_bits_mmd(phydev, MDIO_MMD_PCS, + MDIO_MMD_PCS_MV_GPIO_INT_CTRL, + MDIO_MMD_PCS_MV_GPIO_INT_CTRL_TRI_DIS); + if (ret < 0) + return ret; + } + + /* Enable LED function and disable TX disable feature on LED/TX_ENABLE */ + if (priv->enable_led0) { + ret = phy_clear_bits_mmd(phydev, MDIO_MMD_PCS, + MDIO_MMD_PCS_MV_RESET_CTRL, + MDIO_MMD_PCS_MV_RESET_CTRL_TX_DISABLE); + if (ret < 0) + return ret; + } + + /* Enable temperature sense */ + if (priv->enable_temp) { + ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, + MDIO_MMD_PCS_MV_TEMP_SENSOR2, + MDIO_MMD_PCS_MV_TEMP_SENSOR2_DIS_MASK, 0); + if (ret < 0) + return ret; + } return 0; } @@ -740,6 +788,49 @@ static int mv88q2xxx_hwmon_probe(struct phy_device *phydev) } #endif +#if IS_ENABLED(CONFIG_OF_MDIO) +static int mv88q2xxx_leds_probe(struct phy_device *phydev) +{ + struct device_node *node = phydev->mdio.dev.of_node; + struct mv88q2xxx_priv *priv = phydev->priv; + struct device_node *leds; + int ret = 0; + u32 index; + + if (!node) + return 0; + + leds = of_get_child_by_name(node, "leds"); + if (!leds) + return 0; + + for_each_available_child_of_node_scoped(leds, led) { + ret = of_property_read_u32(led, "reg", &index); + if (ret) + goto exit; + + if (index > MV88Q2XXX_LED_INDEX_GPIO) { + ret = -EINVAL; + goto exit; + } + + if (index == MV88Q2XXX_LED_INDEX_TX_ENABLE) + priv->enable_led0 = true; + } + +exit: + of_node_put(leds); + + return ret; +} + +#else +static int mv88q2xxx_leds_probe(struct phy_device *phydev) +{ + return 0; +} +#endif + static int mv88q2xxx_probe(struct phy_device *phydev) { struct mv88q2xxx_priv *priv; @@ -750,6 +841,21 @@ static int mv88q2xxx_probe(struct phy_device *phydev) phydev->priv = priv; + return 0; +} + +static int mv88q222x_probe(struct phy_device *phydev) +{ + int ret; + + ret = mv88q2xxx_probe(phydev); + if (ret) + return ret; + + ret = mv88q2xxx_leds_probe(phydev); + if (ret) + return ret; + return mv88q2xxx_hwmon_probe(phydev); } @@ -817,18 +923,6 @@ static int mv88q222x_revb1_revb2_config_init(struct phy_device *phydev) static int mv88q222x_config_init(struct phy_device *phydev) { - struct mv88q2xxx_priv *priv = phydev->priv; - int ret; - - /* Enable temperature sense */ - if (priv->enable_temp) { - ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, - MDIO_MMD_PCS_MV_TEMP_SENSOR2, - MDIO_MMD_PCS_MV_TEMP_SENSOR2_DIS_MASK, 0); - if (ret < 0) - return ret; - } - if (phydev->c45_ids.device_ids[MDIO_MMD_PMAPMD] == PHY_ID_88Q2220_REVB0) return mv88q222x_revb0_config_init(phydev); else @@ -918,11 +1012,104 @@ static int mv88q222x_cable_test_get_status(struct phy_device *phydev, return 0; } +static int mv88q2xxx_led_mode(u8 index, unsigned long rules) +{ + switch (rules) { + case BIT(TRIGGER_NETDEV_LINK): + return MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK; + case BIT(TRIGGER_NETDEV_LINK_1000): + return MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_1000BT1_ON; + case BIT(TRIGGER_NETDEV_TX): + return MDIO_MMD_PCS_MV_LED_FUNC_CTRL_TX; + case BIT(TRIGGER_NETDEV_TX) | BIT(TRIGGER_NETDEV_RX): + return MDIO_MMD_PCS_MV_LED_FUNC_CTRL_RX_TX; + case BIT(TRIGGER_NETDEV_LINK) | BIT(TRIGGER_NETDEV_TX) | BIT(TRIGGER_NETDEV_RX): + return MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_RX_TX; + default: + return -EOPNOTSUPP; + } +} + +static int mv88q2xxx_led_hw_is_supported(struct phy_device *phydev, u8 index, + unsigned long rules) +{ + int mode; + + mode = mv88q2xxx_led_mode(index, rules); + if (mode < 0) + return mode; + + return 0; +} + +static int mv88q2xxx_led_hw_control_set(struct phy_device *phydev, u8 index, + unsigned long rules) +{ + int mode; + + mode = mv88q2xxx_led_mode(index, rules); + if (mode < 0) + return mode; + + if (index == MV88Q2XXX_LED_INDEX_TX_ENABLE) + return phy_modify_mmd(phydev, MDIO_MMD_PCS, + MDIO_MMD_PCS_MV_LED_FUNC_CTRL, + MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_0_MASK, + FIELD_PREP(MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_0_MASK, + mode)); + else + return phy_modify_mmd(phydev, MDIO_MMD_PCS, + MDIO_MMD_PCS_MV_LED_FUNC_CTRL, + MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_1_MASK, + FIELD_PREP(MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_1_MASK, + mode)); +} + +static int mv88q2xxx_led_hw_control_get(struct phy_device *phydev, u8 index, + unsigned long *rules) +{ + int val; + + val = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_MMD_PCS_MV_LED_FUNC_CTRL); + if (val < 0) + return val; + + if (index == MV88Q2XXX_LED_INDEX_TX_ENABLE) + val = FIELD_GET(MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_0_MASK, val); + else + val = FIELD_GET(MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_1_MASK, val); + + switch (val) { + case MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK: + *rules = BIT(TRIGGER_NETDEV_LINK); + break; + case MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_1000BT1_ON: + *rules = BIT(TRIGGER_NETDEV_LINK_1000); + break; + case MDIO_MMD_PCS_MV_LED_FUNC_CTRL_TX: + *rules = BIT(TRIGGER_NETDEV_TX); + break; + case MDIO_MMD_PCS_MV_LED_FUNC_CTRL_RX_TX: + *rules = BIT(TRIGGER_NETDEV_TX) | BIT(TRIGGER_NETDEV_RX); + break; + case MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_RX_TX: + *rules = BIT(TRIGGER_NETDEV_LINK) | BIT(TRIGGER_NETDEV_TX) | + BIT(TRIGGER_NETDEV_RX); + break; + default: + *rules = 0; + break; + } + + return 0; +} + static struct phy_driver mv88q2xxx_driver[] = { { .phy_id = MARVELL_PHY_ID_88Q2110, .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "mv88q2110", + .probe = mv88q2xxx_probe, .get_features = mv88q2xxx_get_features, .config_aneg = mv88q2xxx_config_aneg, .config_init = mv88q2110_config_init, @@ -937,7 +1124,7 @@ static struct phy_driver mv88q2xxx_driver[] = { .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "mv88q2220", .flags = PHY_POLL_CABLE_TEST, - .probe = mv88q2xxx_probe, + .probe = mv88q222x_probe, .get_features = mv88q2xxx_get_features, .config_aneg = mv88q2xxx_config_aneg, .aneg_done = genphy_c45_aneg_done, @@ -953,6 +1140,9 @@ static struct phy_driver mv88q2xxx_driver[] = { .get_sqi_max = mv88q2xxx_get_sqi_max, .suspend = mv88q2xxx_suspend, .resume = mv88q2xxx_resume, + .led_hw_is_supported = mv88q2xxx_led_hw_is_supported, + .led_hw_control_set = mv88q2xxx_led_hw_control_set, + .led_hw_control_get = mv88q2xxx_led_hw_control_get, }, }; diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 44e1927de499..dd254e36ca8a 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -3124,33 +3124,13 @@ static umode_t marvell_hwmon_is_visible(const void *data, } } -static u32 marvell_hwmon_chip_config[] = { - HWMON_C_REGISTER_TZ, - 0 -}; - -static const struct hwmon_channel_info marvell_hwmon_chip = { - .type = hwmon_chip, - .config = marvell_hwmon_chip_config, -}; - /* we can define HWMON_T_CRIT and HWMON_T_MAX_ALARM even though these are not * defined for all PHYs, because the hwmon code checks whether the attributes * exists via the .is_visible method */ -static u32 marvell_hwmon_temp_config[] = { - HWMON_T_INPUT | HWMON_T_CRIT | HWMON_T_MAX_ALARM, - 0 -}; - -static const struct hwmon_channel_info marvell_hwmon_temp = { - .type = hwmon_temp, - .config = marvell_hwmon_temp_config, -}; - static const struct hwmon_channel_info * const marvell_hwmon_info[] = { - &marvell_hwmon_chip, - &marvell_hwmon_temp, + HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ), + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_CRIT | HWMON_T_MAX_ALARM), NULL }; diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 623bdb8466b8..5354c8895163 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -230,29 +230,9 @@ static const struct hwmon_ops mv3310_hwmon_ops = { .read = mv3310_hwmon_read, }; -static u32 mv3310_hwmon_chip_config[] = { - HWMON_C_REGISTER_TZ | HWMON_C_UPDATE_INTERVAL, - 0, -}; - -static const struct hwmon_channel_info mv3310_hwmon_chip = { - .type = hwmon_chip, - .config = mv3310_hwmon_chip_config, -}; - -static u32 mv3310_hwmon_temp_config[] = { - HWMON_T_INPUT, - 0, -}; - -static const struct hwmon_channel_info mv3310_hwmon_temp = { - .type = hwmon_temp, - .config = mv3310_hwmon_temp_config, -}; - static const struct hwmon_channel_info * const mv3310_hwmon_info[] = { - &mv3310_hwmon_chip, - &mv3310_hwmon_temp, + HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ | HWMON_C_UPDATE_INTERVAL), + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT), NULL, }; diff --git a/drivers/net/phy/mediatek/mtk-ge-soc.c b/drivers/net/phy/mediatek/mtk-ge-soc.c index bdf99b327029..9de6fbb45564 100644 --- a/drivers/net/phy/mediatek/mtk-ge-soc.c +++ b/drivers/net/phy/mediatek/mtk-ge-soc.c @@ -24,7 +24,107 @@ #define MTK_PHY_SMI_DET_ON_THRESH_MASK GENMASK(13, 8) #define MTK_PHY_PAGE_EXTENDED_2A30 0x2a30 -#define MTK_PHY_PAGE_EXTENDED_52B5 0x52b5 + +/* Registers on Token Ring debug nodes */ +/* ch_addr = 0x0, node_addr = 0x7, data_addr = 0x15 */ +/* NormMseLoThresh */ +#define NORMAL_MSE_LO_THRESH_MASK GENMASK(15, 8) + +/* ch_addr = 0x0, node_addr = 0xf, data_addr = 0x3c */ +/* RemAckCntLimitCtrl */ +#define REMOTE_ACK_COUNT_LIMIT_CTRL_MASK GENMASK(2, 1) + +/* ch_addr = 0x1, node_addr = 0xd, data_addr = 0x20 */ +/* VcoSlicerThreshBitsHigh */ +#define VCO_SLICER_THRESH_HIGH_MASK GENMASK(23, 0) + +/* ch_addr = 0x1, node_addr = 0xf, data_addr = 0x0 */ +/* DfeTailEnableVgaThresh1000 */ +#define DFE_TAIL_EANBLE_VGA_TRHESH_1000 GENMASK(5, 1) + +/* ch_addr = 0x1, node_addr = 0xf, data_addr = 0x1 */ +/* MrvlTrFix100Kp */ +#define MRVL_TR_FIX_100KP_MASK GENMASK(22, 20) +/* MrvlTrFix100Kf */ +#define MRVL_TR_FIX_100KF_MASK GENMASK(19, 17) +/* MrvlTrFix1000Kp */ +#define MRVL_TR_FIX_1000KP_MASK GENMASK(16, 14) +/* MrvlTrFix1000Kf */ +#define MRVL_TR_FIX_1000KF_MASK GENMASK(13, 11) + +/* ch_addr = 0x1, node_addr = 0xf, data_addr = 0x12 */ +/* VgaDecRate */ +#define VGA_DECIMATION_RATE_MASK GENMASK(8, 5) + +/* ch_addr = 0x1, node_addr = 0xf, data_addr = 0x17 */ +/* SlvDSPreadyTime */ +#define SLAVE_DSP_READY_TIME_MASK GENMASK(22, 15) +/* MasDSPreadyTime */ +#define MASTER_DSP_READY_TIME_MASK GENMASK(14, 7) + +/* ch_addr = 0x1, node_addr = 0xf, data_addr = 0x18 */ +/* EnabRandUpdTrig */ +#define ENABLE_RANDOM_UPDOWN_COUNTER_TRIGGER BIT(8) + +/* ch_addr = 0x1, node_addr = 0xf, data_addr = 0x20 */ +/* ResetSyncOffset */ +#define RESET_SYNC_OFFSET_MASK GENMASK(11, 8) + +/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x0 */ +/* FfeUpdGainForceVal */ +#define FFE_UPDATE_GAIN_FORCE_VAL_MASK GENMASK(9, 7) +/* FfeUpdGainForce */ +#define FFE_UPDATE_GAIN_FORCE BIT(6) + +/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x3 */ +/* TrFreeze */ +#define TR_FREEZE_MASK GENMASK(11, 0) + +/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x6 */ +/* SS: Steady-state, KP: Proportional Gain */ +/* SSTrKp100 */ +#define SS_TR_KP100_MASK GENMASK(21, 19) +/* SSTrKf100 */ +#define SS_TR_KF100_MASK GENMASK(18, 16) +/* SSTrKp1000Mas */ +#define SS_TR_KP1000_MASTER_MASK GENMASK(15, 13) +/* SSTrKf1000Mas */ +#define SS_TR_KF1000_MASTER_MASK GENMASK(12, 10) +/* SSTrKp1000Slv */ +#define SS_TR_KP1000_SLAVE_MASK GENMASK(9, 7) +/* SSTrKf1000Slv */ +#define SS_TR_KF1000_SLAVE_MASK GENMASK(6, 4) + +/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x8 */ +/* clear this bit if wanna select from AFE */ +/* Regsigdet_sel_1000 */ +#define EEE1000_SELECT_SIGNAL_DETECTION_FROM_DFE BIT(4) + +/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0xd */ +/* RegEEE_st2TrKf1000 */ +#define EEE1000_STAGE2_TR_KF_MASK GENMASK(13, 11) + +/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0xf */ +/* RegEEE_slv_waketr_timer_tar */ +#define SLAVE_WAKETR_TIMER_MASK GENMASK(20, 11) +/* RegEEE_slv_remtx_timer_tar */ +#define SLAVE_REMTX_TIMER_MASK GENMASK(10, 1) + +/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x10 */ +/* RegEEE_slv_wake_int_timer_tar */ +#define SLAVE_WAKEINT_TIMER_MASK GENMASK(10, 1) + +/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x14 */ +/* RegEEE_trfreeze_timer2 */ +#define TR_FREEZE_TIMER2_MASK GENMASK(9, 0) + +/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x1c */ +/* RegEEE100Stg1_tar */ +#define EEE100_LPSYNC_STAGE1_UPDATE_TIMER_MASK GENMASK(8, 0) + +/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x25 */ +/* REGEEE_wake_slv_tr_wait_dfesigdet_en */ +#define WAKE_SLAVE_TR_WAIT_DFE_DETECTION_EN BIT(11) #define ANALOG_INTERNAL_OPERATION_MAX_US 20 #define TXRESERVE_MIN 0 @@ -701,40 +801,36 @@ restore: static void mt798x_phy_common_finetune(struct phy_device *phydev) { phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); - /* SlvDSPreadyTime = 24, MasDSPreadyTime = 24 */ - __phy_write(phydev, 0x11, 0xc71); - __phy_write(phydev, 0x12, 0xc); - __phy_write(phydev, 0x10, 0x8fae); - - /* EnabRandUpdTrig = 1 */ - __phy_write(phydev, 0x11, 0x2f00); - __phy_write(phydev, 0x12, 0xe); - __phy_write(phydev, 0x10, 0x8fb0); - - /* NormMseLoThresh = 85 */ - __phy_write(phydev, 0x11, 0x55a0); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x83aa); - - /* FfeUpdGainForce = 1(Enable), FfeUpdGainForceVal = 4 */ - __phy_write(phydev, 0x11, 0x240); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x9680); - - /* TrFreeze = 0 (mt7988 default) */ - __phy_write(phydev, 0x11, 0x0); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x9686); - - /* SSTrKp100 = 5 */ - /* SSTrKf100 = 6 */ - /* SSTrKp1000Mas = 5 */ - /* SSTrKf1000Mas = 6 */ - /* SSTrKp1000Slv = 5 */ - /* SSTrKf1000Slv = 6 */ - __phy_write(phydev, 0x11, 0xbaef); - __phy_write(phydev, 0x12, 0x2e); - __phy_write(phydev, 0x10, 0x968c); + __mtk_tr_modify(phydev, 0x1, 0xf, 0x17, + SLAVE_DSP_READY_TIME_MASK | MASTER_DSP_READY_TIME_MASK, + FIELD_PREP(SLAVE_DSP_READY_TIME_MASK, 0x18) | + FIELD_PREP(MASTER_DSP_READY_TIME_MASK, 0x18)); + + __mtk_tr_set_bits(phydev, 0x1, 0xf, 0x18, + ENABLE_RANDOM_UPDOWN_COUNTER_TRIGGER); + + __mtk_tr_modify(phydev, 0x0, 0x7, 0x15, + NORMAL_MSE_LO_THRESH_MASK, + FIELD_PREP(NORMAL_MSE_LO_THRESH_MASK, 0x55)); + + __mtk_tr_modify(phydev, 0x2, 0xd, 0x0, + FFE_UPDATE_GAIN_FORCE_VAL_MASK, + FIELD_PREP(FFE_UPDATE_GAIN_FORCE_VAL_MASK, 0x4) | + FFE_UPDATE_GAIN_FORCE); + + __mtk_tr_clr_bits(phydev, 0x2, 0xd, 0x3, TR_FREEZE_MASK); + + __mtk_tr_modify(phydev, 0x2, 0xd, 0x6, + SS_TR_KP100_MASK | SS_TR_KF100_MASK | + SS_TR_KP1000_MASTER_MASK | SS_TR_KF1000_MASTER_MASK | + SS_TR_KP1000_SLAVE_MASK | SS_TR_KF1000_SLAVE_MASK, + FIELD_PREP(SS_TR_KP100_MASK, 0x5) | + FIELD_PREP(SS_TR_KF100_MASK, 0x6) | + FIELD_PREP(SS_TR_KP1000_MASTER_MASK, 0x5) | + FIELD_PREP(SS_TR_KF1000_MASTER_MASK, 0x6) | + FIELD_PREP(SS_TR_KP1000_SLAVE_MASK, 0x5) | + FIELD_PREP(SS_TR_KF1000_SLAVE_MASK, 0x6)); + phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); } @@ -757,27 +853,29 @@ static void mt7981_phy_finetune(struct phy_device *phydev) } phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); - /* ResetSyncOffset = 6 */ - __phy_write(phydev, 0x11, 0x600); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x8fc0); + __mtk_tr_modify(phydev, 0x1, 0xf, 0x20, + RESET_SYNC_OFFSET_MASK, + FIELD_PREP(RESET_SYNC_OFFSET_MASK, 0x6)); - /* VgaDecRate = 1 */ - __phy_write(phydev, 0x11, 0x4c2a); - __phy_write(phydev, 0x12, 0x3e); - __phy_write(phydev, 0x10, 0x8fa4); + __mtk_tr_modify(phydev, 0x1, 0xf, 0x12, + VGA_DECIMATION_RATE_MASK, + FIELD_PREP(VGA_DECIMATION_RATE_MASK, 0x1)); /* MrvlTrFix100Kp = 3, MrvlTrFix100Kf = 2, * MrvlTrFix1000Kp = 3, MrvlTrFix1000Kf = 2 */ - __phy_write(phydev, 0x11, 0xd10a); - __phy_write(phydev, 0x12, 0x34); - __phy_write(phydev, 0x10, 0x8f82); + __mtk_tr_modify(phydev, 0x1, 0xf, 0x1, + MRVL_TR_FIX_100KP_MASK | MRVL_TR_FIX_100KF_MASK | + MRVL_TR_FIX_1000KP_MASK | MRVL_TR_FIX_1000KF_MASK, + FIELD_PREP(MRVL_TR_FIX_100KP_MASK, 0x3) | + FIELD_PREP(MRVL_TR_FIX_100KF_MASK, 0x2) | + FIELD_PREP(MRVL_TR_FIX_1000KP_MASK, 0x3) | + FIELD_PREP(MRVL_TR_FIX_1000KF_MASK, 0x2)); /* VcoSlicerThreshBitsHigh */ - __phy_write(phydev, 0x11, 0x5555); - __phy_write(phydev, 0x12, 0x55); - __phy_write(phydev, 0x10, 0x8ec0); + __mtk_tr_modify(phydev, 0x1, 0xd, 0x20, + VCO_SLICER_THRESH_HIGH_MASK, + FIELD_PREP(VCO_SLICER_THRESH_HIGH_MASK, 0x555555)); phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); /* TR_OPEN_LOOP_EN = 1, lpf_x_average = 9 */ @@ -829,25 +927,23 @@ static void mt7988_phy_finetune(struct phy_device *phydev) phy_write_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_TX_FILTER, 0x5); phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); - /* ResetSyncOffset = 5 */ - __phy_write(phydev, 0x11, 0x500); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x8fc0); + __mtk_tr_modify(phydev, 0x1, 0xf, 0x20, + RESET_SYNC_OFFSET_MASK, + FIELD_PREP(RESET_SYNC_OFFSET_MASK, 0x5)); /* VgaDecRate is 1 at default on mt7988 */ - /* MrvlTrFix100Kp = 6, MrvlTrFix100Kf = 7, - * MrvlTrFix1000Kp = 6, MrvlTrFix1000Kf = 7 - */ - __phy_write(phydev, 0x11, 0xb90a); - __phy_write(phydev, 0x12, 0x6f); - __phy_write(phydev, 0x10, 0x8f82); - - /* RemAckCntLimitCtrl = 1 */ - __phy_write(phydev, 0x11, 0xfbba); - __phy_write(phydev, 0x12, 0xc3); - __phy_write(phydev, 0x10, 0x87f8); - + __mtk_tr_modify(phydev, 0x1, 0xf, 0x1, + MRVL_TR_FIX_100KP_MASK | MRVL_TR_FIX_100KF_MASK | + MRVL_TR_FIX_1000KP_MASK | MRVL_TR_FIX_1000KF_MASK, + FIELD_PREP(MRVL_TR_FIX_100KP_MASK, 0x6) | + FIELD_PREP(MRVL_TR_FIX_100KF_MASK, 0x7) | + FIELD_PREP(MRVL_TR_FIX_1000KP_MASK, 0x6) | + FIELD_PREP(MRVL_TR_FIX_1000KF_MASK, 0x7)); + + __mtk_tr_modify(phydev, 0x0, 0xf, 0x3c, + REMOTE_ACK_COUNT_LIMIT_CTRL_MASK, + FIELD_PREP(REMOTE_ACK_COUNT_LIMIT_CTRL_MASK, 0x1)); phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); /* TR_OPEN_LOOP_EN = 1, lpf_x_average = 10 */ @@ -923,45 +1019,37 @@ static void mt798x_phy_eee(struct phy_device *phydev) MTK_PHY_TR_READY_SKIP_AFE_WAKEUP); phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); - /* Regsigdet_sel_1000 = 0 */ - __phy_write(phydev, 0x11, 0xb); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x9690); - - /* REG_EEE_st2TrKf1000 = 2 */ - __phy_write(phydev, 0x11, 0x114f); - __phy_write(phydev, 0x12, 0x2); - __phy_write(phydev, 0x10, 0x969a); - - /* RegEEE_slv_wake_tr_timer_tar = 6, RegEEE_slv_remtx_timer_tar = 20 */ - __phy_write(phydev, 0x11, 0x3028); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x969e); - - /* RegEEE_slv_wake_int_timer_tar = 8 */ - __phy_write(phydev, 0x11, 0x5010); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x96a0); - - /* RegEEE_trfreeze_timer2 = 586 */ - __phy_write(phydev, 0x11, 0x24a); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x96a8); - - /* RegEEE100Stg1_tar = 16 */ - __phy_write(phydev, 0x11, 0x3210); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x96b8); - - /* REGEEE_wake_slv_tr_wait_dfesigdet_en = 0 */ - __phy_write(phydev, 0x11, 0x1463); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x96ca); - - /* DfeTailEnableVgaThresh1000 = 27 */ - __phy_write(phydev, 0x11, 0x36); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x8f80); + __mtk_tr_clr_bits(phydev, 0x2, 0xd, 0x8, + EEE1000_SELECT_SIGNAL_DETECTION_FROM_DFE); + + __mtk_tr_modify(phydev, 0x2, 0xd, 0xd, + EEE1000_STAGE2_TR_KF_MASK, + FIELD_PREP(EEE1000_STAGE2_TR_KF_MASK, 0x2)); + + __mtk_tr_modify(phydev, 0x2, 0xd, 0xf, + SLAVE_WAKETR_TIMER_MASK | SLAVE_REMTX_TIMER_MASK, + FIELD_PREP(SLAVE_WAKETR_TIMER_MASK, 0x6) | + FIELD_PREP(SLAVE_REMTX_TIMER_MASK, 0x14)); + + __mtk_tr_modify(phydev, 0x2, 0xd, 0x10, + SLAVE_WAKEINT_TIMER_MASK, + FIELD_PREP(SLAVE_WAKEINT_TIMER_MASK, 0x8)); + + __mtk_tr_modify(phydev, 0x2, 0xd, 0x14, + TR_FREEZE_TIMER2_MASK, + FIELD_PREP(TR_FREEZE_TIMER2_MASK, 0x24a)); + + __mtk_tr_modify(phydev, 0x2, 0xd, 0x1c, + EEE100_LPSYNC_STAGE1_UPDATE_TIMER_MASK, + FIELD_PREP(EEE100_LPSYNC_STAGE1_UPDATE_TIMER_MASK, + 0x10)); + + __mtk_tr_clr_bits(phydev, 0x2, 0xd, 0x25, + WAKE_SLAVE_TR_WAIT_DFE_DETECTION_EN); + + __mtk_tr_modify(phydev, 0x1, 0xf, 0x0, + DFE_TAIL_EANBLE_VGA_TRHESH_1000, + FIELD_PREP(DFE_TAIL_EANBLE_VGA_TRHESH_1000, 0x1b)); phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_3); diff --git a/drivers/net/phy/mediatek/mtk-ge.c b/drivers/net/phy/mediatek/mtk-ge.c index b517ca8573e7..73d9b72f9d9e 100644 --- a/drivers/net/phy/mediatek/mtk-ge.c +++ b/drivers/net/phy/mediatek/mtk-ge.c @@ -8,31 +8,58 @@ #define MTK_GPHY_ID_MT7530 0x03a29412 #define MTK_GPHY_ID_MT7531 0x03a29441 -#define MTK_EXT_PAGE_ACCESS 0x1f -#define MTK_PHY_PAGE_STANDARD 0x0000 -#define MTK_PHY_PAGE_EXTENDED 0x0001 -#define MTK_PHY_PAGE_EXTENDED_2 0x0002 -#define MTK_PHY_PAGE_EXTENDED_3 0x0003 -#define MTK_PHY_PAGE_EXTENDED_2A30 0x2a30 -#define MTK_PHY_PAGE_EXTENDED_52B5 0x52b5 +#define MTK_PHY_PAGE_EXTENDED_2 0x0002 +#define MTK_PHY_PAGE_EXTENDED_3 0x0003 +#define MTK_PHY_RG_LPI_PCS_DSP_CTRL_REG11 0x11 + +#define MTK_PHY_PAGE_EXTENDED_2A30 0x2a30 + +/* Registers on Token Ring debug nodes */ +/* ch_addr = 0x1, node_addr = 0xf, data_addr = 0x17 */ +#define SLAVE_DSP_READY_TIME_MASK GENMASK(22, 15) + +/* Registers on MDIO_MMD_VEND1 */ +#define MTK_PHY_GBE_MODE_TX_DELAY_SEL 0x13 +#define MTK_PHY_TEST_MODE_TX_DELAY_SEL 0x14 +#define MTK_TX_DELAY_PAIR_B_MASK GENMASK(10, 8) +#define MTK_TX_DELAY_PAIR_D_MASK GENMASK(2, 0) + +#define MTK_PHY_MCC_CTRL_AND_TX_POWER_CTRL 0xa6 +#define MTK_MCC_NEARECHO_OFFSET_MASK GENMASK(15, 8) + +#define MTK_PHY_RXADC_CTRL_RG7 0xc6 +#define MTK_PHY_DA_AD_BUF_BIAS_LP_MASK GENMASK(9, 8) + +#define MTK_PHY_RG_LPI_PCS_DSP_CTRL_REG123 0x123 +#define MTK_PHY_LPI_NORM_MSE_LO_THRESH100_MASK GENMASK(15, 8) +#define MTK_PHY_LPI_NORM_MSE_HI_THRESH100_MASK GENMASK(7, 0) static void mtk_gephy_config_init(struct phy_device *phydev) { /* Enable HW auto downshift */ - phy_modify_paged(phydev, MTK_PHY_PAGE_EXTENDED, 0x14, 0, BIT(4)); + phy_modify_paged(phydev, MTK_PHY_PAGE_EXTENDED_1, + MTK_PHY_AUX_CTRL_AND_STATUS, + 0, MTK_PHY_ENABLE_DOWNSHIFT); /* Increase SlvDPSready time */ - phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); - __phy_write(phydev, 0x10, 0xafae); - __phy_write(phydev, 0x12, 0x2f); - __phy_write(phydev, 0x10, 0x8fae); - phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); + mtk_tr_modify(phydev, 0x1, 0xf, 0x17, SLAVE_DSP_READY_TIME_MASK, + FIELD_PREP(SLAVE_DSP_READY_TIME_MASK, 0x5e)); /* Adjust 100_mse_threshold */ - phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x123, 0xffff); - - /* Disable mcc */ - phy_write_mmd(phydev, MDIO_MMD_VEND1, 0xa6, 0x300); + phy_modify_mmd(phydev, MDIO_MMD_VEND1, + MTK_PHY_RG_LPI_PCS_DSP_CTRL_REG123, + MTK_PHY_LPI_NORM_MSE_LO_THRESH100_MASK | + MTK_PHY_LPI_NORM_MSE_HI_THRESH100_MASK, + FIELD_PREP(MTK_PHY_LPI_NORM_MSE_LO_THRESH100_MASK, + 0xff) | + FIELD_PREP(MTK_PHY_LPI_NORM_MSE_HI_THRESH100_MASK, + 0xff)); + + /* If echo time is narrower than 0x3, it will be regarded as noise */ + phy_modify_mmd(phydev, MDIO_MMD_VEND1, + MTK_PHY_MCC_CTRL_AND_TX_POWER_CTRL, + MTK_MCC_NEARECHO_OFFSET_MASK, + FIELD_PREP(MTK_MCC_NEARECHO_OFFSET_MASK, 0x3)); } static int mt7530_phy_config_init(struct phy_device *phydev) @@ -40,7 +67,8 @@ static int mt7530_phy_config_init(struct phy_device *phydev) mtk_gephy_config_init(phydev); /* Increase post_update_timer */ - phy_write_paged(phydev, MTK_PHY_PAGE_EXTENDED_3, 0x11, 0x4b); + phy_write_paged(phydev, MTK_PHY_PAGE_EXTENDED_3, + MTK_PHY_RG_LPI_PCS_DSP_CTRL_REG11, 0x4b); return 0; } @@ -51,11 +79,19 @@ static int mt7531_phy_config_init(struct phy_device *phydev) /* PHY link down power saving enable */ phy_set_bits(phydev, 0x17, BIT(4)); - phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, 0xc6, 0x300); + phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RXADC_CTRL_RG7, + MTK_PHY_DA_AD_BUF_BIAS_LP_MASK, + FIELD_PREP(MTK_PHY_DA_AD_BUF_BIAS_LP_MASK, 0x3)); /* Set TX Pair delay selection */ - phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x13, 0x404); - phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x14, 0x404); + phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_GBE_MODE_TX_DELAY_SEL, + MTK_TX_DELAY_PAIR_B_MASK | MTK_TX_DELAY_PAIR_D_MASK, + FIELD_PREP(MTK_TX_DELAY_PAIR_B_MASK, 0x4) | + FIELD_PREP(MTK_TX_DELAY_PAIR_D_MASK, 0x4)); + phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_TEST_MODE_TX_DELAY_SEL, + MTK_TX_DELAY_PAIR_B_MASK | MTK_TX_DELAY_PAIR_D_MASK, + FIELD_PREP(MTK_TX_DELAY_PAIR_B_MASK, 0x4) | + FIELD_PREP(MTK_TX_DELAY_PAIR_D_MASK, 0x4)); return 0; } diff --git a/drivers/net/phy/mediatek/mtk-phy-lib.c b/drivers/net/phy/mediatek/mtk-phy-lib.c index 98a09d670e9c..dfd0f4e439a2 100644 --- a/drivers/net/phy/mediatek/mtk-phy-lib.c +++ b/drivers/net/phy/mediatek/mtk-phy-lib.c @@ -6,6 +6,83 @@ #include "mtk.h" +/* Difference between functions with mtk_tr* and __mtk_tr* prefixes is + * mtk_tr* functions: wrapped by page switching operations + * __mtk_tr* functions: no page switching operations + */ + +static void __mtk_tr_access(struct phy_device *phydev, bool read, u8 ch_addr, + u8 node_addr, u8 data_addr) +{ + u16 tr_cmd = BIT(15); /* bit 14 & 0 are reserved */ + + if (read) + tr_cmd |= BIT(13); + + tr_cmd |= (((ch_addr & 0x3) << 11) | + ((node_addr & 0xf) << 7) | + ((data_addr & 0x3f) << 1)); + dev_dbg(&phydev->mdio.dev, "tr_cmd: 0x%x\n", tr_cmd); + __phy_write(phydev, 0x10, tr_cmd); +} + +static void __mtk_tr_read(struct phy_device *phydev, u8 ch_addr, u8 node_addr, + u8 data_addr, u16 *tr_high, u16 *tr_low) +{ + __mtk_tr_access(phydev, true, ch_addr, node_addr, data_addr); + *tr_low = __phy_read(phydev, 0x11); + *tr_high = __phy_read(phydev, 0x12); + dev_dbg(&phydev->mdio.dev, "tr_high read: 0x%x, tr_low read: 0x%x\n", + *tr_high, *tr_low); +} + +static void __mtk_tr_write(struct phy_device *phydev, u8 ch_addr, u8 node_addr, + u8 data_addr, u32 tr_data) +{ + __phy_write(phydev, 0x11, tr_data & 0xffff); + __phy_write(phydev, 0x12, tr_data >> 16); + dev_dbg(&phydev->mdio.dev, "tr_high write: 0x%x, tr_low write: 0x%x\n", + tr_data >> 16, tr_data & 0xffff); + __mtk_tr_access(phydev, false, ch_addr, node_addr, data_addr); +} + +void __mtk_tr_modify(struct phy_device *phydev, u8 ch_addr, u8 node_addr, + u8 data_addr, u32 mask, u32 set) +{ + u32 tr_data; + u16 tr_high; + u16 tr_low; + + __mtk_tr_read(phydev, ch_addr, node_addr, data_addr, &tr_high, &tr_low); + tr_data = (tr_high << 16) | tr_low; + tr_data = (tr_data & ~mask) | set; + __mtk_tr_write(phydev, ch_addr, node_addr, data_addr, tr_data); +} +EXPORT_SYMBOL_GPL(__mtk_tr_modify); + +void mtk_tr_modify(struct phy_device *phydev, u8 ch_addr, u8 node_addr, + u8 data_addr, u32 mask, u32 set) +{ + phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); + __mtk_tr_modify(phydev, ch_addr, node_addr, data_addr, mask, set); + phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); +} +EXPORT_SYMBOL_GPL(mtk_tr_modify); + +void __mtk_tr_set_bits(struct phy_device *phydev, u8 ch_addr, u8 node_addr, + u8 data_addr, u32 set) +{ + __mtk_tr_modify(phydev, ch_addr, node_addr, data_addr, 0, set); +} +EXPORT_SYMBOL_GPL(__mtk_tr_set_bits); + +void __mtk_tr_clr_bits(struct phy_device *phydev, u8 ch_addr, u8 node_addr, + u8 data_addr, u32 clr) +{ + __mtk_tr_modify(phydev, ch_addr, node_addr, data_addr, clr, 0); +} +EXPORT_SYMBOL_GPL(__mtk_tr_clr_bits); + int mtk_phy_read_page(struct phy_device *phydev) { return __phy_read(phydev, MTK_EXT_PAGE_ACCESS); diff --git a/drivers/net/phy/mediatek/mtk.h b/drivers/net/phy/mediatek/mtk.h index 63d9fe179b8f..320f76ffa81f 100644 --- a/drivers/net/phy/mediatek/mtk.h +++ b/drivers/net/phy/mediatek/mtk.h @@ -8,7 +8,13 @@ #ifndef _MTK_EPHY_H_ #define _MTK_EPHY_H_ +#define MTK_PHY_AUX_CTRL_AND_STATUS 0x14 +#define MTK_PHY_ENABLE_DOWNSHIFT BIT(4) + #define MTK_EXT_PAGE_ACCESS 0x1f +#define MTK_PHY_PAGE_EXTENDED_1 0x0001 +#define MTK_PHY_PAGE_STANDARD 0x0000 +#define MTK_PHY_PAGE_EXTENDED_52B5 0x52b5 /* Registers on MDIO_MMD_VEND2 */ #define MTK_PHY_LED0_ON_CTRL 0x24 @@ -66,6 +72,15 @@ struct mtk_socphy_priv { unsigned long led_state; }; +void __mtk_tr_modify(struct phy_device *phydev, u8 ch_addr, u8 node_addr, + u8 data_addr, u32 mask, u32 set); +void mtk_tr_modify(struct phy_device *phydev, u8 ch_addr, u8 node_addr, + u8 data_addr, u32 mask, u32 set); +void __mtk_tr_set_bits(struct phy_device *phydev, u8 ch_addr, u8 node_addr, + u8 data_addr, u32 set); +void __mtk_tr_clr_bits(struct phy_device *phydev, u8 ch_addr, u8 node_addr, + u8 data_addr, u32 clr); + int mtk_phy_read_page(struct phy_device *phydev); int mtk_phy_write_page(struct phy_device *phydev, int page); diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 0dac08e85304..37c9a344bf4a 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -683,13 +683,10 @@ EXPORT_SYMBOL_GPL(genphy_c45_read_mdix); static int genphy_c45_write_eee_adv(struct phy_device *phydev, unsigned long *adv) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(tmp); int val, changed = 0; - linkmode_andnot(tmp, adv, phydev->eee_broken_modes); - if (linkmode_intersects(phydev->supported_eee, PHY_EEE_CAP1_FEATURES)) { - val = linkmode_to_mii_eee_cap1_t(tmp); + val = linkmode_to_mii_eee_cap1_t(adv); /* IEEE 802.3-2018 45.2.7.13 EEE advertisement 1 * (Register 7.60) @@ -707,7 +704,7 @@ static int genphy_c45_write_eee_adv(struct phy_device *phydev, } if (linkmode_intersects(phydev->supported_eee, PHY_EEE_CAP2_FEATURES)) { - val = linkmode_to_mii_eee_cap2_t(tmp); + val = linkmode_to_mii_eee_cap2_t(adv); /* IEEE 802.3-2022 45.2.7.16 EEE advertisement 2 * (Register 7.62) @@ -1467,42 +1464,29 @@ EXPORT_SYMBOL_GPL(genphy_c45_plca_get_status); /** * genphy_c45_eee_is_active - get EEE status * @phydev: target phy_device struct - * @adv: variable to store advertised linkmodes * @lp: variable to store LP advertised linkmodes * - * Description: this function will read local and link partner PHY - * advertisements. Compare them return current EEE state. + * Description: this function will read link partner PHY advertisement + * and compare it to local advertisement to return current EEE state. */ -int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv, - unsigned long *lp) +int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *lp) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(tmp_adv) = {}; __ETHTOOL_DECLARE_LINK_MODE_MASK(tmp_lp) = {}; __ETHTOOL_DECLARE_LINK_MODE_MASK(common); - bool eee_active; int ret; - ret = genphy_c45_read_eee_adv(phydev, tmp_adv); - if (ret) - return ret; - ret = genphy_c45_read_eee_lpa(phydev, tmp_lp); if (ret) return ret; - linkmode_and(common, tmp_adv, tmp_lp); - if (!linkmode_empty(tmp_adv) && !linkmode_empty(common)) - eee_active = phy_check_valid(phydev->speed, phydev->duplex, - common); - else - eee_active = false; - - if (adv) - linkmode_copy(adv, tmp_adv); if (lp) linkmode_copy(lp, tmp_lp); - return eee_active; + linkmode_and(common, phydev->advertising_eee, tmp_lp); + if (linkmode_empty(common)) + return 0; + + return phy_check_valid(phydev->speed, phydev->duplex, common); } EXPORT_SYMBOL(genphy_c45_eee_is_active); @@ -1519,14 +1503,14 @@ int genphy_c45_ethtool_get_eee(struct phy_device *phydev, { int ret; - ret = genphy_c45_eee_is_active(phydev, data->advertised, - data->lp_advertised); + ret = genphy_c45_eee_is_active(phydev, data->lp_advertised); if (ret < 0) return ret; data->eee_active = phydev->eee_active; - linkmode_copy(data->supported, phydev->supported_eee); - + linkmode_andnot(data->supported, phydev->supported_eee, + phydev->eee_disabled_modes); + linkmode_copy(data->advertised, phydev->advertising_eee); return 0; } EXPORT_SYMBOL(genphy_c45_ethtool_get_eee); @@ -1559,7 +1543,9 @@ int genphy_c45_ethtool_set_eee(struct phy_device *phydev, phydev_warn(phydev, "At least some EEE link modes are not supported.\n"); return -EINVAL; } - linkmode_copy(phydev->advertising_eee, adv); + + linkmode_andnot(phydev->advertising_eee, adv, + phydev->eee_disabled_modes); } else if (linkmode_empty(phydev->advertising_eee)) { phy_advertise_eee_all(phydev); } diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 6bf3ec985f3d..2fd1d153abc9 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -13,7 +13,7 @@ */ const char *phy_speed_to_str(int speed) { - BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 103, + BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 121, "Enum ethtool_link_mode_bit_indices and phylib are out of sync. " "If a speed or mode has been added please update phy_speed_to_str " "and the PHY settings array.\n"); @@ -169,6 +169,12 @@ static const struct phy_setting settings[] = { PHY_SETTING( 800000, FULL, 800000baseDR8_2_Full ), PHY_SETTING( 800000, FULL, 800000baseSR8_Full ), PHY_SETTING( 800000, FULL, 800000baseVR8_Full ), + PHY_SETTING( 800000, FULL, 800000baseCR4_Full ), + PHY_SETTING( 800000, FULL, 800000baseKR4_Full ), + PHY_SETTING( 800000, FULL, 800000baseDR4_Full ), + PHY_SETTING( 800000, FULL, 800000baseDR4_2_Full ), + PHY_SETTING( 800000, FULL, 800000baseSR4_Full ), + PHY_SETTING( 800000, FULL, 800000baseVR4_Full ), /* 400G */ PHY_SETTING( 400000, FULL, 400000baseCR8_Full ), PHY_SETTING( 400000, FULL, 400000baseKR8_Full ), @@ -180,6 +186,12 @@ static const struct phy_setting settings[] = { PHY_SETTING( 400000, FULL, 400000baseLR4_ER4_FR4_Full ), PHY_SETTING( 400000, FULL, 400000baseDR4_Full ), PHY_SETTING( 400000, FULL, 400000baseSR4_Full ), + PHY_SETTING( 400000, FULL, 400000baseCR2_Full ), + PHY_SETTING( 400000, FULL, 400000baseKR2_Full ), + PHY_SETTING( 400000, FULL, 400000baseDR2_Full ), + PHY_SETTING( 400000, FULL, 400000baseDR2_2_Full ), + PHY_SETTING( 400000, FULL, 400000baseSR2_Full ), + PHY_SETTING( 400000, FULL, 400000baseVR2_Full ), /* 200G */ PHY_SETTING( 200000, FULL, 200000baseCR4_Full ), PHY_SETTING( 200000, FULL, 200000baseKR4_Full ), @@ -191,6 +203,12 @@ static const struct phy_setting settings[] = { PHY_SETTING( 200000, FULL, 200000baseLR2_ER2_FR2_Full ), PHY_SETTING( 200000, FULL, 200000baseDR2_Full ), PHY_SETTING( 200000, FULL, 200000baseSR2_Full ), + PHY_SETTING( 200000, FULL, 200000baseCR_Full ), + PHY_SETTING( 200000, FULL, 200000baseKR_Full ), + PHY_SETTING( 200000, FULL, 200000baseDR_Full ), + PHY_SETTING( 200000, FULL, 200000baseDR_2_Full ), + PHY_SETTING( 200000, FULL, 200000baseSR_Full ), + PHY_SETTING( 200000, FULL, 200000baseVR_Full ), /* 100G */ PHY_SETTING( 100000, FULL, 100000baseCR4_Full ), PHY_SETTING( 100000, FULL, 100000baseKR4_Full ), @@ -388,7 +406,7 @@ void of_set_phy_supported(struct phy_device *phydev) void of_set_phy_eee_broken(struct phy_device *phydev) { struct device_node *node = phydev->mdio.dev.of_node; - unsigned long *modes = phydev->eee_broken_modes; + unsigned long *modes = phydev->eee_disabled_modes; if (!IS_ENABLED(CONFIG_OF_MDIO) || !node) return; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index d0c1718e2b16..831b36839627 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -302,7 +302,7 @@ void phy_ethtool_ksettings_get(struct phy_device *phydev, cmd->base.port = PORT_BNC; else cmd->base.port = phydev->port; - cmd->base.transceiver = phy_is_internal(phydev) ? + cmd->base.transceiver = phydev->is_internal ? XCVR_INTERNAL : XCVR_EXTERNAL; cmd->base.phy_address = phydev->mdio.addr; cmd->base.autoneg = phydev->autoneg; @@ -520,12 +520,12 @@ int __phy_hwtstamp_set(struct phy_device *phydev, * @phydev: the phy_device struct * @jiffies: Run the state machine after these jiffies */ -void phy_queue_state_machine(struct phy_device *phydev, unsigned long jiffies) +static void phy_queue_state_machine(struct phy_device *phydev, + unsigned long jiffies) { mod_delayed_work(system_power_efficient_wq, &phydev->state_queue, jiffies); } -EXPORT_SYMBOL(phy_queue_state_machine); /** * phy_trigger_machine - Trigger the state machine to run now @@ -1031,7 +1031,7 @@ static int phy_check_link_status(struct phy_device *phydev) if (phydev->link && phydev->state != PHY_RUNNING) { phy_check_downshift(phydev); phydev->state = PHY_RUNNING; - err = genphy_c45_eee_is_active(phydev, NULL, NULL); + err = genphy_c45_eee_is_active(phydev, NULL); phydev->eee_active = err > 0; phydev->enable_tx_lpi = phydev->eee_cfg.tx_lpi_enabled && phydev->eee_active; @@ -1501,6 +1501,24 @@ void phy_free_interrupt(struct phy_device *phydev) } EXPORT_SYMBOL(phy_free_interrupt); +/** + * phy_get_next_update_time - Determine the next PHY update time + * @phydev: Pointer to the phy_device structure + * + * This function queries the PHY driver to get the time for the next polling + * event. If the driver does not implement the callback, a default value is + * used. + * + * Return: The time for the next polling event in jiffies + */ +static unsigned int phy_get_next_update_time(struct phy_device *phydev) +{ + if (phydev->drv && phydev->drv->get_next_update_time) + return phydev->drv->get_next_update_time(phydev); + + return PHY_STATE_TIME; +} + enum phy_state_work { PHY_STATE_WORK_NONE, PHY_STATE_WORK_ANEG, @@ -1580,7 +1598,8 @@ static enum phy_state_work _phy_state_machine(struct phy_device *phydev) * called from phy_disconnect() synchronously. */ if (phy_polling_mode(phydev) && phy_is_started(phydev)) - phy_queue_state_machine(phydev, PHY_STATE_TIME); + phy_queue_state_machine(phydev, + phy_get_next_update_time(phydev)); return state_work; } @@ -1761,7 +1780,7 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable) if (!phydev->drv) return -EIO; - ret = genphy_c45_eee_is_active(phydev, NULL, NULL); + ret = genphy_c45_eee_is_active(phydev, NULL); if (ret < 0) return ret; if (!ret) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 46713d27412b..7c4e1ad1864c 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -45,6 +45,17 @@ MODULE_DESCRIPTION("PHY library"); MODULE_AUTHOR("Andy Fleming"); MODULE_LICENSE("GPL"); +#define PHY_ANY_ID "MATCH ANY PHY" +#define PHY_ANY_UID 0xffffffff + +struct phy_fixup { + struct list_head list; + char bus_id[MII_BUS_ID_SIZE + 3]; + u32 phy_uid; + u32 phy_uid_mask; + int (*run)(struct phy_device *phydev); +}; + __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_features) __ro_after_init; EXPORT_SYMBOL_GPL(phy_basic_features); @@ -80,37 +91,28 @@ static const int phy_all_ports_features_array[7] = { ETHTOOL_LINK_MODE_Backplane_BIT, }; -const int phy_10_100_features_array[4] = { +static const int phy_10_100_features_array[4] = { ETHTOOL_LINK_MODE_10baseT_Half_BIT, ETHTOOL_LINK_MODE_10baseT_Full_BIT, ETHTOOL_LINK_MODE_100baseT_Half_BIT, ETHTOOL_LINK_MODE_100baseT_Full_BIT, }; -EXPORT_SYMBOL_GPL(phy_10_100_features_array); -const int phy_basic_t1_features_array[3] = { +static const int phy_basic_t1_features_array[3] = { ETHTOOL_LINK_MODE_TP_BIT, ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, ETHTOOL_LINK_MODE_100baseT1_Full_BIT, }; -EXPORT_SYMBOL_GPL(phy_basic_t1_features_array); -const int phy_basic_t1s_p2mp_features_array[2] = { +static const int phy_basic_t1s_p2mp_features_array[2] = { ETHTOOL_LINK_MODE_TP_BIT, ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT, }; -EXPORT_SYMBOL_GPL(phy_basic_t1s_p2mp_features_array); -const int phy_gbit_features_array[2] = { +static const int phy_gbit_features_array[2] = { ETHTOOL_LINK_MODE_1000baseT_Half_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT, }; -EXPORT_SYMBOL_GPL(phy_gbit_features_array); - -const int phy_10gbit_features_array[1] = { - ETHTOOL_LINK_MODE_10000baseT_Full_BIT, -}; -EXPORT_SYMBOL_GPL(phy_10gbit_features_array); static const int phy_eee_cap1_features_array[] = { ETHTOOL_LINK_MODE_100baseT_Full_BIT, @@ -185,9 +187,8 @@ static void features_init(void) linkmode_set_bit_array(phy_gbit_features_array, ARRAY_SIZE(phy_gbit_features_array), phy_10gbit_features); - linkmode_set_bit_array(phy_10gbit_features_array, - ARRAY_SIZE(phy_10gbit_features_array), - phy_10gbit_features); + linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + phy_10gbit_features); linkmode_set_bit_array(phy_eee_cap1_features_array, ARRAY_SIZE(phy_eee_cap1_features_array), @@ -378,8 +379,8 @@ static SIMPLE_DEV_PM_OPS(mdio_bus_phy_pm_ops, mdio_bus_phy_suspend, * comparison * @run: The actual code to be run when a matching PHY is found */ -int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, - int (*run)(struct phy_device *)) +static int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, + int (*run)(struct phy_device *)) { struct phy_fixup *fixup = kzalloc(sizeof(*fixup), GFP_KERNEL); @@ -397,7 +398,6 @@ int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, return 0; } -EXPORT_SYMBOL(phy_register_fixup); /* Registers a fixup to be run on any PHY with the UID in phy_uid */ int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask, @@ -544,7 +544,7 @@ phy_interface_show(struct device *dev, struct device_attribute *attr, char *buf) struct phy_device *phydev = to_phy_device(dev); const char *mode = NULL; - if (phy_is_internal(phydev)) + if (phydev->is_internal) mode = "internal"; else mode = phy_modes(phydev->interface); @@ -2966,7 +2966,7 @@ void phy_disable_eee(struct phy_device *phydev) phydev->eee_cfg.tx_lpi_enabled = false; phydev->eee_cfg.eee_enabled = false; /* don't let userspace re-enable EEE advertisement */ - linkmode_fill(phydev->eee_broken_modes); + linkmode_fill(phydev->eee_disabled_modes); } EXPORT_SYMBOL_GPL(phy_disable_eee); @@ -3096,19 +3096,12 @@ void phy_get_pause(struct phy_device *phydev, bool *tx_pause, bool *rx_pause) EXPORT_SYMBOL(phy_get_pause); #if IS_ENABLED(CONFIG_OF_MDIO) -static int phy_get_int_delay_property(struct device *dev, const char *name) +static int phy_get_u32_property(struct device *dev, const char *name, u32 *val) { - s32 int_delay; - int ret; - - ret = device_property_read_u32(dev, name, &int_delay); - if (ret) - return ret; - - return int_delay; + return device_property_read_u32(dev, name, val); } #else -static int phy_get_int_delay_property(struct device *dev, const char *name) +static int phy_get_u32_property(struct device *dev, const char *name, u32 *val) { return -EINVAL; } @@ -3133,12 +3126,12 @@ static int phy_get_int_delay_property(struct device *dev, const char *name) s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev, const int *delay_values, int size, bool is_rx) { - s32 delay; - int i; + int i, ret; + u32 delay; if (is_rx) { - delay = phy_get_int_delay_property(dev, "rx-internal-delay-ps"); - if (delay < 0 && size == 0) { + ret = phy_get_u32_property(dev, "rx-internal-delay-ps", &delay); + if (ret < 0 && size == 0) { if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) return 1; @@ -3147,8 +3140,8 @@ s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev, } } else { - delay = phy_get_int_delay_property(dev, "tx-internal-delay-ps"); - if (delay < 0 && size == 0) { + ret = phy_get_u32_property(dev, "tx-internal-delay-ps", &delay); + if (ret < 0 && size == 0) { if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) return 1; @@ -3157,8 +3150,8 @@ s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev, } } - if (delay < 0) - return delay; + if (ret < 0) + return ret; if (size == 0) return delay; @@ -3193,6 +3186,30 @@ s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev, } EXPORT_SYMBOL(phy_get_internal_delay); +/** + * phy_get_tx_amplitude_gain - stores tx amplitude gain in @val + * @phydev: phy_device struct + * @dev: pointer to the devices device struct + * @linkmode: linkmode for which the tx amplitude gain should be retrieved + * @val: tx amplitude gain + * + * Returns: 0 on success, < 0 on failure + */ +int phy_get_tx_amplitude_gain(struct phy_device *phydev, struct device *dev, + enum ethtool_link_mode_bit_indices linkmode, + u32 *val) +{ + switch (linkmode) { + case ETHTOOL_LINK_MODE_100baseT_Full_BIT: + return phy_get_u32_property(dev, + "tx-amplitude-100base-tx-percent", + val); + default: + return -EINVAL; + } +} +EXPORT_SYMBOL_GPL(phy_get_tx_amplitude_gain); + static int phy_led_set_brightness(struct led_classdev *led_cdev, enum led_brightness value) { @@ -3563,22 +3580,21 @@ static int phy_probe(struct device *dev) if (err) goto out; - /* There is no "enabled" flag. If PHY is advertising, assume it is - * kind of enabled. - */ - phydev->eee_cfg.eee_enabled = !linkmode_empty(phydev->advertising_eee); + /* Get the EEE modes we want to prohibit. */ + of_set_phy_eee_broken(phydev); /* Some PHYs may advertise, by default, not support EEE modes. So, - * we need to clean them. + * we need to clean them. In addition remove all disabled EEE modes. */ - if (phydev->eee_cfg.eee_enabled) - linkmode_and(phydev->advertising_eee, phydev->supported_eee, - phydev->advertising_eee); + linkmode_and(phydev->advertising_eee, phydev->supported_eee, + phydev->advertising_eee); + linkmode_andnot(phydev->advertising_eee, phydev->advertising_eee, + phydev->eee_disabled_modes); - /* Get the EEE modes we want to prohibit. We will ask - * the PHY stop advertising these mode later on + /* There is no "enabled" flag. If PHY is advertising, assume it is + * kind of enabled. */ - of_set_phy_eee_broken(phydev); + phydev->eee_cfg.eee_enabled = !linkmode_empty(phydev->advertising_eee); /* Get master/slave strap overrides */ of_set_phy_timing_role(phydev); diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index b00a315de060..a3b186ab3854 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1073,6 +1073,18 @@ static void phylink_pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, pcs->ops->pcs_link_up(pcs, neg_mode, interface, speed, duplex); } +static void phylink_pcs_disable_eee(struct phylink_pcs *pcs) +{ + if (pcs && pcs->ops->pcs_disable_eee) + pcs->ops->pcs_disable_eee(pcs); +} + +static void phylink_pcs_enable_eee(struct phylink_pcs *pcs) +{ + if (pcs && pcs->ops->pcs_enable_eee) + pcs->ops->pcs_enable_eee(pcs); +} + /* Query inband for a specific interface mode, asking the MAC for the * PCS which will be used to handle the interface mode. */ @@ -1353,7 +1365,6 @@ static void phylink_major_config(struct phylink *pl, bool restart, struct phylink_pcs *pcs = NULL; bool pcs_changed = false; unsigned int rate_kbd; - unsigned int neg_mode; int err; phylink_dbg(pl, "major config, requested %s/%s\n", @@ -1416,11 +1427,7 @@ static void phylink_major_config(struct phylink *pl, bool restart, if (pl->pcs_state == PCS_STATE_STARTING || pcs_changed) phylink_pcs_enable(pl->pcs); - neg_mode = pl->act_link_an_mode; - if (pl->pcs && pl->pcs->neg_mode) - neg_mode = pl->pcs_neg_mode; - - err = phylink_pcs_config(pl->pcs, neg_mode, state, + err = phylink_pcs_config(pl->pcs, pl->pcs_neg_mode, state, !!(pl->link_config.pause & MLO_PAUSE_AN)); if (err < 0) phylink_err(pl, "pcs_config failed: %pe\n", @@ -1463,7 +1470,6 @@ static void phylink_major_config(struct phylink *pl, bool restart, */ static int phylink_change_inband_advert(struct phylink *pl) { - unsigned int neg_mode; int ret; if (test_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state)) @@ -1479,15 +1485,11 @@ static int phylink_change_inband_advert(struct phylink *pl) phylink_pcs_neg_mode(pl, pl->pcs, pl->link_config.interface, pl->link_config.advertising); - neg_mode = pl->act_link_an_mode; - if (pl->pcs->neg_mode) - neg_mode = pl->pcs_neg_mode; - /* Modern PCS-based method; update the advert at the PCS, and * restart negotiation if the pcs_config() helper indicates that * the programmed advertisement has changed. */ - ret = phylink_pcs_config(pl->pcs, neg_mode, &pl->link_config, + ret = phylink_pcs_config(pl->pcs, pl->pcs_neg_mode, &pl->link_config, !!(pl->link_config.pause & MLO_PAUSE_AN)); if (ret < 0) return ret; @@ -1511,13 +1513,7 @@ static void phylink_mac_pcs_get_state(struct phylink *pl, state->an_complete = 0; state->link = 1; - pcs = pl->pcs; - if (!pcs || pcs->neg_mode) - autoneg = pl->pcs_neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED; - else - autoneg = linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, - state->advertising); - + autoneg = pl->pcs_neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED; if (autoneg) { state->speed = SPEED_UNKNOWN; state->duplex = DUPLEX_UNKNOWN; @@ -1528,6 +1524,7 @@ static void phylink_mac_pcs_get_state(struct phylink *pl, state->pause = pl->link_config.pause; } + pcs = pl->pcs; if (pcs) pcs->ops->pcs_get_state(pcs, pl->pcs_neg_mode, state); else @@ -1601,6 +1598,8 @@ static void phylink_deactivate_lpi(struct phylink *pl) phylink_dbg(pl, "disabling LPI\n"); pl->mac_ops->mac_disable_tx_lpi(pl->config); + + phylink_pcs_disable_eee(pl->pcs); } } @@ -1617,20 +1616,24 @@ static void phylink_activate_lpi(struct phylink *pl) phylink_dbg(pl, "LPI timer %uus, tx clock stop %u\n", pl->mac_tx_lpi_timer, pl->mac_tx_clk_stop); + phylink_pcs_enable_eee(pl->pcs); + err = pl->mac_ops->mac_enable_tx_lpi(pl->config, pl->mac_tx_lpi_timer, pl->mac_tx_clk_stop); - if (!err) - pl->mac_enable_tx_lpi = true; - else + if (err) { + phylink_pcs_disable_eee(pl->pcs); phylink_err(pl, "%ps() failed: %pe\n", pl->mac_ops->mac_enable_tx_lpi, ERR_PTR(err)); + return; + } + + pl->mac_enable_tx_lpi = true; } static void phylink_link_up(struct phylink *pl, struct phylink_link_state link_state) { struct net_device *ndev = pl->netdev; - unsigned int neg_mode; int speed, duplex; bool rx_pause; @@ -1661,11 +1664,7 @@ static void phylink_link_up(struct phylink *pl, pl->cur_interface = link_state.interface; - neg_mode = pl->act_link_an_mode; - if (pl->pcs && pl->pcs->neg_mode) - neg_mode = pl->pcs_neg_mode; - - phylink_pcs_link_up(pl->pcs, neg_mode, pl->cur_interface, speed, + phylink_pcs_link_up(pl->pcs, pl->pcs_neg_mode, pl->cur_interface, speed, duplex); pl->mac_ops->mac_link_up(pl->config, pl->phydev, pl->act_link_an_mode, @@ -1957,8 +1956,7 @@ struct phylink *phylink_create(struct phylink_config *config, return ERR_PTR(-EINVAL); } - pl->mac_supports_eee_ops = mac_ops->mac_disable_tx_lpi && - mac_ops->mac_enable_tx_lpi; + pl->mac_supports_eee_ops = phylink_mac_implements_lpi(mac_ops); pl->mac_supports_eee = pl->mac_supports_eee_ops && pl->config->lpi_capabilities && !phy_interface_empty(pl->config->lpi_interfaces); diff --git a/drivers/net/phy/realtek/Kconfig b/drivers/net/phy/realtek/Kconfig index 31935f147d87..b05c2a1e9024 100644 --- a/drivers/net/phy/realtek/Kconfig +++ b/drivers/net/phy/realtek/Kconfig @@ -4,8 +4,12 @@ config REALTEK_PHY help Currently supports RTL821x/RTL822x and fast ethernet PHYs +if REALTEK_PHY + config REALTEK_PHY_HWMON - def_bool REALTEK_PHY && HWMON - depends on !(REALTEK_PHY=y && HWMON=m) + bool "HWMON support for Realtek PHYs" + depends on HWMON && !(REALTEK_PHY=y && HWMON=m) help Optional hwmon support for the temperature sensor + +endif # REALTEK_PHY diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index 572a933636b0..7a0b19d66aca 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -13,6 +13,7 @@ #include <linux/module.h> #include <linux/delay.h> #include <linux/clk.h> +#include <linux/string_choices.h> #include "realtek.h" @@ -78,9 +79,7 @@ /* RTL822X_VND2_XXXXX registers are only accessible when phydev->is_c45 * is set, they cannot be accessed by C45-over-C22. */ -#define RTL822X_VND2_GBCR 0xa412 - -#define RTL822X_VND2_GANLPAR 0xa414 +#define RTL822X_VND2_C22_REG(reg) (0xa400 + 2 * (reg)) #define RTL8366RB_POWER_SAVE 0x15 #define RTL8366RB_POWER_SAVE_ON BIT(12) @@ -95,6 +94,16 @@ #define RTL_VND2_PHYSR_MASTER BIT(11) #define RTL_VND2_PHYSR_SPEED_MASK (RTL_VND2_PHYSR_SPEEDL | RTL_VND2_PHYSR_SPEEDH) +#define RTL_MDIO_PCS_EEE_ABLE 0xa5c4 +#define RTL_MDIO_AN_EEE_ADV 0xa5d0 +#define RTL_MDIO_AN_EEE_LPABLE 0xa5d2 +#define RTL_MDIO_AN_10GBT_CTRL 0xa5d4 +#define RTL_MDIO_AN_10GBT_STAT 0xa5d6 +#define RTL_MDIO_PMA_SPEED 0xa616 +#define RTL_MDIO_AN_EEE_LPABLE2 0xa6d0 +#define RTL_MDIO_AN_EEE_ADV2 0xa6d4 +#define RTL_MDIO_PCS_EEE_ABLE2 0xa6ec + #define RTL_GENERIC_PHYID 0x001cc800 #define RTL_8211FVD_PHYID 0x001cc878 #define RTL_8221B 0x001cc840 @@ -422,11 +431,11 @@ static int rtl8211f_config_init(struct phy_device *phydev) } else if (ret) { dev_dbg(dev, "%s 2ns TX delay (and changing the value from pin-strapping RXD1 or the bootloader)\n", - val_txdly ? "Enabling" : "Disabling"); + str_enable_disable(val_txdly)); } else { dev_dbg(dev, "2ns TX delay was already %s (by pin-strapping RXD1 or bootloader configuration)\n", - val_txdly ? "enabled" : "disabled"); + str_enabled_disabled(val_txdly)); } ret = phy_modify_paged_changed(phydev, 0xd08, 0x15, RTL8211F_RX_DELAY, @@ -437,11 +446,11 @@ static int rtl8211f_config_init(struct phy_device *phydev) } else if (ret) { dev_dbg(dev, "%s 2ns RX delay (and changing the value from pin-strapping RXD0 or the bootloader)\n", - val_rxdly ? "Enabling" : "Disabling"); + str_enable_disable(val_rxdly)); } else { dev_dbg(dev, "2ns RX delay was already %s (by pin-strapping RXD0 or bootloader configuration)\n", - val_rxdly ? "enabled" : "disabled"); + str_enabled_disabled(val_rxdly)); } if (priv->has_phycr2) { @@ -734,29 +743,31 @@ static int rtlgen_read_status(struct phy_device *phydev) return 0; } +static int rtlgen_read_vend2(struct phy_device *phydev, int regnum) +{ + return __mdiobus_c45_read(phydev->mdio.bus, 0, MDIO_MMD_VEND2, regnum); +} + +static int rtlgen_write_vend2(struct phy_device *phydev, int regnum, u16 val) +{ + return __mdiobus_c45_write(phydev->mdio.bus, 0, MDIO_MMD_VEND2, regnum, + val); +} + static int rtlgen_read_mmd(struct phy_device *phydev, int devnum, u16 regnum) { int ret; - if (devnum == MDIO_MMD_VEND2) { - rtl821x_write_page(phydev, regnum >> 4); - ret = __phy_read(phydev, 0x10 + ((regnum & 0xf) >> 1)); - rtl821x_write_page(phydev, 0); - } else if (devnum == MDIO_MMD_PCS && regnum == MDIO_PCS_EEE_ABLE) { - rtl821x_write_page(phydev, 0xa5c); - ret = __phy_read(phydev, 0x12); - rtl821x_write_page(phydev, 0); - } else if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_ADV) { - rtl821x_write_page(phydev, 0xa5d); - ret = __phy_read(phydev, 0x10); - rtl821x_write_page(phydev, 0); - } else if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_LPABLE) { - rtl821x_write_page(phydev, 0xa5d); - ret = __phy_read(phydev, 0x11); - rtl821x_write_page(phydev, 0); - } else { + if (devnum == MDIO_MMD_VEND2) + ret = rtlgen_read_vend2(phydev, regnum); + else if (devnum == MDIO_MMD_PCS && regnum == MDIO_PCS_EEE_ABLE) + ret = rtlgen_read_vend2(phydev, RTL_MDIO_PCS_EEE_ABLE); + else if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_ADV) + ret = rtlgen_read_vend2(phydev, RTL_MDIO_AN_EEE_ADV); + else if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_LPABLE) + ret = rtlgen_read_vend2(phydev, RTL_MDIO_AN_EEE_LPABLE); + else ret = -EOPNOTSUPP; - } return ret; } @@ -766,17 +777,12 @@ static int rtlgen_write_mmd(struct phy_device *phydev, int devnum, u16 regnum, { int ret; - if (devnum == MDIO_MMD_VEND2) { - rtl821x_write_page(phydev, regnum >> 4); - ret = __phy_write(phydev, 0x10 + ((regnum & 0xf) >> 1), val); - rtl821x_write_page(phydev, 0); - } else if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_ADV) { - rtl821x_write_page(phydev, 0xa5d); - ret = __phy_write(phydev, 0x10, val); - rtl821x_write_page(phydev, 0); - } else { + if (devnum == MDIO_MMD_VEND2) + ret = rtlgen_write_vend2(phydev, regnum, val); + else if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_ADV) + ret = rtlgen_write_vend2(phydev, regnum, RTL_MDIO_AN_EEE_ADV); + else ret = -EOPNOTSUPP; - } return ret; } @@ -788,19 +794,12 @@ static int rtl822x_read_mmd(struct phy_device *phydev, int devnum, u16 regnum) if (ret != -EOPNOTSUPP) return ret; - if (devnum == MDIO_MMD_PCS && regnum == MDIO_PCS_EEE_ABLE2) { - rtl821x_write_page(phydev, 0xa6e); - ret = __phy_read(phydev, 0x16); - rtl821x_write_page(phydev, 0); - } else if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_ADV2) { - rtl821x_write_page(phydev, 0xa6d); - ret = __phy_read(phydev, 0x12); - rtl821x_write_page(phydev, 0); - } else if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_LPABLE2) { - rtl821x_write_page(phydev, 0xa6d); - ret = __phy_read(phydev, 0x10); - rtl821x_write_page(phydev, 0); - } + if (devnum == MDIO_MMD_PCS && regnum == MDIO_PCS_EEE_ABLE2) + ret = rtlgen_read_vend2(phydev, RTL_MDIO_PCS_EEE_ABLE2); + else if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_ADV2) + ret = rtlgen_read_vend2(phydev, RTL_MDIO_AN_EEE_ADV2); + else if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_LPABLE2) + ret = rtlgen_read_vend2(phydev, RTL_MDIO_AN_EEE_LPABLE2); return ret; } @@ -813,11 +812,8 @@ static int rtl822x_write_mmd(struct phy_device *phydev, int devnum, u16 regnum, if (ret != -EOPNOTSUPP) return ret; - if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_ADV2) { - rtl821x_write_page(phydev, 0xa6d); - ret = __phy_write(phydev, 0x12, val); - rtl821x_write_page(phydev, 0); - } + if (devnum == MDIO_MMD_AN && regnum == MDIO_AN_EEE_ADV2) + ret = rtlgen_write_vend2(phydev, RTL_MDIO_AN_EEE_ADV2, val); return ret; } @@ -913,7 +909,7 @@ static int rtl822x_get_features(struct phy_device *phydev) { int val; - val = phy_read_paged(phydev, 0xa61, 0x13); + val = phy_read_mmd(phydev, MDIO_MMD_VEND2, RTL_MDIO_PMA_SPEED); if (val < 0) return val; @@ -934,10 +930,10 @@ static int rtl822x_config_aneg(struct phy_device *phydev) if (phydev->autoneg == AUTONEG_ENABLE) { u16 adv = linkmode_adv_to_mii_10gbt_adv_t(phydev->advertising); - ret = phy_modify_paged_changed(phydev, 0xa5d, 0x12, - MDIO_AN_10GBT_CTRL_ADV2_5G | - MDIO_AN_10GBT_CTRL_ADV5G, - adv); + ret = phy_modify_mmd_changed(phydev, MDIO_MMD_VEND2, + RTL_MDIO_AN_10GBT_CTRL, + MDIO_AN_10GBT_CTRL_ADV2_5G | + MDIO_AN_10GBT_CTRL_ADV5G, adv); if (ret < 0) return ret; } @@ -981,7 +977,7 @@ static int rtl822x_read_status(struct phy_device *phydev) !phydev->autoneg_complete) return 0; - lpadv = phy_read_paged(phydev, 0xa5d, 0x13); + lpadv = phy_read_mmd(phydev, MDIO_MMD_VEND2, RTL_MDIO_AN_10GBT_STAT); if (lpadv < 0) return lpadv; @@ -1028,7 +1024,8 @@ static int rtl822x_c45_config_aneg(struct phy_device *phydev) val = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising); /* Vendor register as C45 has no standardized support for 1000BaseT */ - ret = phy_modify_mmd_changed(phydev, MDIO_MMD_VEND2, RTL822X_VND2_GBCR, + ret = phy_modify_mmd_changed(phydev, MDIO_MMD_VEND2, + RTL822X_VND2_C22_REG(MII_CTRL1000), ADVERTISE_1000FULL, val); if (ret < 0) return ret; @@ -1045,7 +1042,7 @@ static int rtl822x_c45_read_status(struct phy_device *phydev) /* Vendor register as C45 has no standardized support for 1000BaseT */ if (phydev->autoneg == AUTONEG_ENABLE && genphy_c45_aneg_done(phydev)) { val = phy_read_mmd(phydev, MDIO_MMD_VEND2, - RTL822X_VND2_GANLPAR); + RTL822X_VND2_C22_REG(MII_STAT1000)); if (val < 0) return val; } else { diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 5ca6ecf0ce5f..d4ece538f1b2 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -26,74 +26,9 @@ #include <linux/virtio_net.h> #include <linux/skb_array.h> -#define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) - -#define TAP_VNET_LE 0x80000000 -#define TAP_VNET_BE 0x40000000 - -#ifdef CONFIG_TUN_VNET_CROSS_LE -static inline bool tap_legacy_is_little_endian(struct tap_queue *q) -{ - return q->flags & TAP_VNET_BE ? false : - virtio_legacy_is_little_endian(); -} - -static long tap_get_vnet_be(struct tap_queue *q, int __user *sp) -{ - int s = !!(q->flags & TAP_VNET_BE); - - if (put_user(s, sp)) - return -EFAULT; - - return 0; -} - -static long tap_set_vnet_be(struct tap_queue *q, int __user *sp) -{ - int s; - - if (get_user(s, sp)) - return -EFAULT; - - if (s) - q->flags |= TAP_VNET_BE; - else - q->flags &= ~TAP_VNET_BE; - - return 0; -} -#else -static inline bool tap_legacy_is_little_endian(struct tap_queue *q) -{ - return virtio_legacy_is_little_endian(); -} - -static long tap_get_vnet_be(struct tap_queue *q, int __user *argp) -{ - return -EINVAL; -} - -static long tap_set_vnet_be(struct tap_queue *q, int __user *argp) -{ - return -EINVAL; -} -#endif /* CONFIG_TUN_VNET_CROSS_LE */ - -static inline bool tap_is_little_endian(struct tap_queue *q) -{ - return q->flags & TAP_VNET_LE || - tap_legacy_is_little_endian(q); -} - -static inline u16 tap16_to_cpu(struct tap_queue *q, __virtio16 val) -{ - return __virtio16_to_cpu(tap_is_little_endian(q), val); -} +#include "tun_vnet.h" -static inline __virtio16 cpu_to_tap16(struct tap_queue *q, u16 val) -{ - return __cpu_to_virtio16(tap_is_little_endian(q), val); -} +#define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) static struct proto tap_proto = { .name = "tap", @@ -645,6 +580,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, int err; struct virtio_net_hdr vnet_hdr = { 0 }; int vnet_hdr_len = 0; + int hdr_len = 0; int copylen = 0; int depth; bool zerocopy = false; @@ -654,25 +590,13 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, if (q->flags & IFF_VNET_HDR) { vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); - err = -EINVAL; - if (len < vnet_hdr_len) + hdr_len = tun_vnet_hdr_get(vnet_hdr_len, q->flags, from, &vnet_hdr); + if (hdr_len < 0) { + err = hdr_len; goto err; - len -= vnet_hdr_len; + } - err = -EFAULT; - if (!copy_from_iter_full(&vnet_hdr, sizeof(vnet_hdr), from)) - goto err; - iov_iter_advance(from, vnet_hdr_len - sizeof(vnet_hdr)); - if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && - tap16_to_cpu(q, vnet_hdr.csum_start) + - tap16_to_cpu(q, vnet_hdr.csum_offset) + 2 > - tap16_to_cpu(q, vnet_hdr.hdr_len)) - vnet_hdr.hdr_len = cpu_to_tap16(q, - tap16_to_cpu(q, vnet_hdr.csum_start) + - tap16_to_cpu(q, vnet_hdr.csum_offset) + 2); - err = -EINVAL; - if (tap16_to_cpu(q, vnet_hdr.hdr_len) > len) - goto err; + len -= vnet_hdr_len; } err = -EINVAL; @@ -682,12 +606,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, if (msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { struct iov_iter i; - copylen = vnet_hdr.hdr_len ? - tap16_to_cpu(q, vnet_hdr.hdr_len) : GOODCOPY_LEN; - if (copylen > good_linear) - copylen = good_linear; - else if (copylen < ETH_HLEN) - copylen = ETH_HLEN; + copylen = clamp(hdr_len ?: GOODCOPY_LEN, ETH_HLEN, good_linear); linear = copylen; i = *from; iov_iter_advance(&i, copylen); @@ -697,11 +616,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, if (!zerocopy) { copylen = len; - linear = tap16_to_cpu(q, vnet_hdr.hdr_len); - if (linear > good_linear) - linear = good_linear; - else if (linear < ETH_HLEN) - linear = ETH_HLEN; + linear = clamp(hdr_len, ETH_HLEN, good_linear); } skb = tap_alloc_skb(&q->sk, TAP_RESERVE, copylen, @@ -733,8 +648,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, skb->dev = tap->dev; if (vnet_hdr_len) { - err = virtio_net_hdr_to_skb(skb, &vnet_hdr, - tap_is_little_endian(q)); + err = tun_vnet_hdr_to_skb(q->flags, skb, &vnet_hdr); if (err) { rcu_read_unlock(); drop_reason = SKB_DROP_REASON_DEV_HDR; @@ -797,23 +711,17 @@ static ssize_t tap_put_user(struct tap_queue *q, int total; if (q->flags & IFF_VNET_HDR) { - int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0; struct virtio_net_hdr vnet_hdr; vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); - if (iov_iter_count(iter) < vnet_hdr_len) - return -EINVAL; - if (virtio_net_hdr_from_skb(skb, &vnet_hdr, - tap_is_little_endian(q), true, - vlan_hlen)) - BUG(); - - if (copy_to_iter(&vnet_hdr, sizeof(vnet_hdr), iter) != - sizeof(vnet_hdr)) - return -EFAULT; + ret = tun_vnet_hdr_from_skb(q->flags, NULL, skb, &vnet_hdr); + if (ret) + return ret; - iov_iter_advance(iter, vnet_hdr_len - sizeof(vnet_hdr)); + ret = tun_vnet_hdr_put(vnet_hdr_len, iter, &vnet_hdr); + if (ret) + return ret; } total = vnet_hdr_len; total += skb->len; @@ -1072,42 +980,6 @@ static long tap_ioctl(struct file *file, unsigned int cmd, q->sk.sk_sndbuf = s; return 0; - case TUNGETVNETHDRSZ: - s = q->vnet_hdr_sz; - if (put_user(s, sp)) - return -EFAULT; - return 0; - - case TUNSETVNETHDRSZ: - if (get_user(s, sp)) - return -EFAULT; - if (s < (int)sizeof(struct virtio_net_hdr)) - return -EINVAL; - - q->vnet_hdr_sz = s; - return 0; - - case TUNGETVNETLE: - s = !!(q->flags & TAP_VNET_LE); - if (put_user(s, sp)) - return -EFAULT; - return 0; - - case TUNSETVNETLE: - if (get_user(s, sp)) - return -EFAULT; - if (s) - q->flags |= TAP_VNET_LE; - else - q->flags &= ~TAP_VNET_LE; - return 0; - - case TUNGETVNETBE: - return tap_get_vnet_be(q, sp); - - case TUNSETVNETBE: - return tap_set_vnet_be(q, sp); - case TUNSETOFFLOAD: /* let the user check for future flags */ if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | @@ -1151,7 +1023,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd, return ret; default: - return -EINVAL; + return tun_vnet_ioctl(&q->vnet_hdr_sz, &q->flags, cmd, sp); } } @@ -1198,7 +1070,7 @@ static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp) skb->protocol = eth_hdr(skb)->h_proto; if (vnet_hdr_len) { - err = virtio_net_hdr_to_skb(skb, gso, tap_is_little_endian(q)); + err = tun_vnet_hdr_to_skb(q->flags, skb, gso); if (err) goto err_kfree; } diff --git a/drivers/net/tun.c b/drivers/net/tun.c index acf96f262488..d8f4d3e996a7 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -83,6 +83,8 @@ #include <linux/uaccess.h> #include <linux/proc_fs.h> +#include "tun_vnet.h" + static void tun_default_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd); @@ -94,9 +96,6 @@ static void tun_default_link_ksettings(struct net_device *dev, * overload it to mean fasync when stored there. */ #define TUN_FASYNC IFF_ATTACH_QUEUE -/* High bits in flags field are unused. */ -#define TUN_VNET_LE 0x80000000 -#define TUN_VNET_BE 0x40000000 #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \ IFF_MULTI_QUEUE | IFF_NAPI | IFF_NAPI_FRAGS) @@ -298,70 +297,6 @@ static bool tun_napi_frags_enabled(const struct tun_file *tfile) return tfile->napi_frags_enabled; } -#ifdef CONFIG_TUN_VNET_CROSS_LE -static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) -{ - return tun->flags & TUN_VNET_BE ? false : - virtio_legacy_is_little_endian(); -} - -static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) -{ - int be = !!(tun->flags & TUN_VNET_BE); - - if (put_user(be, argp)) - return -EFAULT; - - return 0; -} - -static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) -{ - int be; - - if (get_user(be, argp)) - return -EFAULT; - - if (be) - tun->flags |= TUN_VNET_BE; - else - tun->flags &= ~TUN_VNET_BE; - - return 0; -} -#else -static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) -{ - return virtio_legacy_is_little_endian(); -} - -static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) -{ - return -EINVAL; -} - -static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) -{ - return -EINVAL; -} -#endif /* CONFIG_TUN_VNET_CROSS_LE */ - -static inline bool tun_is_little_endian(struct tun_struct *tun) -{ - return tun->flags & TUN_VNET_LE || - tun_legacy_is_little_endian(tun); -} - -static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val) -{ - return __virtio16_to_cpu(tun_is_little_endian(tun), val); -} - -static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val) -{ - return __cpu_to_virtio16(tun_is_little_endian(tun), val); -} - static inline u32 tun_hashfn(u32 rxhash) { return rxhash & TUN_MASK_FLOW_ENTRIES; @@ -1756,6 +1691,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, struct virtio_net_hdr gso = { 0 }; int good_linear; int copylen; + int hdr_len = 0; bool zerocopy = false; int err; u32 rxhash = 0; @@ -1775,26 +1711,16 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (tun->flags & IFF_VNET_HDR) { int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); - if (len < vnet_hdr_sz) - return -EINVAL; - len -= vnet_hdr_sz; + hdr_len = tun_vnet_hdr_get(vnet_hdr_sz, tun->flags, from, &gso); + if (hdr_len < 0) + return hdr_len; - if (!copy_from_iter_full(&gso, sizeof(gso), from)) - return -EFAULT; - - if ((gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && - tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2 > tun16_to_cpu(tun, gso.hdr_len)) - gso.hdr_len = cpu_to_tun16(tun, tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2); - - if (tun16_to_cpu(tun, gso.hdr_len) > len) - return -EINVAL; - iov_iter_advance(from, vnet_hdr_sz - sizeof(gso)); + len -= vnet_hdr_sz; } if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) { align += NET_IP_ALIGN; - if (unlikely(len < ETH_HLEN || - (gso.hdr_len && tun16_to_cpu(tun, gso.hdr_len) < ETH_HLEN))) + if (unlikely(len < ETH_HLEN || (hdr_len && hdr_len < ETH_HLEN))) return -EINVAL; } @@ -1807,9 +1733,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, * enough room for skb expand head in case it is used. * The rest of the buffer is mapped from userspace. */ - copylen = gso.hdr_len ? tun16_to_cpu(tun, gso.hdr_len) : GOODCOPY_LEN; - if (copylen > good_linear) - copylen = good_linear; + copylen = min(hdr_len ? hdr_len : GOODCOPY_LEN, good_linear); linear = copylen; iov_iter_advance(&i, copylen); if (iov_iter_npages(&i, INT_MAX) <= MAX_SKB_FRAGS) @@ -1830,10 +1754,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } else { if (!zerocopy) { copylen = len; - if (tun16_to_cpu(tun, gso.hdr_len) > good_linear) - linear = good_linear; - else - linear = tun16_to_cpu(tun, gso.hdr_len); + linear = min(hdr_len, good_linear); } if (frags) { @@ -1868,7 +1789,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } } - if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) { + if (tun_vnet_hdr_to_skb(tun->flags, skb, &gso)) { atomic_long_inc(&tun->rx_frame_errors); err = -EINVAL; goto free_skb; @@ -2063,18 +1984,15 @@ static ssize_t tun_put_user_xdp(struct tun_struct *tun, { int vnet_hdr_sz = 0; size_t size = xdp_frame->len; - size_t ret; + ssize_t ret; if (tun->flags & IFF_VNET_HDR) { struct virtio_net_hdr gso = { 0 }; vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); - if (unlikely(iov_iter_count(iter) < vnet_hdr_sz)) - return -EINVAL; - if (unlikely(copy_to_iter(&gso, sizeof(gso), iter) != - sizeof(gso))) - return -EFAULT; - iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso)); + ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso); + if (ret) + return ret; } ret = copy_to_iter(xdp_frame->data, size, iter) + vnet_hdr_sz; @@ -2097,6 +2015,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, int vlan_offset = 0; int vlan_hlen = 0; int vnet_hdr_sz = 0; + int ret; if (skb_vlan_tag_present(skb)) vlan_hlen = VLAN_HLEN; @@ -2123,31 +2042,13 @@ static ssize_t tun_put_user(struct tun_struct *tun, if (vnet_hdr_sz) { struct virtio_net_hdr gso; - if (iov_iter_count(iter) < vnet_hdr_sz) - return -EINVAL; - - if (virtio_net_hdr_from_skb(skb, &gso, - tun_is_little_endian(tun), true, - vlan_hlen)) { - struct skb_shared_info *sinfo = skb_shinfo(skb); - - if (net_ratelimit()) { - netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n", - sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size), - tun16_to_cpu(tun, gso.hdr_len)); - print_hex_dump(KERN_ERR, "tun: ", - DUMP_PREFIX_NONE, - 16, 1, skb->head, - min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true); - } - WARN_ON_ONCE(1); - return -EINVAL; - } - - if (copy_to_iter(&gso, sizeof(gso), iter) != sizeof(gso)) - return -EFAULT; + ret = tun_vnet_hdr_from_skb(tun->flags, tun->dev, skb, &gso); + if (ret) + return ret; - iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso)); + ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso); + if (ret) + return ret; } if (vlan_hlen) { @@ -2507,7 +2408,7 @@ build: skb_reserve(skb, xdp->data - xdp->data_hard_start); skb_put(skb, xdp->data_end - xdp->data); - if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) { + if (tun_vnet_hdr_to_skb(tun->flags, skb, gso)) { atomic_long_inc(&tun->rx_frame_errors); kfree_skb(skb); ret = -EINVAL; @@ -3091,8 +2992,6 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, kgid_t group; int ifindex; int sndbuf; - int vnet_hdr_sz; - int le; int ret; bool do_notify = false; @@ -3299,50 +3198,6 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, tun_set_sndbuf(tun); break; - case TUNGETVNETHDRSZ: - vnet_hdr_sz = tun->vnet_hdr_sz; - if (copy_to_user(argp, &vnet_hdr_sz, sizeof(vnet_hdr_sz))) - ret = -EFAULT; - break; - - case TUNSETVNETHDRSZ: - if (copy_from_user(&vnet_hdr_sz, argp, sizeof(vnet_hdr_sz))) { - ret = -EFAULT; - break; - } - if (vnet_hdr_sz < (int)sizeof(struct virtio_net_hdr)) { - ret = -EINVAL; - break; - } - - tun->vnet_hdr_sz = vnet_hdr_sz; - break; - - case TUNGETVNETLE: - le = !!(tun->flags & TUN_VNET_LE); - if (put_user(le, (int __user *)argp)) - ret = -EFAULT; - break; - - case TUNSETVNETLE: - if (get_user(le, (int __user *)argp)) { - ret = -EFAULT; - break; - } - if (le) - tun->flags |= TUN_VNET_LE; - else - tun->flags &= ~TUN_VNET_LE; - break; - - case TUNGETVNETBE: - ret = tun_get_vnet_be(tun, argp); - break; - - case TUNSETVNETBE: - ret = tun_set_vnet_be(tun, argp); - break; - case TUNATTACHFILTER: /* Can be set only for TAPs */ ret = -EINVAL; @@ -3398,7 +3253,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, break; default: - ret = -EINVAL; + ret = tun_vnet_ioctl(&tun->vnet_hdr_sz, &tun->flags, cmd, argp); break; } diff --git a/drivers/net/tun_vnet.h b/drivers/net/tun_vnet.h new file mode 100644 index 000000000000..fd7411c4447f --- /dev/null +++ b/drivers/net/tun_vnet.h @@ -0,0 +1,185 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef TUN_VNET_H +#define TUN_VNET_H + +/* High bits in flags field are unused. */ +#define TUN_VNET_LE 0x80000000 +#define TUN_VNET_BE 0x40000000 + +static inline bool tun_vnet_legacy_is_little_endian(unsigned int flags) +{ + bool be = IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE) && + (flags & TUN_VNET_BE); + + return !be && virtio_legacy_is_little_endian(); +} + +static inline long tun_get_vnet_be(unsigned int flags, int __user *argp) +{ + int be = !!(flags & TUN_VNET_BE); + + if (!IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE)) + return -EINVAL; + + if (put_user(be, argp)) + return -EFAULT; + + return 0; +} + +static inline long tun_set_vnet_be(unsigned int *flags, int __user *argp) +{ + int be; + + if (!IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE)) + return -EINVAL; + + if (get_user(be, argp)) + return -EFAULT; + + if (be) + *flags |= TUN_VNET_BE; + else + *flags &= ~TUN_VNET_BE; + + return 0; +} + +static inline bool tun_vnet_is_little_endian(unsigned int flags) +{ + return flags & TUN_VNET_LE || tun_vnet_legacy_is_little_endian(flags); +} + +static inline u16 tun_vnet16_to_cpu(unsigned int flags, __virtio16 val) +{ + return __virtio16_to_cpu(tun_vnet_is_little_endian(flags), val); +} + +static inline __virtio16 cpu_to_tun_vnet16(unsigned int flags, u16 val) +{ + return __cpu_to_virtio16(tun_vnet_is_little_endian(flags), val); +} + +static inline long tun_vnet_ioctl(int *vnet_hdr_sz, unsigned int *flags, + unsigned int cmd, int __user *sp) +{ + int s; + + switch (cmd) { + case TUNGETVNETHDRSZ: + s = *vnet_hdr_sz; + if (put_user(s, sp)) + return -EFAULT; + return 0; + + case TUNSETVNETHDRSZ: + if (get_user(s, sp)) + return -EFAULT; + if (s < (int)sizeof(struct virtio_net_hdr)) + return -EINVAL; + + *vnet_hdr_sz = s; + return 0; + + case TUNGETVNETLE: + s = !!(*flags & TUN_VNET_LE); + if (put_user(s, sp)) + return -EFAULT; + return 0; + + case TUNSETVNETLE: + if (get_user(s, sp)) + return -EFAULT; + if (s) + *flags |= TUN_VNET_LE; + else + *flags &= ~TUN_VNET_LE; + return 0; + + case TUNGETVNETBE: + return tun_get_vnet_be(*flags, sp); + + case TUNSETVNETBE: + return tun_set_vnet_be(flags, sp); + + default: + return -EINVAL; + } +} + +static inline int tun_vnet_hdr_get(int sz, unsigned int flags, + struct iov_iter *from, + struct virtio_net_hdr *hdr) +{ + u16 hdr_len; + + if (iov_iter_count(from) < sz) + return -EINVAL; + + if (!copy_from_iter_full(hdr, sizeof(*hdr), from)) + return -EFAULT; + + hdr_len = tun_vnet16_to_cpu(flags, hdr->hdr_len); + + if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { + hdr_len = max(tun_vnet16_to_cpu(flags, hdr->csum_start) + tun_vnet16_to_cpu(flags, hdr->csum_offset) + 2, hdr_len); + hdr->hdr_len = cpu_to_tun_vnet16(flags, hdr_len); + } + + if (hdr_len > iov_iter_count(from)) + return -EINVAL; + + iov_iter_advance(from, sz - sizeof(*hdr)); + + return hdr_len; +} + +static inline int tun_vnet_hdr_put(int sz, struct iov_iter *iter, + const struct virtio_net_hdr *hdr) +{ + if (unlikely(iov_iter_count(iter) < sz)) + return -EINVAL; + + if (unlikely(copy_to_iter(hdr, sizeof(*hdr), iter) != sizeof(*hdr))) + return -EFAULT; + + iov_iter_advance(iter, sz - sizeof(*hdr)); + + return 0; +} + +static inline int tun_vnet_hdr_to_skb(unsigned int flags, struct sk_buff *skb, + const struct virtio_net_hdr *hdr) +{ + return virtio_net_hdr_to_skb(skb, hdr, tun_vnet_is_little_endian(flags)); +} + +static inline int tun_vnet_hdr_from_skb(unsigned int flags, + const struct net_device *dev, + const struct sk_buff *skb, + struct virtio_net_hdr *hdr) +{ + int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0; + + if (virtio_net_hdr_from_skb(skb, hdr, + tun_vnet_is_little_endian(flags), true, + vlan_hlen)) { + struct skb_shared_info *sinfo = skb_shinfo(skb); + + if (net_ratelimit()) { + netdev_err(dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n", + sinfo->gso_type, tun_vnet16_to_cpu(flags, hdr->gso_size), + tun_vnet16_to_cpu(flags, hdr->hdr_len)); + print_hex_dump(KERN_ERR, "tun: ", + DUMP_PREFIX_NONE, + 16, 1, skb->head, + min(tun_vnet16_to_cpu(flags, hdr->hdr_len), 64), true); + } + WARN_ON_ONCE(1); + return -EINVAL; + } + + return 0; +} + +#endif /* TUN_VNET_H */ diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 57d6e5abc30e..da24941a6e44 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -1421,6 +1421,19 @@ static const struct driver_info hg20f9_info = { .data = FLAG_EEPROM_MAC, }; +static const struct driver_info lyconsys_fibergecko100_info = { + .description = "LyconSys FiberGecko 100 USB 2.0 to SFP Adapter", + .bind = ax88178_bind, + .status = asix_status, + .link_reset = ax88178_link_reset, + .reset = ax88178_link_reset, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR | + FLAG_MULTI_PACKET, + .rx_fixup = asix_rx_fixup_common, + .tx_fixup = asix_tx_fixup, + .data = 0x20061201, +}; + static const struct usb_device_id products [] = { { // Linksys USB200M @@ -1578,6 +1591,10 @@ static const struct usb_device_id products [] = { // Linux Automation GmbH USB 10Base-T1L USB_DEVICE(0x33f7, 0x0004), .driver_info = (unsigned long) &lxausb_t1l_info, +}, { + /* LyconSys FiberGecko 100 */ + USB_DEVICE(0x1d2a, 0x0801), + .driver_info = (unsigned long) &lyconsys_fibergecko100_info, }, { }, // END }; diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index e13e4920ee9b..88921c13b629 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -660,7 +660,7 @@ static const struct usb_device_id mbim_devs[] = { .driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle, }, - /* Telit FN990 */ + /* Telit FN990A */ { USB_DEVICE_AND_INTERFACE_INFO(0x1bc7, 0x1071, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle, }, diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index e9208a8d2bfa..14d1c85c8000 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1360,7 +1360,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1057, 2)}, /* Telit FN980 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */ - {QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990A */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1080, 2)}, /* Telit FE990 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a0, 0)}, /* Telit FN920C04 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a4, 0)}, /* Telit FN920C04 */ @@ -1368,6 +1368,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1bc7, 0x10c0, 0)}, /* Telit FE910C04 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10c4, 0)}, /* Telit FE910C04 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10c8, 0)}, /* Telit FE910C04 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x10d0, 0)}, /* Telit FN990B */ {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */ {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 468c73974046..e1021148d3a6 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -10079,6 +10079,7 @@ static const struct usb_device_id rtl8152_table[] = { { USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff) }, { USB_DEVICE(VENDOR_ID_TPLINK, 0x0601) }, { USB_DEVICE(VENDOR_ID_DLINK, 0xb301) }, + { USB_DEVICE(VENDOR_ID_DELL, 0xb097) }, { USB_DEVICE(VENDOR_ID_ASUS, 0x1976) }, {} }; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index ca81b212a246..5f21ce1013c4 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1537,14 +1537,12 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it) nlmsg_end(skb, nlh); - /* fib_nl_{new,del}rule handling looks for net from skb->sk */ - skb->sk = dev_net(dev)->rtnl; if (add_it) { - err = fib_nl_newrule(skb, nlh, NULL); + err = fib_newrule(dev_net(dev), skb, nlh, NULL, true); if (err == -EEXIST) err = 0; } else { - err = fib_nl_delrule(skb, nlh, NULL); + err = fib_delrule(dev_net(dev), skb, nlh, NULL, true); if (err == -ENOENT) err = 0; } diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 92516189e792..e2354c02def0 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -227,9 +227,9 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, be32_to_cpu(fdb->vni))) goto nla_put_failure; - ci.ndm_used = jiffies_to_clock_t(now - fdb->used); + ci.ndm_used = jiffies_to_clock_t(now - READ_ONCE(fdb->used)); ci.ndm_confirmed = 0; - ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated); + ci.ndm_updated = jiffies_to_clock_t(now - READ_ONCE(fdb->updated)); ci.ndm_refcnt = 0; if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) @@ -434,8 +434,12 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, struct vxlan_fdb *f; f = __vxlan_find_mac(vxlan, mac, vni); - if (f && f->used != jiffies) - f->used = jiffies; + if (f) { + unsigned long now = jiffies; + + if (READ_ONCE(f->used) != now) + WRITE_ONCE(f->used, now); + } return f; } @@ -1009,12 +1013,10 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan, !(f->flags & NTF_VXLAN_ADDED_BY_USER)) { if (f->state != state) { f->state = state; - f->updated = jiffies; notify = 1; } if (f->flags != fdb_flags) { f->flags = fdb_flags; - f->updated = jiffies; notify = 1; } } @@ -1048,12 +1050,13 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan, } if (ndm_flags & NTF_USE) - f->used = jiffies; + WRITE_ONCE(f->updated, jiffies); if (notify) { if (rd == NULL) rd = first_remote_rtnl(f); + WRITE_ONCE(f->updated, jiffies); err = vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH, swdev_notify, extack); if (err) @@ -1292,7 +1295,7 @@ int __vxlan_fdb_delete(struct vxlan_dev *vxlan, struct vxlan_fdb *f; int err = -ENOENT; - f = vxlan_find_mac(vxlan, addr, src_vni); + f = __vxlan_find_mac(vxlan, addr, src_vni); if (!f) return err; @@ -1459,9 +1462,13 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev, ifindex = src_ifindex; #endif - f = vxlan_find_mac(vxlan, src_mac, vni); + f = __vxlan_find_mac(vxlan, src_mac, vni); if (likely(f)) { struct vxlan_rdst *rdst = first_remote_rcu(f); + unsigned long now = jiffies; + + if (READ_ONCE(f->updated) != now) + WRITE_ONCE(f->updated, now); if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) && rdst->remote_ifindex == ifindex)) @@ -1481,7 +1488,6 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev, src_mac, &rdst->remote_ip.sa, &src_ip->sa); rdst->remote_ip = *src_ip; - f->updated = jiffies; vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true, NULL); } else { u32 hash_index = fdb_head_index(vxlan, src_mac, vni); @@ -1664,7 +1670,6 @@ static bool vxlan_ecn_decapsulate(struct vxlan_sock *vs, void *oiph, return err <= 1; } -/* Callback from net/ipv4/udp.c to receive packets */ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) { struct vxlan_vni_node *vninode = NULL; @@ -1834,7 +1839,6 @@ drop: return 0; } -/* Callback from net/ipv{4,6}/udp.c to check that we have a VNI for errors */ static int vxlan_err_lookup(struct sock *sk, struct sk_buff *skb) { struct vxlan_dev *vxlan; @@ -2852,7 +2856,7 @@ static void vxlan_cleanup(struct timer_list *t) if (f->flags & NTF_EXT_LEARNED) continue; - timeout = f->used + vxlan->cfg.age_interval * HZ; + timeout = READ_ONCE(f->updated) + vxlan->cfg.age_interval * HZ; if (time_before_eq(timeout, jiffies)) { netdev_dbg(vxlan->dev, "garbage collect %pM\n", @@ -3932,7 +3936,7 @@ static void vxlan_config_apply(struct net_device *dev, } static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, - struct vxlan_config *conf, bool changelink, + struct vxlan_config *conf, struct netlink_ext_ack *extack) { struct vxlan_dev *vxlan = netdev_priv(dev); @@ -3943,7 +3947,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, if (ret) return ret; - vxlan_config_apply(dev, conf, lowerdev, src_net, changelink); + vxlan_config_apply(dev, conf, lowerdev, src_net, false); return 0; } @@ -3961,7 +3965,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, int err; dst = &vxlan->default_dst; - err = vxlan_dev_configure(net, dev, conf, false, extack); + err = vxlan_dev_configure(net, dev, conf, extack); if (err) return err; @@ -4415,6 +4419,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack) { struct vxlan_dev *vxlan = netdev_priv(dev); + bool rem_ip_changed, change_igmp; struct net_device *lowerdev; struct vxlan_config conf; struct vxlan_rdst *dst; @@ -4438,8 +4443,13 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], if (err) return err; + rem_ip_changed = !vxlan_addr_equal(&conf.remote_ip, &dst->remote_ip); + change_igmp = vxlan->dev->flags & IFF_UP && + (rem_ip_changed || + dst->remote_ifindex != conf.remote_ifindex); + /* handle default dst entry */ - if (!vxlan_addr_equal(&conf.remote_ip, &dst->remote_ip)) { + if (rem_ip_changed) { u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, conf.vni); spin_lock_bh(&vxlan->hash_lock[hash_index]); @@ -4483,6 +4493,9 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], } } + if (change_igmp && vxlan_addr_multicast(&dst->remote_ip)) + err = vxlan_multicast_leave(vxlan); + if (conf.age_interval != vxlan->cfg.age_interval) mod_timer(&vxlan->age_timer, jiffies); @@ -4490,7 +4503,12 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], if (lowerdev && lowerdev != dst->remote_dev) dst->remote_dev = lowerdev; vxlan_config_apply(dev, &conf, lowerdev, vxlan->net, true); - return 0; + + if (!err && change_igmp && + vxlan_addr_multicast(&dst->remote_ip)) + err = vxlan_multicast_join(vxlan); + + return err; } static void vxlan_dellink(struct net_device *dev, struct list_head *head) @@ -4768,7 +4786,7 @@ vxlan_fdb_offloaded_set(struct net_device *dev, spin_lock_bh(&vxlan->hash_lock[hash_index]); - f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni); + f = __vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni); if (!f) goto out; @@ -4824,7 +4842,7 @@ vxlan_fdb_external_learn_del(struct net_device *dev, hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni); spin_lock_bh(&vxlan->hash_lock[hash_index]); - f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni); + f = __vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni); if (!f) err = -ENOENT; else if (f->flags & NTF_EXT_LEARNED) diff --git a/drivers/net/wwan/t7xx/t7xx_pci.c b/drivers/net/wwan/t7xx/t7xx_pci.c index 02f2ec7cf4ce..8bf63f2dcbbf 100644 --- a/drivers/net/wwan/t7xx/t7xx_pci.c +++ b/drivers/net/wwan/t7xx/t7xx_pci.c @@ -32,7 +32,6 @@ #include <linux/pci.h> #include <linux/pm.h> #include <linux/pm_runtime.h> -#include <linux/pm_wakeup.h> #include <linux/spinlock.h> #include "t7xx_mhccif.h" diff --git a/drivers/of/base.c b/drivers/of/base.c index af6c68bbb427..e37b088f1fad 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -824,6 +824,33 @@ struct device_node *of_get_child_by_name(const struct device_node *node, } EXPORT_SYMBOL(of_get_child_by_name); +/** + * of_get_available_child_by_name - Find the available child node by name for a given parent + * @node: parent node + * @name: child name to look for. + * + * This function looks for child node for given matching name and checks the + * device's availability for use. + * + * Return: A node pointer if found, with refcount incremented, use + * of_node_put() on it when done. + * Returns NULL if node is not found. + */ +struct device_node *of_get_available_child_by_name(const struct device_node *node, + const char *name) +{ + struct device_node *child; + + child = of_get_child_by_name(node, name); + if (child && !of_device_is_available(child)) { + of_node_put(child); + return NULL; + } + + return child; +} +EXPORT_SYMBOL(of_get_available_child_by_name); + struct device_node *__of_find_node_by_path(const struct device_node *parent, const char *path) { diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig index c61e6427384c..9eb9e3c49f81 100644 --- a/drivers/s390/net/Kconfig +++ b/drivers/s390/net/Kconfig @@ -2,15 +2,6 @@ menu "S/390 network device drivers" depends on NETDEVICES && S390 -config LCS - def_tristate m - prompt "Lan Channel Station Interface" - depends on CCW && NETDEVICES && ETHERNET - help - Select this option if you want to use LCS networking on IBM System z. - To compile as a module, choose M. The module name is lcs. - If you do not use LCS, choose N. - config CTCM def_tristate m prompt "CTC and MPC SNA device support" @@ -98,7 +89,7 @@ config QETH_OSX config CCWGROUP tristate - default (LCS || CTCM || QETH || SMC) + default (CTCM || QETH || SMC) config ISM tristate "Support for ISM vPCI Adapter" diff --git a/drivers/s390/net/Makefile b/drivers/s390/net/Makefile index bc55ec316adb..b5aaba290127 100644 --- a/drivers/s390/net/Makefile +++ b/drivers/s390/net/Makefile @@ -8,7 +8,6 @@ obj-$(CONFIG_CTCM) += ctcm.o fsm.o obj-$(CONFIG_NETIUCV) += netiucv.o fsm.o obj-$(CONFIG_SMSGIUCV) += smsgiucv.o obj-$(CONFIG_SMSGIUCV_EVENT) += smsgiucv_app.o -obj-$(CONFIG_LCS) += lcs.o qeth-y += qeth_core_sys.o qeth_core_main.o qeth_core_mpc.o qeth_ethtool.o obj-$(CONFIG_QETH) += qeth.o qeth_l2-y += qeth_l2_main.o qeth_l2_sys.o diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c deleted file mode 100644 index 88db8378325a..000000000000 --- a/drivers/s390/net/lcs.c +++ /dev/null @@ -1,2385 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Linux for S/390 LAN channel station device driver - * - * Copyright IBM Corp. 1999, 2009 - * Author(s): Original Code written by - * DJ Barrow <djbarrow@de.ibm.com,barrow_dj@yahoo.com> - * Rewritten by - * Frank Pavlic <fpavlic@de.ibm.com> and - * Martin Schwidefsky <schwidefsky@de.ibm.com> - */ - -#define KMSG_COMPONENT "lcs" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include <linux/module.h> -#include <linux/if.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/inetdevice.h> -#include <linux/in.h> -#include <linux/igmp.h> -#include <linux/delay.h> -#include <linux/kthread.h> -#include <linux/slab.h> -#include <net/arp.h> -#include <net/ip.h> - -#include <asm/debug.h> -#include <asm/idals.h> -#include <asm/timex.h> -#include <linux/device.h> -#include <asm/ccwgroup.h> - -#include "lcs.h" - - -/* - * initialization string for output - */ - -static char version[] __initdata = "LCS driver"; - -/* - * the root device for lcs group devices - */ -static struct device *lcs_root_dev; - -/* - * Some prototypes. - */ -static void lcs_tasklet(unsigned long); -static void lcs_start_kernel_thread(struct work_struct *); -static void lcs_get_frames_cb(struct lcs_channel *, struct lcs_buffer *); -#ifdef CONFIG_IP_MULTICAST -static int lcs_send_delipm(struct lcs_card *, struct lcs_ipm_list *); -#endif /* CONFIG_IP_MULTICAST */ -static int lcs_recovery(void *ptr); - -/* - * Debug Facility Stuff - */ -static char debug_buffer[255]; -static debug_info_t *lcs_dbf_setup; -static debug_info_t *lcs_dbf_trace; - -/* - * LCS Debug Facility functions - */ -static void -lcs_unregister_debug_facility(void) -{ - debug_unregister(lcs_dbf_setup); - debug_unregister(lcs_dbf_trace); -} - -static int -lcs_register_debug_facility(void) -{ - lcs_dbf_setup = debug_register("lcs_setup", 2, 1, 8); - lcs_dbf_trace = debug_register("lcs_trace", 4, 1, 8); - if (lcs_dbf_setup == NULL || lcs_dbf_trace == NULL) { - pr_err("Not enough memory for debug facility.\n"); - lcs_unregister_debug_facility(); - return -ENOMEM; - } - debug_register_view(lcs_dbf_setup, &debug_hex_ascii_view); - debug_set_level(lcs_dbf_setup, 2); - debug_register_view(lcs_dbf_trace, &debug_hex_ascii_view); - debug_set_level(lcs_dbf_trace, 2); - return 0; -} - -/* - * Allocate io buffers. - */ -static int -lcs_alloc_channel(struct lcs_channel *channel) -{ - int cnt; - - LCS_DBF_TEXT(2, setup, "ichalloc"); - for (cnt = 0; cnt < LCS_NUM_BUFFS; cnt++) { - /* alloc memory fo iobuffer */ - channel->iob[cnt].data = - kzalloc(LCS_IOBUFFERSIZE, GFP_DMA | GFP_KERNEL); - if (channel->iob[cnt].data == NULL) - break; - channel->iob[cnt].state = LCS_BUF_STATE_EMPTY; - } - if (cnt < LCS_NUM_BUFFS) { - /* Not all io buffers could be allocated. */ - LCS_DBF_TEXT(2, setup, "echalloc"); - while (cnt-- > 0) - kfree(channel->iob[cnt].data); - return -ENOMEM; - } - return 0; -} - -/* - * Free io buffers. - */ -static void -lcs_free_channel(struct lcs_channel *channel) -{ - int cnt; - - LCS_DBF_TEXT(2, setup, "ichfree"); - for (cnt = 0; cnt < LCS_NUM_BUFFS; cnt++) { - kfree(channel->iob[cnt].data); - channel->iob[cnt].data = NULL; - } -} - -/* - * Cleanup channel. - */ -static void -lcs_cleanup_channel(struct lcs_channel *channel) -{ - LCS_DBF_TEXT(3, setup, "cleanch"); - /* Kill write channel tasklets. */ - tasklet_kill(&channel->irq_tasklet); - /* Free channel buffers. */ - lcs_free_channel(channel); -} - -/* - * LCS free memory for card and channels. - */ -static void -lcs_free_card(struct lcs_card *card) -{ - LCS_DBF_TEXT(2, setup, "remcard"); - LCS_DBF_HEX(2, setup, &card, sizeof(void*)); - kfree(card); -} - -/* - * LCS alloc memory for card and channels - */ -static struct lcs_card * -lcs_alloc_card(void) -{ - struct lcs_card *card; - int rc; - - LCS_DBF_TEXT(2, setup, "alloclcs"); - - card = kzalloc(sizeof(struct lcs_card), GFP_KERNEL | GFP_DMA); - if (card == NULL) - return NULL; - card->lan_type = LCS_FRAME_TYPE_AUTO; - card->pkt_seq = 0; - card->lancmd_timeout = LCS_LANCMD_TIMEOUT_DEFAULT; - /* Allocate io buffers for the read channel. */ - rc = lcs_alloc_channel(&card->read); - if (rc){ - LCS_DBF_TEXT(2, setup, "iccwerr"); - lcs_free_card(card); - return NULL; - } - /* Allocate io buffers for the write channel. */ - rc = lcs_alloc_channel(&card->write); - if (rc) { - LCS_DBF_TEXT(2, setup, "iccwerr"); - lcs_cleanup_channel(&card->read); - lcs_free_card(card); - return NULL; - } - -#ifdef CONFIG_IP_MULTICAST - INIT_LIST_HEAD(&card->ipm_list); -#endif - LCS_DBF_HEX(2, setup, &card, sizeof(void*)); - return card; -} - -/* - * Setup read channel. - */ -static void -lcs_setup_read_ccws(struct lcs_card *card) -{ - int cnt; - - LCS_DBF_TEXT(2, setup, "ireadccw"); - /* Setup read ccws. */ - memset(card->read.ccws, 0, sizeof (struct ccw1) * (LCS_NUM_BUFFS + 1)); - for (cnt = 0; cnt < LCS_NUM_BUFFS; cnt++) { - card->read.ccws[cnt].cmd_code = LCS_CCW_READ; - card->read.ccws[cnt].count = LCS_IOBUFFERSIZE; - card->read.ccws[cnt].flags = - CCW_FLAG_CC | CCW_FLAG_SLI | CCW_FLAG_PCI; - /* - * Note: we have allocated the buffer with GFP_DMA, so - * we do not need to do set_normalized_cda. - */ - card->read.ccws[cnt].cda = - virt_to_dma32(card->read.iob[cnt].data); - ((struct lcs_header *) - card->read.iob[cnt].data)->offset = LCS_ILLEGAL_OFFSET; - card->read.iob[cnt].callback = lcs_get_frames_cb; - card->read.iob[cnt].state = LCS_BUF_STATE_READY; - card->read.iob[cnt].count = LCS_IOBUFFERSIZE; - } - card->read.ccws[0].flags &= ~CCW_FLAG_PCI; - card->read.ccws[LCS_NUM_BUFFS - 1].flags &= ~CCW_FLAG_PCI; - card->read.ccws[LCS_NUM_BUFFS - 1].flags |= CCW_FLAG_SUSPEND; - /* Last ccw is a tic (transfer in channel). */ - card->read.ccws[LCS_NUM_BUFFS].cmd_code = LCS_CCW_TRANSFER; - card->read.ccws[LCS_NUM_BUFFS].cda = virt_to_dma32(card->read.ccws); - /* Setg initial state of the read channel. */ - card->read.state = LCS_CH_STATE_INIT; - - card->read.io_idx = 0; - card->read.buf_idx = 0; -} - -static void -lcs_setup_read(struct lcs_card *card) -{ - LCS_DBF_TEXT(3, setup, "initread"); - - lcs_setup_read_ccws(card); - /* Initialize read channel tasklet. */ - card->read.irq_tasklet.data = (unsigned long) &card->read; - card->read.irq_tasklet.func = lcs_tasklet; - /* Initialize waitqueue. */ - init_waitqueue_head(&card->read.wait_q); -} - -/* - * Setup write channel. - */ -static void -lcs_setup_write_ccws(struct lcs_card *card) -{ - int cnt; - - LCS_DBF_TEXT(3, setup, "iwritccw"); - /* Setup write ccws. */ - memset(card->write.ccws, 0, sizeof(struct ccw1) * (LCS_NUM_BUFFS + 1)); - for (cnt = 0; cnt < LCS_NUM_BUFFS; cnt++) { - card->write.ccws[cnt].cmd_code = LCS_CCW_WRITE; - card->write.ccws[cnt].count = 0; - card->write.ccws[cnt].flags = - CCW_FLAG_SUSPEND | CCW_FLAG_CC | CCW_FLAG_SLI; - /* - * Note: we have allocated the buffer with GFP_DMA, so - * we do not need to do set_normalized_cda. - */ - card->write.ccws[cnt].cda = - virt_to_dma32(card->write.iob[cnt].data); - } - /* Last ccw is a tic (transfer in channel). */ - card->write.ccws[LCS_NUM_BUFFS].cmd_code = LCS_CCW_TRANSFER; - card->write.ccws[LCS_NUM_BUFFS].cda = virt_to_dma32(card->write.ccws); - /* Set initial state of the write channel. */ - card->read.state = LCS_CH_STATE_INIT; - - card->write.io_idx = 0; - card->write.buf_idx = 0; -} - -static void -lcs_setup_write(struct lcs_card *card) -{ - LCS_DBF_TEXT(3, setup, "initwrit"); - - lcs_setup_write_ccws(card); - /* Initialize write channel tasklet. */ - card->write.irq_tasklet.data = (unsigned long) &card->write; - card->write.irq_tasklet.func = lcs_tasklet; - /* Initialize waitqueue. */ - init_waitqueue_head(&card->write.wait_q); -} - -static void -lcs_set_allowed_threads(struct lcs_card *card, unsigned long threads) -{ - unsigned long flags; - - spin_lock_irqsave(&card->mask_lock, flags); - card->thread_allowed_mask = threads; - spin_unlock_irqrestore(&card->mask_lock, flags); - wake_up(&card->wait_q); -} -static int lcs_threads_running(struct lcs_card *card, unsigned long threads) -{ - unsigned long flags; - int rc = 0; - - spin_lock_irqsave(&card->mask_lock, flags); - rc = (card->thread_running_mask & threads); - spin_unlock_irqrestore(&card->mask_lock, flags); - return rc; -} - -static int -lcs_wait_for_threads(struct lcs_card *card, unsigned long threads) -{ - return wait_event_interruptible(card->wait_q, - lcs_threads_running(card, threads) == 0); -} - -static int lcs_set_thread_start_bit(struct lcs_card *card, unsigned long thread) -{ - unsigned long flags; - - spin_lock_irqsave(&card->mask_lock, flags); - if ( !(card->thread_allowed_mask & thread) || - (card->thread_start_mask & thread) ) { - spin_unlock_irqrestore(&card->mask_lock, flags); - return -EPERM; - } - card->thread_start_mask |= thread; - spin_unlock_irqrestore(&card->mask_lock, flags); - return 0; -} - -static void -lcs_clear_thread_running_bit(struct lcs_card *card, unsigned long thread) -{ - unsigned long flags; - - spin_lock_irqsave(&card->mask_lock, flags); - card->thread_running_mask &= ~thread; - spin_unlock_irqrestore(&card->mask_lock, flags); - wake_up(&card->wait_q); -} - -static int __lcs_do_run_thread(struct lcs_card *card, unsigned long thread) -{ - unsigned long flags; - int rc = 0; - - spin_lock_irqsave(&card->mask_lock, flags); - if (card->thread_start_mask & thread){ - if ((card->thread_allowed_mask & thread) && - !(card->thread_running_mask & thread)){ - rc = 1; - card->thread_start_mask &= ~thread; - card->thread_running_mask |= thread; - } else - rc = -EPERM; - } - spin_unlock_irqrestore(&card->mask_lock, flags); - return rc; -} - -static int -lcs_do_run_thread(struct lcs_card *card, unsigned long thread) -{ - int rc = 0; - wait_event(card->wait_q, - (rc = __lcs_do_run_thread(card, thread)) >= 0); - return rc; -} - -static int -lcs_do_start_thread(struct lcs_card *card, unsigned long thread) -{ - unsigned long flags; - int rc = 0; - - spin_lock_irqsave(&card->mask_lock, flags); - LCS_DBF_TEXT_(4, trace, " %02x%02x%02x", - (u8) card->thread_start_mask, - (u8) card->thread_allowed_mask, - (u8) card->thread_running_mask); - rc = (card->thread_start_mask & thread); - spin_unlock_irqrestore(&card->mask_lock, flags); - return rc; -} - -/* - * Initialize channels,card and state machines. - */ -static void -lcs_setup_card(struct lcs_card *card) -{ - LCS_DBF_TEXT(2, setup, "initcard"); - LCS_DBF_HEX(2, setup, &card, sizeof(void*)); - - lcs_setup_read(card); - lcs_setup_write(card); - /* Set cards initial state. */ - card->state = DEV_STATE_DOWN; - card->tx_buffer = NULL; - card->tx_emitted = 0; - - init_waitqueue_head(&card->wait_q); - spin_lock_init(&card->lock); - spin_lock_init(&card->ipm_lock); - spin_lock_init(&card->mask_lock); -#ifdef CONFIG_IP_MULTICAST - INIT_LIST_HEAD(&card->ipm_list); -#endif - INIT_LIST_HEAD(&card->lancmd_waiters); -} - -static void lcs_clear_multicast_list(struct lcs_card *card) -{ -#ifdef CONFIG_IP_MULTICAST - struct lcs_ipm_list *ipm; - unsigned long flags; - - /* Free multicast list. */ - LCS_DBF_TEXT(3, setup, "clmclist"); - spin_lock_irqsave(&card->ipm_lock, flags); - while (!list_empty(&card->ipm_list)){ - ipm = list_entry(card->ipm_list.next, - struct lcs_ipm_list, list); - list_del(&ipm->list); - if (ipm->ipm_state != LCS_IPM_STATE_SET_REQUIRED){ - spin_unlock_irqrestore(&card->ipm_lock, flags); - lcs_send_delipm(card, ipm); - spin_lock_irqsave(&card->ipm_lock, flags); - } - kfree(ipm); - } - spin_unlock_irqrestore(&card->ipm_lock, flags); -#endif -} - -/* - * Cleanup channels,card and state machines. - */ -static void -lcs_cleanup_card(struct lcs_card *card) -{ - - LCS_DBF_TEXT(3, setup, "cleancrd"); - LCS_DBF_HEX(2,setup,&card,sizeof(void*)); - - if (card->dev != NULL) - free_netdev(card->dev); - /* Cleanup channels. */ - lcs_cleanup_channel(&card->write); - lcs_cleanup_channel(&card->read); -} - -/* - * Start channel. - */ -static int -lcs_start_channel(struct lcs_channel *channel) -{ - unsigned long flags; - int rc; - - LCS_DBF_TEXT_(4, trace,"ssch%s", dev_name(&channel->ccwdev->dev)); - spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - rc = ccw_device_start(channel->ccwdev, - channel->ccws + channel->io_idx, 0, 0, - DOIO_DENY_PREFETCH | DOIO_ALLOW_SUSPEND); - if (rc == 0) - channel->state = LCS_CH_STATE_RUNNING; - spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); - if (rc) { - LCS_DBF_TEXT_(4,trace,"essh%s", - dev_name(&channel->ccwdev->dev)); - dev_err(&channel->ccwdev->dev, - "Starting an LCS device resulted in an error," - " rc=%d!\n", rc); - } - return rc; -} - -static int -lcs_clear_channel(struct lcs_channel *channel) -{ - unsigned long flags; - int rc; - - LCS_DBF_TEXT(4,trace,"clearch"); - LCS_DBF_TEXT_(4, trace, "%s", dev_name(&channel->ccwdev->dev)); - spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - rc = ccw_device_clear(channel->ccwdev, 0); - spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); - if (rc) { - LCS_DBF_TEXT_(4, trace, "ecsc%s", - dev_name(&channel->ccwdev->dev)); - return rc; - } - wait_event(channel->wait_q, (channel->state == LCS_CH_STATE_CLEARED)); - channel->state = LCS_CH_STATE_STOPPED; - return rc; -} - - -/* - * Stop channel. - */ -static int -lcs_stop_channel(struct lcs_channel *channel) -{ - unsigned long flags; - int rc; - - if (channel->state == LCS_CH_STATE_STOPPED) - return 0; - LCS_DBF_TEXT(4,trace,"haltsch"); - LCS_DBF_TEXT_(4, trace, "%s", dev_name(&channel->ccwdev->dev)); - channel->state = LCS_CH_STATE_INIT; - spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - rc = ccw_device_halt(channel->ccwdev, 0); - spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); - if (rc) { - LCS_DBF_TEXT_(4, trace, "ehsc%s", - dev_name(&channel->ccwdev->dev)); - return rc; - } - /* Asynchronous halt initialted. Wait for its completion. */ - wait_event(channel->wait_q, (channel->state == LCS_CH_STATE_HALTED)); - lcs_clear_channel(channel); - return 0; -} - -/* - * start read and write channel - */ -static int -lcs_start_channels(struct lcs_card *card) -{ - int rc; - - LCS_DBF_TEXT(2, trace, "chstart"); - /* start read channel */ - rc = lcs_start_channel(&card->read); - if (rc) - return rc; - /* start write channel */ - rc = lcs_start_channel(&card->write); - if (rc) - lcs_stop_channel(&card->read); - return rc; -} - -/* - * stop read and write channel - */ -static int -lcs_stop_channels(struct lcs_card *card) -{ - LCS_DBF_TEXT(2, trace, "chhalt"); - lcs_stop_channel(&card->read); - lcs_stop_channel(&card->write); - return 0; -} - -/* - * Get empty buffer. - */ -static struct lcs_buffer * -__lcs_get_buffer(struct lcs_channel *channel) -{ - int index; - - LCS_DBF_TEXT(5, trace, "_getbuff"); - index = channel->io_idx; - do { - if (channel->iob[index].state == LCS_BUF_STATE_EMPTY) { - channel->iob[index].state = LCS_BUF_STATE_LOCKED; - return channel->iob + index; - } - index = (index + 1) & (LCS_NUM_BUFFS - 1); - } while (index != channel->io_idx); - return NULL; -} - -static struct lcs_buffer * -lcs_get_buffer(struct lcs_channel *channel) -{ - struct lcs_buffer *buffer; - unsigned long flags; - - LCS_DBF_TEXT(5, trace, "getbuff"); - spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - buffer = __lcs_get_buffer(channel); - spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); - return buffer; -} - -/* - * Resume channel program if the channel is suspended. - */ -static int -__lcs_resume_channel(struct lcs_channel *channel) -{ - int rc; - - if (channel->state != LCS_CH_STATE_SUSPENDED) - return 0; - if (channel->ccws[channel->io_idx].flags & CCW_FLAG_SUSPEND) - return 0; - LCS_DBF_TEXT_(5, trace, "rsch%s", dev_name(&channel->ccwdev->dev)); - rc = ccw_device_resume(channel->ccwdev); - if (rc) { - LCS_DBF_TEXT_(4, trace, "ersc%s", - dev_name(&channel->ccwdev->dev)); - dev_err(&channel->ccwdev->dev, - "Sending data from the LCS device to the LAN failed" - " with rc=%d\n",rc); - } else - channel->state = LCS_CH_STATE_RUNNING; - return rc; - -} - -/* - * Make a buffer ready for processing. - */ -static void __lcs_ready_buffer_bits(struct lcs_channel *channel, int index) -{ - int prev, next; - - LCS_DBF_TEXT(5, trace, "rdybits"); - prev = (index - 1) & (LCS_NUM_BUFFS - 1); - next = (index + 1) & (LCS_NUM_BUFFS - 1); - /* Check if we may clear the suspend bit of this buffer. */ - if (channel->ccws[next].flags & CCW_FLAG_SUSPEND) { - /* Check if we have to set the PCI bit. */ - if (!(channel->ccws[prev].flags & CCW_FLAG_SUSPEND)) - /* Suspend bit of the previous buffer is not set. */ - channel->ccws[index].flags |= CCW_FLAG_PCI; - /* Suspend bit of the next buffer is set. */ - channel->ccws[index].flags &= ~CCW_FLAG_SUSPEND; - } -} - -static int -lcs_ready_buffer(struct lcs_channel *channel, struct lcs_buffer *buffer) -{ - unsigned long flags; - int index, rc; - - LCS_DBF_TEXT(5, trace, "rdybuff"); - BUG_ON(buffer->state != LCS_BUF_STATE_LOCKED && - buffer->state != LCS_BUF_STATE_PROCESSED); - spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - buffer->state = LCS_BUF_STATE_READY; - index = buffer - channel->iob; - /* Set length. */ - channel->ccws[index].count = buffer->count; - /* Check relevant PCI/suspend bits. */ - __lcs_ready_buffer_bits(channel, index); - rc = __lcs_resume_channel(channel); - spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); - return rc; -} - -/* - * Mark the buffer as processed. Take care of the suspend bit - * of the previous buffer. This function is called from - * interrupt context, so the lock must not be taken. - */ -static int -__lcs_processed_buffer(struct lcs_channel *channel, struct lcs_buffer *buffer) -{ - int index, prev, next; - - LCS_DBF_TEXT(5, trace, "prcsbuff"); - BUG_ON(buffer->state != LCS_BUF_STATE_READY); - buffer->state = LCS_BUF_STATE_PROCESSED; - index = buffer - channel->iob; - prev = (index - 1) & (LCS_NUM_BUFFS - 1); - next = (index + 1) & (LCS_NUM_BUFFS - 1); - /* Set the suspend bit and clear the PCI bit of this buffer. */ - channel->ccws[index].flags |= CCW_FLAG_SUSPEND; - channel->ccws[index].flags &= ~CCW_FLAG_PCI; - /* Check the suspend bit of the previous buffer. */ - if (channel->iob[prev].state == LCS_BUF_STATE_READY) { - /* - * Previous buffer is in state ready. It might have - * happened in lcs_ready_buffer that the suspend bit - * has not been cleared to avoid an endless loop. - * Do it now. - */ - __lcs_ready_buffer_bits(channel, prev); - } - /* Clear PCI bit of next buffer. */ - channel->ccws[next].flags &= ~CCW_FLAG_PCI; - return __lcs_resume_channel(channel); -} - -/* - * Put a processed buffer back to state empty. - */ -static void -lcs_release_buffer(struct lcs_channel *channel, struct lcs_buffer *buffer) -{ - unsigned long flags; - - LCS_DBF_TEXT(5, trace, "relbuff"); - BUG_ON(buffer->state != LCS_BUF_STATE_LOCKED && - buffer->state != LCS_BUF_STATE_PROCESSED); - spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - buffer->state = LCS_BUF_STATE_EMPTY; - spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); -} - -/* - * Get buffer for a lan command. - */ -static struct lcs_buffer * -lcs_get_lancmd(struct lcs_card *card, int count) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(4, trace, "getlncmd"); - /* Get buffer and wait if none is available. */ - wait_event(card->write.wait_q, - ((buffer = lcs_get_buffer(&card->write)) != NULL)); - count += sizeof(struct lcs_header); - *(__u16 *)(buffer->data + count) = 0; - buffer->count = count + sizeof(__u16); - buffer->callback = lcs_release_buffer; - cmd = (struct lcs_cmd *) buffer->data; - cmd->offset = count; - cmd->type = LCS_FRAME_TYPE_CONTROL; - cmd->slot = 0; - return buffer; -} - - -static void -lcs_get_reply(struct lcs_reply *reply) -{ - refcount_inc(&reply->refcnt); -} - -static void -lcs_put_reply(struct lcs_reply *reply) -{ - if (refcount_dec_and_test(&reply->refcnt)) - kfree(reply); -} - -static struct lcs_reply * -lcs_alloc_reply(struct lcs_cmd *cmd) -{ - struct lcs_reply *reply; - - LCS_DBF_TEXT(4, trace, "getreply"); - - reply = kzalloc(sizeof(struct lcs_reply), GFP_ATOMIC); - if (!reply) - return NULL; - refcount_set(&reply->refcnt, 1); - reply->sequence_no = cmd->sequence_no; - reply->received = 0; - reply->rc = 0; - init_waitqueue_head(&reply->wait_q); - - return reply; -} - -/* - * Notifier function for lancmd replies. Called from read irq. - */ -static void -lcs_notify_lancmd_waiters(struct lcs_card *card, struct lcs_cmd *cmd) -{ - struct list_head *l, *n; - struct lcs_reply *reply; - - LCS_DBF_TEXT(4, trace, "notiwait"); - spin_lock(&card->lock); - list_for_each_safe(l, n, &card->lancmd_waiters) { - reply = list_entry(l, struct lcs_reply, list); - if (reply->sequence_no == cmd->sequence_no) { - lcs_get_reply(reply); - list_del_init(&reply->list); - if (reply->callback != NULL) - reply->callback(card, cmd); - reply->received = 1; - reply->rc = cmd->return_code; - wake_up(&reply->wait_q); - lcs_put_reply(reply); - break; - } - } - spin_unlock(&card->lock); -} - -/* - * Emit buffer of a lan command. - */ -static void -lcs_lancmd_timeout(struct timer_list *t) -{ - struct lcs_reply *reply = from_timer(reply, t, timer); - struct lcs_reply *list_reply, *r; - unsigned long flags; - - LCS_DBF_TEXT(4, trace, "timeout"); - spin_lock_irqsave(&reply->card->lock, flags); - list_for_each_entry_safe(list_reply, r, - &reply->card->lancmd_waiters,list) { - if (reply == list_reply) { - lcs_get_reply(reply); - list_del_init(&reply->list); - spin_unlock_irqrestore(&reply->card->lock, flags); - reply->received = 1; - reply->rc = -ETIME; - wake_up(&reply->wait_q); - lcs_put_reply(reply); - return; - } - } - spin_unlock_irqrestore(&reply->card->lock, flags); -} - -static int -lcs_send_lancmd(struct lcs_card *card, struct lcs_buffer *buffer, - void (*reply_callback)(struct lcs_card *, struct lcs_cmd *)) -{ - struct lcs_reply *reply; - struct lcs_cmd *cmd; - unsigned long flags; - int rc; - - LCS_DBF_TEXT(4, trace, "sendcmd"); - cmd = (struct lcs_cmd *) buffer->data; - cmd->return_code = 0; - cmd->sequence_no = card->sequence_no++; - reply = lcs_alloc_reply(cmd); - if (!reply) - return -ENOMEM; - reply->callback = reply_callback; - reply->card = card; - spin_lock_irqsave(&card->lock, flags); - list_add_tail(&reply->list, &card->lancmd_waiters); - spin_unlock_irqrestore(&card->lock, flags); - - buffer->callback = lcs_release_buffer; - rc = lcs_ready_buffer(&card->write, buffer); - if (rc) - return rc; - timer_setup(&reply->timer, lcs_lancmd_timeout, 0); - mod_timer(&reply->timer, jiffies + HZ * card->lancmd_timeout); - wait_event(reply->wait_q, reply->received); - del_timer_sync(&reply->timer); - LCS_DBF_TEXT_(4, trace, "rc:%d",reply->rc); - rc = reply->rc; - lcs_put_reply(reply); - return rc ? -EIO : 0; -} - -/* - * LCS startup command - */ -static int -lcs_send_startup(struct lcs_card *card, __u8 initiator) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(2, trace, "startup"); - buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - cmd->cmd_code = LCS_CMD_STARTUP; - cmd->initiator = initiator; - cmd->cmd.lcs_startup.buff_size = LCS_IOBUFFERSIZE; - return lcs_send_lancmd(card, buffer, NULL); -} - -/* - * LCS shutdown command - */ -static int -lcs_send_shutdown(struct lcs_card *card) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(2, trace, "shutdown"); - buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - cmd->cmd_code = LCS_CMD_SHUTDOWN; - cmd->initiator = LCS_INITIATOR_TCPIP; - return lcs_send_lancmd(card, buffer, NULL); -} - -/* - * LCS lanstat command - */ -static void -__lcs_lanstat_cb(struct lcs_card *card, struct lcs_cmd *cmd) -{ - LCS_DBF_TEXT(2, trace, "statcb"); - memcpy(card->mac, cmd->cmd.lcs_lanstat_cmd.mac_addr, LCS_MAC_LENGTH); -} - -static int -lcs_send_lanstat(struct lcs_card *card) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(2,trace, "cmdstat"); - buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - /* Setup lanstat command. */ - cmd->cmd_code = LCS_CMD_LANSTAT; - cmd->initiator = LCS_INITIATOR_TCPIP; - cmd->cmd.lcs_std_cmd.lan_type = card->lan_type; - cmd->cmd.lcs_std_cmd.portno = card->portno; - return lcs_send_lancmd(card, buffer, __lcs_lanstat_cb); -} - -/* - * send stoplan command - */ -static int -lcs_send_stoplan(struct lcs_card *card, __u8 initiator) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(2, trace, "cmdstpln"); - buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - cmd->cmd_code = LCS_CMD_STOPLAN; - cmd->initiator = initiator; - cmd->cmd.lcs_std_cmd.lan_type = card->lan_type; - cmd->cmd.lcs_std_cmd.portno = card->portno; - return lcs_send_lancmd(card, buffer, NULL); -} - -/* - * send startlan command - */ -static void -__lcs_send_startlan_cb(struct lcs_card *card, struct lcs_cmd *cmd) -{ - LCS_DBF_TEXT(2, trace, "srtlancb"); - card->lan_type = cmd->cmd.lcs_std_cmd.lan_type; - card->portno = cmd->cmd.lcs_std_cmd.portno; -} - -static int -lcs_send_startlan(struct lcs_card *card, __u8 initiator) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(2, trace, "cmdstaln"); - buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - cmd->cmd_code = LCS_CMD_STARTLAN; - cmd->initiator = initiator; - cmd->cmd.lcs_std_cmd.lan_type = card->lan_type; - cmd->cmd.lcs_std_cmd.portno = card->portno; - return lcs_send_lancmd(card, buffer, __lcs_send_startlan_cb); -} - -#ifdef CONFIG_IP_MULTICAST -/* - * send setipm command (Multicast) - */ -static int -lcs_send_setipm(struct lcs_card *card,struct lcs_ipm_list *ipm_list) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(2, trace, "cmdsetim"); - buffer = lcs_get_lancmd(card, LCS_MULTICAST_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - cmd->cmd_code = LCS_CMD_SETIPM; - cmd->initiator = LCS_INITIATOR_TCPIP; - cmd->cmd.lcs_qipassist.lan_type = card->lan_type; - cmd->cmd.lcs_qipassist.portno = card->portno; - cmd->cmd.lcs_qipassist.version = 4; - cmd->cmd.lcs_qipassist.num_ip_pairs = 1; - memcpy(cmd->cmd.lcs_qipassist.lcs_ipass_ctlmsg.ip_mac_pair, - &ipm_list->ipm, sizeof (struct lcs_ip_mac_pair)); - LCS_DBF_TEXT_(2, trace, "%x",ipm_list->ipm.ip_addr); - return lcs_send_lancmd(card, buffer, NULL); -} - -/* - * send delipm command (Multicast) - */ -static int -lcs_send_delipm(struct lcs_card *card,struct lcs_ipm_list *ipm_list) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(2, trace, "cmddelim"); - buffer = lcs_get_lancmd(card, LCS_MULTICAST_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - cmd->cmd_code = LCS_CMD_DELIPM; - cmd->initiator = LCS_INITIATOR_TCPIP; - cmd->cmd.lcs_qipassist.lan_type = card->lan_type; - cmd->cmd.lcs_qipassist.portno = card->portno; - cmd->cmd.lcs_qipassist.version = 4; - cmd->cmd.lcs_qipassist.num_ip_pairs = 1; - memcpy(cmd->cmd.lcs_qipassist.lcs_ipass_ctlmsg.ip_mac_pair, - &ipm_list->ipm, sizeof (struct lcs_ip_mac_pair)); - LCS_DBF_TEXT_(2, trace, "%x",ipm_list->ipm.ip_addr); - return lcs_send_lancmd(card, buffer, NULL); -} - -/* - * check if multicast is supported by LCS - */ -static void -__lcs_check_multicast_cb(struct lcs_card *card, struct lcs_cmd *cmd) -{ - LCS_DBF_TEXT(2, trace, "chkmccb"); - card->ip_assists_supported = - cmd->cmd.lcs_qipassist.ip_assists_supported; - card->ip_assists_enabled = - cmd->cmd.lcs_qipassist.ip_assists_enabled; -} - -static int -lcs_check_multicast_support(struct lcs_card *card) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - int rc; - - LCS_DBF_TEXT(2, trace, "cmdqipa"); - /* Send query ipassist. */ - buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - cmd->cmd_code = LCS_CMD_QIPASSIST; - cmd->initiator = LCS_INITIATOR_TCPIP; - cmd->cmd.lcs_qipassist.lan_type = card->lan_type; - cmd->cmd.lcs_qipassist.portno = card->portno; - cmd->cmd.lcs_qipassist.version = 4; - cmd->cmd.lcs_qipassist.num_ip_pairs = 1; - rc = lcs_send_lancmd(card, buffer, __lcs_check_multicast_cb); - if (rc != 0) { - pr_err("Query IPAssist failed. Assuming unsupported!\n"); - return -EOPNOTSUPP; - } - if (card->ip_assists_supported & LCS_IPASS_MULTICAST_SUPPORT) - return 0; - return -EOPNOTSUPP; -} - -/* - * set or del multicast address on LCS card - */ -static void -lcs_fix_multicast_list(struct lcs_card *card) -{ - struct list_head failed_list; - struct lcs_ipm_list *ipm, *tmp; - unsigned long flags; - int rc; - - LCS_DBF_TEXT(4,trace, "fixipm"); - INIT_LIST_HEAD(&failed_list); - spin_lock_irqsave(&card->ipm_lock, flags); -list_modified: - list_for_each_entry_safe(ipm, tmp, &card->ipm_list, list){ - switch (ipm->ipm_state) { - case LCS_IPM_STATE_SET_REQUIRED: - /* del from ipm_list so no one else can tamper with - * this entry */ - list_del_init(&ipm->list); - spin_unlock_irqrestore(&card->ipm_lock, flags); - rc = lcs_send_setipm(card, ipm); - spin_lock_irqsave(&card->ipm_lock, flags); - if (rc) { - pr_info("Adding multicast address failed." - " Table possibly full!\n"); - /* store ipm in failed list -> will be added - * to ipm_list again, so a retry will be done - * during the next call of this function */ - list_add_tail(&ipm->list, &failed_list); - } else { - ipm->ipm_state = LCS_IPM_STATE_ON_CARD; - /* re-insert into ipm_list */ - list_add_tail(&ipm->list, &card->ipm_list); - } - goto list_modified; - case LCS_IPM_STATE_DEL_REQUIRED: - list_del(&ipm->list); - spin_unlock_irqrestore(&card->ipm_lock, flags); - lcs_send_delipm(card, ipm); - spin_lock_irqsave(&card->ipm_lock, flags); - kfree(ipm); - goto list_modified; - case LCS_IPM_STATE_ON_CARD: - break; - } - } - /* re-insert all entries from the failed_list into ipm_list */ - list_for_each_entry_safe(ipm, tmp, &failed_list, list) - list_move_tail(&ipm->list, &card->ipm_list); - - spin_unlock_irqrestore(&card->ipm_lock, flags); -} - -/* - * get mac address for the relevant Multicast address - */ -static void -lcs_get_mac_for_ipm(__be32 ipm, char *mac, struct net_device *dev) -{ - LCS_DBF_TEXT(4,trace, "getmac"); - ip_eth_mc_map(ipm, mac); -} - -/* - * function called by net device to handle multicast address relevant things - */ -static void lcs_remove_mc_addresses(struct lcs_card *card, - struct in_device *in4_dev) -{ - struct ip_mc_list *im4; - struct list_head *l; - struct lcs_ipm_list *ipm; - unsigned long flags; - char buf[MAX_ADDR_LEN]; - - LCS_DBF_TEXT(4, trace, "remmclst"); - spin_lock_irqsave(&card->ipm_lock, flags); - list_for_each(l, &card->ipm_list) { - ipm = list_entry(l, struct lcs_ipm_list, list); - for (im4 = rcu_dereference(in4_dev->mc_list); - im4 != NULL; im4 = rcu_dereference(im4->next_rcu)) { - lcs_get_mac_for_ipm(im4->multiaddr, buf, card->dev); - if ( (ipm->ipm.ip_addr == im4->multiaddr) && - (memcmp(buf, &ipm->ipm.mac_addr, - LCS_MAC_LENGTH) == 0) ) - break; - } - if (im4 == NULL) - ipm->ipm_state = LCS_IPM_STATE_DEL_REQUIRED; - } - spin_unlock_irqrestore(&card->ipm_lock, flags); -} - -static struct lcs_ipm_list *lcs_check_addr_entry(struct lcs_card *card, - struct ip_mc_list *im4, - char *buf) -{ - struct lcs_ipm_list *tmp, *ipm = NULL; - struct list_head *l; - unsigned long flags; - - LCS_DBF_TEXT(4, trace, "chkmcent"); - spin_lock_irqsave(&card->ipm_lock, flags); - list_for_each(l, &card->ipm_list) { - tmp = list_entry(l, struct lcs_ipm_list, list); - if ( (tmp->ipm.ip_addr == im4->multiaddr) && - (memcmp(buf, &tmp->ipm.mac_addr, - LCS_MAC_LENGTH) == 0) ) { - ipm = tmp; - break; - } - } - spin_unlock_irqrestore(&card->ipm_lock, flags); - return ipm; -} - -static void lcs_set_mc_addresses(struct lcs_card *card, - struct in_device *in4_dev) -{ - - struct ip_mc_list *im4; - struct lcs_ipm_list *ipm; - char buf[MAX_ADDR_LEN]; - unsigned long flags; - - LCS_DBF_TEXT(4, trace, "setmclst"); - for (im4 = rcu_dereference(in4_dev->mc_list); im4 != NULL; - im4 = rcu_dereference(im4->next_rcu)) { - lcs_get_mac_for_ipm(im4->multiaddr, buf, card->dev); - ipm = lcs_check_addr_entry(card, im4, buf); - if (ipm != NULL) - continue; /* Address already in list. */ - ipm = kzalloc(sizeof(struct lcs_ipm_list), GFP_ATOMIC); - if (ipm == NULL) { - pr_info("Not enough memory to add" - " new multicast entry!\n"); - break; - } - memcpy(&ipm->ipm.mac_addr, buf, LCS_MAC_LENGTH); - ipm->ipm.ip_addr = im4->multiaddr; - ipm->ipm_state = LCS_IPM_STATE_SET_REQUIRED; - spin_lock_irqsave(&card->ipm_lock, flags); - LCS_DBF_HEX(2,trace,&ipm->ipm.ip_addr,4); - list_add(&ipm->list, &card->ipm_list); - spin_unlock_irqrestore(&card->ipm_lock, flags); - } -} - -static int -lcs_register_mc_addresses(void *data) -{ - struct lcs_card *card; - struct in_device *in4_dev; - - card = (struct lcs_card *) data; - - if (!lcs_do_run_thread(card, LCS_SET_MC_THREAD)) - return 0; - LCS_DBF_TEXT(4, trace, "regmulti"); - - in4_dev = in_dev_get(card->dev); - if (in4_dev == NULL) - goto out; - rcu_read_lock(); - lcs_remove_mc_addresses(card,in4_dev); - lcs_set_mc_addresses(card, in4_dev); - rcu_read_unlock(); - in_dev_put(in4_dev); - - netif_carrier_off(card->dev); - netif_tx_disable(card->dev); - wait_event(card->write.wait_q, - (card->write.state != LCS_CH_STATE_RUNNING)); - lcs_fix_multicast_list(card); - if (card->state == DEV_STATE_UP) { - netif_carrier_on(card->dev); - netif_wake_queue(card->dev); - } -out: - lcs_clear_thread_running_bit(card, LCS_SET_MC_THREAD); - return 0; -} -#endif /* CONFIG_IP_MULTICAST */ - -/* - * function called by net device to - * handle multicast address relevant things - */ -static void -lcs_set_multicast_list(struct net_device *dev) -{ -#ifdef CONFIG_IP_MULTICAST - struct lcs_card *card; - - LCS_DBF_TEXT(4, trace, "setmulti"); - card = (struct lcs_card *) dev->ml_priv; - - if (!lcs_set_thread_start_bit(card, LCS_SET_MC_THREAD)) - schedule_work(&card->kernel_thread_starter); -#endif /* CONFIG_IP_MULTICAST */ -} - -static long -lcs_check_irb_error(struct ccw_device *cdev, struct irb *irb) -{ - if (!IS_ERR(irb)) - return 0; - - switch (PTR_ERR(irb)) { - case -EIO: - dev_warn(&cdev->dev, - "An I/O-error occurred on the LCS device\n"); - LCS_DBF_TEXT(2, trace, "ckirberr"); - LCS_DBF_TEXT_(2, trace, " rc%d", -EIO); - break; - case -ETIMEDOUT: - dev_warn(&cdev->dev, - "A command timed out on the LCS device\n"); - LCS_DBF_TEXT(2, trace, "ckirberr"); - LCS_DBF_TEXT_(2, trace, " rc%d", -ETIMEDOUT); - break; - default: - dev_warn(&cdev->dev, - "An error occurred on the LCS device, rc=%ld\n", - PTR_ERR(irb)); - LCS_DBF_TEXT(2, trace, "ckirberr"); - LCS_DBF_TEXT(2, trace, " rc???"); - } - return PTR_ERR(irb); -} - -static int -lcs_get_problem(struct ccw_device *cdev, struct irb *irb) -{ - int dstat, cstat; - char *sense; - - sense = (char *) irb->ecw; - cstat = irb->scsw.cmd.cstat; - dstat = irb->scsw.cmd.dstat; - - if (cstat & (SCHN_STAT_CHN_CTRL_CHK | SCHN_STAT_INTF_CTRL_CHK | - SCHN_STAT_CHN_DATA_CHK | SCHN_STAT_CHAIN_CHECK | - SCHN_STAT_PROT_CHECK | SCHN_STAT_PROG_CHECK)) { - LCS_DBF_TEXT(2, trace, "CGENCHK"); - return 1; - } - if (dstat & DEV_STAT_UNIT_CHECK) { - if (sense[LCS_SENSE_BYTE_1] & - LCS_SENSE_RESETTING_EVENT) { - LCS_DBF_TEXT(2, trace, "REVIND"); - return 1; - } - if (sense[LCS_SENSE_BYTE_0] & - LCS_SENSE_CMD_REJECT) { - LCS_DBF_TEXT(2, trace, "CMDREJ"); - return 0; - } - if ((!sense[LCS_SENSE_BYTE_0]) && - (!sense[LCS_SENSE_BYTE_1]) && - (!sense[LCS_SENSE_BYTE_2]) && - (!sense[LCS_SENSE_BYTE_3])) { - LCS_DBF_TEXT(2, trace, "ZEROSEN"); - return 0; - } - LCS_DBF_TEXT(2, trace, "DGENCHK"); - return 1; - } - return 0; -} - -static void -lcs_schedule_recovery(struct lcs_card *card) -{ - LCS_DBF_TEXT(2, trace, "startrec"); - if (!lcs_set_thread_start_bit(card, LCS_RECOVERY_THREAD)) - schedule_work(&card->kernel_thread_starter); -} - -/* - * IRQ Handler for LCS channels - */ -static void -lcs_irq(struct ccw_device *cdev, unsigned long intparm, struct irb *irb) -{ - struct lcs_card *card; - struct lcs_channel *channel; - int rc, index; - int cstat, dstat; - - if (lcs_check_irb_error(cdev, irb)) - return; - - card = CARD_FROM_DEV(cdev); - if (card->read.ccwdev == cdev) - channel = &card->read; - else - channel = &card->write; - - cstat = irb->scsw.cmd.cstat; - dstat = irb->scsw.cmd.dstat; - LCS_DBF_TEXT_(5, trace, "Rint%s", dev_name(&cdev->dev)); - LCS_DBF_TEXT_(5, trace, "%4x%4x", irb->scsw.cmd.cstat, - irb->scsw.cmd.dstat); - LCS_DBF_TEXT_(5, trace, "%4x%4x", irb->scsw.cmd.fctl, - irb->scsw.cmd.actl); - - /* Check for channel and device errors presented */ - rc = lcs_get_problem(cdev, irb); - if (rc || (dstat & DEV_STAT_UNIT_EXCEP)) { - dev_warn(&cdev->dev, - "The LCS device stopped because of an error," - " dstat=0x%X, cstat=0x%X \n", - dstat, cstat); - if (rc) { - channel->state = LCS_CH_STATE_ERROR; - } - } - if (channel->state == LCS_CH_STATE_ERROR) { - lcs_schedule_recovery(card); - wake_up(&card->wait_q); - return; - } - /* How far in the ccw chain have we processed? */ - if ((channel->state != LCS_CH_STATE_INIT) && - (irb->scsw.cmd.fctl & SCSW_FCTL_START_FUNC) && - (irb->scsw.cmd.cpa != 0)) { - index = (struct ccw1 *)dma32_to_virt(irb->scsw.cmd.cpa) - - channel->ccws; - if ((irb->scsw.cmd.actl & SCSW_ACTL_SUSPENDED) || - (irb->scsw.cmd.cstat & SCHN_STAT_PCI)) - /* Bloody io subsystem tells us lies about cpa... */ - index = (index - 1) & (LCS_NUM_BUFFS - 1); - while (channel->io_idx != index) { - __lcs_processed_buffer(channel, - channel->iob + channel->io_idx); - channel->io_idx = - (channel->io_idx + 1) & (LCS_NUM_BUFFS - 1); - } - } - - if ((irb->scsw.cmd.dstat & DEV_STAT_DEV_END) || - (irb->scsw.cmd.dstat & DEV_STAT_CHN_END) || - (irb->scsw.cmd.dstat & DEV_STAT_UNIT_CHECK)) - /* Mark channel as stopped. */ - channel->state = LCS_CH_STATE_STOPPED; - else if (irb->scsw.cmd.actl & SCSW_ACTL_SUSPENDED) - /* CCW execution stopped on a suspend bit. */ - channel->state = LCS_CH_STATE_SUSPENDED; - if (irb->scsw.cmd.fctl & SCSW_FCTL_HALT_FUNC) { - if (irb->scsw.cmd.cc != 0) { - ccw_device_halt(channel->ccwdev, 0); - return; - } - /* The channel has been stopped by halt_IO. */ - channel->state = LCS_CH_STATE_HALTED; - } - if (irb->scsw.cmd.fctl & SCSW_FCTL_CLEAR_FUNC) - channel->state = LCS_CH_STATE_CLEARED; - /* Do the rest in the tasklet. */ - tasklet_schedule(&channel->irq_tasklet); -} - -/* - * Tasklet for IRQ handler - */ -static void -lcs_tasklet(unsigned long data) -{ - unsigned long flags; - struct lcs_channel *channel; - struct lcs_buffer *iob; - int buf_idx; - - channel = (struct lcs_channel *) data; - LCS_DBF_TEXT_(5, trace, "tlet%s", dev_name(&channel->ccwdev->dev)); - - /* Check for processed buffers. */ - iob = channel->iob; - buf_idx = channel->buf_idx; - while (iob[buf_idx].state == LCS_BUF_STATE_PROCESSED) { - /* Do the callback thing. */ - if (iob[buf_idx].callback != NULL) - iob[buf_idx].callback(channel, iob + buf_idx); - buf_idx = (buf_idx + 1) & (LCS_NUM_BUFFS - 1); - } - channel->buf_idx = buf_idx; - - if (channel->state == LCS_CH_STATE_STOPPED) - lcs_start_channel(channel); - spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - if (channel->state == LCS_CH_STATE_SUSPENDED && - channel->iob[channel->io_idx].state == LCS_BUF_STATE_READY) - __lcs_resume_channel(channel); - spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); - - /* Something happened on the channel. Wake up waiters. */ - wake_up(&channel->wait_q); -} - -/* - * Finish current tx buffer and make it ready for transmit. - */ -static void -__lcs_emit_txbuffer(struct lcs_card *card) -{ - LCS_DBF_TEXT(5, trace, "emittx"); - *(__u16 *)(card->tx_buffer->data + card->tx_buffer->count) = 0; - card->tx_buffer->count += 2; - lcs_ready_buffer(&card->write, card->tx_buffer); - card->tx_buffer = NULL; - card->tx_emitted++; -} - -/* - * Callback for finished tx buffers. - */ -static void -lcs_txbuffer_cb(struct lcs_channel *channel, struct lcs_buffer *buffer) -{ - struct lcs_card *card; - - LCS_DBF_TEXT(5, trace, "txbuffcb"); - /* Put buffer back to pool. */ - lcs_release_buffer(channel, buffer); - card = container_of(channel, struct lcs_card, write); - if (netif_queue_stopped(card->dev) && netif_carrier_ok(card->dev)) - netif_wake_queue(card->dev); - spin_lock(&card->lock); - card->tx_emitted--; - if (card->tx_emitted <= 0 && card->tx_buffer != NULL) - /* - * Last running tx buffer has finished. Submit partially - * filled current buffer. - */ - __lcs_emit_txbuffer(card); - spin_unlock(&card->lock); -} - -/* - * Packet transmit function called by network stack - */ -static netdev_tx_t __lcs_start_xmit(struct lcs_card *card, struct sk_buff *skb, - struct net_device *dev) -{ - struct lcs_header *header; - int rc = NETDEV_TX_OK; - - LCS_DBF_TEXT(5, trace, "hardxmit"); - if (skb == NULL) { - card->stats.tx_dropped++; - card->stats.tx_errors++; - return NETDEV_TX_OK; - } - if (card->state != DEV_STATE_UP) { - dev_kfree_skb(skb); - card->stats.tx_dropped++; - card->stats.tx_errors++; - card->stats.tx_carrier_errors++; - return NETDEV_TX_OK; - } - if (skb->protocol == htons(ETH_P_IPV6)) { - dev_kfree_skb(skb); - return NETDEV_TX_OK; - } - netif_stop_queue(card->dev); - spin_lock(&card->lock); - if (card->tx_buffer != NULL && - card->tx_buffer->count + sizeof(struct lcs_header) + - skb->len + sizeof(u16) > LCS_IOBUFFERSIZE) - /* skb too big for current tx buffer. */ - __lcs_emit_txbuffer(card); - if (card->tx_buffer == NULL) { - /* Get new tx buffer */ - card->tx_buffer = lcs_get_buffer(&card->write); - if (card->tx_buffer == NULL) { - card->stats.tx_dropped++; - rc = NETDEV_TX_BUSY; - goto out; - } - card->tx_buffer->callback = lcs_txbuffer_cb; - card->tx_buffer->count = 0; - } - header = (struct lcs_header *) - (card->tx_buffer->data + card->tx_buffer->count); - card->tx_buffer->count += skb->len + sizeof(struct lcs_header); - header->offset = card->tx_buffer->count; - header->type = card->lan_type; - header->slot = card->portno; - skb_copy_from_linear_data(skb, header + 1, skb->len); - spin_unlock(&card->lock); - card->stats.tx_bytes += skb->len; - card->stats.tx_packets++; - dev_kfree_skb(skb); - netif_wake_queue(card->dev); - spin_lock(&card->lock); - if (card->tx_emitted <= 0 && card->tx_buffer != NULL) - /* If this is the first tx buffer emit it immediately. */ - __lcs_emit_txbuffer(card); -out: - spin_unlock(&card->lock); - return rc; -} - -static netdev_tx_t lcs_start_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct lcs_card *card; - int rc; - - LCS_DBF_TEXT(5, trace, "pktxmit"); - card = (struct lcs_card *) dev->ml_priv; - rc = __lcs_start_xmit(card, skb, dev); - return rc; -} - -/* - * send startlan and lanstat command to make LCS device ready - */ -static int -lcs_startlan_auto(struct lcs_card *card) -{ - int rc; - - LCS_DBF_TEXT(2, trace, "strtauto"); - card->lan_type = LCS_FRAME_TYPE_ENET; - rc = lcs_send_startlan(card, LCS_INITIATOR_TCPIP); - if (rc == 0) - return 0; - - return -EIO; -} - -static int -lcs_startlan(struct lcs_card *card) -{ - int rc, i; - - LCS_DBF_TEXT(2, trace, "startlan"); - rc = 0; - if (card->portno != LCS_INVALID_PORT_NO) { - if (card->lan_type == LCS_FRAME_TYPE_AUTO) - rc = lcs_startlan_auto(card); - else - rc = lcs_send_startlan(card, LCS_INITIATOR_TCPIP); - } else { - for (i = 0; i <= 16; i++) { - card->portno = i; - if (card->lan_type != LCS_FRAME_TYPE_AUTO) - rc = lcs_send_startlan(card, - LCS_INITIATOR_TCPIP); - else - /* autodetecting lan type */ - rc = lcs_startlan_auto(card); - if (rc == 0) - break; - } - } - if (rc == 0) - return lcs_send_lanstat(card); - return rc; -} - -/* - * LCS detect function - * setup channels and make them I/O ready - */ -static int -lcs_detect(struct lcs_card *card) -{ - int rc = 0; - - LCS_DBF_TEXT(2, setup, "lcsdetct"); - /* start/reset card */ - if (card->dev) - netif_stop_queue(card->dev); - rc = lcs_stop_channels(card); - if (rc == 0) { - rc = lcs_start_channels(card); - if (rc == 0) { - rc = lcs_send_startup(card, LCS_INITIATOR_TCPIP); - if (rc == 0) - rc = lcs_startlan(card); - } - } - if (rc == 0) { - card->state = DEV_STATE_UP; - } else { - card->state = DEV_STATE_DOWN; - card->write.state = LCS_CH_STATE_INIT; - card->read.state = LCS_CH_STATE_INIT; - } - return rc; -} - -/* - * LCS Stop card - */ -static int -lcs_stopcard(struct lcs_card *card) -{ - int rc; - - LCS_DBF_TEXT(3, setup, "stopcard"); - - if (card->read.state != LCS_CH_STATE_STOPPED && - card->write.state != LCS_CH_STATE_STOPPED && - card->read.state != LCS_CH_STATE_ERROR && - card->write.state != LCS_CH_STATE_ERROR && - card->state == DEV_STATE_UP) { - lcs_clear_multicast_list(card); - rc = lcs_send_stoplan(card,LCS_INITIATOR_TCPIP); - rc = lcs_send_shutdown(card); - } - rc = lcs_stop_channels(card); - card->state = DEV_STATE_DOWN; - - return rc; -} - -/* - * Kernel Thread helper functions for LGW initiated commands - */ -static void -lcs_start_kernel_thread(struct work_struct *work) -{ - struct lcs_card *card = container_of(work, struct lcs_card, kernel_thread_starter); - LCS_DBF_TEXT(5, trace, "krnthrd"); - if (lcs_do_start_thread(card, LCS_RECOVERY_THREAD)) - kthread_run(lcs_recovery, card, "lcs_recover"); -#ifdef CONFIG_IP_MULTICAST - if (lcs_do_start_thread(card, LCS_SET_MC_THREAD)) - kthread_run(lcs_register_mc_addresses, card, "regipm"); -#endif -} - -/* - * Process control frames. - */ -static void -lcs_get_control(struct lcs_card *card, struct lcs_cmd *cmd) -{ - LCS_DBF_TEXT(5, trace, "getctrl"); - if (cmd->initiator == LCS_INITIATOR_LGW) { - switch(cmd->cmd_code) { - case LCS_CMD_STARTUP: - case LCS_CMD_STARTLAN: - lcs_schedule_recovery(card); - break; - case LCS_CMD_STOPLAN: - if (card->dev) { - pr_warn("Stoplan for %s initiated by LGW\n", - card->dev->name); - netif_carrier_off(card->dev); - } - break; - default: - LCS_DBF_TEXT(5, trace, "noLGWcmd"); - break; - } - } else - lcs_notify_lancmd_waiters(card, cmd); -} - -/* - * Unpack network packet. - */ -static void -lcs_get_skb(struct lcs_card *card, char *skb_data, unsigned int skb_len) -{ - struct sk_buff *skb; - - LCS_DBF_TEXT(5, trace, "getskb"); - if (card->dev == NULL || - card->state != DEV_STATE_UP) - /* The card isn't up. Ignore the packet. */ - return; - - skb = dev_alloc_skb(skb_len); - if (skb == NULL) { - dev_err(&card->dev->dev, - " Allocating a socket buffer to interface %s failed\n", - card->dev->name); - card->stats.rx_dropped++; - return; - } - skb_put_data(skb, skb_data, skb_len); - skb->protocol = card->lan_type_trans(skb, card->dev); - card->stats.rx_bytes += skb_len; - card->stats.rx_packets++; - if (skb->protocol == htons(ETH_P_802_2)) - *((__u32 *)skb->cb) = ++card->pkt_seq; - netif_rx(skb); -} - -/* - * LCS main routine to get packets and lancmd replies from the buffers - */ -static void -lcs_get_frames_cb(struct lcs_channel *channel, struct lcs_buffer *buffer) -{ - struct lcs_card *card; - struct lcs_header *lcs_hdr; - __u16 offset; - - LCS_DBF_TEXT(5, trace, "lcsgtpkt"); - lcs_hdr = (struct lcs_header *) buffer->data; - if (lcs_hdr->offset == LCS_ILLEGAL_OFFSET) { - LCS_DBF_TEXT(4, trace, "-eiogpkt"); - return; - } - card = container_of(channel, struct lcs_card, read); - offset = 0; - while (lcs_hdr->offset != 0) { - if (lcs_hdr->offset <= 0 || - lcs_hdr->offset > LCS_IOBUFFERSIZE || - lcs_hdr->offset < offset) { - /* Offset invalid. */ - card->stats.rx_length_errors++; - card->stats.rx_errors++; - return; - } - if (lcs_hdr->type == LCS_FRAME_TYPE_CONTROL) - lcs_get_control(card, (struct lcs_cmd *) lcs_hdr); - else if (lcs_hdr->type == LCS_FRAME_TYPE_ENET) - lcs_get_skb(card, (char *)(lcs_hdr + 1), - lcs_hdr->offset - offset - - sizeof(struct lcs_header)); - else - dev_info_once(&card->dev->dev, - "Unknown frame type %d\n", - lcs_hdr->type); - offset = lcs_hdr->offset; - lcs_hdr->offset = LCS_ILLEGAL_OFFSET; - lcs_hdr = (struct lcs_header *) (buffer->data + offset); - } - /* The buffer is now empty. Make it ready again. */ - lcs_ready_buffer(&card->read, buffer); -} - -/* - * get network statistics for ifconfig and other user programs - */ -static struct net_device_stats * -lcs_getstats(struct net_device *dev) -{ - struct lcs_card *card; - - LCS_DBF_TEXT(4, trace, "netstats"); - card = (struct lcs_card *) dev->ml_priv; - return &card->stats; -} - -/* - * stop lcs device - * This function will be called by user doing ifconfig xxx down - */ -static int -lcs_stop_device(struct net_device *dev) -{ - struct lcs_card *card; - int rc; - - LCS_DBF_TEXT(2, trace, "stopdev"); - card = (struct lcs_card *) dev->ml_priv; - netif_carrier_off(dev); - netif_tx_disable(dev); - dev->flags &= ~IFF_UP; - wait_event(card->write.wait_q, - (card->write.state != LCS_CH_STATE_RUNNING)); - rc = lcs_stopcard(card); - if (rc) - dev_err(&card->dev->dev, - " Shutting down the LCS device failed\n"); - return rc; -} - -/* - * start lcs device and make it runnable - * This function will be called by user doing ifconfig xxx up - */ -static int -lcs_open_device(struct net_device *dev) -{ - struct lcs_card *card; - int rc; - - LCS_DBF_TEXT(2, trace, "opendev"); - card = (struct lcs_card *) dev->ml_priv; - /* initialize statistics */ - rc = lcs_detect(card); - if (rc) { - pr_err("Error in opening device!\n"); - - } else { - dev->flags |= IFF_UP; - netif_carrier_on(dev); - netif_wake_queue(dev); - card->state = DEV_STATE_UP; - } - return rc; -} - -/* - * show function for portno called by cat or similar things - */ -static ssize_t -lcs_portno_show (struct device *dev, struct device_attribute *attr, char *buf) -{ - struct lcs_card *card; - - card = dev_get_drvdata(dev); - - if (!card) - return 0; - - return sysfs_emit(buf, "%d\n", card->portno); -} - -/* - * store the value which is piped to file portno - */ -static ssize_t -lcs_portno_store (struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -{ - struct lcs_card *card; - int rc; - s16 value; - - card = dev_get_drvdata(dev); - - if (!card) - return 0; - - rc = kstrtos16(buf, 0, &value); - if (rc) - return -EINVAL; - /* TODO: sanity checks */ - card->portno = value; - if (card->dev) - card->dev->dev_port = card->portno; - - return count; - -} - -static DEVICE_ATTR(portno, 0644, lcs_portno_show, lcs_portno_store); - -static const char *lcs_type[] = { - "not a channel", - "2216 parallel", - "2216 channel", - "OSA LCS card", - "unknown channel type", - "unsupported channel type", -}; - -static ssize_t -lcs_type_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct ccwgroup_device *cgdev; - - cgdev = to_ccwgroupdev(dev); - if (!cgdev) - return -ENODEV; - - return sysfs_emit(buf, "%s\n", - lcs_type[cgdev->cdev[0]->id.driver_info]); -} - -static DEVICE_ATTR(type, 0444, lcs_type_show, NULL); - -static ssize_t -lcs_timeout_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct lcs_card *card; - - card = dev_get_drvdata(dev); - - return card ? sysfs_emit(buf, "%u\n", card->lancmd_timeout) : 0; -} - -static ssize_t -lcs_timeout_store (struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -{ - struct lcs_card *card; - unsigned int value; - int rc; - - card = dev_get_drvdata(dev); - - if (!card) - return 0; - - rc = kstrtouint(buf, 0, &value); - if (rc) - return -EINVAL; - /* TODO: sanity checks */ - card->lancmd_timeout = value; - - return count; - -} - -static DEVICE_ATTR(lancmd_timeout, 0644, lcs_timeout_show, lcs_timeout_store); - -static ssize_t -lcs_dev_recover_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct lcs_card *card = dev_get_drvdata(dev); - char *tmp; - int i; - - if (!card) - return -EINVAL; - if (card->state != DEV_STATE_UP) - return -EPERM; - i = simple_strtoul(buf, &tmp, 16); - if (i == 1) - lcs_schedule_recovery(card); - return count; -} - -static DEVICE_ATTR(recover, 0200, NULL, lcs_dev_recover_store); - -static struct attribute * lcs_attrs[] = { - &dev_attr_portno.attr, - &dev_attr_type.attr, - &dev_attr_lancmd_timeout.attr, - &dev_attr_recover.attr, - NULL, -}; -static struct attribute_group lcs_attr_group = { - .attrs = lcs_attrs, -}; -static const struct attribute_group *lcs_attr_groups[] = { - &lcs_attr_group, - NULL, -}; -static const struct device_type lcs_devtype = { - .name = "lcs", - .groups = lcs_attr_groups, -}; - -/* - * lcs_probe_device is called on establishing a new ccwgroup_device. - */ -static int -lcs_probe_device(struct ccwgroup_device *ccwgdev) -{ - struct lcs_card *card; - - if (!get_device(&ccwgdev->dev)) - return -ENODEV; - - LCS_DBF_TEXT(2, setup, "add_dev"); - card = lcs_alloc_card(); - if (!card) { - LCS_DBF_TEXT_(2, setup, " rc%d", -ENOMEM); - put_device(&ccwgdev->dev); - return -ENOMEM; - } - dev_set_drvdata(&ccwgdev->dev, card); - ccwgdev->cdev[0]->handler = lcs_irq; - ccwgdev->cdev[1]->handler = lcs_irq; - card->gdev = ccwgdev; - INIT_WORK(&card->kernel_thread_starter, lcs_start_kernel_thread); - card->thread_start_mask = 0; - card->thread_allowed_mask = 0; - card->thread_running_mask = 0; - ccwgdev->dev.type = &lcs_devtype; - - return 0; -} - -static int -lcs_register_netdev(struct ccwgroup_device *ccwgdev) -{ - struct lcs_card *card; - - LCS_DBF_TEXT(2, setup, "regnetdv"); - card = dev_get_drvdata(&ccwgdev->dev); - if (card->dev->reg_state != NETREG_UNINITIALIZED) - return 0; - SET_NETDEV_DEV(card->dev, &ccwgdev->dev); - return register_netdev(card->dev); -} - -/* - * lcs_new_device will be called by setting the group device online. - */ -static const struct net_device_ops lcs_netdev_ops = { - .ndo_open = lcs_open_device, - .ndo_stop = lcs_stop_device, - .ndo_get_stats = lcs_getstats, - .ndo_start_xmit = lcs_start_xmit, -}; - -static const struct net_device_ops lcs_mc_netdev_ops = { - .ndo_open = lcs_open_device, - .ndo_stop = lcs_stop_device, - .ndo_get_stats = lcs_getstats, - .ndo_start_xmit = lcs_start_xmit, - .ndo_set_rx_mode = lcs_set_multicast_list, -}; - -static int -lcs_new_device(struct ccwgroup_device *ccwgdev) -{ - struct lcs_card *card; - struct net_device *dev=NULL; - enum lcs_dev_states recover_state; - int rc; - - card = dev_get_drvdata(&ccwgdev->dev); - if (!card) - return -ENODEV; - - LCS_DBF_TEXT(2, setup, "newdev"); - LCS_DBF_HEX(3, setup, &card, sizeof(void*)); - card->read.ccwdev = ccwgdev->cdev[0]; - card->write.ccwdev = ccwgdev->cdev[1]; - - recover_state = card->state; - rc = ccw_device_set_online(card->read.ccwdev); - if (rc) - goto out_err; - rc = ccw_device_set_online(card->write.ccwdev); - if (rc) - goto out_werr; - - LCS_DBF_TEXT(3, setup, "lcsnewdv"); - - lcs_setup_card(card); - rc = lcs_detect(card); - if (rc) { - LCS_DBF_TEXT(2, setup, "dtctfail"); - dev_err(&ccwgdev->dev, - "Detecting a network adapter for LCS devices" - " failed with rc=%d (0x%x)\n", rc, rc); - lcs_stopcard(card); - goto out; - } - if (card->dev) { - LCS_DBF_TEXT(2, setup, "samedev"); - LCS_DBF_HEX(3, setup, &card, sizeof(void*)); - goto netdev_out; - } - switch (card->lan_type) { - case LCS_FRAME_TYPE_ENET: - card->lan_type_trans = eth_type_trans; - dev = alloc_etherdev(0); - break; - default: - LCS_DBF_TEXT(3, setup, "errinit"); - pr_err(" Initialization failed\n"); - goto out; - } - if (!dev) - goto out; - card->dev = dev; - card->dev->ml_priv = card; - card->dev->netdev_ops = &lcs_netdev_ops; - card->dev->dev_port = card->portno; - eth_hw_addr_set(card->dev, card->mac); -#ifdef CONFIG_IP_MULTICAST - if (!lcs_check_multicast_support(card)) - card->dev->netdev_ops = &lcs_mc_netdev_ops; -#endif -netdev_out: - lcs_set_allowed_threads(card,0xffffffff); - if (recover_state == DEV_STATE_RECOVER) { - lcs_set_multicast_list(card->dev); - card->dev->flags |= IFF_UP; - netif_carrier_on(card->dev); - netif_wake_queue(card->dev); - card->state = DEV_STATE_UP; - } else { - lcs_stopcard(card); - } - - if (lcs_register_netdev(ccwgdev) != 0) - goto out; - - /* Print out supported assists: IPv6 */ - pr_info("LCS device %s %s IPv6 support\n", card->dev->name, - (card->ip_assists_supported & LCS_IPASS_IPV6_SUPPORT) ? - "with" : "without"); - /* Print out supported assist: Multicast */ - pr_info("LCS device %s %s Multicast support\n", card->dev->name, - (card->ip_assists_supported & LCS_IPASS_MULTICAST_SUPPORT) ? - "with" : "without"); - return 0; -out: - - ccw_device_set_offline(card->write.ccwdev); -out_werr: - ccw_device_set_offline(card->read.ccwdev); -out_err: - return -ENODEV; -} - -/* - * lcs_shutdown_device, called when setting the group device offline. - */ -static int -__lcs_shutdown_device(struct ccwgroup_device *ccwgdev, int recovery_mode) -{ - struct lcs_card *card; - enum lcs_dev_states recover_state; - int ret = 0, ret2 = 0, ret3 = 0; - - LCS_DBF_TEXT(3, setup, "shtdndev"); - card = dev_get_drvdata(&ccwgdev->dev); - if (!card) - return -ENODEV; - if (recovery_mode == 0) { - lcs_set_allowed_threads(card, 0); - if (lcs_wait_for_threads(card, LCS_SET_MC_THREAD)) - return -ERESTARTSYS; - } - LCS_DBF_HEX(3, setup, &card, sizeof(void*)); - recover_state = card->state; - - ret = lcs_stop_device(card->dev); - ret2 = ccw_device_set_offline(card->read.ccwdev); - ret3 = ccw_device_set_offline(card->write.ccwdev); - if (!ret) - ret = (ret2) ? ret2 : ret3; - if (ret) - LCS_DBF_TEXT_(3, setup, "1err:%d", ret); - if (recover_state == DEV_STATE_UP) { - card->state = DEV_STATE_RECOVER; - } - return 0; -} - -static int -lcs_shutdown_device(struct ccwgroup_device *ccwgdev) -{ - return __lcs_shutdown_device(ccwgdev, 0); -} - -/* - * drive lcs recovery after startup and startlan initiated by Lan Gateway - */ -static int -lcs_recovery(void *ptr) -{ - struct lcs_card *card; - struct ccwgroup_device *gdev; - int rc; - - card = (struct lcs_card *) ptr; - - LCS_DBF_TEXT(4, trace, "recover1"); - if (!lcs_do_run_thread(card, LCS_RECOVERY_THREAD)) - return 0; - LCS_DBF_TEXT(4, trace, "recover2"); - gdev = card->gdev; - dev_warn(&gdev->dev, - "A recovery process has been started for the LCS device\n"); - rc = __lcs_shutdown_device(gdev, 1); - rc = lcs_new_device(gdev); - if (!rc) - pr_info("Device %s successfully recovered!\n", - card->dev->name); - else - pr_info("Device %s could not be recovered!\n", - card->dev->name); - lcs_clear_thread_running_bit(card, LCS_RECOVERY_THREAD); - return 0; -} - -/* - * lcs_remove_device, free buffers and card - */ -static void -lcs_remove_device(struct ccwgroup_device *ccwgdev) -{ - struct lcs_card *card; - - card = dev_get_drvdata(&ccwgdev->dev); - if (!card) - return; - - LCS_DBF_TEXT(3, setup, "remdev"); - LCS_DBF_HEX(3, setup, &card, sizeof(void*)); - if (ccwgdev->state == CCWGROUP_ONLINE) { - lcs_shutdown_device(ccwgdev); - } - if (card->dev) - unregister_netdev(card->dev); - lcs_cleanup_card(card); - lcs_free_card(card); - dev_set_drvdata(&ccwgdev->dev, NULL); - put_device(&ccwgdev->dev); -} - -static struct ccw_device_id lcs_ids[] = { - {CCW_DEVICE(0x3088, 0x08), .driver_info = lcs_channel_type_parallel}, - {CCW_DEVICE(0x3088, 0x1f), .driver_info = lcs_channel_type_2216}, - {CCW_DEVICE(0x3088, 0x60), .driver_info = lcs_channel_type_osa2}, - {}, -}; -MODULE_DEVICE_TABLE(ccw, lcs_ids); - -static struct ccw_driver lcs_ccw_driver = { - .driver = { - .owner = THIS_MODULE, - .name = "lcs", - }, - .ids = lcs_ids, - .probe = ccwgroup_probe_ccwdev, - .remove = ccwgroup_remove_ccwdev, - .int_class = IRQIO_LCS, -}; - -/* - * LCS ccwgroup driver registration - */ -static struct ccwgroup_driver lcs_group_driver = { - .driver = { - .owner = THIS_MODULE, - .name = "lcs", - }, - .ccw_driver = &lcs_ccw_driver, - .setup = lcs_probe_device, - .remove = lcs_remove_device, - .set_online = lcs_new_device, - .set_offline = lcs_shutdown_device, -}; - -static ssize_t group_store(struct device_driver *ddrv, const char *buf, - size_t count) -{ - int err; - err = ccwgroup_create_dev(lcs_root_dev, &lcs_group_driver, 2, buf); - return err ? err : count; -} -static DRIVER_ATTR_WO(group); - -static struct attribute *lcs_drv_attrs[] = { - &driver_attr_group.attr, - NULL, -}; -static struct attribute_group lcs_drv_attr_group = { - .attrs = lcs_drv_attrs, -}; -static const struct attribute_group *lcs_drv_attr_groups[] = { - &lcs_drv_attr_group, - NULL, -}; - -/* - * LCS Module/Kernel initialization function - */ -static int -__init lcs_init_module(void) -{ - int rc; - - pr_info("Loading %s\n", version); - rc = lcs_register_debug_facility(); - LCS_DBF_TEXT(0, setup, "lcsinit"); - if (rc) - goto out_err; - lcs_root_dev = root_device_register("lcs"); - rc = PTR_ERR_OR_ZERO(lcs_root_dev); - if (rc) - goto register_err; - rc = ccw_driver_register(&lcs_ccw_driver); - if (rc) - goto ccw_err; - lcs_group_driver.driver.groups = lcs_drv_attr_groups; - rc = ccwgroup_driver_register(&lcs_group_driver); - if (rc) - goto ccwgroup_err; - return 0; - -ccwgroup_err: - ccw_driver_unregister(&lcs_ccw_driver); -ccw_err: - root_device_unregister(lcs_root_dev); -register_err: - lcs_unregister_debug_facility(); -out_err: - pr_err("Initializing the lcs device driver failed\n"); - return rc; -} - - -/* - * LCS module cleanup function - */ -static void -__exit lcs_cleanup_module(void) -{ - pr_info("Terminating lcs module.\n"); - LCS_DBF_TEXT(0, trace, "cleanup"); - ccwgroup_driver_unregister(&lcs_group_driver); - ccw_driver_unregister(&lcs_ccw_driver); - root_device_unregister(lcs_root_dev); - lcs_unregister_debug_facility(); -} - -module_init(lcs_init_module); -module_exit(lcs_cleanup_module); - -MODULE_AUTHOR("Frank Pavlic <fpavlic@de.ibm.com>"); -MODULE_DESCRIPTION("S/390 LAN channel station device driver"); -MODULE_LICENSE("GPL"); - diff --git a/drivers/s390/net/lcs.h b/drivers/s390/net/lcs.h deleted file mode 100644 index a2699b70b050..000000000000 --- a/drivers/s390/net/lcs.h +++ /dev/null @@ -1,342 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/*lcs.h*/ - -#include <linux/interrupt.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <linux/workqueue.h> -#include <linux/refcount.h> -#include <asm/ccwdev.h> - -#define LCS_DBF_TEXT(level, name, text) \ - do { \ - debug_text_event(lcs_dbf_##name, level, text); \ - } while (0) - -#define LCS_DBF_HEX(level,name,addr,len) \ -do { \ - debug_event(lcs_dbf_##name,level,(void*)(addr),len); \ -} while (0) - -#define LCS_DBF_TEXT_(level,name,text...) \ - do { \ - if (debug_level_enabled(lcs_dbf_##name, level)) { \ - scnprintf(debug_buffer, sizeof(debug_buffer), text); \ - debug_text_event(lcs_dbf_##name, level, debug_buffer); \ - } \ - } while (0) - -/** - * sysfs related stuff - */ -#define CARD_FROM_DEV(cdev) \ - (struct lcs_card *) dev_get_drvdata( \ - &((struct ccwgroup_device *)dev_get_drvdata(&cdev->dev))->dev); - -/** - * Enum for classifying detected devices. - */ -enum lcs_channel_types { - /* Device is not a channel */ - lcs_channel_type_none, - - /* Device is a 2216 channel */ - lcs_channel_type_parallel, - - /* Device is a 2216 channel */ - lcs_channel_type_2216, - - /* Device is a OSA2 card */ - lcs_channel_type_osa2 -}; - -/** - * CCW commands used in this driver - */ -#define LCS_CCW_WRITE 0x01 -#define LCS_CCW_READ 0x02 -#define LCS_CCW_TRANSFER 0x08 - -/** - * LCS device status primitives - */ -#define LCS_CMD_STARTLAN 0x01 -#define LCS_CMD_STOPLAN 0x02 -#define LCS_CMD_LANSTAT 0x04 -#define LCS_CMD_STARTUP 0x07 -#define LCS_CMD_SHUTDOWN 0x08 -#define LCS_CMD_QIPASSIST 0xb2 -#define LCS_CMD_SETIPM 0xb4 -#define LCS_CMD_DELIPM 0xb5 - -#define LCS_INITIATOR_TCPIP 0x00 -#define LCS_INITIATOR_LGW 0x01 -#define LCS_STD_CMD_SIZE 16 -#define LCS_MULTICAST_CMD_SIZE 404 - -/** - * LCS IPASSIST MASKS,only used when multicast is switched on - */ -/* Not supported by LCS */ -#define LCS_IPASS_ARP_PROCESSING 0x0001 -#define LCS_IPASS_IN_CHECKSUM_SUPPORT 0x0002 -#define LCS_IPASS_OUT_CHECKSUM_SUPPORT 0x0004 -#define LCS_IPASS_IP_FRAG_REASSEMBLY 0x0008 -#define LCS_IPASS_IP_FILTERING 0x0010 -/* Supported by lcs 3172 */ -#define LCS_IPASS_IPV6_SUPPORT 0x0020 -#define LCS_IPASS_MULTICAST_SUPPORT 0x0040 - -/** - * LCS sense byte definitions - */ -#define LCS_SENSE_BYTE_0 0 -#define LCS_SENSE_BYTE_1 1 -#define LCS_SENSE_BYTE_2 2 -#define LCS_SENSE_BYTE_3 3 -#define LCS_SENSE_INTERFACE_DISCONNECT 0x01 -#define LCS_SENSE_EQUIPMENT_CHECK 0x10 -#define LCS_SENSE_BUS_OUT_CHECK 0x20 -#define LCS_SENSE_INTERVENTION_REQUIRED 0x40 -#define LCS_SENSE_CMD_REJECT 0x80 -#define LCS_SENSE_RESETTING_EVENT 0x80 -#define LCS_SENSE_DEVICE_ONLINE 0x20 - -/** - * LCS packet type definitions - */ -#define LCS_FRAME_TYPE_CONTROL 0 -#define LCS_FRAME_TYPE_ENET 1 -#define LCS_FRAME_TYPE_TR 2 -#define LCS_FRAME_TYPE_FDDI 7 -#define LCS_FRAME_TYPE_AUTO -1 - -/** - * some more definitions,we will sort them later - */ -#define LCS_ILLEGAL_OFFSET 0xffff -#define LCS_IOBUFFERSIZE 0x5000 -#define LCS_NUM_BUFFS 32 /* needs to be power of 2 */ -#define LCS_MAC_LENGTH 6 -#define LCS_INVALID_PORT_NO -1 -#define LCS_LANCMD_TIMEOUT_DEFAULT 5 - -/** - * Multicast state - */ -#define LCS_IPM_STATE_SET_REQUIRED 0 -#define LCS_IPM_STATE_DEL_REQUIRED 1 -#define LCS_IPM_STATE_ON_CARD 2 - -/** - * LCS IP Assist declarations - * seems to be only used for multicast - */ -#define LCS_IPASS_ARP_PROCESSING 0x0001 -#define LCS_IPASS_INBOUND_CSUM_SUPP 0x0002 -#define LCS_IPASS_OUTBOUND_CSUM_SUPP 0x0004 -#define LCS_IPASS_IP_FRAG_REASSEMBLY 0x0008 -#define LCS_IPASS_IP_FILTERING 0x0010 -#define LCS_IPASS_IPV6_SUPPORT 0x0020 -#define LCS_IPASS_MULTICAST_SUPPORT 0x0040 - -/** - * LCS Buffer states - */ -enum lcs_buffer_states { - LCS_BUF_STATE_EMPTY, /* buffer is empty */ - LCS_BUF_STATE_LOCKED, /* buffer is locked, don't touch */ - LCS_BUF_STATE_READY, /* buffer is ready for read/write */ - LCS_BUF_STATE_PROCESSED, -}; - -/** - * LCS Channel State Machine declarations - */ -enum lcs_channel_states { - LCS_CH_STATE_INIT, - LCS_CH_STATE_HALTED, - LCS_CH_STATE_STOPPED, - LCS_CH_STATE_RUNNING, - LCS_CH_STATE_SUSPENDED, - LCS_CH_STATE_CLEARED, - LCS_CH_STATE_ERROR, -}; - -/** - * LCS device state machine - */ -enum lcs_dev_states { - DEV_STATE_DOWN, - DEV_STATE_UP, - DEV_STATE_RECOVER, -}; - -enum lcs_threads { - LCS_SET_MC_THREAD = 1, - LCS_RECOVERY_THREAD = 2, -}; - -/** - * LCS struct declarations - */ -struct lcs_header { - __u16 offset; - __u8 type; - __u8 slot; -} __attribute__ ((packed)); - -struct lcs_ip_mac_pair { - __be32 ip_addr; - __u8 mac_addr[LCS_MAC_LENGTH]; - __u8 reserved[2]; -} __attribute__ ((packed)); - -struct lcs_ipm_list { - struct list_head list; - struct lcs_ip_mac_pair ipm; - __u8 ipm_state; -}; - -struct lcs_cmd { - __u16 offset; - __u8 type; - __u8 slot; - __u8 cmd_code; - __u8 initiator; - __u16 sequence_no; - __u16 return_code; - union { - struct { - __u8 lan_type; - __u8 portno; - __u16 parameter_count; - __u8 operator_flags[3]; - __u8 reserved[3]; - } lcs_std_cmd; - struct { - __u16 unused1; - __u16 buff_size; - __u8 unused2[6]; - } lcs_startup; - struct { - __u8 lan_type; - __u8 portno; - __u8 unused[10]; - __u8 mac_addr[LCS_MAC_LENGTH]; - __u32 num_packets_deblocked; - __u32 num_packets_blocked; - __u32 num_packets_tx_on_lan; - __u32 num_tx_errors_detected; - __u32 num_tx_packets_disgarded; - __u32 num_packets_rx_from_lan; - __u32 num_rx_errors_detected; - __u32 num_rx_discarded_nobuffs_avail; - __u32 num_rx_packets_too_large; - } lcs_lanstat_cmd; -#ifdef CONFIG_IP_MULTICAST - struct { - __u8 lan_type; - __u8 portno; - __u16 num_ip_pairs; - __u16 ip_assists_supported; - __u16 ip_assists_enabled; - __u16 version; - struct { - struct lcs_ip_mac_pair - ip_mac_pair[32]; - __u32 response_data; - } lcs_ipass_ctlmsg __attribute ((packed)); - } lcs_qipassist __attribute__ ((packed)); -#endif /*CONFIG_IP_MULTICAST */ - } cmd __attribute__ ((packed)); -} __attribute__ ((packed)); - -/** - * Forward declarations. - */ -struct lcs_card; -struct lcs_channel; - -/** - * Definition of an lcs buffer. - */ -struct lcs_buffer { - enum lcs_buffer_states state; - void *data; - int count; - /* Callback for completion notification. */ - void (*callback)(struct lcs_channel *, struct lcs_buffer *); -}; - -struct lcs_reply { - struct list_head list; - __u16 sequence_no; - refcount_t refcnt; - /* Callback for completion notification. */ - void (*callback)(struct lcs_card *, struct lcs_cmd *); - wait_queue_head_t wait_q; - struct lcs_card *card; - struct timer_list timer; - int received; - int rc; -}; - -/** - * Definition of an lcs channel - */ -struct lcs_channel { - enum lcs_channel_states state; - struct ccw_device *ccwdev; - struct ccw1 ccws[LCS_NUM_BUFFS + 1]; - wait_queue_head_t wait_q; - struct tasklet_struct irq_tasklet; - struct lcs_buffer iob[LCS_NUM_BUFFS]; - int io_idx; - int buf_idx; -}; - - -/** - * definition of the lcs card - */ -struct lcs_card { - spinlock_t lock; - spinlock_t ipm_lock; - enum lcs_dev_states state; - struct net_device *dev; - struct net_device_stats stats; - __be16 (*lan_type_trans)(struct sk_buff *skb, - struct net_device *dev); - struct ccwgroup_device *gdev; - struct lcs_channel read; - struct lcs_channel write; - struct lcs_buffer *tx_buffer; - int tx_emitted; - struct list_head lancmd_waiters; - int lancmd_timeout; - - struct work_struct kernel_thread_starter; - spinlock_t mask_lock; - unsigned long thread_start_mask; - unsigned long thread_running_mask; - unsigned long thread_allowed_mask; - wait_queue_head_t wait_q; - -#ifdef CONFIG_IP_MULTICAST - struct list_head ipm_list; -#endif - __u8 mac[LCS_MAC_LENGTH]; - __u16 ip_assists_supported; - __u16 ip_assists_enabled; - __s8 lan_type; - __u32 pkt_seq; - __u16 sequence_no; - __s16 portno; - /* Some info copied from probeinfo */ - u8 device_forced; - u8 max_port_no; - u8 hint_port_no; - s16 port_protocol_no; -} __attribute__ ((aligned(8))); - diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 7c0980db77b3..2fecf66661e9 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -447,7 +447,7 @@ static bool ep_busy_loop(struct eventpoll *ep, int nonblock) if (!budget) budget = BUSY_POLL_BUDGET; - if (napi_id >= MIN_NAPI_ID && ep_busy_loop_on(ep)) { + if (napi_id_valid(napi_id) && ep_busy_loop_on(ep)) { napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep, prefer_busy_poll, budget); if (ep_events_available(ep)) @@ -492,7 +492,7 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi) * or * Nothing to do if we already have this ID */ - if (napi_id < MIN_NAPI_ID || napi_id == ep->napi_id) + if (!napi_id_valid(napi_id) || napi_id == ep->napi_id) return; /* record NAPI ID for use in next busy poll */ @@ -546,7 +546,7 @@ static void ep_suspend_napi_irqs(struct eventpoll *ep) { unsigned int napi_id = READ_ONCE(ep->napi_id); - if (napi_id >= MIN_NAPI_ID && READ_ONCE(ep->prefer_busy_poll)) + if (napi_id_valid(napi_id) && READ_ONCE(ep->prefer_busy_poll)) napi_suspend_irqs(napi_id); } @@ -554,7 +554,7 @@ static void ep_resume_napi_irqs(struct eventpoll *ep) { unsigned int napi_id = READ_ONCE(ep->napi_id); - if (napi_id >= MIN_NAPI_ID && READ_ONCE(ep->prefer_busy_poll)) + if (napi_id_valid(napi_id) && READ_ONCE(ep->prefer_busy_poll)) napi_resume_irqs(napi_id); } diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 13a11f3c09b8..4811b9a14604 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -154,7 +154,10 @@ enum virtchnl_ops { VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2 = 55, VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2 = 56, VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2 = 57, - /* opcode 57 - 65 are reserved */ + /* opcode 58 and 59 are reserved */ + VIRTCHNL_OP_1588_PTP_GET_CAPS = 60, + VIRTCHNL_OP_1588_PTP_GET_TIME = 61, + /* opcode 62 - 65 are reserved */ VIRTCHNL_OP_GET_QOS_CAPS = 66, /* opcode 68 through 111 are reserved */ VIRTCHNL_OP_CONFIG_QUEUE_BW = 112, @@ -270,6 +273,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF BIT(27) #define VIRTCHNL_VF_OFFLOAD_FDIR_PF BIT(28) #define VIRTCHNL_VF_OFFLOAD_QOS BIT(29) +#define VIRTCHNL_VF_CAP_PTP BIT(31) #define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \ VIRTCHNL_VF_OFFLOAD_VLAN | \ @@ -309,6 +313,60 @@ struct virtchnl_txq_info { VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_txq_info); +/* RX descriptor IDs (range from 0 to 63) */ +enum virtchnl_rx_desc_ids { + VIRTCHNL_RXDID_0_16B_BASE = 0, + VIRTCHNL_RXDID_1_32B_BASE = 1, + VIRTCHNL_RXDID_2_FLEX_SQ_NIC = 2, + VIRTCHNL_RXDID_3_FLEX_SQ_SW = 3, + VIRTCHNL_RXDID_4_FLEX_SQ_NIC_VEB = 4, + VIRTCHNL_RXDID_5_FLEX_SQ_NIC_ACL = 5, + VIRTCHNL_RXDID_6_FLEX_SQ_NIC_2 = 6, + VIRTCHNL_RXDID_7_HW_RSVD = 7, + /* 8 through 15 are reserved */ + VIRTCHNL_RXDID_16_COMMS_GENERIC = 16, + VIRTCHNL_RXDID_17_COMMS_AUX_VLAN = 17, + VIRTCHNL_RXDID_18_COMMS_AUX_IPV4 = 18, + VIRTCHNL_RXDID_19_COMMS_AUX_IPV6 = 19, + VIRTCHNL_RXDID_20_COMMS_AUX_FLOW = 20, + VIRTCHNL_RXDID_21_COMMS_AUX_TCP = 21, + /* 22 through 63 are reserved */ +}; + +#define VIRTCHNL_RXDID_BIT(x) BIT_ULL(VIRTCHNL_RXDID_##x) + +/* RX descriptor ID bitmasks */ +enum virtchnl_rx_desc_id_bitmasks { + VIRTCHNL_RXDID_0_16B_BASE_M = VIRTCHNL_RXDID_BIT(0_16B_BASE), + VIRTCHNL_RXDID_1_32B_BASE_M = VIRTCHNL_RXDID_BIT(1_32B_BASE), + VIRTCHNL_RXDID_2_FLEX_SQ_NIC_M = VIRTCHNL_RXDID_BIT(2_FLEX_SQ_NIC), + VIRTCHNL_RXDID_3_FLEX_SQ_SW_M = VIRTCHNL_RXDID_BIT(3_FLEX_SQ_SW), + VIRTCHNL_RXDID_4_FLEX_SQ_NIC_VEB_M = VIRTCHNL_RXDID_BIT(4_FLEX_SQ_NIC_VEB), + VIRTCHNL_RXDID_5_FLEX_SQ_NIC_ACL_M = VIRTCHNL_RXDID_BIT(5_FLEX_SQ_NIC_ACL), + VIRTCHNL_RXDID_6_FLEX_SQ_NIC_2_M = VIRTCHNL_RXDID_BIT(6_FLEX_SQ_NIC_2), + VIRTCHNL_RXDID_7_HW_RSVD_M = VIRTCHNL_RXDID_BIT(7_HW_RSVD), + /* 8 through 15 are reserved */ + VIRTCHNL_RXDID_16_COMMS_GENERIC_M = VIRTCHNL_RXDID_BIT(16_COMMS_GENERIC), + VIRTCHNL_RXDID_17_COMMS_AUX_VLAN_M = VIRTCHNL_RXDID_BIT(17_COMMS_AUX_VLAN), + VIRTCHNL_RXDID_18_COMMS_AUX_IPV4_M = VIRTCHNL_RXDID_BIT(18_COMMS_AUX_IPV4), + VIRTCHNL_RXDID_19_COMMS_AUX_IPV6_M = VIRTCHNL_RXDID_BIT(19_COMMS_AUX_IPV6), + VIRTCHNL_RXDID_20_COMMS_AUX_FLOW_M = VIRTCHNL_RXDID_BIT(20_COMMS_AUX_FLOW), + VIRTCHNL_RXDID_21_COMMS_AUX_TCP_M = VIRTCHNL_RXDID_BIT(21_COMMS_AUX_TCP), + /* 22 through 63 are reserved */ +}; + +/* virtchnl_rxq_info_flags - definition of bits in the flags field of the + * virtchnl_rxq_info structure. + * + * @VIRTCHNL_PTP_RX_TSTAMP: request to enable Rx timestamping + * + * Other flag bits are currently reserved and they may be extended in the + * future. + */ +enum virtchnl_rxq_info_flags { + VIRTCHNL_PTP_RX_TSTAMP = BIT(0), +}; + /* VIRTCHNL_OP_CONFIG_RX_QUEUE * VF sends this message to set up parameters for one RX queue. * External data buffer contains one instance of virtchnl_rxq_info. @@ -331,8 +389,14 @@ struct virtchnl_rxq_info { u32 databuffer_size; u32 max_pkt_size; u8 crc_disable; - u8 rxdid; - u8 pad1[2]; + /* see enum virtchnl_rx_desc_ids; + * only used when VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC is supported. Note + * that when the offload is not supported, the descriptor format aligns + * with VIRTCHNL_RXDID_1_32B_BASE. + */ + enum virtchnl_rx_desc_ids rxdid:8; + enum virtchnl_rxq_info_flags flags:8; /* see virtchnl_rxq_info_flags */ + u8 pad1; u64 dma_ring_addr; /* see enum virtchnl_rx_hsplit; deprecated with AVF 1.0 */ @@ -1032,10 +1096,6 @@ struct virtchnl_filter { VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter); -struct virtchnl_supported_rxdids { - u64 supported_rxdids; -}; - /* VIRTCHNL_OP_EVENT * PF sends this message to inform the VF driver of events that may affect it. * No direct response is expected from the VF, though it may generate other @@ -1425,6 +1485,61 @@ struct virtchnl_fdir_del { VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del); +#define VIRTCHNL_1588_PTP_CAP_RX_TSTAMP BIT(1) +#define VIRTCHNL_1588_PTP_CAP_READ_PHC BIT(2) + +/** + * struct virtchnl_ptp_caps - Defines the PTP caps available to the VF. + * @caps: On send, VF sets what capabilities it requests. On reply, PF + * indicates what has been enabled for this VF. The PF shall not set + * bits which were not requested by the VF. + * @rsvd: Reserved bits for future extension. + * + * Structure that defines the PTP capabilities available to the VF. The VF + * sends VIRTCHNL_OP_1588_PTP_GET_CAPS, and must fill in the ptp_caps field + * indicating what capabilities it is requesting. The PF will respond with the + * same message with the virtchnl_ptp_caps structure indicating what is + * enabled for the VF. + * + * VIRTCHNL_1588_PTP_CAP_RX_TSTAMP indicates that the VF receive queues have + * receive timestamps enabled in the flexible descriptors. Note that this + * requires a VF to also negotiate to enable advanced flexible descriptors in + * the receive path instead of the default legacy descriptor format. + * + * VIRTCHNL_1588_PTP_CAP_READ_PHC indicates that the VF may read the PHC time + * via the VIRTCHNL_OP_1588_PTP_GET_TIME command. + * + * Note that in the future, additional capability flags may be added which + * indicate additional extended support. All fields marked as reserved by this + * header will be set to zero. VF implementations should verify this to ensure + * that future extensions do not break compatibility. + */ +struct virtchnl_ptp_caps { + u32 caps; + u8 rsvd[44]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(48, virtchnl_ptp_caps); + +/** + * struct virtchnl_phc_time - Contains the 64bits of PHC clock time in ns. + * @time: PHC time in nanoseconds + * @rsvd: Reserved for future extension + * + * Structure received with VIRTCHNL_OP_1588_PTP_GET_TIME. Contains the 64bits + * of PHC clock time in nanoseconds. + * + * VIRTCHNL_OP_1588_PTP_GET_TIME may be sent to request the current time of + * the PHC. This op is available in case direct access via the PHC registers + * is not available. + */ +struct virtchnl_phc_time { + u64 time; + u8 rsvd[8]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_phc_time); + struct virtchnl_shaper_bw { /* Unit is Kbps */ u32 committed; @@ -1757,6 +1872,12 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, } } break; + case VIRTCHNL_OP_1588_PTP_GET_CAPS: + valid_len = sizeof(struct virtchnl_ptp_caps); + break; + case VIRTCHNL_OP_1588_PTP_GET_TIME: + valid_len = sizeof(struct virtchnl_phc_time); + break; /* These are always errors coming from the VF. */ case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 27f42f713c89..f016263e1fcf 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1135,7 +1135,7 @@ int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, struct mlx4_buf *buf); -int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order); +int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, unsigned int order); void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db); int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, @@ -1415,7 +1415,6 @@ int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port, bool *vlan_offload_disabled); void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, struct _rule_hw *eth_header); -int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index af86097641b0..46bd7550adf8 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -54,7 +54,6 @@ #include <linux/mlx5/doorbell.h> #include <linux/mlx5/eq.h> #include <linux/timecounter.h> -#include <linux/ptp_clock_kernel.h> #include <net/devlink.h> #define MLX5_ADEV_NAME "mlx5_core" @@ -679,33 +678,8 @@ struct mlx5_rsvd_gids { struct ida ida; }; -#define MAX_PIN_NUM 8 -struct mlx5_pps { - u8 pin_caps[MAX_PIN_NUM]; - struct work_struct out_work; - u64 start[MAX_PIN_NUM]; - u8 enabled; - u64 min_npps_period; - u64 min_out_pulse_duration_ns; -}; - -struct mlx5_timer { - struct cyclecounter cycles; - struct timecounter tc; - u32 nominal_c_mult; - unsigned long overflow_period; -}; - -struct mlx5_clock { - struct mlx5_nb pps_nb; - seqlock_t lock; - struct hwtstamp_config hwtstamp_config; - struct ptp_clock *ptp; - struct ptp_clock_info ptp_info; - struct mlx5_pps pps_info; - struct mlx5_timer timer; -}; - +struct mlx5_clock; +struct mlx5_clock_dev_state; struct mlx5_dm; struct mlx5_fw_tracer; struct mlx5_vxlan; @@ -789,7 +763,8 @@ struct mlx5_core_dev { #ifdef CONFIG_MLX5_FPGA struct mlx5_fpga_device *fpga; #endif - struct mlx5_clock clock; + struct mlx5_clock *clock; + struct mlx5_clock_dev_state *clock_state; struct mlx5_ib_clock_info *clock_info; struct mlx5_fw_tracer *tracer; struct mlx5_rsc_dump *rsc_dump; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index e68d42b8ce65..fd625e0dd869 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -115,9 +115,12 @@ enum mlx5e_ext_link_mode { MLX5E_100GAUI_1_100GBASE_CR_KR = 11, MLX5E_200GAUI_4_200GBASE_CR4_KR4 = 12, MLX5E_200GAUI_2_200GBASE_CR2_KR2 = 13, + MLX5E_200GAUI_1_200GBASE_CR1_KR1 = 14, MLX5E_400GAUI_8_400GBASE_CR8 = 15, MLX5E_400GAUI_4_400GBASE_CR4_KR4 = 16, + MLX5E_400GAUI_2_400GBASE_CR2_KR2 = 17, MLX5E_800GAUI_8_800GBASE_CR8_KR8 = 19, + MLX5E_800GAUI_4_800GBASE_CR4_KR4 = 20, MLX5E_EXT_LINK_MODES_NUMBER, }; diff --git a/include/linux/net/intel/iidc.h b/include/linux/net/intel/iidc.h index 1c1332e4df26..13274c3def66 100644 --- a/include/linux/net/intel/iidc.h +++ b/include/linux/net/intel/iidc.h @@ -78,6 +78,8 @@ int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset); int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type); int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable); void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos); +int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry); +void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry); /* Structure representing auxiliary driver tailored information about the core * PCI dev, each auxiliary driver using the IIDC interface will have an diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ab550a89b9bf..9a387d456592 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -658,6 +658,7 @@ struct netdev_queue { struct Qdisc __rcu *qdisc_sleeping; #ifdef CONFIG_SYSFS struct kobject kobj; + const struct attribute_group **groups; #endif unsigned long tx_maxrate; /* diff --git a/include/linux/of.h b/include/linux/of.h index eaf0e2a2b75c..9d6b8a61607f 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -301,6 +301,8 @@ extern struct device_node *of_get_compatible_child(const struct device_node *par const char *compatible); extern struct device_node *of_get_child_by_name(const struct device_node *node, const char *name); +extern struct device_node *of_get_available_child_by_name(const struct device_node *node, + const char *name); /* cache lookup */ extern struct device_node *of_find_next_cache_node(const struct device_node *); @@ -578,6 +580,13 @@ static inline struct device_node *of_get_child_by_name( return NULL; } +static inline struct device_node *of_get_available_child_by_name( + const struct device_node *node, + const char *name) +{ + return NULL; +} + static inline int of_device_is_compatible(const struct device_node *device, const char *name) { diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index 733f4ddd2ef1..e40f554ff717 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -50,8 +50,7 @@ struct dw_xpcs; struct phylink_pcs *xpcs_to_phylink_pcs(struct dw_xpcs *xpcs); int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface); -int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, - int enable); +void xpcs_config_eee_mult_fact(struct dw_xpcs *xpcs, u8 mult_fact); struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr); struct dw_xpcs *xpcs_create_fwnode(struct fwnode_handle *fwnode); void xpcs_destroy(struct dw_xpcs *xpcs); diff --git a/include/linux/phy.h b/include/linux/phy.h index 19f076a71f94..584710e084eb 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -54,11 +54,6 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap2_features) __ro_after_init; #define PHY_EEE_CAP2_FEATURES ((unsigned long *)&phy_eee_cap2_features) extern const int phy_basic_ports_array[3]; -extern const int phy_10_100_features_array[4]; -extern const int phy_basic_t1_features_array[3]; -extern const int phy_basic_t1s_p2mp_features_array[2]; -extern const int phy_gbit_features_array[2]; -extern const int phy_10gbit_features_array[1]; /* * Set phydev->irq to PHY_POLL if interrupts are not supported, @@ -303,9 +298,6 @@ static inline long rgmii_clock(int speed) } } -#define PHY_INIT_TIMEOUT 100000 -#define PHY_FORCE_TIMEOUT 10 - #define PHY_MAX_ADDR 32 /* Used when trying to connect to a specific phy (mii bus id:phy device id) */ @@ -611,7 +603,7 @@ struct macsec_ops; * @eee_cfg: User configuration of EEE * @lp_advertising: Current link partner advertised linkmodes * @host_interfaces: PHY interface modes supported by host - * @eee_broken_modes: Energy efficient ethernet modes which should be prohibited + * @eee_disabled_modes: Energy efficient ethernet modes not to be advertised * @autoneg: Flag autoneg being used * @rate_matching: Current rate matching mode * @link: Current link state @@ -727,7 +719,7 @@ struct phy_device { __ETHTOOL_DECLARE_LINK_MODE_MASK(supported_eee); __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising_eee); /* Energy efficient ethernet modes which should be prohibited */ - __ETHTOOL_DECLARE_LINK_MODE_MASK(eee_broken_modes); + __ETHTOOL_DECLARE_LINK_MODE_MASK(eee_disabled_modes); bool enable_tx_lpi; bool eee_active; struct eee_config eee_cfg; @@ -1273,13 +1265,23 @@ struct phy_driver { */ int (*led_polarity_set)(struct phy_device *dev, int index, unsigned long modes); + + /** + * @get_next_update_time: Get the time until the next update event + * @dev: PHY device + * + * Callback to determine the time (in jiffies) until the next + * update event for the PHY state machine. Allows PHY drivers to + * dynamically adjust polling intervals based on link state or other + * conditions. + * + * Returns the time in jiffies until the next update event. + */ + unsigned int (*get_next_update_time)(struct phy_device *dev); }; #define to_phy_driver(d) container_of_const(to_mdio_common_driver(d), \ struct phy_driver, mdiodrv) -#define PHY_ANY_ID "MATCH ANY PHY" -#define PHY_ANY_UID 0xffffffff - #define PHY_ID_MATCH_EXACT(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 0) #define PHY_ID_MATCH_MODEL(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 4) #define PHY_ID_MATCH_VENDOR(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 10) @@ -1312,15 +1314,6 @@ static inline bool phydev_id_compare(struct phy_device *phydev, u32 id) return phy_id_compare(id, phydev->phy_id, phydev->drv->phy_id_mask); } -/* A Structure for boards to register fixups with the PHY Lib */ -struct phy_fixup { - struct list_head list; - char bus_id[MII_BUS_ID_SIZE + 3]; - u32 phy_uid; - u32 phy_uid_mask; - int (*run)(struct phy_device *phydev); -}; - const char *phy_speed_to_str(int speed); const char *phy_duplex_to_str(unsigned int duplex); const char *phy_rate_matching_to_str(int rate_matching); @@ -1347,22 +1340,25 @@ void of_set_phy_timing_role(struct phy_device *phydev); int phy_speed_down_core(struct phy_device *phydev); /** - * phy_set_eee_broken - Mark an EEE mode as broken so that it isn't advertised. + * phy_is_started - Convenience function to check whether PHY is started * @phydev: The phy_device struct - * @link_mode: The broken EEE mode */ -static inline void phy_set_eee_broken(struct phy_device *phydev, u32 link_mode) +static inline bool phy_is_started(struct phy_device *phydev) { - linkmode_set_bit(link_mode, phydev->eee_broken_modes); + return phydev->state >= PHY_UP; } /** - * phy_is_started - Convenience function to check whether PHY is started + * phy_disable_eee_mode - Don't advertise an EEE mode. * @phydev: The phy_device struct + * @link_mode: The EEE mode to be disabled */ -static inline bool phy_is_started(struct phy_device *phydev) +static inline void phy_disable_eee_mode(struct phy_device *phydev, u32 link_mode) { - return phydev->state >= PHY_UP; + WARN_ON(phy_is_started(phydev)); + + linkmode_set_bit(link_mode, phydev->eee_disabled_modes); + linkmode_clear_bit(link_mode, phydev->advertising_eee); } void phy_resolve_aneg_pause(struct phy_device *phydev); @@ -1747,15 +1743,6 @@ static inline bool phy_is_default_hwtstamp(struct phy_device *phydev) } /** - * phy_is_internal - Convenience function for testing if a PHY is internal - * @phydev: the phy_device struct - */ -static inline bool phy_is_internal(struct phy_device *phydev) -{ - return phydev->is_internal; -} - -/** * phy_on_sfp - Convenience function for testing if a PHY is on an SFP module * @phydev: the phy_device struct */ @@ -2045,8 +2032,7 @@ int genphy_c45_plca_set_cfg(struct phy_device *phydev, const struct phy_plca_cfg *plca_cfg); int genphy_c45_plca_get_status(struct phy_device *phydev, struct phy_plca_status *plca_st); -int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv, - unsigned long *lp); +int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *lp); int genphy_c45_ethtool_get_eee(struct phy_device *phydev, struct ethtool_keee *data); int genphy_c45_ethtool_set_eee(struct phy_device *phydev, @@ -2078,7 +2064,6 @@ int phy_drivers_register(struct phy_driver *new_driver, int n, struct module *owner); void phy_error(struct phy_device *phydev); void phy_state_machine(struct work_struct *work); -void phy_queue_state_machine(struct phy_device *phydev, unsigned long jiffies); void phy_trigger_machine(struct phy_device *phydev); void phy_mac_interrupt(struct phy_device *phydev); void phy_start_machine(struct phy_device *phydev); @@ -2114,11 +2099,13 @@ void phy_get_pause(struct phy_device *phydev, bool *tx_pause, bool *rx_pause); s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev, const int *delay_values, int size, bool is_rx); +int phy_get_tx_amplitude_gain(struct phy_device *phydev, struct device *dev, + enum ethtool_link_mode_bit_indices linkmode, + u32 *val); + void phy_resolve_pause(unsigned long *local_adv, unsigned long *partner_adv, bool *tx_pause, bool *rx_pause); -int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, - int (*run)(struct phy_device *)); int phy_register_fixup_for_id(const char *bus_id, int (*run)(struct phy_device *)); int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask, diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 898b00451bbf..08df65f6867a 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -442,7 +442,6 @@ struct phylink_pcs_ops; * are supported by this PCS. * @ops: a pointer to the &struct phylink_pcs_ops structure * @phylink: pointer to &struct phylink_config - * @neg_mode: provide PCS neg mode via "mode" argument * @poll: poll the PCS for link changes * @rxc_always_on: The MAC driver requires the reference clock * to always be on. Standalone PCS drivers which @@ -459,7 +458,6 @@ struct phylink_pcs { DECLARE_PHY_INTERFACE_MASK(supported_interfaces); const struct phylink_pcs_ops *ops; struct phylink *phylink; - bool neg_mode; bool poll; bool rxc_always_on; }; @@ -477,6 +475,10 @@ struct phylink_pcs { * @pcs_an_restart: restart 802.3z BaseX autonegotiation. * @pcs_link_up: program the PCS for the resolved link configuration * (where necessary). + * @pcs_disable_eee: optional notification to PCS that EEE has been disabled + * at the MAC. + * @pcs_enable_eee: optional notification to PCS that EEE will be enabled at + * the MAC. * @pcs_pre_init: configure PCS components necessary for MAC hardware * initialization e.g. RX clock for stmmac. */ @@ -500,6 +502,8 @@ struct phylink_pcs_ops { void (*pcs_an_restart)(struct phylink_pcs *pcs); void (*pcs_link_up)(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, int speed, int duplex); + void (*pcs_disable_eee)(struct phylink_pcs *pcs); + void (*pcs_enable_eee)(struct phylink_pcs *pcs); int (*pcs_pre_init)(struct phylink_pcs *pcs); }; @@ -626,6 +630,22 @@ void pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, int speed, int duplex); /** + * pcs_disable_eee() - Disable EEE at the PCS + * @pcs: a pointer to a &struct phylink_pcs + * + * Optional method informing the PCS that EEE has been disabled at the MAC. + */ +void pcs_disable_eee(struct phylink_pcs *pcs); + +/** + * pcs_enable_eee() - Enable EEE at the PCS + * @pcs: a pointer to a &struct phylink_pcs + * + * Optional method informing the PCS that EEE is about to be enabled at the MAC. + */ +void pcs_enable_eee(struct phylink_pcs *pcs); + +/** * pcs_pre_init() - Configure PCS components necessary for MAC initialization * @pcs: a pointer to a &struct phylink_pcs. * @@ -737,6 +757,18 @@ static inline int phylink_get_link_timer_ns(phy_interface_t interface) } } +/** + * phylink_mac_implements_lpi() - determine if MAC implements LPI ops + * @ops: phylink_mac_ops structure + * + * Returns true if the phylink MAC operations structure indicates that the + * LPI operations have been implemented, false otherwise. + */ +static inline bool phylink_mac_implements_lpi(const struct phylink_mac_ops *ops) +{ + return ops && ops->mac_disable_tx_lpi && ops->mac_enable_tx_lpi; +} + void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state, unsigned int neg_mode, u16 bmsr, u16 lpa); void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs, diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 4bc2ee0b10b0..ccaaf4c7d5f6 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -43,6 +43,7 @@ extern void rtnl_lock(void); extern void rtnl_unlock(void); extern int rtnl_trylock(void); extern int rtnl_is_locked(void); +extern int rtnl_lock_interruptible(void); extern int rtnl_lock_killable(void); extern bool refcount_dec_and_rtnl_lock(refcount_t *r); diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 836a7e200f39..812011d8b67e 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -222,7 +222,6 @@ struct sctp_datahdr { __be16 stream; __be16 ssn; __u32 ppid; - /* __u8 payload[]; */ }; struct sctp_data_chunk { diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index c9878a612e53..6d2aa77ea963 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -231,7 +231,7 @@ struct plat_stmmacenet_data { u8 tx_sched_algorithm; struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES]; struct stmmac_txq_cfg tx_queues_cfg[MTL_MAX_TX_QUEUES]; - void (*fix_mac_speed)(void *priv, unsigned int speed, unsigned int mode); + void (*fix_mac_speed)(void *priv, int speed, unsigned int mode); int (*fix_soc_reset)(void *priv, void __iomem *ioaddr); int (*serdes_powerup)(struct net_device *ndev, void *priv); void (*serdes_powerdown)(struct net_device *ndev, void *priv); @@ -254,6 +254,8 @@ struct plat_stmmacenet_data { struct clk *clk_ptp_ref; unsigned long clk_ptp_rate; unsigned long clk_ref_rate; + struct clk_bulk_data *clks; + int num_clks; unsigned int mult_fact_100ns; s32 ptp_max_adj; u32 cdc_error_adj; diff --git a/include/linux/unroll.h b/include/linux/unroll.h index d42fd6366373..863fb69f6a7e 100644 --- a/include/linux/unroll.h +++ b/include/linux/unroll.h @@ -9,6 +9,50 @@ #include <linux/args.h> +#ifdef CONFIG_CC_IS_CLANG +#define __pick_unrolled(x, y) _Pragma(#x) +#elif CONFIG_GCC_VERSION >= 80000 +#define __pick_unrolled(x, y) _Pragma(#y) +#else +#define __pick_unrolled(x, y) /* not supported */ +#endif + +/** + * unrolled - loop attributes to ask the compiler to unroll it + * + * Usage: + * + * #define BATCH 8 + * + * unrolled_count(BATCH) + * for (u32 i = 0; i < BATCH; i++) + * // loop body without cross-iteration dependencies + * + * This is only a hint and the compiler is free to disable unrolling if it + * thinks the count is suboptimal and may hurt performance and/or hugely + * increase object code size. + * Not having any cross-iteration dependencies (i.e. when iter x + 1 depends + * on what iter x will do with variables) is not a strict requirement, but + * provides best performance and object code size. + * Available only on Clang and GCC 8.x onwards. + */ + +/* Ask the compiler to pick an optimal unroll count, Clang only */ +#define unrolled \ + __pick_unrolled(clang loop unroll(enable), /* nothing */) + +/* Unroll each @n iterations of the loop */ +#define unrolled_count(n) \ + __pick_unrolled(clang loop unroll_count(n), GCC unroll n) + +/* Unroll the whole loop */ +#define unrolled_full \ + __pick_unrolled(clang loop unroll(full), GCC unroll 65534) + +/* Never unroll the loop */ +#define unrolled_none \ + __pick_unrolled(clang loop unroll(disable), GCC unroll 1) + #define UNROLL(N, MACRO, args...) CONCATENATE(__UNROLL_, N)(MACRO, args) #define __UNROLL_0(MACRO, args...) diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h index 33a4c146dc19..2ca60828f28b 100644 --- a/include/linux/usb/r8152.h +++ b/include/linux/usb/r8152.h @@ -30,6 +30,7 @@ #define VENDOR_ID_NVIDIA 0x0955 #define VENDOR_ID_TPLINK 0x2357 #define VENDOR_ID_DLINK 0x2001 +#define VENDOR_ID_DELL 0x413c #define VENDOR_ID_ASUS 0x0b05 #if IS_REACHABLE(CONFIG_USB_RTL8152) diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index c39a426ebf52..cab6146a510a 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -24,6 +24,11 @@ */ #define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1)) +static inline bool napi_id_valid(unsigned int napi_id) +{ + return napi_id >= MIN_NAPI_ID; +} + #define BUSY_POLL_BUDGET 8 #ifdef CONFIG_NET_RX_BUSY_POLL @@ -114,7 +119,7 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock) #ifdef CONFIG_NET_RX_BUSY_POLL unsigned int napi_id = READ_ONCE(sk->sk_napi_id); - if (napi_id >= MIN_NAPI_ID) + if (napi_id_valid(napi_id)) napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk, READ_ONCE(sk->sk_prefer_busy_poll), READ_ONCE(sk->sk_busy_poll_budget) ?: BUSY_POLL_BUDGET); @@ -129,7 +134,7 @@ static inline void skb_mark_napi_id(struct sk_buff *skb, /* If the skb was already marked with a valid NAPI ID, avoid overwriting * it. */ - if (skb->napi_id < MIN_NAPI_ID) + if (!napi_id_valid(skb->napi_id)) skb->napi_id = napi->napi_id; #endif } diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 04383d90a1e3..5927910ec06e 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -43,6 +43,8 @@ struct fib_rule { struct fib_kuid_range uid_range; struct fib_rule_port_range sport_range; struct fib_rule_port_range dport_range; + u16 sport_mask; + u16 dport_mask; struct rcu_head rcu; }; @@ -146,6 +148,17 @@ static inline bool fib_rule_port_inrange(const struct fib_rule_port_range *a, ntohs(port) <= a->end; } +static inline bool fib_rule_port_match(const struct fib_rule_port_range *range, + u16 port_mask, __be16 port) +{ + if ((range->start ^ ntohs(port)) & port_mask) + return false; + if (!port_mask && fib_rule_port_range_set(range) && + !fib_rule_port_inrange(range, port)) + return false; + return true; +} + static inline bool fib_rule_port_range_valid(const struct fib_rule_port_range *a) { return a->start != 0 && a->end != 0 && a->end < 0xffff && @@ -159,6 +172,12 @@ static inline bool fib_rule_port_range_compare(struct fib_rule_port_range *a, a->end == b->end; } +static inline bool +fib_rule_port_is_range(const struct fib_rule_port_range *range) +{ + return range->start != range->end; +} + static inline bool fib_rule_requires_fldissect(struct fib_rule *rule) { return rule->iifindex != LOOPBACK_IFINDEX && (rule->ip_proto || @@ -178,10 +197,10 @@ int fib_rules_dump(struct net *net, struct notifier_block *nb, int family, struct netlink_ext_ack *extack); unsigned int fib_rules_seq_read(const struct net *net, int family); -int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack); -int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack); +int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, bool rtnl_held); +int fib_delrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, bool rtnl_held); INDIRECT_CALLABLE_DECLARE(int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index c7f42844c79a..d9978ffacc97 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -90,6 +90,7 @@ struct inet_connection_sock { struct timer_list icsk_delack_timer; __u32 icsk_rto; __u32 icsk_rto_min; + u32 icsk_rto_max; __u32 icsk_delack_max; __u32 icsk_pmtu_cookie; const struct tcp_congestion_ops *icsk_ca_ops; @@ -189,9 +190,6 @@ static inline void inet_csk_delack_init(struct sock *sk) memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack)); } -void inet_csk_delete_keepalive_timer(struct sock *sk); -void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout); - static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) { struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/include/net/ip.h b/include/net/ip.h index ba7b43447775..ce5e59957dd5 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -92,11 +92,12 @@ static inline void ipcm_init(struct ipcm_cookie *ipcm) static inline void ipcm_init_sk(struct ipcm_cookie *ipcm, const struct inet_sock *inet) { - ipcm_init(ipcm); + *ipcm = (struct ipcm_cookie) { + .tos = READ_ONCE(inet->tos), + }; + + sockcm_init(&ipcm->sockc, &inet->sk); - ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark); - ipcm->sockc.priority = READ_ONCE(inet->sk.sk_priority); - ipcm->sockc.tsflags = READ_ONCE(inet->sk.sk_tsflags); ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if); ipcm->addr = inet->inet_saddr; ipcm->protocol = inet->inet_num; @@ -257,13 +258,6 @@ static inline u8 ip_sendmsg_scope(const struct inet_sock *inet, return RT_SCOPE_UNIVERSE; } -static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet) -{ - u8 dsfield = ipc->tos != -1 ? ipc->tos : READ_ONCE(inet->tos); - - return dsfield & INET_DSCP_MASK; -} - /* datagram.c */ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); @@ -673,6 +667,14 @@ static inline void ip_ipgre_mc_map(__be32 naddr, const unsigned char *broadcast, memcpy(buf, &naddr, sizeof(naddr)); } +#if IS_MODULE(CONFIG_IPV6) +#define EXPORT_IPV6_MOD(X) EXPORT_SYMBOL(X) +#define EXPORT_IPV6_MOD_GPL(X) EXPORT_SYMBOL_GPL(X) +#else +#define EXPORT_IPV6_MOD(X) +#define EXPORT_IPV6_MOD_GPL(X) +#endif + #if IS_ENABLED(CONFIG_IPV6) #include <linux/ipv6.h> #endif diff --git a/include/net/ipv6.h b/include/net/ipv6.h index f5c43ad1565e..9614006f483c 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -363,15 +363,6 @@ struct ipcm6_cookie { struct ipv6_txoptions *opt; }; -static inline void ipcm6_init(struct ipcm6_cookie *ipc6) -{ - *ipc6 = (struct ipcm6_cookie) { - .hlimit = -1, - .tclass = -1, - .dontfrag = -1, - }; -} - static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6, const struct sock *sk) { @@ -380,6 +371,8 @@ static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6, .tclass = inet6_sk(sk)->tclass, .dontfrag = inet6_test_bit(DONTFRAG, sk), }; + + sockcm_init(&ipc6->sockc, sk); } static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) diff --git a/include/net/libeth/rx.h b/include/net/libeth/rx.h index 43574bd6612f..ab05024be518 100644 --- a/include/net/libeth/rx.h +++ b/include/net/libeth/rx.h @@ -198,6 +198,53 @@ struct libeth_rx_pt { enum xdp_rss_hash_type hash_type:16; }; +/** + * struct libeth_rx_csum - checksum offload bits decoded from the Rx descriptor + * @l3l4p: detectable L3 and L4 integrity check is processed by the hardware + * @ipe: IP checksum error + * @eipe: external (outermost) IP header (only for tunels) + * @eudpe: external (outermost) UDP checksum error (only for tunels) + * @ipv6exadd: IPv6 header with extension headers + * @l4e: L4 integrity error + * @pprs: set for packets that skip checksum calculation in the HW pre parser + * @nat: the packet is a UDP tunneled packet + * @raw_csum_valid: set if raw checksum is valid + * @pad: padding to naturally align raw_csum field + * @raw_csum: raw checksum + */ +struct libeth_rx_csum { + u32 l3l4p:1; + u32 ipe:1; + u32 eipe:1; + u32 eudpe:1; + u32 ipv6exadd:1; + u32 l4e:1; + u32 pprs:1; + u32 nat:1; + + u32 raw_csum_valid:1; + u32 pad:7; + u32 raw_csum:16; +}; + +/** + * struct libeth_rqe_info - receive queue element info + * @len: packet length + * @ptype: packet type based on types programmed into the device + * @eop: whether it's the last fragment of the packet + * @rxe: MAC errors: CRC, Alignment, Oversize, Undersizes, Length error + * @vlan: C-VLAN or S-VLAN tag depending on the VLAN offload configuration + */ +struct libeth_rqe_info { + u32 len; + + u32 ptype:14; + u32 eop:1; + u32 rxe:1; + + u32 vlan:16; +}; + void libeth_rx_pt_gen_hash_type(struct libeth_rx_pt *pt); /** diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index b02bb9f109d5..825141d675e5 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -23,6 +23,7 @@ struct netdev_queue_stats_rx { u64 hw_drops; u64 hw_drop_overruns; + u64 csum_complete; u64 csum_unnecessary; u64 csum_none; u64 csum_bad; @@ -117,6 +118,10 @@ struct netdev_stat_ops { * * @ndo_queue_stop: Stop the RX queue at the specified index. The stopped * queue's memory is written at the specified address. + * + * Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while + * the interface is closed. @ndo_queue_start and @ndo_queue_stop will only + * be called for an interface which is open. */ struct netdev_queue_mgmt_ops { size_t ndo_queue_mem_size; diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index 596836abf7bf..af40842f229d 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -16,6 +16,7 @@ struct netdev_rx_queue { struct rps_dev_flow_table __rcu *rps_flow_table; #endif struct kobject kobj; + const struct attribute_group **groups; struct net_device *dev; netdevice_tracker dev_tracker; diff --git a/include/net/netlink.h b/include/net/netlink.h index e015ffbed819..29e0db940382 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -118,6 +118,7 @@ * nla_nest_start(skb, type) start a nested attribute * nla_nest_end(skb, nla) finalize a nested attribute * nla_nest_cancel(skb, nla) cancel nested attribute construction + * nla_put_empty_nest(skb, type) create an empty nest * * Attribute Length Calculations: * nla_attr_size(payload) length of attribute w/o padding @@ -2241,6 +2242,20 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) } /** + * nla_put_empty_nest - Create an empty nest + * @skb: socket buffer the message is stored in + * @attrtype: attribute type of the container + * + * This function is a helper for creating empty nests. + * + * Returns: 0 when successful or -EMSGSIZE on failure. + */ +static inline int nla_put_empty_nest(struct sk_buff *skb, int attrtype) +{ + return nla_nest_start(skb, attrtype) ? 0 : -EMSGSIZE; +} + +/** * __nla_validate_nested - Validate a stream of nested attributes * @start: container attribute * @maxtype: maximum attribute type to be expected diff --git a/include/net/netmem.h b/include/net/netmem.h index 1b58faa4f20f..c61d5b21e7b4 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -24,11 +24,20 @@ struct net_iov { unsigned long __unused_padding; unsigned long pp_magic; struct page_pool *pp; - struct dmabuf_genpool_chunk_owner *owner; + struct net_iov_area *owner; unsigned long dma_addr; atomic_long_t pp_ref_count; }; +struct net_iov_area { + /* Array of net_iovs for this area. */ + struct net_iov *niovs; + size_t num_niovs; + + /* Offset into the dma-buf where this chunk starts. */ + unsigned long base_virtual; +}; + /* These fields in struct page are used by the page_pool and net stack: * * struct { @@ -54,6 +63,16 @@ NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr); NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count); #undef NET_IOV_ASSERT_OFFSET +static inline struct net_iov_area *net_iov_owner(const struct net_iov *niov) +{ + return niov->owner; +} + +static inline unsigned int net_iov_idx(const struct net_iov *niov) +{ + return niov - net_iov_owner(niov)->niovs; +} + /* netmem */ /** diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 46452da35206..45ac125e8aeb 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -181,6 +181,7 @@ struct netns_ipv4 { u8 sysctl_tcp_window_scaling; u8 sysctl_tcp_timestamps; int sysctl_tcp_rto_min_us; + int sysctl_tcp_rto_max_ms; u8 sysctl_tcp_recovery; u8 sysctl_tcp_thin_linear_timeouts; u8 sysctl_tcp_slow_start_after_idle; diff --git a/include/net/page_pool/memory_provider.h b/include/net/page_pool/memory_provider.h new file mode 100644 index 000000000000..b3e665897767 --- /dev/null +++ b/include/net/page_pool/memory_provider.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NET_PAGE_POOL_MEMORY_PROVIDER_H +#define _NET_PAGE_POOL_MEMORY_PROVIDER_H + +#include <net/netmem.h> +#include <net/page_pool/types.h> + +struct netdev_rx_queue; +struct sk_buff; + +struct memory_provider_ops { + netmem_ref (*alloc_netmems)(struct page_pool *pool, gfp_t gfp); + bool (*release_netmem)(struct page_pool *pool, netmem_ref netmem); + int (*init)(struct page_pool *pool); + void (*destroy)(struct page_pool *pool); + int (*nl_fill)(void *mp_priv, struct sk_buff *rsp, + struct netdev_rx_queue *rxq); + void (*uninstall)(void *mp_priv, struct netdev_rx_queue *rxq); +}; + +bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr); +void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov); +void net_mp_niov_clear_page_pool(struct net_iov *niov); + +int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, + struct pp_memory_provider_params *p); +void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, + struct pp_memory_provider_params *old_p); + +/** + * net_mp_netmem_place_in_cache() - give a netmem to a page pool + * @pool: the page pool to place the netmem into + * @netmem: netmem to give + * + * Push an accounted netmem into the page pool's allocation cache. The caller + * must ensure that there is space in the cache. It should only be called off + * the mp_ops->alloc_netmems() path. + */ +static inline void net_mp_netmem_place_in_cache(struct page_pool *pool, + netmem_ref netmem) +{ + pool->alloc.cache[pool->alloc.count++] = netmem; +} + +#endif diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 7f405672b089..36eb57d73abc 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -152,8 +152,11 @@ struct page_pool_stats { */ #define PAGE_POOL_FRAG_GROUP_ALIGN (4 * sizeof(long)) +struct memory_provider_ops; + struct pp_memory_provider_params { void *mp_priv; + const struct memory_provider_ops *mp_ops; }; struct page_pool { @@ -216,6 +219,7 @@ struct page_pool { struct ptr_ring ring; void *mp_priv; + const struct memory_provider_ops *mp_ops; #ifdef CONFIG_PAGE_POOL_STATS /* recycle stats are per-cpu to avoid locking */ diff --git a/include/net/sock.h b/include/net/sock.h index 8036b3b79cd8..edbb870e3f86 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -954,6 +954,7 @@ enum sock_flags { SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */ SOCK_RCVMARK, /* Receive SO_MARK ancillary data with packet */ SOCK_RCVPRIORITY, /* Receive SO_PRIORITY ancillary data with packet */ + SOCK_TIMESTAMPING_ANY, /* Copy of sk_tsflags & TSFLAGS_ANY */ }; #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) @@ -1284,10 +1285,6 @@ struct proto { unsigned int inuse_idx; #endif -#if IS_ENABLED(CONFIG_MPTCP) - int (*forward_alloc_get)(const struct sock *sk); -#endif - bool (*stream_memory_free)(const struct sock *sk, int wake); bool (*sock_is_readable)(struct sock *sk); /* Memory pressure */ @@ -1348,15 +1345,6 @@ int sock_load_diag_module(int family, int protocol); INDIRECT_CALLABLE_DECLARE(bool tcp_stream_memory_free(const struct sock *sk, int wake)); -static inline int sk_forward_alloc_get(const struct sock *sk) -{ -#if IS_ENABLED(CONFIG_MPTCP) - if (sk->sk_prot->forward_alloc_get) - return sk->sk_prot->forward_alloc_get(sk); -#endif - return READ_ONCE(sk->sk_forward_alloc); -} - static inline bool __sk_stream_memory_free(const struct sock *sk, int wake) { if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf)) @@ -1828,6 +1816,7 @@ static inline void sockcm_init(struct sockcm_cookie *sockc, const struct sock *sk) { *sockc = (struct sockcm_cookie) { + .mark = READ_ONCE(sk->sk_mark), .tsflags = READ_ONCE(sk->sk_tsflags), .priority = READ_ONCE(sk->sk_priority), }; @@ -2664,13 +2653,13 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, { #define FLAGS_RECV_CMSGS ((1UL << SOCK_RXQ_OVFL) | \ (1UL << SOCK_RCVTSTAMP) | \ - (1UL << SOCK_RCVMARK) |\ - (1UL << SOCK_RCVPRIORITY)) + (1UL << SOCK_RCVMARK) | \ + (1UL << SOCK_RCVPRIORITY) | \ + (1UL << SOCK_TIMESTAMPING_ANY)) #define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \ SOF_TIMESTAMPING_RAW_HARDWARE) - if (sk->sk_flags & FLAGS_RECV_CMSGS || - READ_ONCE(sk->sk_tsflags) & TSFLAGS_ANY) + if (READ_ONCE(sk->sk_flags) & FLAGS_RECV_CMSGS) __sock_recv_cmsgs(msg, sk, skb); else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP))) sock_write_timestamp(sk, skb->tstamp); diff --git a/include/net/tcp.h b/include/net/tcp.h index 930cda5b5eb9..ced32b924a9c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -144,8 +144,9 @@ static_assert((1 << ATO_BITS) > TCP_DELACK_MAX); #define TCP_DELACK_MIN 4U #define TCP_ATO_MIN 4U #endif -#define TCP_RTO_MAX ((unsigned)(120*HZ)) -#define TCP_RTO_MIN ((unsigned)(HZ/5)) +#define TCP_RTO_MAX_SEC 120 +#define TCP_RTO_MAX ((unsigned)(TCP_RTO_MAX_SEC * HZ)) +#define TCP_RTO_MIN ((unsigned)(HZ / 5)) #define TCP_TIMEOUT_MIN (2U) /* Min timeout for TCP timers in jiffies */ #define TCP_TIMEOUT_MIN_US (2*USEC_PER_MSEC) /* Min TCP timeout in microsecs */ @@ -416,6 +417,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); +void tcp_reset_keepalive_timer(struct sock *sk, unsigned long timeout); void tcp_set_keepalive(struct sock *sk, int val); void tcp_syn_ack_timeout(const struct request_sock *req); int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, @@ -753,10 +755,14 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu); int tcp_mss_to_mtu(struct sock *sk, int mss); void tcp_mtup_init(struct sock *sk); +static inline unsigned int tcp_rto_max(const struct sock *sk) +{ + return READ_ONCE(inet_csk(sk)->icsk_rto_max); +} + static inline void tcp_bound_rto(struct sock *sk) { - if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) - inet_csk(sk)->icsk_rto = TCP_RTO_MAX; + inet_csk(sk)->icsk_rto = min(inet_csk(sk)->icsk_rto, tcp_rto_max(sk)); } static inline u32 __tcp_set_rto(const struct tcp_sock *tp) @@ -1437,10 +1443,12 @@ static inline unsigned long tcp_pacing_delay(const struct sock *sk) static inline void tcp_reset_xmit_timer(struct sock *sk, const int what, unsigned long when, - const unsigned long max_when) + bool pace_delay) { - inet_csk_reset_xmit_timer(sk, what, when + tcp_pacing_delay(sk), - max_when); + if (pace_delay) + when += tcp_pacing_delay(sk); + inet_csk_reset_xmit_timer(sk, what, when, + tcp_rto_max(sk)); } /* Something is really bad, we could not queue an additional packet, @@ -1469,7 +1477,7 @@ static inline void tcp_check_probe_timer(struct sock *sk) { if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending) tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, - tcp_probe0_base(sk), TCP_RTO_MAX); + tcp_probe0_base(sk), true); } static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq) diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 784cd34f5bba..15086dcf51d8 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -196,6 +196,23 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr) return xp_raw_get_data(pool, addr); } +/** + * xsk_buff_raw_get_ctx - get &xdp_desc context + * @pool: XSk buff pool desc address belongs to + * @addr: desc address (from userspace) + * + * Wrapper for xp_raw_get_ctx() to be used in drivers, see its kdoc for + * details. + * + * Return: new &xdp_desc_ctx struct containing desc's DMA address and metadata + * pointer, if it is present and valid (initialized to %NULL otherwise). + */ +static inline struct xdp_desc_ctx +xsk_buff_raw_get_ctx(const struct xsk_buff_pool *pool, u64 addr) +{ + return xp_raw_get_ctx(pool, addr); +} + #define XDP_TXMD_FLAGS_VALID ( \ XDP_TXMD_FLAGS_TIMESTAMP | \ XDP_TXMD_FLAGS_CHECKSUM | \ @@ -207,20 +224,27 @@ xsk_buff_valid_tx_metadata(const struct xsk_tx_metadata *meta) return !(meta->flags & ~XDP_TXMD_FLAGS_VALID); } -static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr) +static inline struct xsk_tx_metadata * +__xsk_buff_get_metadata(const struct xsk_buff_pool *pool, void *data) { struct xsk_tx_metadata *meta; if (!pool->tx_metadata_len) return NULL; - meta = xp_raw_get_data(pool, addr) - pool->tx_metadata_len; + meta = data - pool->tx_metadata_len; if (unlikely(!xsk_buff_valid_tx_metadata(meta))) return NULL; /* no way to signal the error to the user */ return meta; } +static inline struct xsk_tx_metadata * +xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr) +{ + return __xsk_buff_get_metadata(pool, xp_raw_get_data(pool, addr)); +} + static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp) { struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); @@ -388,12 +412,25 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr) return NULL; } +static inline struct xdp_desc_ctx +xsk_buff_raw_get_ctx(const struct xsk_buff_pool *pool, u64 addr) +{ + return (struct xdp_desc_ctx){ }; +} + static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta) { return false; } -static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr) +static inline struct xsk_tx_metadata * +__xsk_buff_get_metadata(const struct xsk_buff_pool *pool, void *data) +{ + return NULL; +} + +static inline struct xsk_tx_metadata * +xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr) { return NULL; } diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 50779406bc2d..1dcd4d71468a 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -141,6 +141,14 @@ u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max); bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count); void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr); dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr); + +struct xdp_desc_ctx { + dma_addr_t dma; + struct xsk_tx_metadata *meta; +}; + +struct xdp_desc_ctx xp_raw_get_ctx(const struct xsk_buff_pool *pool, u64 addr); + static inline dma_addr_t xp_get_dma(struct xdp_buff_xsk *xskb) { return xskb->dma; diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index a27c4b619dff..1a40c41ff8c3 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -259,6 +259,12 @@ TRACE_EVENT(tcp_retransmit_synack, __entry->saddr_v6, __entry->daddr_v6) ); +DECLARE_TRACE(tcp_cwnd_reduction_tp, + TP_PROTO(const struct sock *sk, int newly_acked_sacked, + int newly_lost, int flag), + TP_ARGS(sk, newly_acked_sacked, newly_lost, flag) +); + #include <trace/events/net_probe_common.h> TRACE_EVENT(tcp_probe, diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index e78cbd85ce7c..42abf0679fb4 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -182,7 +182,7 @@ struct canfd_frame { /* * defined bits for canxl_frame.flags * - * The canxl_frame.flags element contains two bits CANXL_XLF and CANXL_SEC + * The canxl_frame.flags element contains three bits CANXL_[XLF|SEC|RRS] * and shares the relative position of the struct can[fd]_frame.len element. * The CANXL_XLF bit ALWAYS needs to be set to indicate a valid CAN XL frame. * As a side effect setting this bit intentionally breaks the length checks @@ -192,6 +192,7 @@ struct canfd_frame { */ #define CANXL_XLF 0x80 /* mandatory CAN XL frame flag (must always be set!) */ #define CANXL_SEC 0x01 /* Simple Extended Content (security/segmentation) */ +#define CANXL_RRS 0x02 /* Remote Request Substitution */ /* the 8-bit VCID is optionally placed in the canxl_frame.prio element */ #define CANXL_VCID_OFFSET 16 /* bit offset of VCID in prio element */ diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 9b18c4cfe56f..2feba0929a8a 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2059,6 +2059,24 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_10baseT1S_Half_BIT = 100, ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT = 101, ETHTOOL_LINK_MODE_10baseT1BRR_Full_BIT = 102, + ETHTOOL_LINK_MODE_200000baseCR_Full_BIT = 103, + ETHTOOL_LINK_MODE_200000baseKR_Full_BIT = 104, + ETHTOOL_LINK_MODE_200000baseDR_Full_BIT = 105, + ETHTOOL_LINK_MODE_200000baseDR_2_Full_BIT = 106, + ETHTOOL_LINK_MODE_200000baseSR_Full_BIT = 107, + ETHTOOL_LINK_MODE_200000baseVR_Full_BIT = 108, + ETHTOOL_LINK_MODE_400000baseCR2_Full_BIT = 109, + ETHTOOL_LINK_MODE_400000baseKR2_Full_BIT = 110, + ETHTOOL_LINK_MODE_400000baseDR2_Full_BIT = 111, + ETHTOOL_LINK_MODE_400000baseDR2_2_Full_BIT = 112, + ETHTOOL_LINK_MODE_400000baseSR2_Full_BIT = 113, + ETHTOOL_LINK_MODE_400000baseVR2_Full_BIT = 114, + ETHTOOL_LINK_MODE_800000baseCR4_Full_BIT = 115, + ETHTOOL_LINK_MODE_800000baseKR4_Full_BIT = 116, + ETHTOOL_LINK_MODE_800000baseDR4_Full_BIT = 117, + ETHTOOL_LINK_MODE_800000baseDR4_2_Full_BIT = 118, + ETHTOOL_LINK_MODE_800000baseSR4_Full_BIT = 119, + ETHTOOL_LINK_MODE_800000baseVR4_Full_BIT = 120, /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 00e9890ca3c0..95ec01b15c65 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -70,6 +70,8 @@ enum { FRA_DSCP, /* dscp */ FRA_FLOWLABEL, /* flowlabel */ FRA_FLOWLABEL_MASK, /* flowlabel mask */ + FRA_SPORT_MASK, /* sport mask */ + FRA_DPORT_MASK, /* dport mask */ __FRA_MAX }; diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index e4be227d3ad6..4e82f3871473 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -87,6 +87,11 @@ enum { }; enum { + __NETDEV_A_IO_URING_PROVIDER_INFO_MAX, + NETDEV_A_IO_URING_PROVIDER_INFO_MAX = (__NETDEV_A_IO_URING_PROVIDER_INFO_MAX - 1) +}; + +enum { NETDEV_A_PAGE_POOL_ID = 1, NETDEV_A_PAGE_POOL_IFINDEX, NETDEV_A_PAGE_POOL_NAPI_ID, @@ -94,6 +99,7 @@ enum { NETDEV_A_PAGE_POOL_INFLIGHT_MEM, NETDEV_A_PAGE_POOL_DETACH_TIME, NETDEV_A_PAGE_POOL_DMABUF, + NETDEV_A_PAGE_POOL_IO_URING, __NETDEV_A_PAGE_POOL_MAX, NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1) @@ -131,11 +137,18 @@ enum { }; enum { + __NETDEV_A_XSK_INFO_MAX, + NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1) +}; + +enum { NETDEV_A_QUEUE_ID = 1, NETDEV_A_QUEUE_IFINDEX, NETDEV_A_QUEUE_TYPE, NETDEV_A_QUEUE_NAPI_ID, NETDEV_A_QUEUE_DMABUF, + NETDEV_A_QUEUE_IO_URING, + NETDEV_A_QUEUE_XSK, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index dbf896f3146c..32a27b4a5020 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -136,6 +136,7 @@ enum { #define TCP_AO_REPAIR 42 /* Get/Set SNEs and ISNs */ #define TCP_IS_MPTCP 43 /* Is MPTCP being used? */ +#define TCP_RTO_MAX_MS 44 /* max rto time in ms */ #define TCP_REPAIR_ON 1 #define TCP_REPAIR_OFF 0 diff --git a/io_uring/napi.c b/io_uring/napi.c index b1ade3fda30f..4a10de03e426 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c @@ -44,7 +44,7 @@ int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id) struct io_napi_entry *e; /* Non-NAPI IDs can be rejected. */ - if (napi_id < MIN_NAPI_ID) + if (!napi_id_valid(napi_id)) return -EINVAL; hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))]; @@ -87,7 +87,7 @@ static int __io_napi_del_id(struct io_ring_ctx *ctx, unsigned int napi_id) struct io_napi_entry *e; /* Non-NAPI IDs can be rejected. */ - if (napi_id < MIN_NAPI_ID) + if (!napi_id_valid(napi_id)) return -EINVAL; hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))]; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1af972a92d06..238321830993 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2557,15 +2557,6 @@ config TEST_BPF If unsure, say N. -config TEST_BLACKHOLE_DEV - tristate "Test blackhole netdev functionality" - depends on m && NET - help - This builds the "test_blackhole_dev" module that validates the - data path through this blackhole netdev. - - If unsure, say N. - config FIND_BIT_BENCHMARK tristate "Test find_bit functions" help @@ -2888,6 +2879,17 @@ config USERCOPY_KUNIT_TEST on the copy_to/from_user infrastructure, making sure basic user/kernel boundary testing is working. +config BLACKHOLE_DEV_KUNIT_TEST + tristate "Test blackhole netdev functionality" if !KUNIT_ALL_TESTS + depends on NET + depends on KUNIT + default KUNIT_ALL_TESTS + help + This builds the "blackhole_dev_kunit" module that validates the + data path through this blackhole netdev. + + If unsure, say N. + config TEST_UDELAY tristate "udelay test driver" help diff --git a/lib/Makefile b/lib/Makefile index d5cfc7afbbb8..19ff6993c2bc 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -102,7 +102,6 @@ obj-$(CONFIG_TEST_RUNTIME) += tests/ obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o obj-$(CONFIG_TEST_OBJAGG) += test_objagg.o -obj-$(CONFIG_TEST_BLACKHOLE_DEV) += test_blackhole_dev.o obj-$(CONFIG_TEST_MEMINIT) += test_meminit.o obj-$(CONFIG_TEST_LOCKUP) += test_lockup.o obj-$(CONFIG_TEST_HMM) += test_hmm.o @@ -393,6 +392,7 @@ obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o obj-$(CONFIG_CRC_KUNIT_TEST) += crc_kunit.o obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o obj-$(CONFIG_USERCOPY_KUNIT_TEST) += usercopy_kunit.o +obj-$(CONFIG_BLACKHOLE_DEV_KUNIT_TEST) += blackhole_dev_kunit.o obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o diff --git a/lib/test_blackhole_dev.c b/lib/blackhole_dev_kunit.c index ec290ac2a0d9..06834ab35f43 100644 --- a/lib/test_blackhole_dev.c +++ b/lib/blackhole_dev_kunit.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * This module tests the blackhole_dev that is created during the + * This tests the blackhole_dev that is created during the * net subsystem initialization. The test this module performs is * by injecting an skb into the stack with skb->dev as the * blackhole_dev and expects kernel to behave in a sane manner @@ -9,9 +9,8 @@ * Copyright (c) 2018, Mahesh Bandewar <maheshb@google.com> */ -#include <linux/init.h> +#include <kunit/test.h> #include <linux/module.h> -#include <linux/printk.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/udp.h> @@ -25,17 +24,15 @@ #define UDP_PORT 1234 -static int __init test_blackholedev_init(void) +static void test_blackholedev(struct kunit *test) { struct ipv6hdr *ip6h; struct sk_buff *skb; struct udphdr *uh; int data_len; - int ret; skb = alloc_skb(SKB_SIZE, GFP_KERNEL); - if (!skb) - return -ENOMEM; + KUNIT_ASSERT_NOT_NULL(test, skb); /* Reserve head-room for the headers */ skb_reserve(skb, HEAD_SIZE); @@ -55,7 +52,7 @@ static int __init test_blackholedev_init(void) ip6h = (struct ipv6hdr *)skb_push(skb, sizeof(struct ipv6hdr)); skb_set_network_header(skb, 0); ip6h->hop_limit = 32; - ip6h->payload_len = data_len + sizeof(struct udphdr); + ip6h->payload_len = htons(data_len + sizeof(struct udphdr)); ip6h->nexthdr = IPPROTO_UDP; ip6h->saddr = in6addr_loopback; ip6h->daddr = in6addr_loopback; @@ -68,32 +65,20 @@ static int __init test_blackholedev_init(void) skb->dev = blackhole_netdev; /* Now attempt to send the packet */ - ret = dev_queue_xmit(skb); - - switch (ret) { - case NET_XMIT_SUCCESS: - pr_warn("dev_queue_xmit() returned NET_XMIT_SUCCESS\n"); - break; - case NET_XMIT_DROP: - pr_warn("dev_queue_xmit() returned NET_XMIT_DROP\n"); - break; - case NET_XMIT_CN: - pr_warn("dev_queue_xmit() returned NET_XMIT_CN\n"); - break; - default: - pr_err("dev_queue_xmit() returned UNKNOWN(%d)\n", ret); - } - - return 0; + KUNIT_EXPECT_EQ(test, dev_queue_xmit(skb), NET_XMIT_SUCCESS); } -static void __exit test_blackholedev_exit(void) -{ - pr_warn("test_blackholedev module terminating.\n"); -} +static struct kunit_case blackholedev_cases[] = { + KUNIT_CASE(test_blackholedev), + {}, +}; + +static struct kunit_suite blackholedev_suite = { + .name = "blackholedev", + .test_cases = blackholedev_cases, +}; -module_init(test_blackholedev_init); -module_exit(test_blackholedev_exit); +kunit_test_suite(blackholedev_suite); MODULE_AUTHOR("Mahesh Bandewar <maheshb@google.com>"); MODULE_DESCRIPTION("module test of the blackhole_dev"); diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 1a52a0bca086..7e1ad229e133 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -1040,7 +1040,7 @@ static int br_mdb_add_group(const struct br_mdb_config *cfg, /* host join */ if (!port) { - if (mp->host_joined) { + if (mp->host_joined && !(cfg->nlflags & NLM_F_REPLACE)) { NL_SET_ERR_MSG_MOD(extack, "Group is already joined by host"); return -EEXIST; } diff --git a/net/can/raw.c b/net/can/raw.c index 46e8ed9d64da..9b1d5f036f57 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -963,7 +963,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) skb->dev = dev; skb->priority = sockc.priority; - skb->mark = READ_ONCE(sk->sk_mark); + skb->mark = sockc.mark; skb->tstamp = sockc.transmit_time; skb_setup_tx_timestamp(skb, &sockc); diff --git a/net/core/dev.c b/net/core/dev.c index 1b252e9459fd..18064be6cf3e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -159,6 +159,7 @@ #include <net/netdev_rx_queue.h> #include <net/page_pool/types.h> #include <net/page_pool/helpers.h> +#include <net/page_pool/memory_provider.h> #include <net/rps.h> #include <linux/phy_link_topology.h> @@ -1007,7 +1008,7 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id) WARN_ON_ONCE(!rcu_read_lock_held()); - if (napi_id < MIN_NAPI_ID) + if (!napi_id_valid(napi_id)) return NULL; napi = napi_by_id(napi_id); @@ -6190,16 +6191,18 @@ EXPORT_SYMBOL(netif_receive_skb_list); static void flush_backlog(struct work_struct *work) { struct sk_buff *skb, *tmp; + struct sk_buff_head list; struct softnet_data *sd; + __skb_queue_head_init(&list); local_bh_disable(); sd = this_cpu_ptr(&softnet_data); backlog_lock_irq_disable(sd); skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { - if (skb->dev->reg_state == NETREG_UNREGISTERING) { + if (READ_ONCE(skb->dev->reg_state) == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->input_pkt_queue); - dev_kfree_skb_irq(skb); + __skb_queue_tail(&list, skb); rps_input_queue_head_incr(sd); } } @@ -6207,14 +6210,16 @@ static void flush_backlog(struct work_struct *work) local_lock_nested_bh(&softnet_data.process_queue_bh_lock); skb_queue_walk_safe(&sd->process_queue, skb, tmp) { - if (skb->dev->reg_state == NETREG_UNREGISTERING) { + if (READ_ONCE(skb->dev->reg_state) == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->process_queue); - kfree_skb(skb); + __skb_queue_tail(&list, skb); rps_input_queue_head_incr(sd); } } local_unlock_nested_bh(&softnet_data.process_queue_bh_lock); local_bh_enable(); + + __skb_queue_purge_reason(&list, SKB_DROP_REASON_DEV_READY); } static bool flush_required(int cpu) @@ -6806,7 +6811,7 @@ static void napi_hash_add(struct napi_struct *napi) /* 0..NR_CPUS range is reserved for sender_cpu use */ do { - if (unlikely(++napi_gen_id < MIN_NAPI_ID)) + if (unlikely(!napi_id_valid(++napi_gen_id))) napi_gen_id = MIN_NAPI_ID; } while (napi_by_id(napi_gen_id)); @@ -6977,7 +6982,7 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi) higher = &dev->napi_list; list_for_each_entry(pos, &dev->napi_list, dev_list) { - if (pos->napi_id >= MIN_NAPI_ID) + if (napi_id_valid(pos->napi_id)) pos_id = pos->napi_id; else if (pos->config) pos_id = pos->config->napi_id; @@ -7159,6 +7164,9 @@ void __netif_napi_del_locked(struct napi_struct *napi) if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state)) return; + /* Make sure NAPI is disabled (or was never enabled). */ + WARN_ON(!test_bit(NAPI_STATE_SCHED, &napi->state)); + if (napi->config) { napi->index = -1; napi->config = NULL; @@ -9262,7 +9270,7 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags, if ((flags ^ dev->gflags) & IFF_PROMISC) { int inc = (flags & IFF_PROMISC) ? 1 : -1; - unsigned int old_flags = dev->flags; + old_flags = dev->flags; dev->gflags ^= IFF_PROMISC; @@ -11826,6 +11834,19 @@ void unregister_netdevice_queue(struct net_device *dev, struct list_head *head) } EXPORT_SYMBOL(unregister_netdevice_queue); +static void dev_memory_provider_uninstall(struct net_device *dev) +{ + unsigned int i; + + for (i = 0; i < dev->real_num_rx_queues; i++) { + struct netdev_rx_queue *rxq = &dev->_rx[i]; + struct pp_memory_provider_params *p = &rxq->mp_params; + + if (p->mp_ops && p->mp_ops->uninstall) + p->mp_ops->uninstall(rxq->mp_params.mp_priv, rxq); + } +} + void unregister_netdevice_many_notify(struct list_head *head, u32 portid, const struct nlmsghdr *nlh) { @@ -11880,7 +11901,7 @@ void unregister_netdevice_many_notify(struct list_head *head, dev_tcx_uninstall(dev); dev_xdp_uninstall(dev); bpf_dev_bound_netdev_unregister(dev); - dev_dmabuf_uninstall(dev); + dev_memory_provider_uninstall(dev); netdev_offload_xstats_disable_all(dev); diff --git a/net/core/dev.h b/net/core/dev.h index a5b166bbd169..caa13e431a6b 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -299,6 +299,18 @@ void xdp_do_check_flushed(struct napi_struct *napi); static inline void xdp_do_check_flushed(struct napi_struct *napi) { } #endif +/* Best effort check that NAPI is not idle (can't be scheduled to run) */ +static inline void napi_assert_will_not_race(const struct napi_struct *napi) +{ + /* uninitialized instance, can't race */ + if (!napi->poll_list.next) + return; + + /* SCHED bit is set on disabled instances */ + WARN_ON(!test_bit(NAPI_STATE_SCHED, &napi->state)); + WARN_ON(READ_ONCE(napi->list_owner) != -1); +} + void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu); #define XMIT_RECURSION_LIMIT 8 diff --git a/net/core/devmem.c b/net/core/devmem.c index 3bba3f018df0..7c6e0b5b6acb 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -16,6 +16,7 @@ #include <net/netdev_queues.h> #include <net/netdev_rx_queue.h> #include <net/page_pool/helpers.h> +#include <net/page_pool/memory_provider.h> #include <trace/events/page_pool.h> #include "devmem.h" @@ -27,20 +28,28 @@ /* Protected by rtnl_lock() */ static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1); +static const struct memory_provider_ops dmabuf_devmem_ops; + +bool net_is_devmem_iov(struct net_iov *niov) +{ + return niov->pp->mp_ops == &dmabuf_devmem_ops; +} + static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool, struct gen_pool_chunk *chunk, void *not_used) { struct dmabuf_genpool_chunk_owner *owner = chunk->owner; - kvfree(owner->niovs); + kvfree(owner->area.niovs); kfree(owner); } static dma_addr_t net_devmem_get_dma_addr(const struct net_iov *niov) { - struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov); + struct dmabuf_genpool_chunk_owner *owner; + owner = net_devmem_iov_to_chunk_owner(niov); return owner->base_dma_addr + ((dma_addr_t)net_iov_idx(niov) << PAGE_SHIFT); } @@ -83,7 +92,7 @@ net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding) offset = dma_addr - owner->base_dma_addr; index = offset / PAGE_SIZE; - niov = &owner->niovs[index]; + niov = &owner->area.niovs[index]; niov->pp_magic = 0; niov->pp = NULL; @@ -94,7 +103,7 @@ net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding) void net_devmem_free_dmabuf(struct net_iov *niov) { - struct net_devmem_dmabuf_binding *binding = net_iov_binding(niov); + struct net_devmem_dmabuf_binding *binding = net_devmem_iov_binding(niov); unsigned long dma_addr = net_devmem_get_dma_addr(niov); if (WARN_ON(!gen_pool_has_addr(binding->chunk_pool, dma_addr, @@ -117,6 +126,7 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) WARN_ON(rxq->mp_params.mp_priv != binding); rxq->mp_params.mp_priv = NULL; + rxq->mp_params.mp_ops = NULL; rxq_idx = get_netdev_rx_queue_index(rxq); @@ -152,7 +162,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, } rxq = __netif_get_rx_queue(dev, rxq_idx); - if (rxq->mp_params.mp_priv) { + if (rxq->mp_params.mp_ops) { NL_SET_ERR_MSG(extack, "designated queue already memory provider bound"); return -EEXIST; } @@ -170,6 +180,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, return err; rxq->mp_params.mp_priv = binding; + rxq->mp_params.mp_ops = &dmabuf_devmem_ops; err = netdev_rx_queue_restart(dev, rxq_idx); if (err) @@ -179,6 +190,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, err_xa_erase: rxq->mp_params.mp_priv = NULL; + rxq->mp_params.mp_ops = NULL; xa_erase(&binding->bound_rxqs, xa_idx); return err; @@ -261,9 +273,9 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, goto err_free_chunks; } - owner->base_virtual = virtual; + owner->area.base_virtual = virtual; owner->base_dma_addr = dma_addr; - owner->num_niovs = len / PAGE_SIZE; + owner->area.num_niovs = len / PAGE_SIZE; owner->binding = binding; err = gen_pool_add_owner(binding->chunk_pool, dma_addr, @@ -275,17 +287,17 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, goto err_free_chunks; } - owner->niovs = kvmalloc_array(owner->num_niovs, - sizeof(*owner->niovs), - GFP_KERNEL); - if (!owner->niovs) { + owner->area.niovs = kvmalloc_array(owner->area.num_niovs, + sizeof(*owner->area.niovs), + GFP_KERNEL); + if (!owner->area.niovs) { err = -ENOMEM; goto err_free_chunks; } - for (i = 0; i < owner->num_niovs; i++) { - niov = &owner->niovs[i]; - niov->owner = owner; + for (i = 0; i < owner->area.num_niovs; i++) { + niov = &owner->area.niovs[i]; + niov->owner = &owner->area; page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), net_devmem_get_dma_addr(niov)); } @@ -313,26 +325,6 @@ err_put_dmabuf: return ERR_PTR(err); } -void dev_dmabuf_uninstall(struct net_device *dev) -{ - struct net_devmem_dmabuf_binding *binding; - struct netdev_rx_queue *rxq; - unsigned long xa_idx; - unsigned int i; - - for (i = 0; i < dev->real_num_rx_queues; i++) { - binding = dev->_rx[i].mp_params.mp_priv; - if (!binding) - continue; - - xa_for_each(&binding->bound_rxqs, xa_idx, rxq) - if (rxq == &dev->_rx[i]) { - xa_erase(&binding->bound_rxqs, xa_idx); - break; - } - } -} - /*** "Dmabuf devmem memory provider" ***/ int mp_dmabuf_devmem_init(struct page_pool *pool) @@ -398,3 +390,36 @@ bool mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem) /* We don't want the page pool put_page()ing our net_iovs. */ return false; } + +static int mp_dmabuf_devmem_nl_fill(void *mp_priv, struct sk_buff *rsp, + struct netdev_rx_queue *rxq) +{ + const struct net_devmem_dmabuf_binding *binding = mp_priv; + int type = rxq ? NETDEV_A_QUEUE_DMABUF : NETDEV_A_PAGE_POOL_DMABUF; + + return nla_put_u32(rsp, type, binding->id); +} + +static void mp_dmabuf_devmem_uninstall(void *mp_priv, + struct netdev_rx_queue *rxq) +{ + struct net_devmem_dmabuf_binding *binding = mp_priv; + struct netdev_rx_queue *bound_rxq; + unsigned long xa_idx; + + xa_for_each(&binding->bound_rxqs, xa_idx, bound_rxq) { + if (bound_rxq == rxq) { + xa_erase(&binding->bound_rxqs, xa_idx); + break; + } + } +} + +static const struct memory_provider_ops dmabuf_devmem_ops = { + .init = mp_dmabuf_devmem_init, + .destroy = mp_dmabuf_devmem_destroy, + .alloc_netmems = mp_dmabuf_devmem_alloc_netmems, + .release_netmem = mp_dmabuf_devmem_release_page, + .nl_fill = mp_dmabuf_devmem_nl_fill, + .uninstall = mp_dmabuf_devmem_uninstall, +}; diff --git a/net/core/devmem.h b/net/core/devmem.h index 76099ef9c482..7fc158d52729 100644 --- a/net/core/devmem.h +++ b/net/core/devmem.h @@ -10,6 +10,8 @@ #ifndef _NET_DEVMEM_H #define _NET_DEVMEM_H +#include <net/netmem.h> + struct netlink_ext_ack; struct net_devmem_dmabuf_binding { @@ -51,17 +53,11 @@ struct net_devmem_dmabuf_binding { * allocations from this chunk. */ struct dmabuf_genpool_chunk_owner { - /* Offset into the dma-buf where this chunk starts. */ - unsigned long base_virtual; + struct net_iov_area area; + struct net_devmem_dmabuf_binding *binding; /* dma_addr of the start of the chunk. */ dma_addr_t base_dma_addr; - - /* Array of net_iovs for this chunk. */ - struct net_iov *niovs; - size_t num_niovs; - - struct net_devmem_dmabuf_binding *binding; }; void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding); @@ -72,38 +68,34 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding); int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, struct net_devmem_dmabuf_binding *binding, struct netlink_ext_ack *extack); -void dev_dmabuf_uninstall(struct net_device *dev); static inline struct dmabuf_genpool_chunk_owner * -net_iov_owner(const struct net_iov *niov) +net_devmem_iov_to_chunk_owner(const struct net_iov *niov) { - return niov->owner; + struct net_iov_area *owner = net_iov_owner(niov); + + return container_of(owner, struct dmabuf_genpool_chunk_owner, area); } -static inline unsigned int net_iov_idx(const struct net_iov *niov) +static inline struct net_devmem_dmabuf_binding * +net_devmem_iov_binding(const struct net_iov *niov) { - return niov - net_iov_owner(niov)->niovs; + return net_devmem_iov_to_chunk_owner(niov)->binding; } -static inline struct net_devmem_dmabuf_binding * -net_iov_binding(const struct net_iov *niov) +static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov) { - return net_iov_owner(niov)->binding; + return net_devmem_iov_binding(niov)->id; } static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov) { - struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov); + struct net_iov_area *owner = net_iov_owner(niov); return owner->base_virtual + ((unsigned long)net_iov_idx(niov) << PAGE_SHIFT); } -static inline u32 net_iov_binding_id(const struct net_iov *niov) -{ - return net_iov_owner(niov)->binding->id; -} - static inline void net_devmem_dmabuf_binding_get(struct net_devmem_dmabuf_binding *binding) { @@ -123,6 +115,8 @@ struct net_iov * net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding); void net_devmem_free_dmabuf(struct net_iov *ppiov); +bool net_is_devmem_iov(struct net_iov *niov); + #else struct net_devmem_dmabuf_binding; @@ -152,10 +146,6 @@ net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, return -EOPNOTSUPP; } -static inline void dev_dmabuf_uninstall(struct net_device *dev) -{ -} - static inline struct net_iov * net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding) { @@ -171,10 +161,15 @@ static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov) return 0; } -static inline u32 net_iov_binding_id(const struct net_iov *niov) +static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov) { return 0; } + +static inline bool net_is_devmem_iov(struct net_iov *niov) +{ + return false; +} #endif #endif /* _NET_DEVMEM_H */ diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 94a7872ab231..5ddd34cbe7f6 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -373,7 +373,8 @@ static int call_fib_rule_notifiers(struct net *net, .rule = rule, }; - ASSERT_RTNL(); + ASSERT_RTNL_NET(net); + /* Paired with READ_ONCE() in fib_rules_seq() */ WRITE_ONCE(ops->fib_rules_seq, ops->fib_rules_seq + 1); return call_fib_notifiers(net, event_type, &info.info); @@ -461,9 +462,6 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops, if (rule->tun_id && r->tun_id != rule->tun_id) continue; - if (r->fr_net != rule->fr_net) - continue; - if (rule->l3mdev && r->l3mdev != rule->l3mdev) continue; @@ -483,11 +481,17 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops, &rule->sport_range)) continue; + if (rule->sport_mask && r->sport_mask != rule->sport_mask) + continue; + if (fib_rule_port_range_set(&rule->dport_range) && !fib_rule_port_range_compare(&r->dport_range, &rule->dport_range)) continue; + if (rule->dport_mask && r->dport_mask != rule->dport_mask) + continue; + if (!ops->compare(r, frh, tb)) continue; return r; @@ -517,14 +521,40 @@ static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule, } #endif -static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh, +static int fib_nl2rule_port_mask(const struct nlattr *mask_attr, + const struct fib_rule_port_range *range, + u16 *port_mask, + struct netlink_ext_ack *extack) +{ + if (!fib_rule_port_range_valid(range)) { + NL_SET_ERR_MSG_ATTR(extack, mask_attr, + "Cannot specify port mask without port value"); + return -EINVAL; + } + + if (fib_rule_port_is_range(range)) { + NL_SET_ERR_MSG_ATTR(extack, mask_attr, + "Cannot specify port mask for port range"); + return -EINVAL; + } + + if (range->start & ~nla_get_u16(mask_attr)) { + NL_SET_ERR_MSG_ATTR(extack, mask_attr, "Invalid port mask"); + return -EINVAL; + } + + *port_mask = nla_get_u16(mask_attr); + + return 0; +} + +static int fib_nl2rule(struct net *net, struct nlmsghdr *nlh, struct netlink_ext_ack *extack, struct fib_rules_ops *ops, struct nlattr *tb[], struct fib_rule **rule, bool *user_priority) { - struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rule *nlrule = NULL; int err = -EINVAL; @@ -556,30 +586,18 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[FRA_PRIORITY]) { nlrule->pref = nla_get_u32(tb[FRA_PRIORITY]); *user_priority = true; - } else { - nlrule->pref = fib_default_rule_pref(ops); } nlrule->proto = nla_get_u8_default(tb[FRA_PROTOCOL], RTPROT_UNSPEC); if (tb[FRA_IIFNAME]) { - struct net_device *dev; - nlrule->iifindex = -1; nla_strscpy(nlrule->iifname, tb[FRA_IIFNAME], IFNAMSIZ); - dev = __dev_get_by_name(net, nlrule->iifname); - if (dev) - nlrule->iifindex = dev->ifindex; } if (tb[FRA_OIFNAME]) { - struct net_device *dev; - nlrule->oifindex = -1; nla_strscpy(nlrule->oifname, tb[FRA_OIFNAME], IFNAMSIZ); - dev = __dev_get_by_name(net, nlrule->oifname); - if (dev) - nlrule->oifindex = dev->ifindex; } if (tb[FRA_FWMARK]) { @@ -621,11 +639,6 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh, } nlrule->target = nla_get_u32(tb[FRA_GOTO]); - /* Backward jumps are prohibited to avoid endless loops */ - if (nlrule->target <= nlrule->pref) { - NL_SET_ERR_MSG(extack, "Backward goto not supported"); - goto errout_free; - } } else if (nlrule->action == FR_ACT_GOTO) { NL_SET_ERR_MSG(extack, "Missing goto target for action goto"); goto errout_free; @@ -664,6 +677,16 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh, NL_SET_ERR_MSG(extack, "Invalid sport range"); goto errout_free; } + if (!fib_rule_port_is_range(&nlrule->sport_range)) + nlrule->sport_mask = U16_MAX; + } + + if (tb[FRA_SPORT_MASK]) { + err = fib_nl2rule_port_mask(tb[FRA_SPORT_MASK], + &nlrule->sport_range, + &nlrule->sport_mask, extack); + if (err) + goto errout_free; } if (tb[FRA_DPORT_RANGE]) { @@ -673,6 +696,16 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh, NL_SET_ERR_MSG(extack, "Invalid dport range"); goto errout_free; } + if (!fib_rule_port_is_range(&nlrule->dport_range)) + nlrule->dport_mask = U16_MAX; + } + + if (tb[FRA_DPORT_MASK]) { + err = fib_nl2rule_port_mask(tb[FRA_DPORT_MASK], + &nlrule->dport_range, + &nlrule->dport_mask, extack); + if (err) + goto errout_free; } *rule = nlrule; @@ -685,6 +718,39 @@ errout: return err; } +static int fib_nl2rule_rtnl(struct fib_rule *nlrule, + struct fib_rules_ops *ops, + struct nlattr *tb[], + struct netlink_ext_ack *extack) +{ + if (!tb[FRA_PRIORITY]) + nlrule->pref = fib_default_rule_pref(ops); + + /* Backward jumps are prohibited to avoid endless loops */ + if (tb[FRA_GOTO] && nlrule->target <= nlrule->pref) { + NL_SET_ERR_MSG(extack, "Backward goto not supported"); + return -EINVAL; + } + + if (tb[FRA_IIFNAME]) { + struct net_device *dev; + + dev = __dev_get_by_name(nlrule->fr_net, nlrule->iifname); + if (dev) + nlrule->iifindex = dev->ifindex; + } + + if (tb[FRA_OIFNAME]) { + struct net_device *dev; + + dev = __dev_get_by_name(nlrule->fr_net, nlrule->oifname); + if (dev) + nlrule->oifindex = dev->ifindex; + } + + return 0; +} + static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, struct nlattr **tb, struct fib_rule *rule) { @@ -721,9 +787,6 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, if (r->tun_id != rule->tun_id) continue; - if (r->fr_net != rule->fr_net) - continue; - if (r->l3mdev != rule->l3mdev) continue; @@ -741,10 +804,16 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, &rule->sport_range)) continue; + if (r->sport_mask != rule->sport_mask) + continue; + if (!fib_rule_port_range_compare(&r->dport_range, &rule->dport_range)) continue; + if (r->dport_mask != rule->dport_mask) + continue; + if (!ops->compare(r, frh, tb)) continue; return 1; @@ -774,17 +843,18 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = { [FRA_DSCP] = NLA_POLICY_MAX(NLA_U8, INET_DSCP_MASK >> 2), [FRA_FLOWLABEL] = { .type = NLA_BE32 }, [FRA_FLOWLABEL_MASK] = { .type = NLA_BE32 }, + [FRA_SPORT_MASK] = { .type = NLA_U16 }, + [FRA_DPORT_MASK] = { .type = NLA_U16 }, }; -int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, bool rtnl_held) { - struct net *net = sock_net(skb->sk); + struct fib_rule *rule = NULL, *r, *last = NULL; struct fib_rule_hdr *frh = nlmsg_data(nlh); + int err = -EINVAL, unresolved = 0; struct fib_rules_ops *ops = NULL; - struct fib_rule *rule = NULL, *r, *last = NULL; struct nlattr *tb[FRA_MAX + 1]; - int err = -EINVAL, unresolved = 0; bool user_priority = false; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) { @@ -806,10 +876,17 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; } - err = fib_nl2rule(skb, nlh, extack, ops, tb, &rule, &user_priority); + err = fib_nl2rule(net, nlh, extack, ops, tb, &rule, &user_priority); if (err) goto errout; + if (!rtnl_held) + rtnl_net_lock(net); + + err = fib_nl2rule_rtnl(rule, ops, tb, extack); + if (err) + goto errout_free; + if ((nlh->nlmsg_flags & NLM_F_EXCL) && rule_exists(ops, frh, tb, rule)) { err = -EEXIST; @@ -871,29 +948,42 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, if (rule->tun_id) ip_tunnel_need_metadata(); + fib_rule_get(rule); + + if (!rtnl_held) + rtnl_net_unlock(net); + notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid); + fib_rule_put(rule); flush_route_cache(ops); rules_ops_put(ops); return 0; errout_free: + if (!rtnl_held) + rtnl_net_unlock(net); kfree(rule); errout: rules_ops_put(ops); return err; } -EXPORT_SYMBOL_GPL(fib_nl_newrule); +EXPORT_SYMBOL_GPL(fib_newrule); -int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { - struct net *net = sock_net(skb->sk); + return fib_newrule(sock_net(skb->sk), skb, nlh, extack, false); +} + +int fib_delrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, bool rtnl_held) +{ + struct fib_rule *rule = NULL, *nlrule = NULL; struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rules_ops *ops = NULL; - struct fib_rule *rule = NULL, *r, *nlrule = NULL; struct nlattr *tb[FRA_MAX+1]; - int err = -EINVAL; bool user_priority = false; + int err = -EINVAL; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) { NL_SET_ERR_MSG(extack, "Invalid msg length"); @@ -914,25 +1004,32 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; } - err = fib_nl2rule(skb, nlh, extack, ops, tb, &nlrule, &user_priority); + err = fib_nl2rule(net, nlh, extack, ops, tb, &nlrule, &user_priority); if (err) goto errout; + if (!rtnl_held) + rtnl_net_lock(net); + + err = fib_nl2rule_rtnl(nlrule, ops, tb, extack); + if (err) + goto errout_free; + rule = rule_find(ops, frh, tb, nlrule, user_priority); if (!rule) { err = -ENOENT; - goto errout; + goto errout_free; } if (rule->flags & FIB_RULE_PERMANENT) { err = -EPERM; - goto errout; + goto errout_free; } if (ops->delete) { err = ops->delete(rule); if (err) - goto errout; + goto errout_free; } if (rule->tun_id) @@ -954,7 +1051,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, * current if it is goto rule, have actually been added. */ if (ops->nr_goto_rules > 0) { - struct fib_rule *n; + struct fib_rule *n, *r; n = list_next_entry(rule, list); if (&n->list == &ops->rules_list || n->pref != rule->pref) @@ -968,22 +1065,33 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, } } - call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops, - NULL); - notify_rule_change(RTM_DELRULE, rule, ops, nlh, - NETLINK_CB(skb).portid); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops, NULL); + + if (!rtnl_held) + rtnl_net_unlock(net); + + notify_rule_change(RTM_DELRULE, rule, ops, nlh, NETLINK_CB(skb).portid); fib_rule_put(rule); flush_route_cache(ops); rules_ops_put(ops); kfree(nlrule); return 0; -errout: +errout_free: + if (!rtnl_held) + rtnl_net_unlock(net); kfree(nlrule); +errout: rules_ops_put(ops); return err; } -EXPORT_SYMBOL_GPL(fib_nl_delrule); +EXPORT_SYMBOL_GPL(fib_delrule); + +static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + return fib_delrule(sock_net(skb->sk), skb, nlh, extack, false); +} static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, struct fib_rule *rule) @@ -1002,7 +1110,9 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + nla_total_size(1) /* FRA_PROTOCOL */ + nla_total_size(1) /* FRA_IP_PROTO */ + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */ - + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */ + + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_DPORT_RANGE */ + + nla_total_size(2) /* FRA_SPORT_MASK */ + + nla_total_size(2); /* FRA_DPORT_MASK */ if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); @@ -1070,8 +1180,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, nla_put_uid_range(skb, &rule->uid_range)) || (fib_rule_port_range_set(&rule->sport_range) && nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) || + (rule->sport_mask && nla_put_u16(skb, FRA_SPORT_MASK, + rule->sport_mask)) || (fib_rule_port_range_set(&rule->dport_range) && nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) || + (rule->dport_mask && nla_put_u16(skb, FRA_DPORT_MASK, + rule->dport_mask)) || (rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto))) goto nla_put_failure; @@ -1295,8 +1409,10 @@ static struct pernet_operations fib_rules_net_ops = { }; static const struct rtnl_msg_handler fib_rules_rtnl_msg_handlers[] __initconst = { - {.msgtype = RTM_NEWRULE, .doit = fib_nl_newrule}, - {.msgtype = RTM_DELRULE, .doit = fib_nl_delrule}, + {.msgtype = RTM_NEWRULE, .doit = fib_nl_newrule, + .flags = RTNL_FLAG_DOIT_PERNET}, + {.msgtype = RTM_DELRULE, .doit = fib_nl_delrule, + .flags = RTNL_FLAG_DOIT_PERNET}, {.msgtype = RTM_GETRULE, .dumpit = fib_nl_dumprule, .flags = RTNL_FLAG_DUMP_UNLOCKED}, }; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index bd0251bd74a1..d8dd686b5287 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -832,12 +832,10 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, return -ENOENT; } -static void neigh_parms_destroy(struct neigh_parms *parms); - static inline void neigh_parms_put(struct neigh_parms *parms) { if (refcount_dec_and_test(&parms->refcnt)) - neigh_parms_destroy(parms); + kfree(parms); } /* @@ -1713,11 +1711,6 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) } EXPORT_SYMBOL(neigh_parms_release); -static void neigh_parms_destroy(struct neigh_parms *parms) -{ - kfree(parms); -} - static struct lock_class_key neigh_table_proxy_queue_class; static struct neigh_table __rcu *neigh_tables[NEIGH_NR_TABLES] __read_mostly; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 07cb99b114bd..3fe2c521e574 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -42,6 +42,87 @@ static inline int dev_isalive(const struct net_device *dev) return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED; } +/* There is a possible ABBA deadlock between rtnl_lock and kernfs_node->active, + * when unregistering a net device and accessing associated sysfs files. The + * potential deadlock is as follow: + * + * CPU 0 CPU 1 + * + * rtnl_lock vfs_read + * unregister_netdevice_many kernfs_seq_start + * device_del / kobject_put kernfs_get_active (kn->active++) + * kernfs_drain sysfs_kf_seq_show + * wait_event( rtnl_lock + * kn->active == KN_DEACTIVATED_BIAS) -> waits on CPU 0 to release + * -> waits on CPU 1 to decrease kn->active the rtnl lock. + * + * The historical fix was to use rtnl_trylock with restart_syscall to bail out + * of sysfs operations when the lock couldn't be taken. This fixed the above + * issue as it allowed CPU 1 to bail out of the ABBA situation. + * + * But it came with performances issues, as syscalls are being restarted in + * loops when there was contention on the rtnl lock, with huge slow downs in + * specific scenarios (e.g. lots of virtual interfaces created and userspace + * daemons querying their attributes). + * + * The idea below is to bail out of the active kernfs_node protection + * (kn->active) while trying to take the rtnl lock. + * + * This replaces rtnl_lock() and still has to be used with rtnl_unlock(). The + * net device is guaranteed to be alive if this returns successfully. + */ +static int sysfs_rtnl_lock(struct kobject *kobj, struct attribute *attr, + struct net_device *ndev) +{ + struct kernfs_node *kn; + int ret = 0; + + /* First, we hold a reference to the net device as the unregistration + * path might run in parallel. This will ensure the net device and the + * associated sysfs objects won't be freed while we try to take the rtnl + * lock. + */ + dev_hold(ndev); + /* sysfs_break_active_protection was introduced to allow self-removal of + * devices and their associated sysfs files by bailing out of the + * sysfs/kernfs protection. We do this here to allow the unregistration + * path to complete in parallel. The following takes a reference on the + * kobject and the kernfs_node being accessed. + * + * This works because we hold a reference onto the net device and the + * unregistration path will wait for us eventually in netdev_run_todo + * (outside an rtnl lock section). + */ + kn = sysfs_break_active_protection(kobj, attr); + /* We can now try to take the rtnl lock. This can't deadlock us as the + * unregistration path is able to drain sysfs files (kernfs_node) thanks + * to the above dance. + */ + if (rtnl_lock_interruptible()) { + ret = -ERESTARTSYS; + goto unbreak; + } + /* Check dismantle on the device hasn't started, otherwise deny the + * operation. + */ + if (!dev_isalive(ndev)) { + rtnl_unlock(); + ret = -ENODEV; + goto unbreak; + } + /* We are now sure the device dismantle hasn't started nor that it can + * start before we exit the locking section as we hold the rtnl lock. + * There's no need to keep unbreaking the sysfs protection nor to hold + * a net device reference from that point; that was only needed to take + * the rtnl lock. + */ +unbreak: + sysfs_unbreak_active_protection(kn); + dev_put(ndev); + + return ret; +} + /* use same locking rules as GIF* ioctl's */ static ssize_t netdev_show(const struct device *dev, struct device_attribute *attr, char *buf, @@ -95,14 +176,14 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, if (ret) goto err; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + goto err; + + ret = (*set)(netdev, new); + if (ret == 0) + ret = len; - if (dev_isalive(netdev)) { - ret = (*set)(netdev, new); - if (ret == 0) - ret = len; - } rtnl_unlock(); err: return ret; @@ -220,7 +301,7 @@ static ssize_t carrier_store(struct device *dev, struct device_attribute *attr, struct net_device *netdev = to_net_dev(dev); /* The check is also done in change_carrier; this helps returning early - * without hitting the trylock/restart in netdev_store. + * without hitting the locking section in netdev_store. */ if (!netdev->netdev_ops->ndo_change_carrier) return -EOPNOTSUPP; @@ -234,8 +315,9 @@ static ssize_t carrier_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); int ret = -EINVAL; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; if (netif_running(netdev)) { /* Synchronize carrier state with link watch, @@ -245,8 +327,8 @@ static ssize_t carrier_show(struct device *dev, ret = sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev)); } - rtnl_unlock(); + rtnl_unlock(); return ret; } static DEVICE_ATTR_RW(carrier); @@ -258,13 +340,14 @@ static ssize_t speed_show(struct device *dev, int ret = -EINVAL; /* The check is also done in __ethtool_get_link_ksettings; this helps - * returning early without hitting the trylock/restart below. + * returning early without hitting the locking section below. */ if (!netdev->ethtool_ops->get_link_ksettings) return ret; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; if (netif_running(netdev)) { struct ethtool_link_ksettings cmd; @@ -284,13 +367,14 @@ static ssize_t duplex_show(struct device *dev, int ret = -EINVAL; /* The check is also done in __ethtool_get_link_ksettings; this helps - * returning early without hitting the trylock/restart below. + * returning early without hitting the locking section below. */ if (!netdev->ethtool_ops->get_link_ksettings) return ret; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; if (netif_running(netdev)) { struct ethtool_link_ksettings cmd; @@ -490,16 +574,15 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr, if (len > 0 && buf[len - 1] == '\n') --count; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; - if (dev_isalive(netdev)) { - ret = dev_set_alias(netdev, buf, count); - if (ret < 0) - goto err; - ret = len; - netdev_state_change(netdev); - } + ret = dev_set_alias(netdev, buf, count); + if (ret < 0) + goto err; + ret = len; + netdev_state_change(netdev); err: rtnl_unlock(); @@ -551,24 +634,23 @@ static ssize_t phys_port_id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct net_device *netdev = to_net_dev(dev); + struct netdev_phys_item_id ppid; ssize_t ret = -EINVAL; /* The check is also done in dev_get_phys_port_id; this helps returning - * early without hitting the trylock/restart below. + * early without hitting the locking section below. */ if (!netdev->netdev_ops->ndo_get_phys_port_id) return -EOPNOTSUPP; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; - if (dev_isalive(netdev)) { - struct netdev_phys_item_id ppid; + ret = dev_get_phys_port_id(netdev, &ppid); + if (!ret) + ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id); - ret = dev_get_phys_port_id(netdev, &ppid); - if (!ret) - ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id); - } rtnl_unlock(); return ret; @@ -580,24 +662,23 @@ static ssize_t phys_port_name_show(struct device *dev, { struct net_device *netdev = to_net_dev(dev); ssize_t ret = -EINVAL; + char name[IFNAMSIZ]; /* The checks are also done in dev_get_phys_port_name; this helps - * returning early without hitting the trylock/restart below. + * returning early without hitting the locking section below. */ if (!netdev->netdev_ops->ndo_get_phys_port_name && !netdev->devlink_port) return -EOPNOTSUPP; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; - if (dev_isalive(netdev)) { - char name[IFNAMSIZ]; + ret = dev_get_phys_port_name(netdev, name, sizeof(name)); + if (!ret) + ret = sysfs_emit(buf, "%s\n", name); - ret = dev_get_phys_port_name(netdev, name, sizeof(name)); - if (!ret) - ret = sysfs_emit(buf, "%s\n", name); - } rtnl_unlock(); return ret; @@ -608,26 +689,25 @@ static ssize_t phys_switch_id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct net_device *netdev = to_net_dev(dev); + struct netdev_phys_item_id ppid = { }; ssize_t ret = -EINVAL; /* The checks are also done in dev_get_phys_port_name; this helps - * returning early without hitting the trylock/restart below. This works + * returning early without hitting the locking section below. This works * because recurse is false when calling dev_get_port_parent_id. */ if (!netdev->netdev_ops->ndo_get_port_parent_id && !netdev->devlink_port) return -EOPNOTSUPP; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; - if (dev_isalive(netdev)) { - struct netdev_phys_item_id ppid = { }; + ret = dev_get_port_parent_id(netdev, &ppid, false); + if (!ret) + ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id); - ret = dev_get_port_parent_id(netdev, &ppid, false); - if (!ret) - ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id); - } rtnl_unlock(); return ret; @@ -1108,7 +1188,6 @@ static void rx_queue_get_ownership(const struct kobject *kobj, static const struct kobj_type rx_queue_ktype = { .sysfs_ops = &rx_queue_sysfs_ops, .release = rx_queue_release, - .default_groups = rx_queue_default_groups, .namespace = rx_queue_namespace, .get_ownership = rx_queue_get_ownership, }; @@ -1131,6 +1210,22 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) struct kobject *kobj = &queue->kobj; int error = 0; + /* Rx queues are cleared in rx_queue_release to allow later + * re-registration. This is triggered when their kobj refcount is + * dropped. + * + * If a queue is removed while both a read (or write) operation and a + * the re-addition of the same queue are pending (waiting on rntl_lock) + * it might happen that the re-addition will execute before the read, + * making the initial removal to never happen (queue's kobj refcount + * won't drop enough because of the pending read). In such rare case, + * return to allow the removal operation to complete. + */ + if (unlikely(kobj->state_initialized)) { + netdev_warn_once(dev, "Cannot re-add rx queues before their removal completed"); + return -EAGAIN; + } + /* Kobject_put later will trigger rx_queue_release call which * decreases dev refcount: Take that reference here */ @@ -1142,20 +1237,27 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) if (error) goto err; + queue->groups = rx_queue_default_groups; + error = sysfs_create_groups(kobj, queue->groups); + if (error) + goto err; + if (dev->sysfs_rx_queue_group) { error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group); if (error) - goto err; + goto err_default_groups; } error = rx_queue_default_mask(dev, queue); if (error) - goto err; + goto err_default_groups; kobject_uevent(kobj, KOBJ_ADD); return error; +err_default_groups: + sysfs_remove_groups(kobj, queue->groups); err: kobject_put(kobj); return error; @@ -1200,12 +1302,14 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) } while (--i >= new_num) { - struct kobject *kobj = &dev->_rx[i].kobj; + struct netdev_rx_queue *queue = &dev->_rx[i]; + struct kobject *kobj = &queue->kobj; if (!refcount_read(&dev_net(dev)->ns.count)) kobj->uevent_suppress = 1; if (dev->sysfs_rx_queue_group) sysfs_remove_group(kobj, dev->sysfs_rx_queue_group); + sysfs_remove_groups(kobj, queue->groups); kobject_put(kobj); } @@ -1244,9 +1348,11 @@ static int net_rx_queue_change_owner(struct net_device *dev, int num, */ struct netdev_queue_attribute { struct attribute attr; - ssize_t (*show)(struct netdev_queue *queue, char *buf); - ssize_t (*store)(struct netdev_queue *queue, - const char *buf, size_t len); + ssize_t (*show)(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf); + ssize_t (*store)(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len); }; #define to_netdev_queue_attr(_attr) \ container_of(_attr, struct netdev_queue_attribute, attr) @@ -1263,7 +1369,7 @@ static ssize_t netdev_queue_attr_show(struct kobject *kobj, if (!attribute->show) return -EIO; - return attribute->show(queue, buf); + return attribute->show(kobj, attr, queue, buf); } static ssize_t netdev_queue_attr_store(struct kobject *kobj, @@ -1277,7 +1383,7 @@ static ssize_t netdev_queue_attr_store(struct kobject *kobj, if (!attribute->store) return -EIO; - return attribute->store(queue, buf, count); + return attribute->store(kobj, attr, queue, buf, count); } static const struct sysfs_ops netdev_queue_sysfs_ops = { @@ -1285,7 +1391,8 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = { .store = netdev_queue_attr_store, }; -static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf) +static ssize_t tx_timeout_show(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { unsigned long trans_timeout = atomic_long_read(&queue->trans_timeout); @@ -1303,18 +1410,18 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue) return i; } -static ssize_t traffic_class_show(struct netdev_queue *queue, - char *buf) +static ssize_t traffic_class_show(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct net_device *dev = queue->dev; - int num_tc, tc; - int index; + int num_tc, tc, index, ret; if (!netif_is_multiqueue(dev)) return -ENOENT; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(kobj, attr, queue->dev); + if (ret) + return ret; index = get_netdev_queue_index(queue); @@ -1341,24 +1448,25 @@ static ssize_t traffic_class_show(struct netdev_queue *queue, } #ifdef CONFIG_XPS -static ssize_t tx_maxrate_show(struct netdev_queue *queue, - char *buf) +static ssize_t tx_maxrate_show(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { return sysfs_emit(buf, "%lu\n", queue->tx_maxrate); } -static ssize_t tx_maxrate_store(struct netdev_queue *queue, - const char *buf, size_t len) +static ssize_t tx_maxrate_store(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len) { - struct net_device *dev = queue->dev; int err, index = get_netdev_queue_index(queue); + struct net_device *dev = queue->dev; u32 rate = 0; if (!capable(CAP_NET_ADMIN)) return -EPERM; /* The check is also done later; this helps returning early without - * hitting the trylock/restart below. + * hitting the locking section below. */ if (!dev->netdev_ops->ndo_set_tx_maxrate) return -EOPNOTSUPP; @@ -1367,18 +1475,21 @@ static ssize_t tx_maxrate_store(struct netdev_queue *queue, if (err < 0) return err; - if (!rtnl_trylock()) - return restart_syscall(); + err = sysfs_rtnl_lock(kobj, attr, dev); + if (err) + return err; err = -EOPNOTSUPP; if (dev->netdev_ops->ndo_set_tx_maxrate) err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate); - rtnl_unlock(); if (!err) { queue->tx_maxrate = rate; + rtnl_unlock(); return len; } + + rtnl_unlock(); return err; } @@ -1422,16 +1533,17 @@ static ssize_t bql_set(const char *buf, const size_t count, return count; } -static ssize_t bql_show_hold_time(struct netdev_queue *queue, - char *buf) +static ssize_t bql_show_hold_time(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct dql *dql = &queue->dql; return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time)); } -static ssize_t bql_set_hold_time(struct netdev_queue *queue, - const char *buf, size_t len) +static ssize_t bql_set_hold_time(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len) { struct dql *dql = &queue->dql; unsigned int value; @@ -1450,15 +1562,17 @@ static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init = __ATTR(hold_time, 0644, bql_show_hold_time, bql_set_hold_time); -static ssize_t bql_show_stall_thrs(struct netdev_queue *queue, char *buf) +static ssize_t bql_show_stall_thrs(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct dql *dql = &queue->dql; return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->stall_thrs)); } -static ssize_t bql_set_stall_thrs(struct netdev_queue *queue, - const char *buf, size_t len) +static ssize_t bql_set_stall_thrs(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len) { struct dql *dql = &queue->dql; unsigned int value; @@ -1484,13 +1598,15 @@ static ssize_t bql_set_stall_thrs(struct netdev_queue *queue, static struct netdev_queue_attribute bql_stall_thrs_attribute __ro_after_init = __ATTR(stall_thrs, 0644, bql_show_stall_thrs, bql_set_stall_thrs); -static ssize_t bql_show_stall_max(struct netdev_queue *queue, char *buf) +static ssize_t bql_show_stall_max(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { return sysfs_emit(buf, "%u\n", READ_ONCE(queue->dql.stall_max)); } -static ssize_t bql_set_stall_max(struct netdev_queue *queue, - const char *buf, size_t len) +static ssize_t bql_set_stall_max(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len) { WRITE_ONCE(queue->dql.stall_max, 0); return len; @@ -1499,7 +1615,8 @@ static ssize_t bql_set_stall_max(struct netdev_queue *queue, static struct netdev_queue_attribute bql_stall_max_attribute __ro_after_init = __ATTR(stall_max, 0644, bql_show_stall_max, bql_set_stall_max); -static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf) +static ssize_t bql_show_stall_cnt(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct dql *dql = &queue->dql; @@ -1509,8 +1626,8 @@ static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf) static struct netdev_queue_attribute bql_stall_cnt_attribute __ro_after_init = __ATTR(stall_cnt, 0444, bql_show_stall_cnt, NULL); -static ssize_t bql_show_inflight(struct netdev_queue *queue, - char *buf) +static ssize_t bql_show_inflight(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct dql *dql = &queue->dql; @@ -1521,13 +1638,16 @@ static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init = __ATTR(inflight, 0444, bql_show_inflight, NULL); #define BQL_ATTR(NAME, FIELD) \ -static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \ - char *buf) \ +static ssize_t bql_show_ ## NAME(struct kobject *kobj, \ + struct attribute *attr, \ + struct netdev_queue *queue, char *buf) \ { \ return bql_show(buf, queue->dql.FIELD); \ } \ \ -static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \ +static ssize_t bql_set_ ## NAME(struct kobject *kobj, \ + struct attribute *attr, \ + struct netdev_queue *queue, \ const char *buf, size_t len) \ { \ return bql_set(buf, len, &queue->dql.FIELD); \ @@ -1613,19 +1733,21 @@ out_no_maps: return len < PAGE_SIZE ? len : -EINVAL; } -static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf) +static ssize_t xps_cpus_show(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct net_device *dev = queue->dev; unsigned int index; - int len, tc; + int len, tc, ret; if (!netif_is_multiqueue(dev)) return -ENOENT; index = get_netdev_queue_index(queue); - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(kobj, attr, queue->dev); + if (ret) + return ret; /* If queue belongs to subordinate dev use its map */ dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev; @@ -1636,18 +1758,21 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf) return -EINVAL; } - /* Make sure the subordinate device can't be freed */ - get_device(&dev->dev); + /* Increase the net device refcnt to make sure it won't be freed while + * xps_queue_show is running. + */ + dev_hold(dev); rtnl_unlock(); len = xps_queue_show(dev, index, tc, buf, XPS_CPUS); - put_device(&dev->dev); + dev_put(dev); return len; } -static ssize_t xps_cpus_store(struct netdev_queue *queue, - const char *buf, size_t len) +static ssize_t xps_cpus_store(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len) { struct net_device *dev = queue->dev; unsigned int index; @@ -1671,9 +1796,10 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue, return err; } - if (!rtnl_trylock()) { + err = sysfs_rtnl_lock(kobj, attr, dev); + if (err) { free_cpumask_var(mask); - return restart_syscall(); + return err; } err = netif_set_xps_queue(dev, mask, index); @@ -1687,26 +1813,34 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue, static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init = __ATTR_RW(xps_cpus); -static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf) +static ssize_t xps_rxqs_show(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct net_device *dev = queue->dev; unsigned int index; - int tc; + int tc, ret; index = get_netdev_queue_index(queue); - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(kobj, attr, dev); + if (ret) + return ret; tc = netdev_txq_to_tc(dev, index); + + /* Increase the net device refcnt to make sure it won't be freed while + * xps_queue_show is running. + */ + dev_hold(dev); rtnl_unlock(); - if (tc < 0) - return -EINVAL; - return xps_queue_show(dev, index, tc, buf, XPS_RXQS); + ret = tc >= 0 ? xps_queue_show(dev, index, tc, buf, XPS_RXQS) : -EINVAL; + dev_put(dev); + return ret; } -static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf, +static ssize_t xps_rxqs_store(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, size_t len) { struct net_device *dev = queue->dev; @@ -1730,9 +1864,10 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf, return err; } - if (!rtnl_trylock()) { + err = sysfs_rtnl_lock(kobj, attr, dev); + if (err) { bitmap_free(mask); - return restart_syscall(); + return err; } cpus_read_lock(); @@ -1792,7 +1927,6 @@ static void netdev_queue_get_ownership(const struct kobject *kobj, static const struct kobj_type netdev_queue_ktype = { .sysfs_ops = &netdev_queue_sysfs_ops, .release = netdev_queue_release, - .default_groups = netdev_queue_default_groups, .namespace = netdev_queue_namespace, .get_ownership = netdev_queue_get_ownership, }; @@ -1811,6 +1945,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) struct kobject *kobj = &queue->kobj; int error = 0; + /* Tx queues are cleared in netdev_queue_release to allow later + * re-registration. This is triggered when their kobj refcount is + * dropped. + * + * If a queue is removed while both a read (or write) operation and a + * the re-addition of the same queue are pending (waiting on rntl_lock) + * it might happen that the re-addition will execute before the read, + * making the initial removal to never happen (queue's kobj refcount + * won't drop enough because of the pending read). In such rare case, + * return to allow the removal operation to complete. + */ + if (unlikely(kobj->state_initialized)) { + netdev_warn_once(dev, "Cannot re-add tx queues before their removal completed"); + return -EAGAIN; + } + /* Kobject_put later will trigger netdev_queue_release call * which decreases dev refcount: Take that reference here */ @@ -1822,15 +1972,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) if (error) goto err; + queue->groups = netdev_queue_default_groups; + error = sysfs_create_groups(kobj, queue->groups); + if (error) + goto err; + if (netdev_uses_bql(dev)) { error = sysfs_create_group(kobj, &dql_group); if (error) - goto err; + goto err_default_groups; } kobject_uevent(kobj, KOBJ_ADD); return 0; +err_default_groups: + sysfs_remove_groups(kobj, queue->groups); err: kobject_put(kobj); return error; @@ -1885,6 +2042,7 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) if (netdev_uses_bql(dev)) sysfs_remove_group(&queue->kobj, &dql_group); + sysfs_remove_groups(&queue->kobj, queue->groups); kobject_put(&queue->kobj); } diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 715f85c6b62e..c92fba65b20d 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -10,6 +10,7 @@ #include <net/sock.h> #include <net/xdp.h> #include <net/xdp_sock.h> +#include <net/page_pool/memory_provider.h> #include "dev.h" #include "devmem.h" @@ -266,7 +267,7 @@ netdev_nl_napi_dump_one(struct net_device *netdev, struct sk_buff *rsp, prev_id = UINT_MAX; list_for_each_entry(napi, &netdev->napi_list, dev_list) { - if (napi->napi_id < MIN_NAPI_ID) + if (!napi_id_valid(napi->napi_id)) continue; /* Dump continuation below depends on the list being sorted */ @@ -364,11 +365,18 @@ int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info) return err; } +static int nla_put_napi_id(struct sk_buff *skb, const struct napi_struct *napi) +{ + if (napi && napi_id_valid(napi->napi_id)) + return nla_put_u32(skb, NETDEV_A_QUEUE_NAPI_ID, napi->napi_id); + return 0; +} + static int netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx, u32 q_type, const struct genl_info *info) { - struct net_devmem_dmabuf_binding *binding; + struct pp_memory_provider_params *params; struct netdev_rx_queue *rxq; struct netdev_queue *txq; void *hdr; @@ -385,21 +393,30 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, switch (q_type) { case NETDEV_QUEUE_TYPE_RX: rxq = __netif_get_rx_queue(netdev, q_idx); - if (rxq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID, - rxq->napi->napi_id)) + if (nla_put_napi_id(rsp, rxq->napi)) goto nla_put_failure; - binding = rxq->mp_params.mp_priv; - if (binding && - nla_put_u32(rsp, NETDEV_A_QUEUE_DMABUF, binding->id)) + params = &rxq->mp_params; + if (params->mp_ops && + params->mp_ops->nl_fill(params->mp_priv, rsp, rxq)) goto nla_put_failure; +#ifdef CONFIG_XDP_SOCKETS + if (rxq->pool) + if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK)) + goto nla_put_failure; +#endif break; case NETDEV_QUEUE_TYPE_TX: txq = netdev_get_tx_queue(netdev, q_idx); - if (txq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID, - txq->napi->napi_id)) + if (nla_put_napi_id(rsp, txq->napi)) goto nla_put_failure; +#ifdef CONFIG_XDP_SOCKETS + if (txq->pool) + if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK)) + goto nla_put_failure; +#endif + break; } genlmsg_end(rsp, hdr); @@ -576,6 +593,7 @@ netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx) netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROPS, rx->hw_drops) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS, rx->hw_drop_overruns) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_COMPLETE, rx->csum_complete) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY, rx->csum_unnecessary) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_NONE, rx->csum_none) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_BAD, rx->csum_bad) || diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index db82786fa0c4..ddd54e1e7289 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -3,34 +3,34 @@ #include <linux/netdevice.h> #include <net/netdev_queues.h> #include <net/netdev_rx_queue.h> +#include <net/page_pool/memory_provider.h> #include "page_pool_priv.h" int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx) { struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, rxq_idx); + const struct netdev_queue_mgmt_ops *qops = dev->queue_mgmt_ops; void *new_mem, *old_mem; int err; - if (!dev->queue_mgmt_ops || !dev->queue_mgmt_ops->ndo_queue_stop || - !dev->queue_mgmt_ops->ndo_queue_mem_free || - !dev->queue_mgmt_ops->ndo_queue_mem_alloc || - !dev->queue_mgmt_ops->ndo_queue_start) + if (!qops || !qops->ndo_queue_stop || !qops->ndo_queue_mem_free || + !qops->ndo_queue_mem_alloc || !qops->ndo_queue_start) return -EOPNOTSUPP; ASSERT_RTNL(); - new_mem = kvzalloc(dev->queue_mgmt_ops->ndo_queue_mem_size, GFP_KERNEL); + new_mem = kvzalloc(qops->ndo_queue_mem_size, GFP_KERNEL); if (!new_mem) return -ENOMEM; - old_mem = kvzalloc(dev->queue_mgmt_ops->ndo_queue_mem_size, GFP_KERNEL); + old_mem = kvzalloc(qops->ndo_queue_mem_size, GFP_KERNEL); if (!old_mem) { err = -ENOMEM; goto err_free_new_mem; } - err = dev->queue_mgmt_ops->ndo_queue_mem_alloc(dev, new_mem, rxq_idx); + err = qops->ndo_queue_mem_alloc(dev, new_mem, rxq_idx); if (err) goto err_free_old_mem; @@ -38,15 +38,19 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx) if (err) goto err_free_new_queue_mem; - err = dev->queue_mgmt_ops->ndo_queue_stop(dev, old_mem, rxq_idx); - if (err) - goto err_free_new_queue_mem; + if (netif_running(dev)) { + err = qops->ndo_queue_stop(dev, old_mem, rxq_idx); + if (err) + goto err_free_new_queue_mem; - err = dev->queue_mgmt_ops->ndo_queue_start(dev, new_mem, rxq_idx); - if (err) - goto err_start_queue; + err = qops->ndo_queue_start(dev, new_mem, rxq_idx); + if (err) + goto err_start_queue; + } else { + swap(new_mem, old_mem); + } - dev->queue_mgmt_ops->ndo_queue_mem_free(dev, old_mem); + qops->ndo_queue_mem_free(dev, old_mem); kvfree(old_mem); kvfree(new_mem); @@ -61,15 +65,15 @@ err_start_queue: * WARN if we fail to recover the old rx queue, and at least free * old_mem so we don't also leak that. */ - if (dev->queue_mgmt_ops->ndo_queue_start(dev, old_mem, rxq_idx)) { + if (qops->ndo_queue_start(dev, old_mem, rxq_idx)) { WARN(1, "Failed to restart old queue in error path. RX queue %d may be unhealthy.", rxq_idx); - dev->queue_mgmt_ops->ndo_queue_mem_free(dev, old_mem); + qops->ndo_queue_mem_free(dev, old_mem); } err_free_new_queue_mem: - dev->queue_mgmt_ops->ndo_queue_mem_free(dev, new_mem); + qops->ndo_queue_mem_free(dev, new_mem); err_free_old_mem: kvfree(old_mem); @@ -80,3 +84,71 @@ err_free_new_mem: return err; } EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL"); + +static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, + struct pp_memory_provider_params *p) +{ + struct netdev_rx_queue *rxq; + int ret; + + if (ifq_idx >= dev->real_num_rx_queues) + return -EINVAL; + ifq_idx = array_index_nospec(ifq_idx, dev->real_num_rx_queues); + + rxq = __netif_get_rx_queue(dev, ifq_idx); + if (rxq->mp_params.mp_ops) + return -EEXIST; + + rxq->mp_params = *p; + ret = netdev_rx_queue_restart(dev, ifq_idx); + if (ret) { + rxq->mp_params.mp_ops = NULL; + rxq->mp_params.mp_priv = NULL; + } + return ret; +} + +int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, + struct pp_memory_provider_params *p) +{ + int ret; + + rtnl_lock(); + ret = __net_mp_open_rxq(dev, ifq_idx, p); + rtnl_unlock(); + return ret; +} + +static void __net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, + struct pp_memory_provider_params *old_p) +{ + struct netdev_rx_queue *rxq; + + if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues)) + return; + + rxq = __netif_get_rx_queue(dev, ifq_idx); + + /* Callers holding a netdev ref may get here after we already + * went thru shutdown via dev_memory_provider_uninstall(). + */ + if (dev->reg_state > NETREG_REGISTERED && + !rxq->mp_params.mp_ops) + return; + + if (WARN_ON_ONCE(rxq->mp_params.mp_ops != old_p->mp_ops || + rxq->mp_params.mp_priv != old_p->mp_priv)) + return; + + rxq->mp_params.mp_ops = NULL; + rxq->mp_params.mp_priv = NULL; + WARN_ON(netdev_rx_queue_restart(dev, ifq_idx)); +} + +void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, + struct pp_memory_provider_params *old_p) +{ + rtnl_lock(); + __net_mp_close_rxq(dev, ifq_idx, old_p); + rtnl_unlock(); +} diff --git a/net/core/page_pool.c b/net/core/page_pool.c index f5e908c9e7ad..acef1fcd8ddc 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -13,6 +13,7 @@ #include <net/netdev_rx_queue.h> #include <net/page_pool/helpers.h> +#include <net/page_pool/memory_provider.h> #include <net/xdp.h> #include <linux/dma-direction.h> @@ -25,6 +26,7 @@ #include <trace/events/page_pool.h> +#include "dev.h" #include "mp_dmabuf_devmem.h" #include "netmem_priv.h" #include "page_pool_priv.h" @@ -285,13 +287,19 @@ static int page_pool_init(struct page_pool *pool, rxq = __netif_get_rx_queue(pool->slow.netdev, pool->slow.queue_idx); pool->mp_priv = rxq->mp_params.mp_priv; + pool->mp_ops = rxq->mp_params.mp_ops; } - if (pool->mp_priv) { + if (pool->mp_ops) { if (!pool->dma_map || !pool->dma_sync) return -EOPNOTSUPP; - err = mp_dmabuf_devmem_init(pool); + if (WARN_ON(!is_kernel_rodata((unsigned long)pool->mp_ops))) { + err = -EFAULT; + goto free_ptr_ring; + } + + err = pool->mp_ops->init(pool); if (err) { pr_warn("%s() mem-provider init failed %d\n", __func__, err); @@ -587,8 +595,8 @@ netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp) return netmem; /* Slow-path: cache empty, do real allocation */ - if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv) - netmem = mp_dmabuf_devmem_alloc_netmems(pool, gfp); + if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) + netmem = pool->mp_ops->alloc_netmems(pool, gfp); else netmem = __page_pool_alloc_pages_slow(pool, gfp); return netmem; @@ -679,8 +687,8 @@ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem) bool put; put = true; - if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv) - put = mp_dmabuf_devmem_release_page(pool, netmem); + if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) + put = pool->mp_ops->release_netmem(pool, netmem); else __page_pool_release_page_dma(pool, netmem); @@ -1048,8 +1056,8 @@ static void __page_pool_destroy(struct page_pool *pool) page_pool_unlist(pool); page_pool_uninit(pool); - if (pool->mp_priv) { - mp_dmabuf_devmem_destroy(pool); + if (pool->mp_ops) { + pool->mp_ops->destroy(pool); static_branch_dec(&page_pool_mem_providers); } @@ -1104,7 +1112,13 @@ static void page_pool_release_retry(struct work_struct *wq) int inflight; inflight = page_pool_release(pool); - if (!inflight) + /* In rare cases, a driver bug may cause inflight to go negative. + * Don't reschedule release if inflight is 0 or negative. + * - If 0, the page_pool has been destroyed + * - if negative, we will never recover + * in both cases no reschedule is necessary. + */ + if (inflight <= 0) return; /* Periodic warning for page pools the user can't see */ @@ -1140,11 +1154,7 @@ void page_pool_disable_direct_recycling(struct page_pool *pool) if (!pool->p.napi) return; - /* To avoid races with recycling and additional barriers make sure - * pool and NAPI are unlinked when NAPI is disabled. - */ - WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state)); - WARN_ON(READ_ONCE(pool->p.napi->list_owner) != -1); + napi_assert_will_not_race(pool->p.napi); mutex_lock(&page_pools_lock); WRITE_ONCE(pool->p.napi, NULL); @@ -1190,3 +1200,31 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid) } } EXPORT_SYMBOL(page_pool_update_nid); + +bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr) +{ + return page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), addr); +} + +/* Associate a niov with a page pool. Should follow with a matching + * net_mp_niov_clear_page_pool() + */ +void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov) +{ + netmem_ref netmem = net_iov_to_netmem(niov); + + page_pool_set_pp_info(pool, netmem); + + pool->pages_state_hold_cnt++; + trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt); +} + +/* Disassociate a niov from a page pool. Should only be used in the + * ->release_netmem() path. + */ +void net_mp_niov_clear_page_pool(struct net_iov *niov) +{ + netmem_ref netmem = net_iov_to_netmem(niov); + + page_pool_clear_pp_info(netmem); +} diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c index 6677e0c2e256..c82a95beceff 100644 --- a/net/core/page_pool_user.c +++ b/net/core/page_pool_user.c @@ -8,9 +8,9 @@ #include <net/netdev_rx_queue.h> #include <net/page_pool/helpers.h> #include <net/page_pool/types.h> +#include <net/page_pool/memory_provider.h> #include <net/sock.h> -#include "devmem.h" #include "page_pool_priv.h" #include "netdev-genl-gen.h" @@ -216,7 +216,6 @@ static int page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, const struct genl_info *info) { - struct net_devmem_dmabuf_binding *binding = pool->mp_priv; size_t inflight, refsz; unsigned int napi_id; void *hdr; @@ -234,7 +233,7 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, goto err_cancel; napi_id = pool->p.napi ? READ_ONCE(pool->p.napi->napi_id) : 0; - if (napi_id >= MIN_NAPI_ID && + if (napi_id_valid(napi_id) && nla_put_uint(rsp, NETDEV_A_PAGE_POOL_NAPI_ID, napi_id)) goto err_cancel; @@ -249,7 +248,7 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, pool->user.detach_time)) goto err_cancel; - if (binding && nla_put_u32(rsp, NETDEV_A_PAGE_POOL_DMABUF, binding->id)) + if (pool->mp_ops && pool->mp_ops->nl_fill(pool->mp_priv, rsp, NULL)) goto err_cancel; genlmsg_end(rsp, hdr); @@ -356,7 +355,7 @@ void page_pool_unlist(struct page_pool *pool) int page_pool_check_memory_provider(struct net_device *dev, struct netdev_rx_queue *rxq) { - struct net_devmem_dmabuf_binding *binding = rxq->mp_params.mp_priv; + void *binding = rxq->mp_params.mp_priv; struct page_pool *pool; struct hlist_node *n; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d1e559fce918..abe1a461ea67 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -80,6 +80,11 @@ void rtnl_lock(void) } EXPORT_SYMBOL(rtnl_lock); +int rtnl_lock_interruptible(void) +{ + return mutex_lock_interruptible(&rtnl_mutex); +} + int rtnl_lock_killable(void) { return mutex_lock_killable(&rtnl_mutex); diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index b0ff6153be62..568779d5a0ef 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -71,7 +71,7 @@ u32 secure_tcpv6_ts_off(const struct net *net, return siphash(&combined, offsetofend(typeof(combined), daddr), &ts_secret); } -EXPORT_SYMBOL(secure_tcpv6_ts_off); +EXPORT_IPV6_MOD(secure_tcpv6_ts_off); u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, __be16 sport, __be16 dport) diff --git a/net/core/sock.c b/net/core/sock.c index eae2ae70a2e0..0d385bf27b38 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -938,6 +938,7 @@ int sock_set_timestamping(struct sock *sk, int optname, WRITE_ONCE(sk->sk_tsflags, val); sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW); + sock_valbool_flag(sk, SOCK_TIMESTAMPING_ANY, !!(val & TSFLAGS_ANY)); if (val & SOF_TIMESTAMPING_RX_SOFTWARE) sock_enable_timestamp(sk, @@ -2041,7 +2042,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname, v.val = READ_ONCE(sk->sk_napi_id); /* aggregate non-NAPI IDs down to 0 */ - if (v.val < MIN_NAPI_ID) + if (!napi_id_valid(v.val)) v.val = 0; break; @@ -3881,7 +3882,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem) mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf); mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf); - mem[SK_MEMINFO_FWD_ALLOC] = sk_forward_alloc_get(sk); + mem[SK_MEMINFO_FWD_ALLOC] = READ_ONCE(sk->sk_forward_alloc); mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued); mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index be515ba821e2..bfa529a54aca 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -426,9 +426,6 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, newinet = inet_sk(newsk); ireq = inet_rsk(req); - sk_daddr_set(newsk, ireq->ir_rmt_addr); - sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); - newinet->inet_saddr = ireq->ir_loc_addr; RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt)); newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index d6649246188d..39ae9d89d7d4 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -365,6 +365,9 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; + ireq->ir_rmt_addr = LOOPBACK4_IPV6; + ireq->ir_loc_addr = LOOPBACK4_IPV6; + ireq->ireq_family = AF_INET6; ireq->ir_mark = inet_request_mark(sk, skb); @@ -504,10 +507,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; newnp->saddr = ireq->ir_v6_loc_addr; - newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; - newsk->sk_bound_dev_if = ireq->ir_iif; /* Now IPv6 options... @@ -546,9 +546,6 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, dccp_sync_mss(newsk, dst_mtu(dst)); - newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; - newinet->inet_rcv_saddr = LOOPBACK4_IPV6; - if (__inet_inherit_port(sk, newsk) < 0) { inet_csk_prepare_forced_close(newsk); dccp_done(newsk); diff --git a/net/dsa/user.c b/net/dsa/user.c index 291ab1b4acc4..2296a4ead020 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -1243,16 +1243,25 @@ static int dsa_user_set_eee(struct net_device *dev, struct ethtool_keee *e) if (!ds->ops->support_eee || !ds->ops->support_eee(ds, dp->index)) return -EOPNOTSUPP; - /* Port's PHY and MAC both need to be EEE capable */ - if (!dev->phydev) - return -ENODEV; + /* If the port is using phylink managed EEE, then an unimplemented + * set_mac_eee() is permissible. + */ + if (!phylink_mac_implements_lpi(ds->phylink_mac_ops)) { + /* Port's PHY and MAC both need to be EEE capable */ + if (!dev->phydev) + return -ENODEV; - if (!ds->ops->set_mac_eee) - return -EOPNOTSUPP; + if (!ds->ops->set_mac_eee) + return -EOPNOTSUPP; - ret = ds->ops->set_mac_eee(ds, dp->index, e); - if (ret) - return ret; + ret = ds->ops->set_mac_eee(ds, dp->index, e); + if (ret) + return ret; + } else if (ds->ops->set_mac_eee) { + ret = ds->ops->set_mac_eee(ds, dp->index, e); + if (ret) + return ret; + } return phylink_ethtool_set_eee(dp->pl, e); } diff --git a/net/ethtool/common.c b/net/ethtool/common.c index d88e9080643b..7149d07e90c6 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -213,6 +213,24 @@ const char link_mode_names[][ETH_GSTRING_LEN] = { __DEFINE_LINK_MODE_NAME(10, T1S, Half), __DEFINE_LINK_MODE_NAME(10, T1S_P2MP, Half), __DEFINE_LINK_MODE_NAME(10, T1BRR, Full), + __DEFINE_LINK_MODE_NAME(200000, CR, Full), + __DEFINE_LINK_MODE_NAME(200000, KR, Full), + __DEFINE_LINK_MODE_NAME(200000, DR, Full), + __DEFINE_LINK_MODE_NAME(200000, DR_2, Full), + __DEFINE_LINK_MODE_NAME(200000, SR, Full), + __DEFINE_LINK_MODE_NAME(200000, VR, Full), + __DEFINE_LINK_MODE_NAME(400000, CR2, Full), + __DEFINE_LINK_MODE_NAME(400000, KR2, Full), + __DEFINE_LINK_MODE_NAME(400000, DR2, Full), + __DEFINE_LINK_MODE_NAME(400000, DR2_2, Full), + __DEFINE_LINK_MODE_NAME(400000, SR2, Full), + __DEFINE_LINK_MODE_NAME(400000, VR2, Full), + __DEFINE_LINK_MODE_NAME(800000, CR4, Full), + __DEFINE_LINK_MODE_NAME(800000, KR4, Full), + __DEFINE_LINK_MODE_NAME(800000, DR4, Full), + __DEFINE_LINK_MODE_NAME(800000, DR4_2, Full), + __DEFINE_LINK_MODE_NAME(800000, SR4, Full), + __DEFINE_LINK_MODE_NAME(800000, VR4, Full), }; static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -221,8 +239,11 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); #define __LINK_MODE_LANES_CR4 4 #define __LINK_MODE_LANES_CR8 8 #define __LINK_MODE_LANES_DR 1 +#define __LINK_MODE_LANES_DR_2 1 #define __LINK_MODE_LANES_DR2 2 +#define __LINK_MODE_LANES_DR2_2 2 #define __LINK_MODE_LANES_DR4 4 +#define __LINK_MODE_LANES_DR4_2 4 #define __LINK_MODE_LANES_DR8 8 #define __LINK_MODE_LANES_KR 1 #define __LINK_MODE_LANES_KR2 2 @@ -251,6 +272,9 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); #define __LINK_MODE_LANES_T1L 1 #define __LINK_MODE_LANES_T1S 1 #define __LINK_MODE_LANES_T1S_P2MP 1 +#define __LINK_MODE_LANES_VR 1 +#define __LINK_MODE_LANES_VR2 2 +#define __LINK_MODE_LANES_VR4 4 #define __LINK_MODE_LANES_VR8 8 #define __LINK_MODE_LANES_DR8_2 8 #define __LINK_MODE_LANES_T1BRR 1 @@ -378,6 +402,24 @@ const struct link_mode_info link_mode_params[] = { __DEFINE_LINK_MODE_PARAMS(10, T1S, Half), __DEFINE_LINK_MODE_PARAMS(10, T1S_P2MP, Half), __DEFINE_LINK_MODE_PARAMS(10, T1BRR, Full), + __DEFINE_LINK_MODE_PARAMS(200000, CR, Full), + __DEFINE_LINK_MODE_PARAMS(200000, KR, Full), + __DEFINE_LINK_MODE_PARAMS(200000, DR, Full), + __DEFINE_LINK_MODE_PARAMS(200000, DR_2, Full), + __DEFINE_LINK_MODE_PARAMS(200000, SR, Full), + __DEFINE_LINK_MODE_PARAMS(200000, VR, Full), + __DEFINE_LINK_MODE_PARAMS(400000, CR2, Full), + __DEFINE_LINK_MODE_PARAMS(400000, KR2, Full), + __DEFINE_LINK_MODE_PARAMS(400000, DR2, Full), + __DEFINE_LINK_MODE_PARAMS(400000, DR2_2, Full), + __DEFINE_LINK_MODE_PARAMS(400000, SR2, Full), + __DEFINE_LINK_MODE_PARAMS(400000, VR2, Full), + __DEFINE_LINK_MODE_PARAMS(800000, CR4, Full), + __DEFINE_LINK_MODE_PARAMS(800000, KR4, Full), + __DEFINE_LINK_MODE_PARAMS(800000, DR4, Full), + __DEFINE_LINK_MODE_PARAMS(800000, DR4_2, Full), + __DEFINE_LINK_MODE_PARAMS(800000, SR4, Full), + __DEFINE_LINK_MODE_PARAMS(800000, VR4, Full), }; static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 7609ce2b2c5e..271c7cef9ef3 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -992,11 +992,17 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, if (rc) return rc; - /* Nonzero ring with RSS only makes sense if NIC adds them together */ - if (cmd == ETHTOOL_SRXCLSRLINS && info.fs.flow_type & FLOW_RSS && - !ops->cap_rss_rxnfc_adds && - ethtool_get_flow_spec_ring(info.fs.ring_cookie)) - return -EINVAL; + if (cmd == ETHTOOL_SRXCLSRLINS && info.fs.flow_type & FLOW_RSS) { + /* Nonzero ring with RSS only makes sense + * if NIC adds them together + */ + if (!ops->cap_rss_rxnfc_adds && + ethtool_get_flow_spec_ring(info.fs.ring_cookie)) + return -EINVAL; + + if (!xa_load(&dev->ethtool->rss_ctx, info.rss_context)) + return -EINVAL; + } if (cmd == ETHTOOL_SRXFH && ops->get_rxfh) { struct ethtool_rxfh_param rxfh = {}; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 21f46ee7b6e9..5df1f1325259 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -153,7 +153,7 @@ void inet_sock_destruct(struct sock *sk) WARN_ON_ONCE(atomic_read(&sk->sk_rmem_alloc)); WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc)); WARN_ON_ONCE(sk->sk_wmem_queued); - WARN_ON_ONCE(sk_forward_alloc_get(sk)); + WARN_ON_ONCE(sk->sk_forward_alloc); kfree(rcu_dereference_protected(inet->inet_opt, 1)); dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1)); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 814300eee39d..a648fff71ea7 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1064,8 +1064,8 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on) IPV4_DEVCONF_ALL(net, PROXY_ARP) = on; return 0; } - if (__in_dev_get_rtnl(dev)) { - IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, on); + if (__in_dev_get_rtnl_net(dev)) { + IN_DEV_CONF_SET(__in_dev_get_rtnl_net(dev), PROXY_ARP, on); return 0; } return -ENXIO; @@ -1295,14 +1295,14 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) switch (cmd) { case SIOCDARP: - rtnl_lock(); + rtnl_net_lock(net); err = arp_req_delete(net, &r); - rtnl_unlock(); + rtnl_net_unlock(net); break; case SIOCSARP: - rtnl_lock(); + rtnl_net_lock(net); err = arp_req_set(net, &r); - rtnl_unlock(); + rtnl_net_unlock(net); break; case SIOCGARP: rcu_read_lock(); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 55b8151759bc..754f60fb6e25 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -46,6 +46,7 @@ #include <linux/notifier.h> #include <linux/inetdevice.h> #include <linux/igmp.h> +#include "igmp_internal.h" #include <linux/slab.h> #include <linux/hash.h> #ifdef CONFIG_SYSCTL @@ -107,15 +108,6 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_PROTO] = { .type = NLA_U8 }, }; -struct inet_fill_args { - u32 portid; - u32 seq; - int event; - unsigned int flags; - int netnsid; - int ifindex; -}; - #define IN4_ADDR_HSIZE_SHIFT 8 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT) @@ -1847,9 +1839,38 @@ static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh, return 0; } -static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb, - struct netlink_callback *cb, int *s_ip_idx, - struct inet_fill_args *fillargs) +static int in_dev_dump_ifmcaddr(struct in_device *in_dev, struct sk_buff *skb, + struct netlink_callback *cb, int *s_ip_idx, + struct inet_fill_args *fillargs) +{ + struct ip_mc_list *im; + int ip_idx = 0; + int err; + + for (im = rcu_dereference(in_dev->mc_list); + im; + im = rcu_dereference(im->next_rcu)) { + if (ip_idx < *s_ip_idx) { + ip_idx++; + continue; + } + err = inet_fill_ifmcaddr(skb, in_dev->dev, im, fillargs); + if (err < 0) + goto done; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + ip_idx++; + } + err = 0; + ip_idx = 0; +done: + *s_ip_idx = ip_idx; + return err; +} + +static int in_dev_dump_ifaddr(struct in_device *in_dev, struct sk_buff *skb, + struct netlink_callback *cb, int *s_ip_idx, + struct inet_fill_args *fillargs) { struct in_ifaddr *ifa; int ip_idx = 0; @@ -1875,6 +1896,21 @@ done: return err; } +static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb, + struct netlink_callback *cb, int *s_ip_idx, + struct inet_fill_args *fillargs) +{ + switch (fillargs->event) { + case RTM_NEWADDR: + return in_dev_dump_ifaddr(in_dev, skb, cb, s_ip_idx, fillargs); + case RTM_GETMULTICAST: + return in_dev_dump_ifmcaddr(in_dev, skb, cb, s_ip_idx, + fillargs); + default: + return -EINVAL; + } +} + /* Combine dev_addr_genid and dev_base_seq to detect changes. */ static u32 inet_base_seq(const struct net *net) @@ -1890,13 +1926,14 @@ static u32 inet_base_seq(const struct net *net) return res; } -static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) +static int inet_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, + int event) { const struct nlmsghdr *nlh = cb->nlh; struct inet_fill_args fillargs = { .portid = NETLINK_CB(cb->skb).portid, .seq = nlh->nlmsg_seq, - .event = RTM_NEWADDR, + .event = event, .flags = NLM_F_MULTI, .netnsid = -1, }; @@ -1950,6 +1987,16 @@ done: return err; } +static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) +{ + return inet_dump_addr(skb, cb, RTM_NEWADDR); +} + +static int inet_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb) +{ + return inet_dump_addr(skb, cb, RTM_GETMULTICAST); +} + static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, u32 portid) { @@ -2846,6 +2893,8 @@ static const struct rtnl_msg_handler devinet_rtnl_msg_handlers[] __initconst = { {.protocol = PF_INET, .msgtype = RTM_GETNETCONF, .doit = inet_netconf_get_devconf, .dumpit = inet_netconf_dump_devconf, .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, + {.owner = THIS_MODULE, .protocol = PF_INET, .msgtype = RTM_GETMULTICAST, + .dumpit = inet_dump_ifmcaddr, .flags = RTNL_FLAG_DUMP_UNLOCKED}, }; void __init devinet_init(void) diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 9517b8667e00..6b3d6a957822 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -201,12 +201,12 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule, if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto)) return 0; - if (fib_rule_port_range_set(&rule->sport_range) && - !fib_rule_port_inrange(&rule->sport_range, fl4->fl4_sport)) + if (!fib_rule_port_match(&rule->sport_range, rule->sport_mask, + fl4->fl4_sport)) return 0; - if (fib_rule_port_range_set(&rule->dport_range) && - !fib_rule_port_inrange(&rule->dport_range, fl4->fl4_dport)) + if (!fib_rule_port_match(&rule->dport_range, rule->dport_mask, + fl4->fl4_dport)) return 0; return 1; @@ -245,9 +245,9 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct nlattr **tb, struct netlink_ext_ack *extack) { - struct net *net = sock_net(skb->sk); + struct fib4_rule *rule4 = (struct fib4_rule *)rule; + struct net *net = rule->fr_net; int err = -EINVAL; - struct fib4_rule *rule4 = (struct fib4_rule *) rule; if (tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) { NL_SET_ERR_MSG(extack, diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 5482edb5aade..799775ba97d4 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -405,7 +405,6 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct ipcm_cookie ipc; struct flowi4 fl4; struct sock *sk; - struct inet_sock *inet; __be32 daddr, saddr; u32 mark = IP4_REPLY_MARK(net, skb->mark); int type = icmp_param->data.icmph.type; @@ -424,12 +423,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) sk = icmp_xmit_lock(net); if (!sk) goto out_bh_enable; - inet = inet_sk(sk); icmp_param->data.icmph.checksum = 0; ipcm_init(&ipc); - inet->tos = ip_hdr(skb)->tos; + ipc.tos = ip_hdr(skb)->tos; ipc.sockc.mark = mark; daddr = ipc.addr = ip_hdr(skb)->saddr; saddr = fib_compute_spec_dst(skb); @@ -737,8 +735,8 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, icmp_param.data.icmph.checksum = 0; icmp_param.skb = skb_in; icmp_param.offset = skb_network_offset(skb_in); - inet_sk(sk)->tos = tos; ipcm_init(&ipc); + ipc.tos = tos; ipc.addr = iph->saddr; ipc.opt = &icmp_param.replyopts.opt; ipc.sockc.mark = mark; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 3da126cea884..2c394c364cb9 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -81,6 +81,7 @@ #include <linux/skbuff.h> #include <linux/inetdevice.h> #include <linux/igmp.h> +#include "igmp_internal.h" #include <linux/if_arp.h> #include <linux/rtnetlink.h> #include <linux/times.h> @@ -1432,14 +1433,16 @@ static void ip_mc_hash_remove(struct in_device *in_dev, *mc_hash = im->next_hash; } -static int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, - const struct ip_mc_list *im, int event) +int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, + const struct ip_mc_list *im, + struct inet_fill_args *args) { struct ifa_cacheinfo ci; struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct ifaddrmsg), 0); + nlh = nlmsg_put(skb, args->portid, args->seq, args->event, + sizeof(struct ifaddrmsg), args->flags); if (!nlh) return -EMSGSIZE; @@ -1468,6 +1471,9 @@ static int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, static void inet_ifmcaddr_notify(struct net_device *dev, const struct ip_mc_list *im, int event) { + struct inet_fill_args fillargs = { + .event = event, + }; struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOMEM; @@ -1479,7 +1485,7 @@ static void inet_ifmcaddr_notify(struct net_device *dev, if (!skb) goto error; - err = inet_fill_ifmcaddr(skb, dev, im, event); + err = inet_fill_ifmcaddr(skb, dev, im, &fillargs); if (err < 0) { WARN_ON_ONCE(err == -EMSGSIZE); nlmsg_free(skb); diff --git a/net/ipv4/igmp_internal.h b/net/ipv4/igmp_internal.h new file mode 100644 index 000000000000..0a1bcc8ec8e1 --- /dev/null +++ b/net/ipv4/igmp_internal.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _LINUX_IGMP_INTERNAL_H +#define _LINUX_IGMP_INTERNAL_H + +struct inet_fill_args { + u32 portid; + u32 seq; + int event; + unsigned int flags; + int netnsid; + int ifindex; +}; + +int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, + const struct ip_mc_list *im, + struct inet_fill_args *args); +#endif diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index e4decfb270fa..bf9ce0c19657 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -799,18 +799,6 @@ void inet_csk_clear_xmit_timers_sync(struct sock *sk) sk_stop_timer_sync(sk, &sk->sk_timer); } -void inet_csk_delete_keepalive_timer(struct sock *sk) -{ - sk_stop_timer(sk, &sk->sk_timer); -} -EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); - -void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) -{ - sk_reset_timer(sk, &sk->sk_timer, jiffies + len); -} -EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); - struct dst_entry *inet_csk_route_req(const struct sock *sk, struct flowi4 *fl4, const struct request_sock *req) @@ -1249,39 +1237,59 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, const gfp_t priority) { struct sock *newsk = sk_clone_lock(sk, priority); + struct inet_connection_sock *newicsk; + struct inet_request_sock *ireq; + struct inet_sock *newinet; - if (newsk) { - struct inet_connection_sock *newicsk = inet_csk(newsk); + if (!newsk) + return NULL; - inet_sk_set_state(newsk, TCP_SYN_RECV); - newicsk->icsk_bind_hash = NULL; - newicsk->icsk_bind2_hash = NULL; + newicsk = inet_csk(newsk); + newinet = inet_sk(newsk); + ireq = inet_rsk(req); - inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port; - inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num; - inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); + newicsk->icsk_bind_hash = NULL; + newicsk->icsk_bind2_hash = NULL; - /* listeners have SOCK_RCU_FREE, not the children */ - sock_reset_flag(newsk, SOCK_RCU_FREE); + newinet->inet_dport = ireq->ir_rmt_port; + newinet->inet_num = ireq->ir_num; + newinet->inet_sport = htons(ireq->ir_num); - inet_sk(newsk)->mc_list = NULL; + newsk->sk_bound_dev_if = ireq->ir_iif; - newsk->sk_mark = inet_rsk(req)->ir_mark; - atomic64_set(&newsk->sk_cookie, - atomic64_read(&inet_rsk(req)->ir_cookie)); + newsk->sk_daddr = ireq->ir_rmt_addr; + newsk->sk_rcv_saddr = ireq->ir_loc_addr; + newinet->inet_saddr = ireq->ir_loc_addr; - newicsk->icsk_retransmits = 0; - newicsk->icsk_backoff = 0; - newicsk->icsk_probes_out = 0; - newicsk->icsk_probes_tstamp = 0; +#if IS_ENABLED(CONFIG_IPV6) + newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; + newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; +#endif - /* Deinitialize accept_queue to trap illegal accesses. */ - memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); + /* listeners have SOCK_RCU_FREE, not the children */ + sock_reset_flag(newsk, SOCK_RCU_FREE); - inet_clone_ulp(req, newsk, priority); + inet_sk(newsk)->mc_list = NULL; + + newsk->sk_mark = inet_rsk(req)->ir_mark; + atomic64_set(&newsk->sk_cookie, + atomic64_read(&inet_rsk(req)->ir_cookie)); + + newicsk->icsk_retransmits = 0; + newicsk->icsk_backoff = 0; + newicsk->icsk_probes_out = 0; + newicsk->icsk_probes_tstamp = 0; + + /* Deinitialize accept_queue to trap illegal accesses. */ + memset(&newicsk->icsk_accept_queue, 0, + sizeof(newicsk->icsk_accept_queue)); + + inet_sk_set_state(newsk, TCP_SYN_RECV); + + inet_clone_ulp(req, newsk, priority); + + security_inet_csk_clone(newsk, req); - security_inet_csk_clone(newsk, req); - } return newsk; } EXPORT_SYMBOL_GPL(inet_csk_clone_lock); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 321acc8abf17..efe2a085cf68 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -282,7 +282,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct inet_diag_meminfo minfo = { .idiag_rmem = sk_rmem_alloc_get(sk), .idiag_wmem = READ_ONCE(sk->sk_wmem_queued), - .idiag_fmem = sk_forward_alloc_get(sk), + .idiag_fmem = READ_ONCE(sk->sk_forward_alloc), .idiag_tmem = sk_wmem_alloc_get(sk), }; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index b8b23a77ceb4..7b1e0a2d6906 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -60,7 +60,7 @@ void inet_peer_base_init(struct inet_peer_base *bp) seqlock_init(&bp->lock); bp->total = 0; } -EXPORT_SYMBOL_GPL(inet_peer_base_init); +EXPORT_IPV6_MOD_GPL(inet_peer_base_init); #define PEER_MAX_GC 32 @@ -218,7 +218,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, return p; } -EXPORT_SYMBOL_GPL(inet_getpeer); +EXPORT_IPV6_MOD_GPL(inet_getpeer); void inet_putpeer(struct inet_peer *p) { @@ -269,7 +269,7 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) WRITE_ONCE(peer->rate_tokens, token); return rc; } -EXPORT_SYMBOL(inet_peer_xrlim_allow); +EXPORT_IPV6_MOD(inet_peer_xrlim_allow); void inetpeer_invalidate_tree(struct inet_peer_base *base) { @@ -286,4 +286,4 @@ void inetpeer_invalidate_tree(struct inet_peer_base *base) base->total = 0; } -EXPORT_SYMBOL(inetpeer_invalidate_tree); +EXPORT_IPV6_MOD(inetpeer_invalidate_tree); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index ed1b6b44faf8..c9f11a046c26 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -141,7 +141,6 @@ static int ipgre_err(struct sk_buff *skb, u32 info, const struct iphdr *iph; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; - unsigned int data_len = 0; struct ip_tunnel *t; if (tpi->proto == htons(ETH_P_TEB)) @@ -182,7 +181,6 @@ static int ipgre_err(struct sk_buff *skb, u32 info, case ICMP_TIME_EXCEEDED: if (code != ICMP_EXC_TTL) return 0; - data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ break; case ICMP_REDIRECT: @@ -190,10 +188,16 @@ static int ipgre_err(struct sk_buff *skb, u32 info, } #if IS_ENABLED(CONFIG_IPV6) - if (tpi->proto == htons(ETH_P_IPV6) && - !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, - type, data_len)) - return 0; + if (tpi->proto == htons(ETH_P_IPV6)) { + unsigned int data_len = 0; + + if (type == ICMP_TIME_EXCEEDED) + data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ + + if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, + type, data_len)) + return 0; + } #endif if (t->parms.iph.daddr == 0 || diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 619ddc087957..85d09f2ecadc 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -705,7 +705,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct ip_options_data opt_copy; int free = 0; __be32 saddr, daddr, faddr; - u8 tos, scope; + u8 scope; int err; pr_debug("ping_v4_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); @@ -768,7 +768,6 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } faddr = ipc.opt->opt.faddr; } - tos = get_rttos(&ipc, inet); scope = ip_sendmsg_scope(inet, &ipc, msg); if (ipv4_is_multicast(daddr)) { @@ -779,7 +778,8 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } else if (!ipc.oif) ipc.oif = READ_ONCE(inet->uc_index); - flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, scope, + flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, + ipc.tos & INET_DSCP_MASK, scope, sk->sk_protocol, inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, sk->sk_uid); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4304a68d1db0..6aace4d55733 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -486,7 +486,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct ipcm_cookie ipc; struct rtable *rt = NULL; struct flowi4 fl4; - u8 tos, scope; + u8 scope; int free = 0; __be32 daddr; __be32 saddr; @@ -581,7 +581,6 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) daddr = ipc.opt->opt.faddr; } } - tos = get_rttos(&ipc, inet); scope = ip_sendmsg_scope(inet, &ipc, msg); uc_index = READ_ONCE(inet->uc_index); @@ -606,7 +605,8 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } } - flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, scope, + flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, + ipc.tos & INET_DSCP_MASK, scope, hdrincl ? ipc.protocol : sk->sk_protocol, inet_sk_flowi_flags(sk) | (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 1948d15f1f28..26816b876dd8 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -222,7 +222,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, return NULL; } -EXPORT_SYMBOL(tcp_get_cookie_sock); +EXPORT_IPV6_MOD(tcp_get_cookie_sock); /* * when syncookies are in effect and tcp timestamps are enabled we stored @@ -259,7 +259,7 @@ bool cookie_timestamp_decode(const struct net *net, return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0; } -EXPORT_SYMBOL(cookie_timestamp_decode); +EXPORT_IPV6_MOD(cookie_timestamp_decode); static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb, struct request_sock *req) @@ -310,7 +310,7 @@ struct request_sock *cookie_bpf_check(struct sock *sk, struct sk_buff *skb) return req; } -EXPORT_SYMBOL_GPL(cookie_bpf_check); +EXPORT_IPV6_MOD_GPL(cookie_bpf_check); #endif struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, @@ -351,7 +351,7 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, return req; } -EXPORT_SYMBOL_GPL(cookie_tcp_reqsk_alloc); +EXPORT_IPV6_MOD_GPL(cookie_tcp_reqsk_alloc); static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 42cb5dc9cb24..3a43010d726f 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -28,6 +28,7 @@ static int tcp_adv_win_scale_max = 31; static int tcp_app_win_max = 31; static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS; static int tcp_min_snd_mss_max = 65535; +static int tcp_rto_max_max = TCP_RTO_MAX_SEC * MSEC_PER_SEC; static int ip_privileged_port_min; static int ip_privileged_port_max = 65535; static int ip_ttl_min = 1; @@ -1583,6 +1584,15 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, }, + { + .procname = "tcp_rto_max_ms", + .data = &init_net.ipv4.sysctl_tcp_rto_max_ms, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE_THOUSAND, + .extra2 = &tcp_rto_max_max, + }, }; static __net_init int ipv4_sysctl_init_net(struct net *net) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0d704bda6c41..6a8f19a10911 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -300,10 +300,10 @@ DEFINE_PER_CPU(u32, tcp_tw_isn); EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn); long sysctl_tcp_mem[3] __read_mostly; -EXPORT_SYMBOL(sysctl_tcp_mem); +EXPORT_IPV6_MOD(sysctl_tcp_mem); atomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp; /* Current allocated memory. */ -EXPORT_SYMBOL(tcp_memory_allocated); +EXPORT_IPV6_MOD(tcp_memory_allocated); DEFINE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc); EXPORT_PER_CPU_SYMBOL_GPL(tcp_memory_per_cpu_fw_alloc); @@ -316,7 +316,7 @@ EXPORT_SYMBOL(tcp_have_smc); * Current number of TCP sockets. */ struct percpu_counter tcp_sockets_allocated ____cacheline_aligned_in_smp; -EXPORT_SYMBOL(tcp_sockets_allocated); +EXPORT_IPV6_MOD(tcp_sockets_allocated); /* * TCP splice context @@ -349,7 +349,7 @@ void tcp_enter_memory_pressure(struct sock *sk) if (!cmpxchg(&tcp_memory_pressure, 0, val)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES); } -EXPORT_SYMBOL_GPL(tcp_enter_memory_pressure); +EXPORT_IPV6_MOD_GPL(tcp_enter_memory_pressure); void tcp_leave_memory_pressure(struct sock *sk) { @@ -362,7 +362,7 @@ void tcp_leave_memory_pressure(struct sock *sk) NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURESCHRONO, jiffies_to_msecs(jiffies - val)); } -EXPORT_SYMBOL_GPL(tcp_leave_memory_pressure); +EXPORT_IPV6_MOD_GPL(tcp_leave_memory_pressure); /* Convert seconds to retransmits based on initial and max timeout */ static u8 secs_to_retrans(int seconds, int timeout, int rto_max) @@ -423,7 +423,7 @@ void tcp_init_sock(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - int rto_min_us; + int rto_min_us, rto_max_ms; tp->out_of_order_queue = RB_ROOT; sk->tcp_rtx_queue = RB_ROOT; @@ -432,6 +432,10 @@ void tcp_init_sock(struct sock *sk) INIT_LIST_HEAD(&tp->tsorted_sent_queue); icsk->icsk_rto = TCP_TIMEOUT_INIT; + + rto_max_ms = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rto_max_ms); + icsk->icsk_rto_max = msecs_to_jiffies(rto_max_ms); + rto_min_us = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rto_min_us); icsk->icsk_rto_min = usecs_to_jiffies(rto_min_us); icsk->icsk_delack_max = TCP_DELACK_MAX; @@ -475,7 +479,7 @@ void tcp_init_sock(struct sock *sk) sk_sockets_allocated_inc(sk); xa_init_flags(&sk->sk_user_frags, XA_FLAGS_ALLOC1); } -EXPORT_SYMBOL(tcp_init_sock); +EXPORT_IPV6_MOD(tcp_init_sock); static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc) { @@ -660,7 +664,7 @@ int tcp_ioctl(struct sock *sk, int cmd, int *karg) *karg = answ; return 0; } -EXPORT_SYMBOL(tcp_ioctl); +EXPORT_IPV6_MOD(tcp_ioctl); void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) { @@ -876,7 +880,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, return ret; } -EXPORT_SYMBOL(tcp_splice_read); +EXPORT_IPV6_MOD(tcp_splice_read); struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp, bool force_schedule) @@ -1123,7 +1127,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) /* 'common' sending to sendq */ } - sockcm_init(&sockc, sk); + sockc = (struct sockcm_cookie) { .tsflags = READ_ONCE(sk->sk_tsflags)}; if (msg->msg_controllen) { err = sock_cmsg_send(sk, msg, &sockc); if (unlikely(err)) { @@ -1376,7 +1380,7 @@ void tcp_splice_eof(struct socket *sock) tcp_push(sk, 0, mss_now, tp->nonagle, size_goal); release_sock(sk); } -EXPORT_SYMBOL_GPL(tcp_splice_eof); +EXPORT_IPV6_MOD_GPL(tcp_splice_eof); /* * Handle reading urgent data. BSD has very simple semantics for @@ -1667,7 +1671,7 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) } return copied; } -EXPORT_SYMBOL(tcp_read_skb); +EXPORT_IPV6_MOD(tcp_read_skb); void tcp_read_done(struct sock *sk, size_t len) { @@ -1712,7 +1716,7 @@ int tcp_peek_len(struct socket *sock) { return tcp_inq(sock->sk); } -EXPORT_SYMBOL(tcp_peek_len); +EXPORT_IPV6_MOD(tcp_peek_len); /* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */ int tcp_set_rcvlowat(struct sock *sk, int val) @@ -1739,7 +1743,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val) } return 0; } -EXPORT_SYMBOL(tcp_set_rcvlowat); +EXPORT_IPV6_MOD(tcp_set_rcvlowat); void tcp_update_recv_tstamps(struct sk_buff *skb, struct scm_timestamping_internal *tss) @@ -1772,7 +1776,7 @@ int tcp_mmap(struct file *file, struct socket *sock, vma->vm_ops = &tcp_vm_ops; return 0; } -EXPORT_SYMBOL(tcp_mmap); +EXPORT_IPV6_MOD(tcp_mmap); static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb, u32 *offset_frag) @@ -2476,6 +2480,11 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb, } niov = skb_frag_net_iov(frag); + if (!net_is_devmem_iov(niov)) { + err = -ENODEV; + goto out; + } + end = start + skb_frag_size(frag); copy = end - offset; @@ -2494,7 +2503,7 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb, /* Will perform the exchange later */ dmabuf_cmsg.frag_token = tcp_xa_pool.tokens[tcp_xa_pool.idx]; - dmabuf_cmsg.dmabuf_id = net_iov_binding_id(niov); + dmabuf_cmsg.dmabuf_id = net_devmem_iov_binding_id(niov); offset += copy; remaining_len -= copy; @@ -2864,7 +2873,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, } return ret; } -EXPORT_SYMBOL(tcp_recvmsg); +EXPORT_IPV6_MOD(tcp_recvmsg); void tcp_set_state(struct sock *sk, int state) { @@ -2994,7 +3003,7 @@ void tcp_shutdown(struct sock *sk, int how) tcp_send_fin(sk); } } -EXPORT_SYMBOL(tcp_shutdown); +EXPORT_IPV6_MOD(tcp_shutdown); int tcp_orphan_count_sum(void) { @@ -3174,7 +3183,7 @@ adjudge_to_death: const int tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { - inet_csk_reset_keepalive_timer(sk, + tcp_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); } else { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); @@ -3493,7 +3502,7 @@ static int tcp_repair_options_est(struct sock *sk, sockptr_t optbuf, } DEFINE_STATIC_KEY_FALSE(tcp_tx_delay_enabled); -EXPORT_SYMBOL(tcp_tx_delay_enabled); +EXPORT_IPV6_MOD(tcp_tx_delay_enabled); static void tcp_enable_tx_delay(void) { @@ -3627,7 +3636,7 @@ int tcp_sock_set_keepidle_locked(struct sock *sk, int val) elapsed = tp->keepalive_time - elapsed; else elapsed = 0; - inet_csk_reset_keepalive_timer(sk, elapsed); + tcp_reset_keepalive_timer(sk, elapsed); } return 0; @@ -3802,6 +3811,11 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ)); return 0; + case TCP_RTO_MAX_MS: + if (val < MSEC_PER_SEC || val > TCP_RTO_MAX_SEC * MSEC_PER_SEC) + return -EINVAL; + WRITE_ONCE(inet_csk(sk)->icsk_rto_max, msecs_to_jiffies(val)); + return 0; } sockopt_lock_sock(sk); @@ -4031,7 +4045,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, optval, optlen); return do_tcp_setsockopt(sk, level, optname, optval, optlen); } -EXPORT_SYMBOL(tcp_setsockopt); +EXPORT_IPV6_MOD(tcp_setsockopt); static void tcp_get_info_chrono_stats(const struct tcp_sock *tp, struct tcp_info *info) @@ -4638,6 +4652,9 @@ zerocopy_rcv_out: case TCP_IS_MPTCP: val = 0; break; + case TCP_RTO_MAX_MS: + val = jiffies_to_msecs(tcp_rto_max(sk)); + break; default: return -ENOPROTOOPT; } @@ -4659,7 +4676,7 @@ bool tcp_bpf_bypass_getsockopt(int level, int optname) return false; } -EXPORT_SYMBOL(tcp_bpf_bypass_getsockopt); +EXPORT_IPV6_MOD(tcp_bpf_bypass_getsockopt); int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) @@ -4673,11 +4690,11 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, return do_tcp_getsockopt(sk, level, optname, USER_SOCKPTR(optval), USER_SOCKPTR(optlen)); } -EXPORT_SYMBOL(tcp_getsockopt); +EXPORT_IPV6_MOD(tcp_getsockopt); #ifdef CONFIG_TCP_MD5SIG int tcp_md5_sigpool_id = -1; -EXPORT_SYMBOL_GPL(tcp_md5_sigpool_id); +EXPORT_IPV6_MOD_GPL(tcp_md5_sigpool_id); int tcp_md5_alloc_sigpool(void) { @@ -4723,7 +4740,7 @@ int tcp_md5_hash_key(struct tcp_sigpool *hp, */ return data_race(crypto_ahash_update(hp->req)); } -EXPORT_SYMBOL(tcp_md5_hash_key); +EXPORT_IPV6_MOD(tcp_md5_hash_key); /* Called with rcu_read_lock() */ static enum skb_drop_reason @@ -4843,7 +4860,7 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, return tcp_inbound_md5_hash(sk, skb, saddr, daddr, family, l3index, md5_location); } -EXPORT_SYMBOL_GPL(tcp_inbound_hash); +EXPORT_IPV6_MOD_GPL(tcp_inbound_hash); void tcp_done(struct sock *sk) { diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 32b28fc21b63..1a6b1bc54245 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -274,8 +274,8 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, * because it's been added to the accept queue directly. */ req->timeout = tcp_timeout_init(child); - inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, - req->timeout, TCP_RTO_MAX); + tcp_reset_xmit_timer(child, ICSK_TIME_RETRANS, + req->timeout, false); refcount_set(&req->rsk_refcnt, 2); @@ -468,7 +468,7 @@ bool tcp_fastopen_defer_connect(struct sock *sk, int *err) } return false; } -EXPORT_SYMBOL(tcp_fastopen_defer_connect); +EXPORT_IPV6_MOD(tcp_fastopen_defer_connect); /* * The following code block is to deal with middle box issues with TFO: diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0cbf81bf3d45..5fddcd0bbe91 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -636,7 +636,7 @@ void tcp_initialize_rcv_mss(struct sock *sk) inet_csk(sk)->icsk_ack.rcv_mss = hint; } -EXPORT_SYMBOL(tcp_initialize_rcv_mss); +EXPORT_IPV6_MOD(tcp_initialize_rcv_mss); /* Receiver "autotuning" code. * @@ -2258,8 +2258,7 @@ static bool tcp_check_sack_reneging(struct sock *sk, int *ack_flag) unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4), msecs_to_jiffies(10)); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - delay, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, false); *ack_flag &= ~FLAG_SET_XMIT_TIMER; return true; } @@ -2716,6 +2715,8 @@ void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd)) return; + trace_tcp_cwnd_reduction_tp(sk, newly_acked_sacked, newly_lost, flag); + tp->prr_delivered += newly_acked_sacked; if (delta < 0) { u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + @@ -2898,7 +2899,7 @@ void tcp_simple_retransmit(struct sock *sk) */ tcp_non_congestion_loss_retransmit(sk); } -EXPORT_SYMBOL(tcp_simple_retransmit); +EXPORT_IPV6_MOD(tcp_simple_retransmit); void tcp_enter_recovery(struct sock *sk, bool ece_ack) { @@ -3288,8 +3289,7 @@ void tcp_rearm_rto(struct sock *sk) */ rto = usecs_to_jiffies(max_t(int, delta_us, 1)); } - tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, - TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, true); } } @@ -3566,10 +3566,10 @@ static void tcp_ack_probe(struct sock *sk) * This function is not for random using! */ } else { - unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX); + unsigned long when = tcp_probe0_when(sk, tcp_rto_max(sk)); when = tcp_clamp_probe0_to_user_timeout(sk, when); - tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, true); } } @@ -4180,7 +4180,6 @@ u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss) } return mss; } -EXPORT_SYMBOL_GPL(tcp_parse_mss_option); /* Look for tcp options. Normally only called on SYN and SYNACK packets. * But, this can also be called on packets in the established flow when @@ -4530,7 +4529,7 @@ void tcp_done_with_error(struct sock *sk, int err) if (!sock_flag(sk, SOCK_DEAD)) sk_error_report(sk); } -EXPORT_SYMBOL(tcp_done_with_error); +EXPORT_IPV6_MOD(tcp_done_with_error); /* When we get a reset we do this. */ void tcp_reset(struct sock *sk, struct sk_buff *skb) @@ -6300,7 +6299,7 @@ csum_error: discard: tcp_drop_reason(sk, skb, reason); } -EXPORT_SYMBOL(tcp_rcv_established); +EXPORT_IPV6_MOD(tcp_rcv_established); void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb) { @@ -6353,7 +6352,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) tp->lsndtime = tcp_jiffies32; if (sock_flag(sk, SOCK_KEEPOPEN)) - inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp)); + tcp_reset_keepalive_timer(sk, keepalive_time_when(tp)); if (!tp->rx_opt.snd_wscale) __tcp_fast_path_on(tp, tp->snd_wnd); @@ -6476,9 +6475,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) { /* Previous FIN/ACK or RST/ACK might be ignored. */ if (icsk->icsk_retransmits == 0) - inet_csk_reset_xmit_timer(sk, - ICSK_TIME_RETRANS, - TCP_TIMEOUT_MIN, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + TCP_TIMEOUT_MIN, false); SKB_DR_SET(reason, TCP_INVALID_ACK_SEQUENCE); goto reset_and_undo; } @@ -6593,8 +6591,8 @@ consume: */ inet_csk_schedule_ack(sk); tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - TCP_DELACK_MAX, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_DACK, + TCP_DELACK_MAX, false); goto consume; } tcp_send_ack(sk); @@ -6928,7 +6926,7 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { - inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); + tcp_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); } else if (th->fin || sock_owned_by_user(sk)) { /* Bad case. We could lose such FIN otherwise. * It is not a big problem, but it looks confusing @@ -6936,7 +6934,7 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) * if it spins in bh_lock_sock(), but it is really * marginal case. */ - inet_csk_reset_keepalive_timer(sk, tmo); + tcp_reset_keepalive_timer(sk, tmo); } else { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); goto consume; @@ -7014,7 +7012,7 @@ consume: __kfree_skb(skb); return 0; } -EXPORT_SYMBOL(tcp_rcv_state_process); +EXPORT_IPV6_MOD(tcp_rcv_state_process); static inline void pr_drop_req(struct request_sock *req, __u16 port, int family) { @@ -7196,7 +7194,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, return mss; } -EXPORT_SYMBOL_GPL(tcp_get_syncookie_mss); +EXPORT_IPV6_MOD_GPL(tcp_get_syncookie_mss); int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, @@ -7377,4 +7375,4 @@ drop: tcp_listendrop(sk); return 0; } -EXPORT_SYMBOL(tcp_conn_request); +EXPORT_IPV6_MOD(tcp_conn_request); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2632844d2c35..7900855237d9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -92,7 +92,6 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, #endif struct inet_hashinfo tcp_hashinfo; -EXPORT_SYMBOL(tcp_hashinfo); static DEFINE_PER_CPU(struct sock_bh_locked, ipv4_tcp_sk) = { .bh_lock = INIT_LOCAL_LOCK(bh_lock), @@ -199,7 +198,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) return 0; } -EXPORT_SYMBOL_GPL(tcp_twsk_unique); +EXPORT_IPV6_MOD_GPL(tcp_twsk_unique); static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) @@ -359,7 +358,7 @@ failure: inet->inet_dport = 0; return err; } -EXPORT_SYMBOL(tcp_v4_connect); +EXPORT_IPV6_MOD(tcp_v4_connect); /* * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191. @@ -400,7 +399,7 @@ void tcp_v4_mtu_reduced(struct sock *sk) tcp_simple_retransmit(sk); } /* else let the usual retransmit timer handle it */ } -EXPORT_SYMBOL(tcp_v4_mtu_reduced); +EXPORT_IPV6_MOD(tcp_v4_mtu_reduced); static void do_redirect(struct sk_buff *skb, struct sock *sk) { @@ -434,7 +433,7 @@ void tcp_req_err(struct sock *sk, u32 seq, bool abort) } reqsk_put(req); } -EXPORT_SYMBOL(tcp_req_err); +EXPORT_IPV6_MOD(tcp_req_err); /* TCP-LD (RFC 6069) logic */ void tcp_ld_RTO_revert(struct sock *sk, u32 seq) @@ -458,15 +457,14 @@ void tcp_ld_RTO_revert(struct sock *sk, u32 seq) icsk->icsk_backoff--; icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) : TCP_TIMEOUT_INIT; - icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); + icsk->icsk_rto = inet_csk_rto_backoff(icsk, tcp_rto_max(sk)); tcp_mstamp_refresh(tp); delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb)); remaining = icsk->icsk_rto - usecs_to_jiffies(delta_us); if (remaining > 0) { - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - remaining, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, remaining, false); } else { /* RTO revert clocked out retransmission. * Will retransmit now. @@ -474,7 +472,7 @@ void tcp_ld_RTO_revert(struct sock *sk, u32 seq) tcp_retransmit_timer(sk); } } -EXPORT_SYMBOL(tcp_ld_RTO_revert); +EXPORT_IPV6_MOD(tcp_ld_RTO_revert); /* * This routine is called by the ICMP module when it gets some @@ -676,7 +674,7 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); } -EXPORT_SYMBOL(tcp_v4_send_check); +EXPORT_IPV6_MOD(tcp_v4_send_check); #define REPLY_OPTIONS_LEN (MAX_TCP_OPTION_SPACE / sizeof(__be32)) @@ -1231,7 +1229,7 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) */ DEFINE_STATIC_KEY_DEFERRED_FALSE(tcp_md5_needed, HZ); -EXPORT_SYMBOL(tcp_md5_needed); +EXPORT_IPV6_MOD(tcp_md5_needed); static bool better_md5_match(struct tcp_md5sig_key *old, struct tcp_md5sig_key *new) { @@ -1290,7 +1288,7 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, } return best_match; } -EXPORT_SYMBOL(__tcp_md5_do_lookup); +EXPORT_IPV6_MOD(__tcp_md5_do_lookup); static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk, const union tcp_md5_addr *addr, @@ -1337,7 +1335,7 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr; return tcp_md5_do_lookup(sk, l3index, addr, AF_INET); } -EXPORT_SYMBOL(tcp_v4_md5_lookup); +EXPORT_IPV6_MOD(tcp_v4_md5_lookup); static int tcp_md5sig_info_add(struct sock *sk, gfp_t gfp) { @@ -1433,7 +1431,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, return __tcp_md5_do_add(sk, addr, family, prefixlen, l3index, flags, newkey, newkeylen, GFP_KERNEL); } -EXPORT_SYMBOL(tcp_md5_do_add); +EXPORT_IPV6_MOD(tcp_md5_do_add); int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, @@ -1465,7 +1463,7 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, key->flags, key->key, key->keylen, sk_gfp_mask(sk, GFP_ATOMIC)); } -EXPORT_SYMBOL(tcp_md5_key_copy); +EXPORT_IPV6_MOD(tcp_md5_key_copy); int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, u8 flags) @@ -1480,7 +1478,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, kfree_rcu(key, rcu); return 0; } -EXPORT_SYMBOL(tcp_md5_do_del); +EXPORT_IPV6_MOD(tcp_md5_do_del); void tcp_clear_md5_list(struct sock *sk) { @@ -1659,7 +1657,7 @@ clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } -EXPORT_SYMBOL(tcp_v4_md5_hash_skb); +EXPORT_IPV6_MOD(tcp_v4_md5_hash_skb); #endif @@ -1732,7 +1730,7 @@ drop: tcp_listendrop(sk); return 0; } -EXPORT_SYMBOL(tcp_v4_conn_request); +EXPORT_IPV6_MOD(tcp_v4_conn_request); /* @@ -1770,10 +1768,6 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, newtp = tcp_sk(newsk); newinet = inet_sk(newsk); ireq = inet_rsk(req); - sk_daddr_set(newsk, ireq->ir_rmt_addr); - sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); - newsk->sk_bound_dev_if = ireq->ir_iif; - newinet->inet_saddr = ireq->ir_loc_addr; inet_opt = rcu_dereference(ireq->ireq_opt); RCU_INIT_POINTER(newinet->inet_opt, inet_opt); newinet->mc_index = inet_iif(skb); @@ -1856,7 +1850,7 @@ put_and_exit: tcp_done(newsk); goto exit; } -EXPORT_SYMBOL(tcp_v4_syn_recv_sock); +EXPORT_IPV6_MOD(tcp_v4_syn_recv_sock); static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb) { @@ -2135,7 +2129,7 @@ no_coalesce: } return false; } -EXPORT_SYMBOL(tcp_add_backlog); +EXPORT_IPV6_MOD(tcp_add_backlog); int tcp_filter(struct sock *sk, struct sk_buff *skb) { @@ -2143,7 +2137,7 @@ int tcp_filter(struct sock *sk, struct sk_buff *skb) return sk_filter_trim_cap(sk, skb, th->doff * 4); } -EXPORT_SYMBOL(tcp_filter); +EXPORT_IPV6_MOD(tcp_filter); static void tcp_v4_restore_cb(struct sk_buff *skb) { @@ -2452,7 +2446,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) sk->sk_rx_dst_ifindex = skb->skb_iif; } } -EXPORT_SYMBOL(inet_sk_rx_dst_set); +EXPORT_IPV6_MOD(inet_sk_rx_dst_set); const struct inet_connection_sock_af_ops ipv4_specific = { .queue_xmit = ip_queue_xmit, @@ -2468,7 +2462,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = { .sockaddr_len = sizeof(struct sockaddr_in), .mtu_reduced = tcp_v4_mtu_reduced, }; -EXPORT_SYMBOL(ipv4_specific); +EXPORT_IPV6_MOD(ipv4_specific); #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { @@ -2578,7 +2572,7 @@ void tcp_v4_destroy_sock(struct sock *sk) sk_sockets_allocated_dec(sk); } -EXPORT_SYMBOL(tcp_v4_destroy_sock); +EXPORT_IPV6_MOD(tcp_v4_destroy_sock); #ifdef CONFIG_PROC_FS /* Proc filesystem TCP sock list dumping. */ @@ -2814,7 +2808,7 @@ out: st->last_pos = *pos; return rc; } -EXPORT_SYMBOL(tcp_seq_start); +EXPORT_IPV6_MOD(tcp_seq_start); void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) { @@ -2845,7 +2839,7 @@ out: st->last_pos = *pos; return rc; } -EXPORT_SYMBOL(tcp_seq_next); +EXPORT_IPV6_MOD(tcp_seq_next); void tcp_seq_stop(struct seq_file *seq, void *v) { @@ -2863,7 +2857,7 @@ void tcp_seq_stop(struct seq_file *seq, void *v) break; } } -EXPORT_SYMBOL(tcp_seq_stop); +EXPORT_IPV6_MOD(tcp_seq_stop); static void get_openreq4(const struct request_sock *req, struct seq_file *f, int i) @@ -3533,6 +3527,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_pingpong_thresh = 1; net->ipv4.sysctl_tcp_rto_min_us = jiffies_to_usecs(TCP_RTO_MIN); + net->ipv4.sysctl_tcp_rto_max_ms = TCP_RTO_MAX_SEC * MSEC_PER_SEC; return 0; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b089b08e9617..1eccc518b957 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -264,7 +264,7 @@ kill: inet_twsk_put(tw); return TCP_TW_SUCCESS; } -EXPORT_SYMBOL(tcp_timewait_state_process); +EXPORT_IPV6_MOD(tcp_timewait_state_process); static void tcp_time_wait_init(struct sock *sk, struct tcp_timewait_sock *tcptw) { @@ -398,7 +398,7 @@ void tcp_twsk_destructor(struct sock *sk) #endif tcp_ao_destroy_sock(sk, true); } -EXPORT_SYMBOL_GPL(tcp_twsk_destructor); +EXPORT_IPV6_MOD_GPL(tcp_twsk_destructor); void tcp_twsk_purge(struct list_head *net_exit_list) { @@ -457,7 +457,6 @@ void tcp_openreq_init_rwin(struct request_sock *req, rcv_wnd); ireq->rcv_wscale = rcv_wscale; } -EXPORT_SYMBOL(tcp_openreq_init_rwin); static void tcp_ecn_openreq_child(struct tcp_sock *tp, const struct request_sock *req) @@ -492,7 +491,7 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) tcp_set_ca_state(sk, TCP_CA_Open); } -EXPORT_SYMBOL_GPL(tcp_ca_openreq_child); +EXPORT_IPV6_MOD_GPL(tcp_ca_openreq_child); static void smc_check_reset_syn_req(const struct tcp_sock *oldtp, struct request_sock *req, @@ -566,8 +565,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, WRITE_ONCE(newtp->write_seq, newtp->pushed_seq = treq->snt_isn + 1); if (sock_flag(newsk, SOCK_KEEPOPEN)) - inet_csk_reset_keepalive_timer(newsk, - keepalive_time_when(newtp)); + tcp_reset_keepalive_timer(newsk, keepalive_time_when(newtp)); newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; newtp->rx_opt.sack_ok = ireq->sack_ok; @@ -910,7 +908,7 @@ embryonic_reset: } return NULL; } -EXPORT_SYMBOL(tcp_check_req); +EXPORT_IPV6_MOD(tcp_check_req); /* * Queue segment on the new socket if the new socket is active, @@ -952,4 +950,4 @@ enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child, sock_put(child); return reason; } -EXPORT_SYMBOL(tcp_child_process); +EXPORT_IPV6_MOD(tcp_child_process); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bc95d2a5924f..b4b40f135432 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -250,7 +250,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, WRITE_ONCE(*__window_clamp, min_t(__u32, U16_MAX << (*rcv_wscale), window_clamp)); } -EXPORT_SYMBOL(tcp_select_initial_window); +EXPORT_IPV6_MOD(tcp_select_initial_window); /* Chose a new window to advertise, update state in tcp_sock for the * socket, and return result with RFC1323 scaling applied. The return @@ -1171,7 +1171,7 @@ void tcp_release_cb(struct sock *sk) if ((flags & TCPF_ACK_DEFERRED) && inet_csk_ack_scheduled(sk)) tcp_send_ack(sk); } -EXPORT_SYMBOL(tcp_release_cb); +EXPORT_IPV6_MOD(tcp_release_cb); void __init tcp_tasklet_init(void) { @@ -1783,7 +1783,7 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu) return __tcp_mtu_to_mss(sk, pmtu) - (tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr)); } -EXPORT_SYMBOL(tcp_mtu_to_mss); +EXPORT_IPV6_MOD(tcp_mtu_to_mss); /* Inverse of above */ int tcp_mss_to_mtu(struct sock *sk, int mss) @@ -1813,7 +1813,6 @@ void tcp_mtup_init(struct sock *sk) if (icsk->icsk_mtup.enabled) icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; } -EXPORT_SYMBOL(tcp_mtup_init); /* This function synchronize snd mss to current pmtu/exthdr set. @@ -1857,7 +1856,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) return mss_now; } -EXPORT_SYMBOL(tcp_sync_mss); +EXPORT_IPV6_MOD(tcp_sync_mss); /* Compute the current effective MSS, taking SACKs and IP options, * and even PMTU discovery events into account. @@ -2911,7 +2910,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto) if (rto_delta_us > 0) timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us)); - tcp_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, true); return true; } @@ -3545,8 +3544,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) } if (rearm_timer) tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - inet_csk(sk)->icsk_rto, - TCP_RTO_MAX); + inet_csk(sk)->icsk_rto, true); } /* We allow to exceed memory limits for FIN packets to expedite @@ -3853,7 +3851,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, return skb; } -EXPORT_SYMBOL(tcp_make_synack); +EXPORT_IPV6_MOD(tcp_make_synack); static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst) { @@ -4163,8 +4161,8 @@ int tcp_connect(struct sock *sk) TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS); /* Timer for repeating the SYN until an answer. */ - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - inet_csk(sk)->icsk_rto, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, false); return 0; } EXPORT_SYMBOL(tcp_connect); @@ -4253,11 +4251,11 @@ void __tcp_send_ack(struct sock *sk, u32 rcv_nxt) unsigned long delay; delay = TCP_DELACK_MAX << icsk->icsk_ack.retry; - if (delay < TCP_RTO_MAX) + if (delay < tcp_rto_max(sk)) icsk->icsk_ack.retry++; inet_csk_schedule_ack(sk); icsk->icsk_ack.ato = TCP_ATO_MIN; - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, delay, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_DACK, delay, false); return; } @@ -4393,7 +4391,7 @@ void tcp_send_probe0(struct sock *sk) if (err <= 0) { if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2)) icsk->icsk_backoff++; - timeout = tcp_probe0_when(sk, TCP_RTO_MAX); + timeout = tcp_probe0_when(sk, tcp_rto_max(sk)); } else { /* If packet was not sent due to local congestion, * Let senders fight for local resources conservatively. @@ -4402,7 +4400,7 @@ void tcp_send_probe0(struct sock *sk) } timeout = tcp_clamp_probe0_to_user_timeout(sk, timeout); - tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, true); } int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) @@ -4430,4 +4428,4 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) } return res; } -EXPORT_SYMBOL(tcp_rtx_synack); +EXPORT_IPV6_MOD(tcp_rtx_synack); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b412ed88ccd9..728bce01ccd3 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -109,7 +109,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) /* If peer does not open window for long time, or did not transmit * anything for long time, penalize it. */ - if ((s32)(tcp_jiffies32 - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) + if ((s32)(tcp_jiffies32 - tp->lsndtime) > 2*tcp_rto_max(sk) || !do_reset) shift++; /* If some dubious ICMP arrived, penalize even more. */ @@ -189,12 +189,12 @@ static unsigned int tcp_model_timeout(struct sock *sk, { unsigned int linear_backoff_thresh, timeout; - linear_backoff_thresh = ilog2(TCP_RTO_MAX / rto_base); + linear_backoff_thresh = ilog2(tcp_rto_max(sk) / rto_base); if (boundary <= linear_backoff_thresh) timeout = ((2 << boundary) - 1) * rto_base; else timeout = ((2 << linear_backoff_thresh) - 1) * rto_base + - (boundary - linear_backoff_thresh) * TCP_RTO_MAX; + (boundary - linear_backoff_thresh) * tcp_rto_max(sk); return jiffies_to_msecs(timeout); } /** @@ -268,7 +268,7 @@ static int tcp_write_timeout(struct sock *sk) retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { - const bool alive = icsk->icsk_rto < TCP_RTO_MAX; + const bool alive = icsk->icsk_rto < tcp_rto_max(sk); retry_until = tcp_orphan_retries(sk, alive); do_reset = alive || @@ -416,7 +416,8 @@ static void tcp_probe_timer(struct sock *sk) } max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { - const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX; + unsigned int rto_max = tcp_rto_max(sk); + const bool alive = inet_csk_rto_backoff(icsk, rto_max) < rto_max; max_probes = tcp_orphan_retries(sk, alive); if (!alive && icsk->icsk_backoff >= max_probes) @@ -481,8 +482,8 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) tcp_update_rto_stats(sk); if (!tp->retrans_stamp) tp->retrans_stamp = tcp_time_stamp_ts(tp); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - req->timeout << req->num_timeout, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + req->timeout << req->num_timeout, false); } static bool tcp_rtx_probe0_timed_out(const struct sock *sk, @@ -492,7 +493,7 @@ static bool tcp_rtx_probe0_timed_out(const struct sock *sk, const struct inet_connection_sock *icsk = inet_csk(sk); u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout); const struct tcp_sock *tp = tcp_sk(sk); - int timeout = TCP_RTO_MAX * 2; + int timeout = tcp_rto_max(sk) * 2; s32 rcv_delta; if (user_timeout) { @@ -626,9 +627,9 @@ void tcp_retransmit_timer(struct sock *sk) /* Retransmission failed because of local congestion, * Let senders fight for local resources conservatively. */ - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - TCP_RESOURCE_PROBE_INTERVAL, - TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + TCP_RESOURCE_PROBE_INTERVAL, + false); goto out; } @@ -665,7 +666,7 @@ out_reset_timer: icsk->icsk_backoff = 0; icsk->icsk_rto = clamp(__tcp_set_rto(tp), tcp_rto_min(sk), - TCP_RTO_MAX); + tcp_rto_max(sk)); } else if (sk->sk_state != TCP_SYN_SENT || tp->total_rto > READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) { @@ -673,10 +674,10 @@ out_reset_timer: * activated. */ icsk->icsk_backoff++; - icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); + icsk->icsk_rto = min(icsk->icsk_rto << 1, tcp_rto_max(sk)); } - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + tcp_clamp_rto_to_user_timeout(sk), false); if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0)) __sk_dst_reset(sk); @@ -749,7 +750,17 @@ void tcp_syn_ack_timeout(const struct request_sock *req) __NET_INC_STATS(net, LINUX_MIB_TCPTIMEOUTS); } -EXPORT_SYMBOL(tcp_syn_ack_timeout); +EXPORT_IPV6_MOD(tcp_syn_ack_timeout); + +void tcp_reset_keepalive_timer(struct sock *sk, unsigned long len) +{ + sk_reset_timer(sk, &sk->sk_timer, jiffies + len); +} + +static void tcp_delete_keepalive_timer(struct sock *sk) +{ + sk_stop_timer(sk, &sk->sk_timer); +} void tcp_set_keepalive(struct sock *sk, int val) { @@ -757,14 +768,13 @@ void tcp_set_keepalive(struct sock *sk, int val) return; if (val && !sock_flag(sk, SOCK_KEEPOPEN)) - inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk))); + tcp_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk))); else if (!val) - inet_csk_delete_keepalive_timer(sk); + tcp_delete_keepalive_timer(sk); } -EXPORT_SYMBOL_GPL(tcp_set_keepalive); - +EXPORT_IPV6_MOD_GPL(tcp_set_keepalive); -static void tcp_keepalive_timer (struct timer_list *t) +static void tcp_keepalive_timer(struct timer_list *t) { struct sock *sk = from_timer(sk, t, sk_timer); struct inet_connection_sock *icsk = inet_csk(sk); @@ -775,7 +785,7 @@ static void tcp_keepalive_timer (struct timer_list *t) bh_lock_sock(sk); if (sock_owned_by_user(sk)) { /* Try again later. */ - inet_csk_reset_keepalive_timer (sk, HZ/20); + tcp_reset_keepalive_timer(sk, HZ/20); goto out; } @@ -841,7 +851,7 @@ static void tcp_keepalive_timer (struct timer_list *t) } resched: - inet_csk_reset_keepalive_timer (sk, elapsed); + tcp_reset_keepalive_timer(sk, elapsed); goto out; death: diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a9bb9ce5438e..17c7736d8349 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -121,13 +121,12 @@ #endif struct udp_table udp_table __read_mostly; -EXPORT_SYMBOL(udp_table); long sysctl_udp_mem[3] __read_mostly; -EXPORT_SYMBOL(sysctl_udp_mem); +EXPORT_IPV6_MOD(sysctl_udp_mem); atomic_long_t udp_memory_allocated ____cacheline_aligned_in_smp; -EXPORT_SYMBOL(udp_memory_allocated); +EXPORT_IPV6_MOD(udp_memory_allocated); DEFINE_PER_CPU(int, udp_memory_per_cpu_fw_alloc); EXPORT_PER_CPU_SYMBOL_GPL(udp_memory_per_cpu_fw_alloc); @@ -352,7 +351,7 @@ fail_unlock: fail: return error; } -EXPORT_SYMBOL(udp_lib_get_port); +EXPORT_IPV6_MOD(udp_lib_get_port); int udp_v4_get_port(struct sock *sk, unsigned short snum) { @@ -418,7 +417,7 @@ u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, return __inet_ehashfn(laddr, lport, faddr, fport, udp_ehash_secret + net_hash_mix(net)); } -EXPORT_SYMBOL(udp_ehashfn); +EXPORT_IPV6_MOD(udp_ehashfn); /** * udp4_lib_lookup1() - Simplified lookup using primary hash (destination port) @@ -653,7 +652,7 @@ void udp_lib_hash4(struct sock *sk, u16 hash) spin_unlock_bh(&hslot->lock); } -EXPORT_SYMBOL(udp_lib_hash4); +EXPORT_IPV6_MOD(udp_lib_hash4); /* call with sock lock */ void udp4_hash4(struct sock *sk) @@ -669,7 +668,7 @@ void udp4_hash4(struct sock *sk) udp_lib_hash4(sk, hash); } -EXPORT_SYMBOL(udp4_hash4); +EXPORT_IPV6_MOD(udp4_hash4); #endif /* CONFIG_BASE_SMALL */ /* UDP is nearly always wildcards out the wazoo, it makes no sense to try @@ -809,11 +808,11 @@ static inline bool __udp_is_mcast_sock(struct net *net, const struct sock *sk, } DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); -EXPORT_SYMBOL(udp_encap_needed_key); +EXPORT_IPV6_MOD(udp_encap_needed_key); #if IS_ENABLED(CONFIG_IPV6) DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); -EXPORT_SYMBOL(udpv6_encap_needed_key); +EXPORT_IPV6_MOD(udpv6_encap_needed_key); #endif void udp_encap_enable(void) @@ -1041,7 +1040,7 @@ void udp_flush_pending_frames(struct sock *sk) ip_flush_pending_frames(sk); } } -EXPORT_SYMBOL(udp_flush_pending_frames); +EXPORT_IPV6_MOD(udp_flush_pending_frames); /** * udp4_hwcsum - handle outgoing HW checksumming @@ -1229,7 +1228,7 @@ out: WRITE_ONCE(up->pending, 0); return err; } -EXPORT_SYMBOL(udp_push_pending_frames); +EXPORT_IPV6_MOD(udp_push_pending_frames); static int __udp_cmsg_send(struct cmsghdr *cmsg, u16 *gso_size) { @@ -1266,7 +1265,7 @@ int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size) return need_ip; } -EXPORT_SYMBOL_GPL(udp_cmsg_send); +EXPORT_IPV6_MOD_GPL(udp_cmsg_send); int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { @@ -1281,7 +1280,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int free = 0; int connected = 0; __be32 daddr, faddr, saddr; - u8 tos, scope; + u8 scope; __be16 dport; int err, is_udplite = IS_UDPLITE(sk); int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE; @@ -1405,7 +1404,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) faddr = ipc.opt->opt.faddr; connected = 0; } - tos = get_rttos(&ipc, inet); scope = ip_sendmsg_scope(inet, &ipc, msg); if (scope == RT_SCOPE_LINK) connected = 0; @@ -1442,7 +1440,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl4 = &fl4_stack; - flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, tos, scope, + flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, + ipc.tos & INET_DSCP_MASK, scope, sk->sk_protocol, flow_flags, faddr, saddr, dport, inet->inet_sport, sk->sk_uid); @@ -1561,7 +1560,7 @@ void udp_splice_eof(struct socket *sock) udp_push_pending_frames(sk); release_sock(sk); } -EXPORT_SYMBOL_GPL(udp_splice_eof); +EXPORT_IPV6_MOD_GPL(udp_splice_eof); #define UDP_SKB_IS_STATELESS 0x80000000 @@ -1678,7 +1677,7 @@ void udp_skb_destructor(struct sock *sk, struct sk_buff *skb) prefetch(&skb->data); udp_rmem_release(sk, udp_skb_truesize(skb), 1, false); } -EXPORT_SYMBOL(udp_skb_destructor); +EXPORT_IPV6_MOD(udp_skb_destructor); /* as above, but the caller held the rx queue lock, too */ static void udp_skb_dtor_locked(struct sock *sk, struct sk_buff *skb) @@ -1785,7 +1784,7 @@ drop: busylock_release(busy); return err; } -EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb); +EXPORT_IPV6_MOD_GPL(__udp_enqueue_schedule_skb); void udp_destruct_common(struct sock *sk) { @@ -1801,7 +1800,7 @@ void udp_destruct_common(struct sock *sk) } udp_rmem_release(sk, total, 0, true); } -EXPORT_SYMBOL_GPL(udp_destruct_common); +EXPORT_IPV6_MOD_GPL(udp_destruct_common); static void udp_destruct_sock(struct sock *sk) { @@ -1832,7 +1831,7 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) skb_release_head_state(skb); __consume_stateless_skb(skb); } -EXPORT_SYMBOL_GPL(skb_consume_udp); +EXPORT_IPV6_MOD_GPL(skb_consume_udp); static struct sk_buff *__first_packet_length(struct sock *sk, struct sk_buff_head *rcvq, @@ -1914,7 +1913,7 @@ int udp_ioctl(struct sock *sk, int cmd, int *karg) return 0; } -EXPORT_SYMBOL(udp_ioctl); +EXPORT_IPV6_MOD(udp_ioctl); struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, int *off, int *err) @@ -2010,7 +2009,7 @@ try_again: WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk)); return recv_actor(sk, skb); } -EXPORT_SYMBOL(udp_read_skb); +EXPORT_IPV6_MOD(udp_read_skb); /* * This should be easy, if there is something there we @@ -2137,7 +2136,7 @@ int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len); } -EXPORT_SYMBOL(udp_pre_connect); +EXPORT_IPV6_MOD(udp_pre_connect); static int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -2186,7 +2185,7 @@ int udp_disconnect(struct sock *sk, int flags) release_sock(sk); return 0; } -EXPORT_SYMBOL(udp_disconnect); +EXPORT_IPV6_MOD(udp_disconnect); void udp_lib_unhash(struct sock *sk) { @@ -2216,7 +2215,7 @@ void udp_lib_unhash(struct sock *sk) spin_unlock_bh(&hslot->lock); } } -EXPORT_SYMBOL(udp_lib_unhash); +EXPORT_IPV6_MOD(udp_lib_unhash); /* * inet_rcv_saddr was changed, we must rehash secondary hash @@ -2280,7 +2279,7 @@ void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4) } } } -EXPORT_SYMBOL(udp_lib_rehash); +EXPORT_IPV6_MOD(udp_lib_rehash); void udp_v4_rehash(struct sock *sk) { @@ -2485,7 +2484,7 @@ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) } return false; } -EXPORT_SYMBOL(udp_sk_rx_dst_set); +EXPORT_IPV6_MOD(udp_sk_rx_dst_set); /* * Multicasts and broadcasts go to each listener. @@ -3041,7 +3040,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, return err; } -EXPORT_SYMBOL(udp_lib_setsockopt); +EXPORT_IPV6_MOD(udp_lib_setsockopt); int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) @@ -3112,7 +3111,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, return -EFAULT; return 0; } -EXPORT_SYMBOL(udp_lib_getsockopt); +EXPORT_IPV6_MOD(udp_lib_getsockopt); int udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) @@ -3154,7 +3153,7 @@ __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait) return mask; } -EXPORT_SYMBOL(udp_poll); +EXPORT_IPV6_MOD(udp_poll); int udp_abort(struct sock *sk, int err) { @@ -3177,7 +3176,7 @@ out: return 0; } -EXPORT_SYMBOL_GPL(udp_abort); +EXPORT_IPV6_MOD_GPL(udp_abort); struct proto udp_prot = { .name = "UDP", @@ -3311,7 +3310,7 @@ void *udp_seq_start(struct seq_file *seq, loff_t *pos) return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } -EXPORT_SYMBOL(udp_seq_start); +EXPORT_IPV6_MOD(udp_seq_start); void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) { @@ -3325,7 +3324,7 @@ void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++*pos; return sk; } -EXPORT_SYMBOL(udp_seq_next); +EXPORT_IPV6_MOD(udp_seq_next); void udp_seq_stop(struct seq_file *seq, void *v) { @@ -3337,7 +3336,7 @@ void udp_seq_stop(struct seq_file *seq, void *v) if (state->bucket <= udptable->mask) spin_unlock_bh(&udptable->hash[state->bucket].lock); } -EXPORT_SYMBOL(udp_seq_stop); +EXPORT_IPV6_MOD(udp_seq_stop); /* ------------------------------------------------------------------------ */ static void udp4_format_sock(struct sock *sp, struct seq_file *f, @@ -3616,7 +3615,7 @@ const struct seq_operations udp_seq_ops = { .stop = udp_seq_stop, .show = udp4_seq_show, }; -EXPORT_SYMBOL(udp_seq_ops); +EXPORT_IPV6_MOD(udp_seq_ops); static struct udp_seq_afinfo udp4_seq_afinfo = { .family = AF_INET, diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 67d39114d9a6..0144d01417d9 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -340,12 +340,12 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule, if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto)) return 0; - if (fib_rule_port_range_set(&rule->sport_range) && - !fib_rule_port_inrange(&rule->sport_range, fl6->fl6_sport)) + if (!fib_rule_port_match(&rule->sport_range, rule->sport_mask, + fl6->fl6_sport)) return 0; - if (fib_rule_port_range_set(&rule->dport_range) && - !fib_rule_port_inrange(&rule->dport_range, fl6->fl6_dport)) + if (!fib_rule_port_match(&rule->dport_range, rule->dport_mask, + fl6->fl6_dport)) return 0; return 1; @@ -399,9 +399,9 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct nlattr **tb, struct netlink_ext_ack *extack) { + struct fib6_rule *rule6 = (struct fib6_rule *)rule; + struct net *net = rule->fr_net; int err = -EINVAL; - struct net *net = sock_net(skb->sk); - struct fib6_rule *rule6 = (struct fib6_rule *) rule; if (!inet_validate_dscp(frh->tos)) { NL_SET_ERR_MSG(extack, diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 8699d1a188dc..ecb5c4b8518f 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1680,7 +1680,7 @@ static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb, void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) { struct net_device *dev = skb->dev; - struct net *net = dev_net(dev); + struct net *net = dev_net_rcu(dev); struct sock *sk = net->ipv6.ndisc_sk; int optlen = 0; struct inet_peer *peer; @@ -1695,8 +1695,8 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL; bool ret; - if (netif_is_l3_master(skb->dev)) { - dev = dev_get_by_index_rcu(dev_net(skb->dev), IPCB(skb)->iif); + if (netif_is_l3_master(dev)) { + dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); if (!dev) return; } @@ -1734,10 +1734,8 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) goto release; } - rcu_read_lock(); peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr); ret = inet_peer_xrlim_allow(peer, 1*HZ); - rcu_read_unlock(); if (!ret) goto release; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 46b8adf6e7f8..84d90dd8b3f0 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -119,9 +119,6 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return -EINVAL; ipcm6_init_sk(&ipc6, sk); - ipc6.sockc.priority = READ_ONCE(sk->sk_priority); - ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); - ipc6.sockc.mark = READ_ONCE(sk->sk_mark); fl6.flowi6_oif = oif; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a45aba090aa4..fda640ebd53f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -769,19 +769,16 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) hdrincl = inet_test_bit(HDRINCL, sk); + ipcm6_init_sk(&ipc6, sk); + /* * Get and verify the address. */ memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_mark = READ_ONCE(sk->sk_mark); + fl6.flowi6_mark = ipc6.sockc.mark; fl6.flowi6_uid = sk->sk_uid; - ipcm6_init(&ipc6); - ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); - ipc6.sockc.mark = fl6.flowi6_mark; - ipc6.sockc.priority = READ_ONCE(sk->sk_priority); - if (sin6) { if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; @@ -891,9 +888,6 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (hdrincl) fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH; - if (ipc6.tclass < 0) - ipc6.tclass = np->tclass; - fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); @@ -904,9 +898,6 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (ipc6.hlimit < 0) ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); - if (ipc6.dontfrag < 0) - ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); - if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2debdf085a3b..a80608260298 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -798,6 +798,8 @@ static void tcp_v6_init_req(struct request_sock *req, ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; + ireq->ir_rmt_addr = LOOPBACK4_IPV6; + ireq->ir_loc_addr = LOOPBACK4_IPV6; /* So that link locals have meaning */ if ((!sk_listener->sk_bound_dev_if || l3_slave) && @@ -1451,10 +1453,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * ip6_dst_store(newsk, dst, NULL, NULL); - newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; newnp->saddr = ireq->ir_v6_loc_addr; - newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; - newsk->sk_bound_dev_if = ireq->ir_iif; /* Now IPv6 options... @@ -1507,9 +1506,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * tcp_initialize_rcv_mss(newsk); - newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; - newinet->inet_rcv_saddr = LOOPBACK4_IPV6; - #ifdef CONFIG_TCP_MD5SIG l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c6ea438b5c75..3a0d6c5a8286 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1494,11 +1494,8 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int is_udplite = IS_UDPLITE(sk); int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); - ipcm6_init(&ipc6); + ipcm6_init_sk(&ipc6, sk); ipc6.gso_size = READ_ONCE(up->gso_size); - ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); - ipc6.sockc.mark = READ_ONCE(sk->sk_mark); - ipc6.sockc.priority = READ_ONCE(sk->sk_priority); /* destination address check */ if (sin6) { @@ -1704,9 +1701,6 @@ do_udp_sendmsg: security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); - if (ipc6.tclass < 0) - ipc6.tclass = np->tclass; - fl6->flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6->flowlabel); dst = ip6_sk_dst_lookup_flow(sk, fl6, final_p, connected); @@ -1752,8 +1746,6 @@ back_from_confirm: WRITE_ONCE(up->pending, AF_INET6); do_append_data: - if (ipc6.dontfrag < 0) - ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); up->len += ulen; err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), &ipc6, fl6, dst_rt6_info(dst), diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index f4c1da070826..b98d13584c81 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -547,7 +547,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.flowi6_mark = READ_ONCE(sk->sk_mark); fl6.flowi6_uid = sk->sk_uid; - ipcm6_init(&ipc6); + ipcm6_init_sk(&ipc6, sk); if (lsa) { if (addr_len < SIN6_LEN_RFC2133) @@ -634,9 +634,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); - if (ipc6.tclass < 0) - ipc6.tclass = np->tclass; - fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); @@ -648,9 +645,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (ipc6.hlimit < 0) ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); - if (ipc6.dontfrag < 0) - ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); - if (msg->msg_flags & MSG_CONFIRM) goto do_confirm; diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c index a29ff901df75..b9e451197902 100644 --- a/net/mptcp/fastopen.c +++ b/net/mptcp/fastopen.c @@ -40,17 +40,17 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf tp->copied_seq += skb->len; subflow->ssn_offset += skb->len; - /* initialize a dummy sequence number, we will update it at MPC - * completion, if needed - */ + /* Only the sequence delta is relevant */ MPTCP_SKB_CB(skb)->map_seq = -skb->len; MPTCP_SKB_CB(skb)->end_seq = 0; MPTCP_SKB_CB(skb)->offset = 0; MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; + MPTCP_SKB_CB(skb)->cant_coalesce = 1; mptcp_data_lock(sk); + DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk)); - mptcp_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); __skb_queue_tail(&sk->sk_receive_queue, skb); mptcp_sk(sk)->bytes_received += skb->len; @@ -58,22 +58,3 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf mptcp_data_unlock(sk); } - -void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, - const struct mptcp_options_received *mp_opt) -{ - struct sock *sk = (struct sock *)msk; - struct sk_buff *skb; - - skb = skb_peek_tail(&sk->sk_receive_queue); - if (skb) { - WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq); - pr_debug("msk %p moving seq %llx -> %llx end_seq %llx -> %llx\n", sk, - MPTCP_SKB_CB(skb)->map_seq, MPTCP_SKB_CB(skb)->map_seq + msk->ack_seq, - MPTCP_SKB_CB(skb)->end_seq, MPTCP_SKB_CB(skb)->end_seq + msk->ack_seq); - MPTCP_SKB_CB(skb)->map_seq += msk->ack_seq; - MPTCP_SKB_CB(skb)->end_seq += msk->ack_seq; - } - - pr_debug("msk=%p ack_seq=%llx\n", msk, msk->ack_seq); -} diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 16c336c51940..b1f36dc1a091 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -10,6 +10,7 @@ #include "protocol.h" #include "mib.h" +#include "mptcp_pm_gen.h" /* path manager command handlers */ @@ -433,14 +434,62 @@ bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc) return mptcp_pm_nl_is_backup(msk, &skc_local); } -int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info) +static int mptcp_pm_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, + struct genl_info *info) { if (info->attrs[MPTCP_PM_ATTR_TOKEN]) - return mptcp_userspace_pm_get_addr(skb, info); - return mptcp_pm_nl_get_addr(skb, info); + return mptcp_userspace_pm_get_addr(id, addr, info); + return mptcp_pm_nl_get_addr(id, addr, info); } -int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb) +int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct mptcp_pm_addr_entry addr; + struct nlattr *attr; + struct sk_buff *msg; + void *reply; + int ret; + + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) + return -EINVAL; + + attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; + ret = mptcp_pm_parse_entry(attr, info, false, &addr); + if (ret < 0) + return ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, + info->genlhdr->cmd); + if (!reply) { + GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); + ret = -EMSGSIZE; + goto fail; + } + + ret = mptcp_pm_get_addr(addr.addr.id, &addr, info); + if (ret) { + NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); + goto fail; + } + + ret = mptcp_nl_fill_addr(msg, &addr); + if (ret) + goto fail; + + genlmsg_end(msg, reply); + ret = genlmsg_reply(msg, info); + return ret; + +fail: + nlmsg_free(msg); + return ret; +} + +static int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb) { const struct genl_info *info = genl_info_dump(cb); @@ -449,11 +498,34 @@ int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb) return mptcp_pm_nl_dump_addr(msg, cb); } -int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + return mptcp_pm_dump_addr(msg, cb); +} + +static int mptcp_pm_set_flags(struct genl_info *info) { + struct mptcp_pm_addr_entry loc = { .addr = { .family = AF_UNSPEC }, }; + struct nlattr *attr_loc; + int ret = -EINVAL; + + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR)) + return ret; + + attr_loc = info->attrs[MPTCP_PM_ATTR_ADDR]; + ret = mptcp_pm_parse_entry(attr_loc, info, false, &loc); + if (ret < 0) + return ret; + if (info->attrs[MPTCP_PM_ATTR_TOKEN]) - return mptcp_userspace_pm_set_flags(skb, info); - return mptcp_pm_nl_set_flags(skb, info); + return mptcp_userspace_pm_set_flags(&loc, info); + return mptcp_pm_nl_set_flags(&loc, info); +} + +int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info) +{ + return mptcp_pm_set_flags(info); } void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 572d160edca3..99705a9c2238 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1393,28 +1393,35 @@ static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr, int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); struct mptcp_pm_addr_entry addr, *entry; + struct nlattr *attr; int ret; + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) + return -EINVAL; + + attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; ret = mptcp_pm_parse_entry(attr, info, true, &addr); if (ret < 0) return ret; if (addr.addr.port && !address_use_port(&addr)) { - GENL_SET_ERR_MSG(info, "flags must have signal and not subflow when using port"); + NL_SET_ERR_MSG_ATTR(info->extack, attr, + "flags must have signal and not subflow when using port"); return -EINVAL; } if (addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL && addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { - GENL_SET_ERR_MSG(info, "flags mustn't have both signal and fullmesh"); + NL_SET_ERR_MSG_ATTR(info->extack, attr, + "flags mustn't have both signal and fullmesh"); return -EINVAL; } if (addr.flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) { - GENL_SET_ERR_MSG(info, "can't create IMPLICIT endpoint"); + NL_SET_ERR_MSG_ATTR(info->extack, attr, + "can't create IMPLICIT endpoint"); return -EINVAL; } @@ -1587,12 +1594,16 @@ next: int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); struct mptcp_pm_addr_entry addr, *entry; unsigned int addr_max; + struct nlattr *attr; int ret; + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) + return -EINVAL; + + attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; ret = mptcp_pm_parse_entry(attr, info, false, &addr); if (ret < 0) return ret; @@ -1608,7 +1619,7 @@ int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info) spin_lock_bh(&pernet->lock); entry = __lookup_addr_by_id(pernet, addr.addr.id); if (!entry) { - GENL_SET_ERR_MSG(info, "address not found"); + NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); spin_unlock_bh(&pernet->lock); return -EINVAL; } @@ -1762,61 +1773,24 @@ nla_put_failure: return -EMSGSIZE; } -int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, + struct genl_info *info) { - struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); - struct mptcp_pm_addr_entry addr, *entry; - struct sk_buff *msg; - void *reply; - int ret; - - ret = mptcp_pm_parse_entry(attr, info, false, &addr); - if (ret < 0) - return ret; - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; - - reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, - info->genlhdr->cmd); - if (!reply) { - GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); - ret = -EMSGSIZE; - goto fail; - } + struct mptcp_pm_addr_entry *entry; + int ret = -EINVAL; rcu_read_lock(); - entry = __lookup_addr_by_id(pernet, addr.addr.id); - if (!entry) { - GENL_SET_ERR_MSG(info, "address not found"); - ret = -EINVAL; - goto unlock_fail; + entry = __lookup_addr_by_id(pernet, id); + if (entry) { + *addr = *entry; + ret = 0; } - - ret = mptcp_nl_fill_addr(msg, entry); - if (ret) - goto unlock_fail; - - genlmsg_end(msg, reply); - ret = genlmsg_reply(msg, info); - rcu_read_unlock(); - return ret; - -unlock_fail: rcu_read_unlock(); -fail: - nlmsg_free(msg); return ret; } -int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info) -{ - return mptcp_pm_get_addr(skb, info); -} - int mptcp_pm_nl_dump_addr(struct sk_buff *msg, struct netlink_callback *cb) { @@ -1860,12 +1834,6 @@ int mptcp_pm_nl_dump_addr(struct sk_buff *msg, return msg->len; } -int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) -{ - return mptcp_pm_dump_addr(msg, cb); -} - static int parse_limit(struct genl_info *info, int id, unsigned int *limit) { struct nlattr *attr = info->attrs[id]; @@ -1875,7 +1843,9 @@ static int parse_limit(struct genl_info *info, int id, unsigned int *limit) *limit = nla_get_u32(attr); if (*limit > MPTCP_PM_ADDR_MAX) { - GENL_SET_ERR_MSG(info, "limit greater than maximum"); + NL_SET_ERR_MSG_ATTR_FMT(info->extack, attr, + "limit greater than maximum (%u)", + MPTCP_PM_ADDR_MAX); return -EINVAL; } return 0; @@ -1981,66 +1951,57 @@ next: return ret; } -int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, + struct genl_info *info) { - struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }; struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | MPTCP_PM_ADDR_FLAG_FULLMESH; - struct net *net = sock_net(skb->sk); + struct net *net = genl_info_net(info); struct mptcp_pm_addr_entry *entry; struct pm_nl_pernet *pernet; u8 lookup_by_id = 0; u8 bkup = 0; - int ret; pernet = pm_nl_get_pernet(net); - ret = mptcp_pm_parse_entry(attr, info, false, &addr); - if (ret < 0) - return ret; - - if (addr.addr.family == AF_UNSPEC) { + if (local->addr.family == AF_UNSPEC) { lookup_by_id = 1; - if (!addr.addr.id) { - GENL_SET_ERR_MSG(info, "missing required inputs"); + if (!local->addr.id) { + NL_SET_ERR_MSG_ATTR(info->extack, attr, + "missing address ID"); return -EOPNOTSUPP; } } - if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP) + if (local->flags & MPTCP_PM_ADDR_FLAG_BACKUP) bkup = 1; spin_lock_bh(&pernet->lock); - entry = lookup_by_id ? __lookup_addr_by_id(pernet, addr.addr.id) : - __lookup_addr(pernet, &addr.addr); + entry = lookup_by_id ? __lookup_addr_by_id(pernet, local->addr.id) : + __lookup_addr(pernet, &local->addr); if (!entry) { spin_unlock_bh(&pernet->lock); - GENL_SET_ERR_MSG(info, "address not found"); + NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); return -EINVAL; } - if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && + if ((local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL | MPTCP_PM_ADDR_FLAG_IMPLICIT))) { spin_unlock_bh(&pernet->lock); - GENL_SET_ERR_MSG(info, "invalid addr flags"); + NL_SET_ERR_MSG_ATTR(info->extack, attr, "invalid addr flags"); return -EINVAL; } - changed = (addr.flags ^ entry->flags) & mask; - entry->flags = (entry->flags & ~mask) | (addr.flags & mask); - addr = *entry; + changed = (local->flags ^ entry->flags) & mask; + entry->flags = (entry->flags & ~mask) | (local->flags & mask); + *local = *entry; spin_unlock_bh(&pernet->lock); - mptcp_nl_set_flags(net, &addr.addr, bkup, changed); + mptcp_nl_set_flags(net, &local->addr, bkup, changed); return 0; } -int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info) -{ - return mptcp_pm_set_flags(skb, info); -} - static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp) { genlmsg_multicast_netns(&mptcp_genl_family, net, diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index a3d477059b11..277cf092a870 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -175,14 +175,13 @@ bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, static struct mptcp_sock *mptcp_userspace_pm_get_sock(const struct genl_info *info) { - struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct mptcp_sock *msk; + struct nlattr *token; - if (!token) { - GENL_SET_ERR_MSG(info, "missing required token"); + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_TOKEN)) return NULL; - } + token = info->attrs[MPTCP_PM_ATTR_TOKEN]; msk = mptcp_token_get_sock(genl_info_net(info), nla_get_u32(token)); if (!msk) { NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); @@ -190,7 +189,8 @@ static struct mptcp_sock *mptcp_userspace_pm_get_sock(const struct genl_info *in } if (!mptcp_pm_is_userspace(msk)) { - GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); + NL_SET_ERR_MSG_ATTR(info->extack, token, + "userspace PM not selected"); sock_put((struct sock *)msk); return NULL; } @@ -200,16 +200,14 @@ static struct mptcp_sock *mptcp_userspace_pm_get_sock(const struct genl_info *in int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *addr = info->attrs[MPTCP_PM_ATTR_ADDR]; struct mptcp_pm_addr_entry addr_val; struct mptcp_sock *msk; + struct nlattr *addr; int err = -EINVAL; struct sock *sk; - if (!addr) { - GENL_SET_ERR_MSG(info, "missing required address"); + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR)) return err; - } msk = mptcp_userspace_pm_get_sock(info); if (!msk) @@ -217,21 +215,27 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) sk = (struct sock *)msk; + addr = info->attrs[MPTCP_PM_ATTR_ADDR]; err = mptcp_pm_parse_entry(addr, info, true, &addr_val); - if (err < 0) { - GENL_SET_ERR_MSG(info, "error parsing local address"); + if (err < 0) + goto announce_err; + + if (addr_val.addr.id == 0) { + NL_SET_ERR_MSG_ATTR(info->extack, addr, "invalid addr id"); + err = -EINVAL; goto announce_err; } - if (addr_val.addr.id == 0 || !(addr_val.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { - GENL_SET_ERR_MSG(info, "invalid addr id or flags"); + if (!(addr_val.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { + NL_SET_ERR_MSG_ATTR(info->extack, addr, "invalid addr flags"); err = -EINVAL; goto announce_err; } err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val, false); if (err < 0) { - GENL_SET_ERR_MSG(info, "did not match address and id"); + NL_SET_ERR_MSG_ATTR(info->extack, addr, + "did not match address and id"); goto announce_err; } @@ -253,8 +257,7 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) return err; } -static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk, - struct genl_info *info) +static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk) { struct mptcp_rm_list list = { .nr = 0 }; struct mptcp_subflow_context *subflow; @@ -269,10 +272,8 @@ static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk, break; } } - if (!has_id_0) { - GENL_SET_ERR_MSG(info, "address with id 0 not found"); + if (!has_id_0) goto remove_err; - } list.ids[list.nr++] = 0; @@ -309,18 +310,17 @@ void mptcp_pm_remove_addr_entry(struct mptcp_sock *msk, int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *id = info->attrs[MPTCP_PM_ATTR_LOC_ID]; struct mptcp_pm_addr_entry *match; struct mptcp_sock *msk; + struct nlattr *id; int err = -EINVAL; struct sock *sk; u8 id_val; - if (!id) { - GENL_SET_ERR_MSG(info, "missing required ID"); + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_LOC_ID)) return err; - } + id = info->attrs[MPTCP_PM_ATTR_LOC_ID]; id_val = nla_get_u8(id); msk = mptcp_userspace_pm_get_sock(info); @@ -330,7 +330,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) sk = (struct sock *)msk; if (id_val == 0) { - err = mptcp_userspace_pm_remove_id_zero_address(msk, info); + err = mptcp_userspace_pm_remove_id_zero_address(msk); goto out; } @@ -339,7 +339,6 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) spin_lock_bh(&msk->pm.lock); match = mptcp_userspace_pm_lookup_addr_by_id(msk, id_val); if (!match) { - GENL_SET_ERR_MSG(info, "address with specified id not found"); spin_unlock_bh(&msk->pm.lock); release_sock(sk); goto out; @@ -356,25 +355,28 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) err = 0; out: + if (err) + NL_SET_ERR_MSG_ATTR_FMT(info->extack, id, + "address with id %u not found", + id_val); + sock_put(sk); return err; } int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; - struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; struct mptcp_pm_addr_entry entry = { 0 }; struct mptcp_addr_info addr_r; + struct nlattr *raddr, *laddr; struct mptcp_pm_local local; struct mptcp_sock *msk; int err = -EINVAL; struct sock *sk; - if (!laddr || !raddr) { - GENL_SET_ERR_MSG(info, "missing required address(es)"); + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR) || + GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR_REMOTE)) return err; - } msk = mptcp_userspace_pm_get_sock(info); if (!msk) @@ -382,24 +384,22 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) sk = (struct sock *)msk; + laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; err = mptcp_pm_parse_entry(laddr, info, true, &entry); - if (err < 0) { - NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr"); + if (err < 0) goto create_err; - } if (entry.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { - GENL_SET_ERR_MSG(info, "invalid addr flags"); + NL_SET_ERR_MSG_ATTR(info->extack, laddr, "invalid addr flags"); err = -EINVAL; goto create_err; } entry.flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW; + raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; err = mptcp_pm_parse_addr(raddr, info, &addr_r); - if (err < 0) { - NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr"); + if (err < 0) goto create_err; - } if (!mptcp_pm_addr_families_match(sk, &entry.addr, &addr_r)) { GENL_SET_ERR_MSG(info, "families mismatch"); @@ -409,7 +409,8 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) err = mptcp_userspace_pm_append_new_local_addr(msk, &entry, false); if (err < 0) { - GENL_SET_ERR_MSG(info, "did not match address and id"); + NL_SET_ERR_MSG_ATTR(info->extack, laddr, + "did not match address and id"); goto create_err; } @@ -421,6 +422,9 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) err = __mptcp_subflow_connect(sk, &local, &addr_r); release_sock(sk); + if (err) + GENL_SET_ERR_MSG_FMT(info, "connect error: %d", err); + spin_lock_bh(&msk->pm.lock); if (err) mptcp_userspace_pm_delete_local_addr(msk, &entry); @@ -483,18 +487,16 @@ static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk, int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; - struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; struct mptcp_pm_addr_entry addr_l; struct mptcp_addr_info addr_r; + struct nlattr *raddr, *laddr; struct mptcp_sock *msk; struct sock *sk, *ssk; int err = -EINVAL; - if (!laddr || !raddr) { - GENL_SET_ERR_MSG(info, "missing required address(es)"); + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR) || + GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR_REMOTE)) return err; - } msk = mptcp_userspace_pm_get_sock(info); if (!msk) @@ -502,17 +504,15 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info sk = (struct sock *)msk; + laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; err = mptcp_pm_parse_entry(laddr, info, true, &addr_l); - if (err < 0) { - NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr"); + if (err < 0) goto destroy_err; - } + raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; err = mptcp_pm_parse_addr(raddr, info, &addr_r); - if (err < 0) { - NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr"); + if (err < 0) goto destroy_err; - } #if IS_ENABLED(CONFIG_MPTCP_IPV6) if (addr_l.addr.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) { @@ -530,8 +530,14 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info goto destroy_err; } - if (!addr_l.addr.port || !addr_r.port) { - GENL_SET_ERR_MSG(info, "missing local or remote port"); + if (!addr_l.addr.port) { + NL_SET_ERR_MSG_ATTR(info->extack, laddr, "missing local port"); + err = -EINVAL; + goto destroy_err; + } + + if (!addr_r.port) { + NL_SET_ERR_MSG_ATTR(info->extack, raddr, "missing remote port"); err = -EINVAL; goto destroy_err; } @@ -539,6 +545,7 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info lock_sock(sk); ssk = mptcp_nl_find_ssk(msk, &addr_l.addr, &addr_r); if (!ssk) { + GENL_SET_ERR_MSG(info, "subflow not found"); err = -ESRCH; goto release_sock; } @@ -557,46 +564,51 @@ destroy_err: return err; } -int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info) +int mptcp_userspace_pm_set_flags(struct mptcp_pm_addr_entry *local, + struct genl_info *info) { - struct mptcp_pm_addr_entry loc = { .addr = { .family = AF_UNSPEC }, }; - struct mptcp_pm_addr_entry rem = { .addr = { .family = AF_UNSPEC }, }; - struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; - struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct mptcp_addr_info rem = { .family = AF_UNSPEC, }; struct mptcp_pm_addr_entry *entry; + struct nlattr *attr, *attr_rem; struct mptcp_sock *msk; int ret = -EINVAL; struct sock *sk; u8 bkup = 0; + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR_REMOTE)) + return ret; + msk = mptcp_userspace_pm_get_sock(info); if (!msk) return ret; sk = (struct sock *)msk; - ret = mptcp_pm_parse_entry(attr, info, false, &loc); - if (ret < 0) + attr = info->attrs[MPTCP_PM_ATTR_ADDR]; + if (local->addr.family == AF_UNSPEC) { + NL_SET_ERR_MSG_ATTR(info->extack, attr, + "invalid local address family"); + ret = -EINVAL; goto set_flags_err; - - if (attr_rem) { - ret = mptcp_pm_parse_entry(attr_rem, info, false, &rem); - if (ret < 0) - goto set_flags_err; } - if (loc.addr.family == AF_UNSPEC || - rem.addr.family == AF_UNSPEC) { - GENL_SET_ERR_MSG(info, "invalid address families"); + attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; + ret = mptcp_pm_parse_addr(attr_rem, info, &rem); + if (ret < 0) + goto set_flags_err; + + if (rem.family == AF_UNSPEC) { + NL_SET_ERR_MSG_ATTR(info->extack, attr_rem, + "invalid remote address family"); ret = -EINVAL; goto set_flags_err; } - if (loc.flags & MPTCP_PM_ADDR_FLAG_BACKUP) + if (local->flags & MPTCP_PM_ADDR_FLAG_BACKUP) bkup = 1; spin_lock_bh(&msk->pm.lock); - entry = mptcp_userspace_pm_lookup_addr(msk, &loc.addr); + entry = mptcp_userspace_pm_lookup_addr(msk, &local->addr); if (entry) { if (bkup) entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP; @@ -606,9 +618,13 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info) spin_unlock_bh(&msk->pm.lock); lock_sock(sk); - ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc.addr, &rem.addr, bkup); + ret = mptcp_pm_nl_mp_prio_send_ack(msk, &local->addr, &rem, bkup); release_sock(sk); + /* mptcp_pm_nl_mp_prio_send_ack() only fails in one case */ + if (ret < 0) + GENL_SET_ERR_MSG(info, "subflow not found"); + set_flags_err: sock_put(sk); return ret; @@ -663,16 +679,13 @@ int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, return ret; } -int mptcp_userspace_pm_get_addr(struct sk_buff *skb, +int mptcp_userspace_pm_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, struct genl_info *info) { - struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; - struct mptcp_pm_addr_entry addr, *entry; + struct mptcp_pm_addr_entry *entry; struct mptcp_sock *msk; - struct sk_buff *msg; int ret = -EINVAL; struct sock *sk; - void *reply; msk = mptcp_userspace_pm_get_sock(info); if (!msk) @@ -680,50 +693,16 @@ int mptcp_userspace_pm_get_addr(struct sk_buff *skb, sk = (struct sock *)msk; - ret = mptcp_pm_parse_entry(attr, info, false, &addr); - if (ret < 0) - goto out; - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) { - ret = -ENOMEM; - goto out; - } - - reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, - info->genlhdr->cmd); - if (!reply) { - GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); - ret = -EMSGSIZE; - goto fail; - } - lock_sock(sk); spin_lock_bh(&msk->pm.lock); - entry = mptcp_userspace_pm_lookup_addr_by_id(msk, addr.addr.id); - if (!entry) { - GENL_SET_ERR_MSG(info, "address not found"); - ret = -EINVAL; - goto unlock_fail; + entry = mptcp_userspace_pm_lookup_addr_by_id(msk, id); + if (entry) { + *addr = *entry; + ret = 0; } - - ret = mptcp_nl_fill_addr(msg, entry); - if (ret) - goto unlock_fail; - - genlmsg_end(msg, reply); - ret = genlmsg_reply(msg, info); spin_unlock_bh(&msk->pm.lock); release_sock(sk); - sock_put(sk); - return ret; -unlock_fail: - spin_unlock_bh(&msk->pm.lock); - release_sock(sk); -fail: - nlmsg_free(msg); -out: sock_put(sk); return ret; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 6bd819047470..6b61b7dee33b 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -118,24 +118,14 @@ static void mptcp_drop(struct sock *sk, struct sk_buff *skb) __kfree_skb(skb); } -static void mptcp_rmem_fwd_alloc_add(struct sock *sk, int size) -{ - WRITE_ONCE(mptcp_sk(sk)->rmem_fwd_alloc, - mptcp_sk(sk)->rmem_fwd_alloc + size); -} - -static void mptcp_rmem_charge(struct sock *sk, int size) -{ - mptcp_rmem_fwd_alloc_add(sk, -size); -} - static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to, struct sk_buff *from) { bool fragstolen; int delta; - if (MPTCP_SKB_CB(from)->offset || + if (unlikely(MPTCP_SKB_CB(to)->cant_coalesce) || + MPTCP_SKB_CB(from)->offset || ((to->len + from->len) > (sk->sk_rcvbuf >> 3)) || !skb_try_coalesce(to, from, &fragstolen, &delta)) return false; @@ -150,7 +140,7 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to, * negative one */ atomic_add(delta, &sk->sk_rmem_alloc); - mptcp_rmem_charge(sk, delta); + sk_mem_charge(sk, delta); kfree_skb_partial(from, fragstolen); return true; @@ -165,44 +155,6 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to, return mptcp_try_coalesce((struct sock *)msk, to, from); } -static void __mptcp_rmem_reclaim(struct sock *sk, int amount) -{ - amount >>= PAGE_SHIFT; - mptcp_rmem_charge(sk, amount << PAGE_SHIFT); - __sk_mem_reduce_allocated(sk, amount); -} - -static void mptcp_rmem_uncharge(struct sock *sk, int size) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - int reclaimable; - - mptcp_rmem_fwd_alloc_add(sk, size); - reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk); - - /* see sk_mem_uncharge() for the rationale behind the following schema */ - if (unlikely(reclaimable >= PAGE_SIZE)) - __mptcp_rmem_reclaim(sk, reclaimable); -} - -static void mptcp_rfree(struct sk_buff *skb) -{ - unsigned int len = skb->truesize; - struct sock *sk = skb->sk; - - atomic_sub(len, &sk->sk_rmem_alloc); - mptcp_rmem_uncharge(sk, len); -} - -void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk) -{ - skb_orphan(skb); - skb->sk = sk; - skb->destructor = mptcp_rfree; - atomic_add(skb->truesize, &sk->sk_rmem_alloc); - mptcp_rmem_charge(sk, skb->truesize); -} - /* "inspired" by tcp_data_queue_ofo(), main differences: * - use mptcp seqs * - don't cope with sacks @@ -315,25 +267,7 @@ merge_right: end: skb_condense(skb); - mptcp_set_owner_r(skb, sk); -} - -static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - int amt, amount; - - if (size <= msk->rmem_fwd_alloc) - return true; - - size -= msk->rmem_fwd_alloc; - amt = sk_mem_pages(size); - amount = amt << PAGE_SHIFT; - if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV)) - return false; - - mptcp_rmem_fwd_alloc_add(sk, amount); - return true; + skb_set_owner_r(skb, sk); } static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, @@ -351,7 +285,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, skb_orphan(skb); /* try to fetch required memory from subflow */ - if (!mptcp_rmem_schedule(sk, ssk, skb->truesize)) { + if (!sk_rmem_schedule(sk, skb, skb->truesize)) { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED); goto drop; } @@ -366,6 +300,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, MPTCP_SKB_CB(skb)->end_seq = MPTCP_SKB_CB(skb)->map_seq + copy_len; MPTCP_SKB_CB(skb)->offset = offset; MPTCP_SKB_CB(skb)->has_rxtstamp = has_rxtstamp; + MPTCP_SKB_CB(skb)->cant_coalesce = 0; if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) { /* in sequence */ @@ -375,7 +310,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, if (tail && mptcp_try_coalesce(sk, tail, skb)) return true; - mptcp_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); __skb_queue_tail(&sk->sk_receive_queue, skb); return true; } else if (after64(MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq)) { @@ -561,7 +496,7 @@ static void mptcp_cleanup_rbuf(struct mptcp_sock *msk, int copied) bool cleanup, rx_empty; cleanup = (space > 0) && (space >= (old_space << 1)) && copied; - rx_empty = !__mptcp_rmem(sk) && copied; + rx_empty = !sk_rmem_alloc_get(sk) && copied; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -634,27 +569,13 @@ static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk) } static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, - struct sock *ssk, - unsigned int *bytes) + struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct sock *sk = (struct sock *)msk; - unsigned int moved = 0; bool more_data_avail; struct tcp_sock *tp; - bool done = false; - int sk_rbuf; - - sk_rbuf = READ_ONCE(sk->sk_rcvbuf); - - if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { - int ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf); - - if (unlikely(ssk_rbuf > sk_rbuf)) { - WRITE_ONCE(sk->sk_rcvbuf, ssk_rbuf); - sk_rbuf = ssk_rbuf; - } - } + bool ret = false; pr_debug("msk=%p ssk=%p\n", msk, ssk); tp = tcp_sk(ssk); @@ -664,20 +585,16 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, struct sk_buff *skb; bool fin; + if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf) + break; + /* try to move as much data as available */ map_remaining = subflow->map_data_len - mptcp_subflow_get_map_offset(subflow); skb = skb_peek(&ssk->sk_receive_queue); - if (!skb) { - /* With racing move_skbs_to_msk() and __mptcp_move_skbs(), - * a different CPU can have already processed the pending - * data, stop here or we can enter an infinite loop - */ - if (!moved) - done = true; + if (unlikely(!skb)) break; - } if (__mptcp_check_fallback(msk)) { /* Under fallback skbs have no MPTCP extension and TCP could @@ -690,19 +607,13 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, offset = seq - TCP_SKB_CB(skb)->seq; fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; - if (fin) { - done = true; + if (fin) seq++; - } if (offset < skb->len) { size_t len = skb->len - offset; - if (tp->urg_data) - done = true; - - if (__mptcp_move_skb(msk, ssk, skb, offset, len)) - moved += len; + ret = __mptcp_move_skb(msk, ssk, skb, offset, len) || ret; seq += len; if (unlikely(map_remaining < len)) { @@ -716,22 +627,16 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, } sk_eat_skb(ssk, skb); - done = true; } WRITE_ONCE(tp->copied_seq, seq); more_data_avail = mptcp_subflow_data_available(ssk); - if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf) { - done = true; - break; - } } while (more_data_avail); - if (moved > 0) + if (ret) msk->last_data_recv = tcp_jiffies32; - *bytes += moved; - return done; + return ret; } static bool __mptcp_ofo_queue(struct mptcp_sock *msk) @@ -825,9 +730,9 @@ void __mptcp_error_report(struct sock *sk) static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) { struct sock *sk = (struct sock *)msk; - unsigned int moved = 0; + bool moved; - __mptcp_move_skbs_from_subflow(msk, ssk, &moved); + moved = __mptcp_move_skbs_from_subflow(msk, ssk); __mptcp_ofo_queue(msk); if (unlikely(ssk->sk_err)) { if (!sock_owned_by_user(sk)) @@ -843,14 +748,29 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) */ if (mptcp_pending_data_fin(sk, NULL)) mptcp_schedule_work(sk); - return moved > 0; + return moved; +} + +static void __mptcp_rcvbuf_update(struct sock *sk, struct sock *ssk) +{ + if (unlikely(ssk->sk_rcvbuf > sk->sk_rcvbuf)) + WRITE_ONCE(sk->sk_rcvbuf, ssk->sk_rcvbuf); +} + +static void __mptcp_data_ready(struct sock *sk, struct sock *ssk) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + __mptcp_rcvbuf_update(sk, ssk); + + /* Wake-up the reader only for in-sequence data */ + if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk)) + sk->sk_data_ready(sk); } void mptcp_data_ready(struct sock *sk, struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); - struct mptcp_sock *msk = mptcp_sk(sk); - int sk_rbuf, ssk_rbuf; /* The peer can send data while we are shutting down this * subflow at msk destruction time, but we must avoid enqueuing @@ -859,19 +779,11 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) if (unlikely(subflow->disposable)) return; - ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf); - sk_rbuf = READ_ONCE(sk->sk_rcvbuf); - if (unlikely(ssk_rbuf > sk_rbuf)) - sk_rbuf = ssk_rbuf; - - /* over limit? can't append more skbs to msk, Also, no need to wake-up*/ - if (__mptcp_rmem(sk) > sk_rbuf) - return; - - /* Wake-up the reader only for in-sequence data */ mptcp_data_lock(sk); - if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk)) - sk->sk_data_ready(sk); + if (!sock_owned_by_user(sk)) + __mptcp_data_ready(sk, ssk); + else + __set_bit(MPTCP_DEQUEUE, &mptcp_sk(sk)->cb_flags); mptcp_data_unlock(sk); } @@ -950,20 +862,6 @@ bool mptcp_schedule_work(struct sock *sk) return false; } -static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk) -{ - struct mptcp_subflow_context *subflow; - - msk_owned_by_me(msk); - - mptcp_for_each_subflow(msk, subflow) { - if (READ_ONCE(subflow->data_avail)) - return mptcp_subflow_tcp_sock(subflow); - } - - return NULL; -} - static bool mptcp_skb_can_collapse_to(u64 write_seq, const struct sk_buff *skb, const struct mptcp_ext *mpext) @@ -1944,16 +1842,17 @@ do_error: static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied); -static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, +static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg, size_t len, int flags, struct scm_timestamping_internal *tss, int *cmsg_flags) { + struct mptcp_sock *msk = mptcp_sk(sk); struct sk_buff *skb, *tmp; int copied = 0; - skb_queue_walk_safe(&msk->receive_queue, skb, tmp) { + skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) { u32 offset = MPTCP_SKB_CB(skb)->offset; u32 data_len = skb->len - offset; u32 count = min_t(size_t, len - copied, data_len); @@ -1985,10 +1884,11 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, } if (!(flags & MSG_PEEK)) { - /* we will bulk release the skb memory later */ + /* avoid the indirect call, we know the destructor is sock_wfree */ skb->destructor = NULL; - WRITE_ONCE(msk->rmem_released, msk->rmem_released + skb->truesize); - __skb_unlink(skb, &msk->receive_queue); + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); + sk_mem_uncharge(sk, skb->truesize); + __skb_unlink(skb, &sk->sk_receive_queue); __kfree_skb(skb); msk->bytes_consumed += count; } @@ -2101,66 +2001,65 @@ new_measure: msk->rcvq_space.time = mstamp; } -static void __mptcp_update_rmem(struct sock *sk) +static struct mptcp_subflow_context * +__mptcp_first_ready_from(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow) { - struct mptcp_sock *msk = mptcp_sk(sk); + struct mptcp_subflow_context *start_subflow = subflow; - if (!msk->rmem_released) - return; - - atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc); - mptcp_rmem_uncharge(sk, msk->rmem_released); - WRITE_ONCE(msk->rmem_released, 0); + while (!READ_ONCE(subflow->data_avail)) { + subflow = mptcp_next_subflow(msk, subflow); + if (subflow == start_subflow) + return NULL; + } + return subflow; } -static void __mptcp_splice_receive_queue(struct sock *sk) +static bool __mptcp_move_skbs(struct sock *sk) { + struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); + bool ret = false; - skb_queue_splice_tail_init(&sk->sk_receive_queue, &msk->receive_queue); -} + if (list_empty(&msk->conn_list)) + return false; -static bool __mptcp_move_skbs(struct mptcp_sock *msk) -{ - struct sock *sk = (struct sock *)msk; - unsigned int moved = 0; - bool ret, done; + /* verify we can move any data from the subflow, eventually updating */ + if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) + mptcp_for_each_subflow(msk, subflow) + __mptcp_rcvbuf_update(sk, subflow->tcp_sock); - do { - struct sock *ssk = mptcp_subflow_recv_lookup(msk); + subflow = list_first_entry(&msk->conn_list, + struct mptcp_subflow_context, node); + for (;;) { + struct sock *ssk; bool slowpath; - /* we can have data pending in the subflows only if the msk - * receive buffer was full at subflow_data_ready() time, - * that is an unlikely slow path. + /* + * As an optimization avoid traversing the subflows list + * and ev. acquiring the subflow socket lock before baling out */ - if (likely(!ssk)) + if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf) break; - slowpath = lock_sock_fast(ssk); - mptcp_data_lock(sk); - __mptcp_update_rmem(sk); - done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved); - mptcp_data_unlock(sk); + subflow = __mptcp_first_ready_from(msk, subflow); + if (!subflow) + break; + ssk = mptcp_subflow_tcp_sock(subflow); + slowpath = lock_sock_fast(ssk); + ret = __mptcp_move_skbs_from_subflow(msk, ssk) || ret; if (unlikely(ssk->sk_err)) __mptcp_error_report(sk); unlock_sock_fast(ssk, slowpath); - } while (!done); - /* acquire the data lock only if some input data is pending */ - ret = moved > 0; - if (!RB_EMPTY_ROOT(&msk->out_of_order_queue) || - !skb_queue_empty_lockless(&sk->sk_receive_queue)) { - mptcp_data_lock(sk); - __mptcp_update_rmem(sk); - ret |= __mptcp_ofo_queue(msk); - __mptcp_splice_receive_queue(sk); - mptcp_data_unlock(sk); + subflow = mptcp_next_subflow(msk, subflow); } + + __mptcp_ofo_queue(msk); if (ret) mptcp_check_data_fin((struct sock *)msk); - return !skb_queue_empty(&msk->receive_queue); + return ret; } static unsigned int mptcp_inq_hint(const struct sock *sk) @@ -2168,7 +2067,7 @@ static unsigned int mptcp_inq_hint(const struct sock *sk) const struct mptcp_sock *msk = mptcp_sk(sk); const struct sk_buff *skb; - skb = skb_peek(&msk->receive_queue); + skb = skb_peek(&sk->sk_receive_queue); if (skb) { u64 hint_val = READ_ONCE(msk->ack_seq) - MPTCP_SKB_CB(skb)->map_seq; @@ -2214,7 +2113,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, while (copied < len) { int err, bytes_read; - bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags, &tss, &cmsg_flags); + bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, &tss, &cmsg_flags); if (unlikely(bytes_read < 0)) { if (!copied) copied = bytes_read; @@ -2223,7 +2122,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, copied += bytes_read; - if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk)) + if (skb_queue_empty(&sk->sk_receive_queue) && __mptcp_move_skbs(sk)) continue; /* only the MPTCP socket status is relevant here. The exit @@ -2249,7 +2148,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, /* race breaker: the shutdown could be after the * previous receive queue check */ - if (__mptcp_move_skbs(msk)) + if (__mptcp_move_skbs(sk)) continue; break; } @@ -2293,9 +2192,8 @@ out_err: } } - pr_debug("msk=%p rx queue empty=%d:%d copied=%d\n", - msk, skb_queue_empty_lockless(&sk->sk_receive_queue), - skb_queue_empty(&msk->receive_queue), copied); + pr_debug("msk=%p rx queue empty=%d copied=%d\n", + msk, skb_queue_empty(&sk->sk_receive_queue), copied); release_sock(sk); return copied; @@ -2822,11 +2720,8 @@ static void __mptcp_init_sock(struct sock *sk) INIT_LIST_HEAD(&msk->join_list); INIT_LIST_HEAD(&msk->rtx_queue); INIT_WORK(&msk->work, mptcp_worker); - __skb_queue_head_init(&msk->receive_queue); msk->out_of_order_queue = RB_ROOT; msk->first_pending = NULL; - WRITE_ONCE(msk->rmem_fwd_alloc, 0); - WRITE_ONCE(msk->rmem_released, 0); msk->timer_ival = TCP_RTO_MIN; msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO; @@ -3052,8 +2947,6 @@ static void __mptcp_destroy_sock(struct sock *sk) sk->sk_prot->destroy(sk); - WARN_ON_ONCE(READ_ONCE(msk->rmem_fwd_alloc)); - WARN_ON_ONCE(msk->rmem_released); sk_stream_kill_queues(sk); xfrm_sk_free_policy(sk); @@ -3405,18 +3298,12 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags) mptcp_for_each_subflow_safe(msk, subflow, tmp) __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, flags); - /* move to sk_receive_queue, sk_stream_kill_queues will purge it */ - mptcp_data_lock(sk); - skb_queue_splice_tail_init(&msk->receive_queue, &sk->sk_receive_queue); __skb_queue_purge(&sk->sk_receive_queue); skb_rbtree_purge(&msk->out_of_order_queue); - mptcp_data_unlock(sk); /* move all the rx fwd alloc into the sk_mem_reclaim_final in * inet_sock_destruct() will dispose it */ - sk_forward_alloc_add(sk, msk->rmem_fwd_alloc); - WRITE_ONCE(msk->rmem_fwd_alloc, 0); mptcp_token_destroy(msk); mptcp_pm_free_anno_list(msk); mptcp_free_local_addr_list(msk); @@ -3453,7 +3340,8 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk) #define MPTCP_FLAGS_PROCESS_CTX_NEED (BIT(MPTCP_PUSH_PENDING) | \ BIT(MPTCP_RETRANSMIT) | \ - BIT(MPTCP_FLUSH_JOIN_LIST)) + BIT(MPTCP_FLUSH_JOIN_LIST) | \ + BIT(MPTCP_DEQUEUE)) /* processes deferred events and flush wmem */ static void mptcp_release_cb(struct sock *sk) @@ -3487,6 +3375,11 @@ static void mptcp_release_cb(struct sock *sk) __mptcp_push_pending(sk, 0); if (flags & BIT(MPTCP_RETRANSMIT)) __mptcp_retrans(sk); + if ((flags & BIT(MPTCP_DEQUEUE)) && __mptcp_move_skbs(sk)) { + /* notify ack seq update */ + mptcp_cleanup_rbuf(msk, 0); + sk->sk_data_ready(sk); + } cond_resched(); spin_lock_bh(&sk->sk_lock.slock); @@ -3506,8 +3399,6 @@ static void mptcp_release_cb(struct sock *sk) if (__test_and_clear_bit(MPTCP_SYNC_SNDBUF, &msk->cb_flags)) __mptcp_sync_sndbuf(sk); } - - __mptcp_update_rmem(sk); } /* MP_JOIN client subflow must wait for 4th ack before sending any data: @@ -3678,12 +3569,6 @@ static void mptcp_shutdown(struct sock *sk, int how) __mptcp_wr_shutdown(sk); } -static int mptcp_forward_alloc_get(const struct sock *sk) -{ - return READ_ONCE(sk->sk_forward_alloc) + - READ_ONCE(mptcp_sk(sk)->rmem_fwd_alloc); -} - static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v) { const struct sock *sk = (void *)msk; @@ -3724,7 +3609,8 @@ static int mptcp_ioctl(struct sock *sk, int cmd, int *karg) return -EINVAL; lock_sock(sk); - __mptcp_move_skbs(msk); + if (__mptcp_move_skbs(sk)) + mptcp_cleanup_rbuf(msk, 0); *karg = mptcp_inq_hint(sk); release_sock(sk); break; @@ -3841,7 +3727,6 @@ static struct proto mptcp_prot = { .hash = mptcp_hash, .unhash = mptcp_unhash, .get_port = mptcp_get_port, - .forward_alloc_get = mptcp_forward_alloc_get, .stream_memory_free = mptcp_stream_memory_free, .sockets_allocated = &mptcp_sockets_allocated, diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index f6a207958459..ca65f8bff632 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -124,12 +124,14 @@ #define MPTCP_FLUSH_JOIN_LIST 5 #define MPTCP_SYNC_STATE 6 #define MPTCP_SYNC_SNDBUF 7 +#define MPTCP_DEQUEUE 8 struct mptcp_skb_cb { u64 map_seq; u64 end_seq; u32 offset; - u8 has_rxtstamp:1; + u8 has_rxtstamp; + u8 cant_coalesce; }; #define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0])) @@ -279,7 +281,6 @@ struct mptcp_sock { u64 rcv_data_fin_seq; u64 bytes_retrans; u64 bytes_consumed; - int rmem_fwd_alloc; int snd_burst; int old_wspace; u64 recovery_snd_nxt; /* in recovery mode accept up to this seq; @@ -294,7 +295,6 @@ struct mptcp_sock { u32 last_ack_recv; unsigned long timer_ival; u32 token; - int rmem_released; unsigned long flags; unsigned long cb_flags; bool recovery; /* closing subflow write queue reinjected */ @@ -324,7 +324,6 @@ struct mptcp_sock { struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; - struct sk_buff_head receive_queue; struct list_head conn_list; struct list_head rtx_queue; struct mptcp_data_frag *first_pending; @@ -355,6 +354,8 @@ struct mptcp_sock { list_for_each_entry(__subflow, &((__msk)->conn_list), node) #define mptcp_for_each_subflow_safe(__msk, __subflow, __tmp) \ list_for_each_entry_safe(__subflow, __tmp, &((__msk)->conn_list), node) +#define mptcp_next_subflow(__msk, __subflow) \ + list_next_entry_circular(__subflow, &((__msk)->conn_list), node) extern struct genl_family mptcp_genl_family; @@ -381,14 +382,6 @@ static inline void msk_owned_by_me(const struct mptcp_sock *msk) #define mptcp_sk(ptr) container_of_const(ptr, struct mptcp_sock, sk.icsk_inet.sk) #endif -/* the msk socket don't use the backlog, also account for the bulk - * free memory - */ -static inline int __mptcp_rmem(const struct sock *sk) -{ - return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released); -} - static inline int mptcp_win_from_space(const struct sock *sk, int space) { return __tcp_win_from_space(mptcp_sk(sk)->scaling_ratio, space); @@ -401,7 +394,8 @@ static inline int mptcp_space_from_win(const struct sock *sk, int win) static inline int __mptcp_space(const struct sock *sk) { - return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk)); + return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - + sk_rmem_alloc_get(sk)); } static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) @@ -1038,9 +1032,10 @@ bool mptcp_lookup_subflow_by_saddr(const struct list_head *list, const struct mptcp_addr_info *saddr); bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); -int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info); -int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info); -int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, + struct genl_info *info); +int mptcp_userspace_pm_set_flags(struct mptcp_pm_addr_entry *local, + struct genl_info *info); int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool echo); @@ -1058,8 +1053,6 @@ void mptcp_event_pm_listener(const struct sock *ssk, enum mptcp_event_type event); bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); -void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, - const struct mptcp_options_received *mp_opt); void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow, struct request_sock *req); int mptcp_nl_fill_addr(struct sk_buff *skb, @@ -1131,14 +1124,13 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_in bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc); bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); -int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); int mptcp_pm_nl_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); -int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info); -int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info); -int mptcp_userspace_pm_get_addr(struct sk_buff *skb, +int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, + struct genl_info *info); +int mptcp_userspace_pm_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, struct genl_info *info); static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index fd021cf8286e..d2caffa56bdd 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -802,9 +802,6 @@ void __mptcp_subflow_fully_established(struct mptcp_sock *msk, subflow_set_remote_key(msk, subflow, mp_opt); WRITE_ONCE(subflow->fully_established, true); WRITE_ONCE(msk->fully_established, true); - - if (subflow->is_mptfo) - __mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt); } static struct sock *subflow_syn_recv_sock(const struct sock *sk, @@ -1271,7 +1268,12 @@ out: subflow->map_valid = 0; } -/* sched mptcp worker to remove the subflow if no more data is pending */ +static bool subflow_is_done(const struct sock *sk) +{ + return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE; +} + +/* sched mptcp worker for subflow cleanup if no more data is pending */ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk) { struct sock *sk = (struct sock *)msk; @@ -1281,8 +1283,18 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss inet_sk_state_load(sk) != TCP_ESTABLISHED))) return; - if (skb_queue_empty(&ssk->sk_receive_queue) && - !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) + if (!skb_queue_empty(&ssk->sk_receive_queue)) + return; + + if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) + mptcp_schedule_work(sk); + + /* when the fallback subflow closes the rx side, trigger a 'dummy' + * ingress data fin, so that the msk state will follow along + */ + if (__mptcp_check_fallback(msk) && subflow_is_done(ssk) && + msk->first == ssk && + mptcp_update_rcv_data_fin(msk, READ_ONCE(msk->ack_seq), true)) mptcp_schedule_work(sk); } @@ -1842,11 +1854,6 @@ static void __subflow_state_change(struct sock *sk) rcu_read_unlock(); } -static bool subflow_is_done(const struct sock *sk) -{ - return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE; -} - static void subflow_state_change(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); @@ -1873,13 +1880,6 @@ static void subflow_state_change(struct sock *sk) subflow_error_report(sk); subflow_sched_work_if_closed(mptcp_sk(parent), sk); - - /* when the fallback subflow closes the rx side, trigger a 'dummy' - * ingress data fin, so that the msk state will follow along - */ - if (__mptcp_check_fallback(msk) && subflow_is_done(sk) && msk->first == sk && - mptcp_update_rcv_data_fin(msk, READ_ONCE(msk->ack_seq), true)) - mptcp_schedule_work(parent); } void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 85311226183a..f8f13058a46e 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -771,6 +771,7 @@ static int netlink_release(struct socket *sock) nlk->cb.done(&nlk->cb); module_put(nlk->cb.module); kfree_skb(nlk->cb.skb); + WRITE_ONCE(nlk->cb_running, false); } module_put(nlk->module); diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c index ba91284f4086..e6cf4eb06b46 100644 --- a/net/nfc/hci/llc.c +++ b/net/nfc/hci/llc.c @@ -78,17 +78,6 @@ static struct nfc_llc_engine *nfc_llc_name_to_engine(const char *name) return NULL; } -void nfc_llc_unregister(const char *name) -{ - struct nfc_llc_engine *llc_engine; - - llc_engine = nfc_llc_name_to_engine(name); - if (llc_engine == NULL) - return; - - nfc_llc_del_engine(llc_engine); -} - struct nfc_llc *nfc_llc_allocate(const char *name, struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv, rcv_to_hci_t rcv_to_hci, int tx_headroom, diff --git a/net/nfc/hci/llc.h b/net/nfc/hci/llc.h index d66271d211a5..09914608ec43 100644 --- a/net/nfc/hci/llc.h +++ b/net/nfc/hci/llc.h @@ -40,7 +40,6 @@ struct nfc_llc { void *nfc_llc_get_data(struct nfc_llc *llc); int nfc_llc_register(const char *name, const struct nfc_llc_ops *ops); -void nfc_llc_unregister(const char *name); int nfc_llc_nop_register(void); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c131e5ceea37..3e9ddf72cd03 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2102,8 +2102,8 @@ retry: skb->protocol = proto; skb->dev = dev; - skb->priority = READ_ONCE(sk->sk_priority); - skb->mark = READ_ONCE(sk->sk_mark); + skb->priority = sockc.priority; + skb->mark = sockc.mark; skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid); skb_setup_tx_timestamp(skb, &sockc); @@ -2634,8 +2634,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->protocol = proto; skb->dev = dev; - skb->priority = READ_ONCE(po->sk.sk_priority); - skb->mark = READ_ONCE(po->sk.sk_mark); + skb->priority = sockc->priority; + skb->mark = sockc->mark; skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, po->sk.sk_clockid); skb_setup_tx_timestamp(skb, sockc); skb_zcopy_set_nouarg(skb, ph.raw); @@ -3039,7 +3039,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) goto out_unlock; sockcm_init(&sockc, sk); - sockc.mark = READ_ONCE(sk->sk_mark); if (msg->msg_controllen) { err = sock_cmsg_send(sk, msg, &sockc); if (unlikely(err)) diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 8996c73c9779..3f2e707a11d1 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -460,7 +460,7 @@ META_COLLECTOR(int_sk_fwd_alloc) *err = -1; return; } - dst->value = sk_forward_alloc_get(sk); + dst->value = READ_ONCE(sk->sk_forward_alloc); } META_COLLECTOR(int_sk_sndbuf) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 89d2bef96469..0edf25973072 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -875,7 +875,7 @@ static bool xsk_no_wakeup(struct sock *sk) #ifdef CONFIG_NET_RX_BUSY_POLL /* Prefer busy-polling, skip the wakeup. */ return READ_ONCE(sk->sk_prefer_busy_poll) && READ_ONCE(sk->sk_ll_usec) && - READ_ONCE(sk->sk_napi_id) >= MIN_NAPI_ID; + napi_id_valid(READ_ONCE(sk->sk_napi_id)); #else return false; #endif diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 1f7975b49657..c263fb7a68dc 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -699,18 +699,56 @@ void xp_free(struct xdp_buff_xsk *xskb) } EXPORT_SYMBOL(xp_free); -void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr) +static u64 __xp_raw_get_addr(const struct xsk_buff_pool *pool, u64 addr) +{ + return pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr; +} + +static void *__xp_raw_get_data(const struct xsk_buff_pool *pool, u64 addr) { - addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr; return pool->addrs + addr; } + +void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr) +{ + return __xp_raw_get_data(pool, __xp_raw_get_addr(pool, addr)); +} EXPORT_SYMBOL(xp_raw_get_data); -dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr) +static dma_addr_t __xp_raw_get_dma(const struct xsk_buff_pool *pool, u64 addr) { - addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr; return (pool->dma_pages[addr >> PAGE_SHIFT] & ~XSK_NEXT_PG_CONTIG_MASK) + (addr & ~PAGE_MASK); } + +dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr) +{ + return __xp_raw_get_dma(pool, __xp_raw_get_addr(pool, addr)); +} EXPORT_SYMBOL(xp_raw_get_dma); + +/** + * xp_raw_get_ctx - get &xdp_desc context + * @pool: XSk buff pool desc address belongs to + * @addr: desc address (from userspace) + * + * Helper for getting desc's DMA address and metadata pointer, if present. + * Saves one call on hotpath, double calculation of the actual address, + * and inline checks for metadata presence and sanity. + * + * Return: new &xdp_desc_ctx struct containing desc's DMA address and metadata + * pointer, if it is present and valid (initialized to %NULL otherwise). + */ +struct xdp_desc_ctx xp_raw_get_ctx(const struct xsk_buff_pool *pool, u64 addr) +{ + struct xdp_desc_ctx ret; + + addr = __xp_raw_get_addr(pool, addr); + + ret.dma = __xp_raw_get_dma(pool, addr); + ret.meta = __xsk_buff_get_metadata(pool, __xp_raw_get_data(pool, addr)); + + return ret; +} +EXPORT_SYMBOL(xp_raw_get_ctx); diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index e4be227d3ad6..4e82f3871473 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -87,6 +87,11 @@ enum { }; enum { + __NETDEV_A_IO_URING_PROVIDER_INFO_MAX, + NETDEV_A_IO_URING_PROVIDER_INFO_MAX = (__NETDEV_A_IO_URING_PROVIDER_INFO_MAX - 1) +}; + +enum { NETDEV_A_PAGE_POOL_ID = 1, NETDEV_A_PAGE_POOL_IFINDEX, NETDEV_A_PAGE_POOL_NAPI_ID, @@ -94,6 +99,7 @@ enum { NETDEV_A_PAGE_POOL_INFLIGHT_MEM, NETDEV_A_PAGE_POOL_DETACH_TIME, NETDEV_A_PAGE_POOL_DMABUF, + NETDEV_A_PAGE_POOL_IO_URING, __NETDEV_A_PAGE_POOL_MAX, NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1) @@ -131,11 +137,18 @@ enum { }; enum { + __NETDEV_A_XSK_INFO_MAX, + NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1) +}; + +enum { NETDEV_A_QUEUE_ID = 1, NETDEV_A_QUEUE_IFINDEX, NETDEV_A_QUEUE_TYPE, NETDEV_A_QUEUE_NAPI_ID, NETDEV_A_QUEUE_DMABUF, + NETDEV_A_QUEUE_IO_URING, + NETDEV_A_QUEUE_XSK, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps index 0712b5e82eb7..f3269ce39e5b 100644 --- a/tools/net/ynl/Makefile.deps +++ b/tools/net/ynl/Makefile.deps @@ -17,10 +17,13 @@ get_hdr_inc=-D$(1) -include $(UAPI_PATH)/linux/$(2) CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h) CFLAGS_dpll:=$(call get_hdr_inc,_LINUX_DPLL_H,dpll.h) CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_H,ethtool.h) \ - $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) + $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) \ + $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_GENERATED_H,ethtool_netlink_generated.h) CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h) CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H,mptcp_pm.h) +CFLAGS_net_shaper:=$(call get_hdr_inc,_LINUX_NET_SHAPER_H,net_shaper.h) CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h) +CFLAGS_nl80211:=$(call get_hdr_inc,__LINUX_NL802121_H,nl80211.h) CFLAGS_nlctrl:=$(call get_hdr_inc,__LINUX_GENERIC_NETLINK_H,genetlink.h) CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h) CFLAGS_ovs_datapath:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py index 08f8bf89cfc2..dcc2c6b298d6 100644 --- a/tools/net/ynl/pyynl/lib/ynl.py +++ b/tools/net/ynl/pyynl/lib/ynl.py @@ -536,9 +536,11 @@ class YnlFamily(SpecFamily): try: return int(value) except (ValueError, TypeError) as e: - if 'enum' not in attr_spec: - raise e - return self._encode_enum(attr_spec, value) + if 'enum' in attr_spec: + return self._encode_enum(attr_spec, value) + if attr_spec.display_hint: + return self._from_string(value, attr_spec) + raise e def _add_attr(self, space, name, value, search_attrs): try: @@ -571,7 +573,10 @@ class YnlFamily(SpecFamily): if isinstance(value, bytes): attr_payload = value elif isinstance(value, str): - attr_payload = bytes.fromhex(value) + if attr.display_hint: + attr_payload = self._from_string(value, attr) + else: + attr_payload = bytes.fromhex(value) elif isinstance(value, dict) and attr.struct_name: attr_payload = self._encode_struct(attr.struct_name, value) else: @@ -627,6 +632,11 @@ class YnlFamily(SpecFamily): decoded = self._decode_struct(attr.raw, attr_spec.struct_name) elif attr_spec.sub_type: decoded = attr.as_c_array(attr_spec.sub_type) + if 'enum' in attr_spec: + decoded = [ self._decode_enum(x, attr_spec) for x in decoded ] + elif attr_spec.display_hint: + decoded = [ self._formatted_string(x, attr_spec.display_hint) + for x in decoded ] else: decoded = attr.as_bin() if attr_spec.display_hint: @@ -644,15 +654,17 @@ class YnlFamily(SpecFamily): subattrs = self._decode(NlAttrs(item.raw), attr_spec['nested-attributes']) decoded.append({ item.type: subattrs }) elif attr_spec["sub-type"] == 'binary': - subattrs = item.as_bin() + subattr = item.as_bin() if attr_spec.display_hint: - subattrs = self._formatted_string(subattrs, attr_spec.display_hint) - decoded.append(subattrs) + subattr = self._formatted_string(subattr, attr_spec.display_hint) + decoded.append(subattr) elif attr_spec["sub-type"] in NlAttr.type_formats: - subattrs = item.as_scalar(attr_spec['sub-type'], attr_spec.byte_order) - if attr_spec.display_hint: - subattrs = self._formatted_string(subattrs, attr_spec.display_hint) - decoded.append(subattrs) + subattr = item.as_scalar(attr_spec['sub-type'], attr_spec.byte_order) + if 'enum' in attr_spec: + subattr = self._decode_enum(subattr, attr_spec) + elif attr_spec.display_hint: + subattr = self._formatted_string(subattr, attr_spec.display_hint) + decoded.append(subattr) else: raise Exception(f'Unknown {attr_spec["sub-type"]} with name {attr_spec["name"]}') return decoded @@ -899,6 +911,18 @@ class YnlFamily(SpecFamily): formatted = raw return formatted + def _from_string(self, string, attr_spec): + if attr_spec.display_hint in ['ipv4', 'ipv6']: + ip = ipaddress.ip_address(string) + if attr_spec['type'] == 'binary': + raw = ip.packed + else: + raw = int(ip) + else: + raise Exception(f"Display hint '{attr_spec.display_hint}' not implemented" + f" when parsing '{attr_spec['name']}'") + return raw + def handle_ntf(self, decoded): msg = dict() if self.include_raw: diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index c2eabc90dce8..a1427c537030 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -74,6 +74,8 @@ class Type(SpecAttr): self.c_name = c_lower(self.name) if self.c_name in _C_KW: self.c_name += '_' + if self.c_name[0].isdigit(): + self.c_name = '_' + self.c_name # Added by resolve(): self.enum_name = None @@ -100,7 +102,7 @@ class Type(SpecAttr): if isinstance(value, int): return value if value in self.family.consts: - raise Exception("Resolving family constants not implemented, yet") + return self.family.consts[value]["value"] return limit_to_number(value) def get_limit_str(self, limit, default=None, suffix=''): @@ -110,6 +112,9 @@ class Type(SpecAttr): if isinstance(value, int): return str(value) + suffix if value in self.family.consts: + const = self.family.consts[value] + if const.get('header'): + return c_upper(value) return c_upper(f"{self.family['name']}-{value}") return c_upper(value) @@ -683,7 +688,10 @@ class TypeArrayNest(Type): raise Exception(f"Sub-type {self.attr['sub-type']} not supported yet") def _attr_typol(self): - return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, ' + if self.attr['sub-type'] in scalars: + return f'.type = YNL_PT_U{c_upper(self.sub_type[1:])}, ' + else: + return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, ' def _attr_get(self, ri, var): local_vars = ['const struct nlattr *attr2;'] @@ -885,7 +893,7 @@ class AttrSet(SpecAttrSet): elif elem['type'] == 'nest': t = TypeNest(self.family, self, elem, value) elif elem['type'] == 'indexed-array' and 'sub-type' in elem: - if elem["sub-type"] == 'nest': + if elem["sub-type"] in ['nest', 'u32']: t = TypeArrayNest(self.family, self, elem, value) else: raise Exception(f'new_attr: unsupported sub-type {elem["sub-type"]}') @@ -1437,7 +1445,7 @@ class CodeWriter: self._ifdef_block = config_option -scalars = {'u8', 'u16', 'u32', 'u64', 's32', 's64', 'uint', 'sint'} +scalars = {'u8', 'u16', 'u32', 'u64', 's8', 's16', 's32', 's64', 'uint', 'sint'} direction_to_suffix = { 'reply': '_rsp', @@ -1669,6 +1677,9 @@ def _multi_parse(ri, struct, init_lines, local_vars): if aspec["sub-type"] == 'nest': local_vars.append(f'const struct nlattr *attr_{aspec.c_name};') array_nests.add(arg) + elif aspec['sub-type'] in scalars: + local_vars.append(f'const struct nlattr *attr_{aspec.c_name};') + array_nests.add(arg) else: raise Exception(f'Not supported sub-type {aspec["sub-type"]}') if 'multi-attr' in aspec: @@ -1724,11 +1735,17 @@ def _multi_parse(ri, struct, init_lines, local_vars): ri.cw.p(f"dst->{aspec.c_name} = calloc(n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));") ri.cw.p(f"dst->n_{aspec.c_name} = n_{aspec.c_name};") ri.cw.p('i = 0;') - ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;") + if 'nested-attributes' in aspec: + ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;") ri.cw.block_start(line=f"ynl_attr_for_each_nested(attr, attr_{aspec.c_name})") - ri.cw.p(f"parg.data = &dst->{aspec.c_name}[i];") - ri.cw.p(f"if ({aspec.nested_render_name}_parse(&parg, attr, ynl_attr_type(attr)))") - ri.cw.p('return YNL_PARSE_CB_ERROR;') + if 'nested-attributes' in aspec: + ri.cw.p(f"parg.data = &dst->{aspec.c_name}[i];") + ri.cw.p(f"if ({aspec.nested_render_name}_parse(&parg, attr, ynl_attr_type(attr)))") + ri.cw.p('return YNL_PARSE_CB_ERROR;') + elif aspec.sub_type in scalars: + ri.cw.p(f"dst->{aspec.c_name}[i] = ynl_attr_get_{aspec.sub_type}(attr);") + else: + raise Exception(f"Nest parsing type not supported in {aspec['name']}") ri.cw.p('i++;') ri.cw.block_end() ri.cw.block_end() @@ -2549,6 +2566,9 @@ def render_uapi(family, cw): defines = [] for const in family['definitions']: + if const.get('header'): + continue + if const['type'] != 'const': cw.writes_defines(defines) defines = [] diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore new file mode 100644 index 000000000000..ec746f374e85 --- /dev/null +++ b/tools/testing/selftests/drivers/net/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +xdp_helper diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 137470bdee0c..0c95bd944d56 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -1,13 +1,18 @@ # SPDX-License-Identifier: GPL-2.0 +CFLAGS += $(KHDR_INCLUDES) TEST_INCLUDES := $(wildcard lib/py/*.py) \ $(wildcard lib/sh/*.sh) \ ../../net/net_helper.sh \ ../../net/lib.sh \ +TEST_GEN_FILES := xdp_helper + TEST_PROGS := \ netcons_basic.sh \ + netcons_fragmented_msg.sh \ netcons_overflow.sh \ + netcons_sysdata.sh \ ping.py \ queues.py \ stats.py \ diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config index a2d8af60876d..f27172ddee0a 100644 --- a/tools/testing/selftests/drivers/net/config +++ b/tools/testing/selftests/drivers/net/config @@ -4,3 +4,4 @@ CONFIG_CONFIGFS_FS=y CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_NETCONSOLE_EXTENDED_LOG=y +CONFIG_XDP_SOCKETS=y diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 21ba64ce1e34..ae783e18be83 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -15,6 +15,7 @@ TEST_PROGS = \ nic_performance.py \ pp_alloc_fail.py \ rss_ctx.py \ + tso.py \ # TEST_FILES := \ diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py index cb40497faee4..701aca1361e0 100755 --- a/tools/testing/selftests/drivers/net/hw/csum.py +++ b/tools/testing/selftests/drivers/net/hw/csum.py @@ -9,15 +9,12 @@ from lib.py import ksft_run, ksft_exit, KsftSkipEx from lib.py import EthtoolFamily, NetDrvEpEnv from lib.py import bkg, cmd, wait_port_listen -def test_receive(cfg, ipv4=False, extra_args=None): +def test_receive(cfg, ipver="6", extra_args=None): """Test local nic checksum receive. Remote host sends crafted packets.""" if not cfg.have_rx_csum: raise KsftSkipEx(f"Test requires rx checksum offload on {cfg.ifname}") - if ipv4: - ip_args = f"-4 -S {cfg.remote_v4} -D {cfg.v4}" - else: - ip_args = f"-6 -S {cfg.remote_v6} -D {cfg.v6}" + ip_args = f"-{ipver} -S {cfg.remote_addr_v[ipver]} -D {cfg.addr_v[ipver]}" rx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -n 100 {ip_args} -r 1 -R {extra_args}" tx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -n 100 {ip_args} -r 1 -T {extra_args}" @@ -27,17 +24,14 @@ def test_receive(cfg, ipv4=False, extra_args=None): cmd(tx_cmd, host=cfg.remote) -def test_transmit(cfg, ipv4=False, extra_args=None): +def test_transmit(cfg, ipver="6", extra_args=None): """Test local nic checksum transmit. Remote host verifies packets.""" if (not cfg.have_tx_csum_generic and - not (cfg.have_tx_csum_ipv4 and ipv4) and - not (cfg.have_tx_csum_ipv6 and not ipv4)): + not (cfg.have_tx_csum_ipv4 and ipver == "4") and + not (cfg.have_tx_csum_ipv6 and ipver == "6")): raise KsftSkipEx(f"Test requires tx checksum offload on {cfg.ifname}") - if ipv4: - ip_args = f"-4 -S {cfg.v4} -D {cfg.remote_v4}" - else: - ip_args = f"-6 -S {cfg.v6} -D {cfg.remote_v6}" + ip_args = f"-{ipver} -S {cfg.addr_v[ipver]} -D {cfg.remote_addr_v[ipver]}" # Cannot randomize input when calculating zero checksum if extra_args != "-U -Z": @@ -51,26 +45,20 @@ def test_transmit(cfg, ipv4=False, extra_args=None): cmd(tx_cmd) -def test_builder(name, cfg, ipv4=False, tx=False, extra_args=""): +def test_builder(name, cfg, ipver="6", tx=False, extra_args=""): """Construct specific tests from the common template. Most tests follow the same basic pattern, differing only in Direction of the test and optional flags passed to csum.""" def f(cfg): - if ipv4: - cfg.require_v4() - else: - cfg.require_v6() + cfg.require_ipver(ipver) if tx: - test_transmit(cfg, ipv4, extra_args) + test_transmit(cfg, ipver, extra_args) else: - test_receive(cfg, ipv4, extra_args) + test_receive(cfg, ipver, extra_args) - if ipv4: - f.__name__ = "ipv4_" + name - else: - f.__name__ = "ipv6_" + name + f.__name__ = f"ipv{ipver}_" + name return f @@ -100,19 +88,19 @@ def main() -> None: with NetDrvEpEnv(__file__, nsim_test=False) as cfg: check_nic_features(cfg) - cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../net/lib/csum") + cfg.bin_local = cfg.rpath("../../../net/lib/csum") cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) cases = [] - for ipv4 in [True, False]: - cases.append(test_builder("rx_tcp", cfg, ipv4, False, "-t")) - cases.append(test_builder("rx_tcp_invalid", cfg, ipv4, False, "-t -E")) + for ipver in ["4", "6"]: + cases.append(test_builder("rx_tcp", cfg, ipver, False, "-t")) + cases.append(test_builder("rx_tcp_invalid", cfg, ipver, False, "-t -E")) - cases.append(test_builder("rx_udp", cfg, ipv4, False, "")) - cases.append(test_builder("rx_udp_invalid", cfg, ipv4, False, "-E")) + cases.append(test_builder("rx_udp", cfg, ipver, False, "")) + cases.append(test_builder("rx_udp_invalid", cfg, ipver, False, "-E")) - cases.append(test_builder("tx_udp_csum_offload", cfg, ipv4, True, "-U")) - cases.append(test_builder("tx_udp_zero_checksum", cfg, ipv4, True, "-U -Z")) + cases.append(test_builder("tx_udp_csum_offload", cfg, ipver, True, "-U")) + cases.append(test_builder("tx_udp_zero_checksum", cfg, ipver, True, "-U -Z")) ksft_run(cases=cases, args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py index 1223f0f5c10c..3947e9157115 100755 --- a/tools/testing/selftests/drivers/net/hw/devmem.py +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -21,15 +21,15 @@ def require_devmem(cfg): @ksft_disruptive def check_rx(cfg) -> None: - cfg.require_v6() + cfg.require_ipver("6") require_devmem(cfg) port = rand_port() - listen_cmd = f"./ncdevmem -l -f {cfg.ifname} -s {cfg.v6} -p {port}" + listen_cmd = f"./ncdevmem -l -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port}" with bkg(listen_cmd) as socat: wait_port_listen(port) - cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP6:[{cfg.v6}]:{port}", host=cfg.remote, shell=True) + cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP6:[{cfg.addr_v['6']}]:{port}", host=cfg.remote, shell=True) ksft_eq(socat.stdout.strip(), "hello\nworld") diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 19a6969643f4..2bf14ac2b8c6 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -50,7 +50,6 @@ #include <linux/memfd.h> #include <linux/dma-buf.h> #include <linux/udmabuf.h> -#include <libmnl/libmnl.h> #include <linux/types.h> #include <linux/netlink.h> #include <linux/genetlink.h> diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 319aaa004c40..d6e69d7d5e43 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -4,7 +4,8 @@ import datetime import random import re -from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ne, ksft_ge, ksft_lt, ksft_true +from lib.py import ksft_run, ksft_pr, ksft_exit +from lib.py import ksft_eq, ksft_ne, ksft_ge, ksft_in, ksft_lt, ksft_true, ksft_raises from lib.py import NetDrvEpEnv from lib.py import EthtoolFamily, NetdevFamily from lib.py import KsftSkipEx, KsftFailEx @@ -58,6 +59,14 @@ def require_ntuple(cfg): raise KsftSkipEx("Ntuple filters not enabled on the device: " + str(features["ntuple-filters"])) +def require_context_cnt(cfg, need_cnt): + # There's no good API to get the context count, so the tests + # which try to add a lot opportunisitically set the count they + # discovered. Careful with test ordering! + if need_cnt and cfg.context_cnt and cfg.context_cnt < need_cnt: + raise KsftSkipEx(f"Test requires at least {need_cnt} contexts, but device only has {cfg.context_cnt}") + + # Get Rx packet counts for all queues, as a simple list of integers # if @prev is specified the prev counts will be subtracted def _get_rx_cnts(cfg, prev=None): @@ -456,6 +465,8 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): raise ksft_pr(f"Failed to create context {i + 1}, trying to test what we got") ctx_cnt = i + if cfg.context_cnt is None: + cfg.context_cnt = ctx_cnt break _rss_key_check(cfg, context=ctx_id) @@ -511,8 +522,7 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4): """ require_ntuple(cfg) - - requested_ctx_cnt = ctx_cnt + require_context_cnt(cfg, 4) # Try to allocate more queues when necessary qcnt = len(_get_rx_cnts(cfg)) @@ -577,9 +587,6 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4): remove_ctx(-1) check_traffic() - if requested_ctx_cnt != ctx_cnt: - raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}") - def test_rss_context_overlap(cfg, other_ctx=0): """ @@ -588,6 +595,8 @@ def test_rss_context_overlap(cfg, other_ctx=0): """ require_ntuple(cfg) + if other_ctx: + require_context_cnt(cfg, 2) queue_cnt = len(_get_rx_cnts(cfg)) if queue_cnt < 4: @@ -649,6 +658,29 @@ def test_rss_context_overlap2(cfg): test_rss_context_overlap(cfg, True) +def test_flow_add_context_missing(cfg): + """ + Test that we are not allowed to add a rule pointing to an RSS context + which was never created. + """ + + require_ntuple(cfg) + + # Find a context which doesn't exist + for ctx_id in range(1, 100): + try: + get_rss(cfg, context=ctx_id) + except CmdExitFailure: + break + + with ksft_raises(CmdExitFailure) as cm: + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port 1234 context {ctx_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + ethtool(f"-N {cfg.ifname} delete {ntuple_id}") + if cm.exception: + ksft_in('Invalid argument', cm.exception.cmd.stderr) + + def test_delete_rss_context_busy(cfg): """ Test that deletion returns -EBUSY when an rss context is being used @@ -717,6 +749,7 @@ def test_rss_ntuple_addition(cfg): def main() -> None: with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.context_cnt = None cfg.ethnl = EthtoolFamily() cfg.netdevnl = NetdevFamily() @@ -726,6 +759,7 @@ def main() -> None: test_rss_context_dump, test_rss_context_queue_reconfigure, test_rss_context_overlap, test_rss_context_overlap2, test_rss_context_out_of_order, test_rss_context4_create_with_cfg, + test_flow_add_context_missing, test_delete_rss_context_busy, test_rss_ntuple_addition], args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py new file mode 100755 index 000000000000..e1ecb92f79d9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/tso.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""Run the tools/testing/selftests/net/csum testsuite.""" + +import fcntl +import socket +import struct +import termios +import time + +from lib.py import ksft_pr, ksft_run, ksft_exit, KsftSkipEx, KsftXfailEx +from lib.py import ksft_eq, ksft_ge, ksft_lt +from lib.py import EthtoolFamily, NetdevFamily, NetDrvEpEnv +from lib.py import bkg, cmd, defer, ethtool, ip, rand_port, wait_port_listen + + +def sock_wait_drain(sock, max_wait=1000): + """Wait for all pending write data on the socket to get ACKed.""" + for _ in range(max_wait): + one = b'\0' * 4 + outq = fcntl.ioctl(sock.fileno(), termios.TIOCOUTQ, one) + outq = struct.unpack("I", outq)[0] + if outq == 0: + break + time.sleep(0.01) + ksft_eq(outq, 0) + + +def tcp_sock_get_retrans(sock): + """Get the number of retransmissions for the TCP socket.""" + info = sock.getsockopt(socket.SOL_TCP, socket.TCP_INFO, 512) + return struct.unpack("I", info[100:104])[0] + + +def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso): + cfg.require_cmd("socat", remote=True) + + port = rand_port() + listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{port},reuseport /dev/null,ignoreeof" + + with bkg(listen_cmd, host=cfg.remote) as nc: + wait_port_listen(port, host=cfg.remote) + + if ipver == "4": + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect((remote_v4, port)) + else: + sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) + sock.connect((remote_v6, port)) + + # Small send to make sure the connection is working. + sock.send("ping".encode()) + sock_wait_drain(sock) + + # Send 4MB of data, record the LSO packet count. + qstat_old = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + buf = b"0" * 1024 * 1024 * 4 + sock.send(buf) + sock_wait_drain(sock) + qstat_new = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + + # No math behind the 10 here, but try to catch cases where + # TCP falls back to non-LSO. + ksft_lt(tcp_sock_get_retrans(sock), 10) + sock.close() + + # Check that at least 90% of the data was sent as LSO packets. + # System noise may cause false negatives. Also header overheads + # will add up to 5% of extra packes... The check is best effort. + total_lso_wire = len(buf) * 0.90 // cfg.dev["mtu"] + total_lso_super = len(buf) * 0.90 // cfg.dev["tso_max_size"] + if should_lso: + if cfg.have_stat_super_count: + ksft_ge(qstat_new['tx-hw-gso-packets'] - + qstat_old['tx-hw-gso-packets'], + total_lso_super, + comment="Number of LSO super-packets with LSO enabled") + if cfg.have_stat_wire_count: + ksft_ge(qstat_new['tx-hw-gso-wire-packets'] - + qstat_old['tx-hw-gso-wire-packets'], + total_lso_wire, + comment="Number of LSO wire-packets with LSO enabled") + else: + if cfg.have_stat_super_count: + ksft_lt(qstat_new['tx-hw-gso-packets'] - + qstat_old['tx-hw-gso-packets'], + 15, comment="Number of LSO super-packets with LSO disabled") + if cfg.have_stat_wire_count: + ksft_lt(qstat_new['tx-hw-gso-wire-packets'] - + qstat_old['tx-hw-gso-wire-packets'], + 500, comment="Number of LSO wire-packets with LSO disabled") + + +def build_tunnel(cfg, outer_ipver, tun_info): + local_v4 = NetDrvEpEnv.nsim_v4_pfx + "1" + local_v6 = NetDrvEpEnv.nsim_v6_pfx + "1" + remote_v4 = NetDrvEpEnv.nsim_v4_pfx + "2" + remote_v6 = NetDrvEpEnv.nsim_v6_pfx + "2" + + local_addr = cfg.addr_v[outer_ipver] + remote_addr = cfg.remote_addr_v[outer_ipver] + + tun_type = tun_info[0] + tun_arg = tun_info[2] + ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {local_addr} remote {remote_addr} dev {cfg.ifname}") + defer(ip, f"link del {tun_type}-ksft") + ip(f"link set dev {tun_type}-ksft up") + ip(f"addr add {local_v4}/24 dev {tun_type}-ksft") + ip(f"addr add {local_v6}/64 dev {tun_type}-ksft") + + ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {remote_addr} remote {local_addr} dev {cfg.remote_ifname}", + host=cfg.remote) + defer(ip, f"link del {tun_type}-ksft", host=cfg.remote) + ip(f"link set dev {tun_type}-ksft up", host=cfg.remote) + ip(f"addr add {remote_v4}/24 dev {tun_type}-ksft", host=cfg.remote) + ip(f"addr add {remote_v6}/64 dev {tun_type}-ksft", host=cfg.remote) + + return remote_v4, remote_v6 + + +def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None): + """Construct specific tests from the common template.""" + def f(cfg): + cfg.require_ipver(outer_ipver) + + if not cfg.have_stat_super_count and \ + not cfg.have_stat_wire_count: + raise KsftSkipEx(f"Device does not support LSO queue stats") + + ipver = outer_ipver + if tun: + remote_v4, remote_v6 = build_tunnel(cfg, ipver, tun) + ipver = inner_ipver + else: + remote_v4 = cfg.remote_addr_v["4"] + remote_v6 = cfg.remote_addr_v["6"] + + tun_partial = tun and tun[1] + # Tunnel which can silently fall back to gso-partial + has_gso_partial = tun and 'tx-gso-partial' in cfg.features + + # For TSO4 via partial we need mangleid + if ipver == "4" and feature in cfg.partial_features: + ksft_pr("Testing with mangleid enabled") + if 'tx-tcp-mangleid-segmentation' not in cfg.features: + ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on") + defer(ethtool, f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off") + + # First test without the feature enabled. + ethtool(f"-K {cfg.ifname} {feature} off") + if has_gso_partial: + ethtool(f"-K {cfg.ifname} tx-gso-partial off") + run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=False) + + # Now test with the feature enabled. + # For compatible tunnels only - just GSO partial, not specific feature. + if has_gso_partial: + ethtool(f"-K {cfg.ifname} tx-gso-partial on") + run_one_stream(cfg, ipver, remote_v4, remote_v6, + should_lso=tun_partial) + + # Full feature enabled. + if feature in cfg.features: + ethtool(f"-K {cfg.ifname} {feature} on") + run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True) + else: + raise KsftXfailEx(f"Device does not support {feature}") + + f.__name__ = name + ((outer_ipver + "_") if tun else "") + "ipv" + inner_ipver + return f + + +def query_nic_features(cfg) -> None: + """Query and cache the NIC features.""" + cfg.have_stat_super_count = False + cfg.have_stat_wire_count = False + + cfg.features = set() + features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}}) + for f in features["active"]["bits"]["bit"]: + cfg.features.add(f["name"]) + + # Check which features are supported via GSO partial + cfg.partial_features = set() + if 'tx-gso-partial' in cfg.features: + ethtool(f"-K {cfg.ifname} tx-gso-partial off") + + no_partial = set() + features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}}) + for f in features["active"]["bits"]["bit"]: + no_partial.add(f["name"]) + cfg.partial_features = cfg.features - no_partial + ethtool(f"-K {cfg.ifname} tx-gso-partial on") + + stats = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True) + if stats: + if 'tx-hw-gso-packets' in stats[0]: + ksft_pr("Detected qstat for LSO super-packets") + cfg.have_stat_super_count = True + if 'tx-hw-gso-wire-packets' in stats[0]: + ksft_pr("Detected qstat for LSO wire-packets") + cfg.have_stat_wire_count = True + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netnl = NetdevFamily() + + query_nic_features(cfg) + + test_info = ( + # name, v4/v6 ethtool_feature tun:(type, partial, args) + ("", "4", "tx-tcp-segmentation", None), + ("", "6", "tx-tcp6-segmentation", None), + ("vxlan", "", "tx-udp_tnl-segmentation", ("vxlan", True, "id 100 dstport 4789 noudpcsum")), + ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", False, "id 100 dstport 4789 udpcsum")), + ("gre", "4", "tx-gre-segmentation", ("ipgre", False, "")), + ("gre", "6", "tx-gre-segmentation", ("ip6gre", False, "")), + ) + + cases = [] + for outer_ipver in ["4", "6"]: + for info in test_info: + # Skip if test which only works for a specific IP version + if info[1] and outer_ipver != info[1]: + continue + + cases.append(test_builder(info[0], cfg, outer_ipver, info[2], + tun=info[3], inner_ipver="4")) + if info[3]: + cases.append(test_builder(info[0], cfg, outer_ipver, info[2], + tun=info[3], inner_ipver="6")) + + ksft_run(cases=cases, args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 987e452d3a45..96b33b5ef9dd 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -10,41 +10,61 @@ from lib.py import NetNS, NetdevSimDev from .remote import Remote -def _load_env_file(src_path): - env = os.environ.copy() +class NetDrvEnvBase: + """ + Base class for a NIC / host envirnoments + """ + def __init__(self, src_path): + self.src_path = src_path + self.env = self._load_env_file() + + def rpath(self, path): + """ + Get an absolute path to a file based on a path relative to the directory + containing the test which constructed env. - src_dir = Path(src_path).parent.resolve() - if not (src_dir / "net.config").exists(): + For example, if the test.py is in the same directory as + a binary (built from helper.c), the test can use env.rpath("helper") + to get the absolute path to the binary + """ + src_dir = Path(self.src_path).parent.resolve() + return (src_dir / path).as_posix() + + def _load_env_file(self): + env = os.environ.copy() + + src_dir = Path(self.src_path).parent.resolve() + if not (src_dir / "net.config").exists(): + return ksft_setup(env) + + with open((src_dir / "net.config").as_posix(), 'r') as fp: + for line in fp.readlines(): + full_file = line + # Strip comments + pos = line.find("#") + if pos >= 0: + line = line[:pos] + line = line.strip() + if not line: + continue + pair = line.split('=', maxsplit=1) + if len(pair) != 2: + raise Exception("Can't parse configuration line:", full_file) + env[pair[0]] = pair[1] return ksft_setup(env) - with open((src_dir / "net.config").as_posix(), 'r') as fp: - for line in fp.readlines(): - full_file = line - # Strip comments - pos = line.find("#") - if pos >= 0: - line = line[:pos] - line = line.strip() - if not line: - continue - pair = line.split('=', maxsplit=1) - if len(pair) != 2: - raise Exception("Can't parse configuration line:", full_file) - env[pair[0]] = pair[1] - return ksft_setup(env) - - -class NetDrvEnv: + +class NetDrvEnv(NetDrvEnvBase): """ Class for a single NIC / host env, with no remote end """ def __init__(self, src_path, **kwargs): - self._ns = None + super().__init__(src_path) - self.env = _load_env_file(src_path) + self._ns = None if 'NETIF' in self.env: - self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0] + self.dev = ip("-d link show dev " + self.env['NETIF'], json=True)[0] else: self._ns = NetdevSimDev(**kwargs) self.dev = self._ns.nsims[0].dev @@ -68,7 +88,7 @@ class NetDrvEnv: self._ns = None -class NetDrvEpEnv: +class NetDrvEpEnv(NetDrvEnvBase): """ Class for an environment with a local device and "remote endpoint" which can be used to send traffic in. @@ -82,8 +102,7 @@ class NetDrvEpEnv: nsim_v6_pfx = "2001:db8::" def __init__(self, src_path, nsim_test=None): - - self.env = _load_env_file(src_path) + super().__init__(src_path) self._stats_settle_time = None @@ -94,17 +113,20 @@ class NetDrvEpEnv: self._ns = None self._ns_peer = None + self.addr_v = { "4": None, "6": None } + self.remote_addr_v = { "4": None, "6": None } + if "NETIF" in self.env: if nsim_test is True: raise KsftXfailEx("Test only works on netdevsim") self._check_env() - self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0] + self.dev = ip("-d link show dev " + self.env['NETIF'], json=True)[0] - self.v4 = self.env.get("LOCAL_V4") - self.v6 = self.env.get("LOCAL_V6") - self.remote_v4 = self.env.get("REMOTE_V4") - self.remote_v6 = self.env.get("REMOTE_V6") + self.addr_v["4"] = self.env.get("LOCAL_V4") + self.addr_v["6"] = self.env.get("LOCAL_V6") + self.remote_addr_v["4"] = self.env.get("REMOTE_V4") + self.remote_addr_v["6"] = self.env.get("REMOTE_V6") kind = self.env["REMOTE_TYPE"] args = self.env["REMOTE_ARGS"] else: @@ -115,26 +137,29 @@ class NetDrvEpEnv: self.dev = self._ns.nsims[0].dev - self.v4 = self.nsim_v4_pfx + "1" - self.v6 = self.nsim_v6_pfx + "1" - self.remote_v4 = self.nsim_v4_pfx + "2" - self.remote_v6 = self.nsim_v6_pfx + "2" + self.addr_v["4"] = self.nsim_v4_pfx + "1" + self.addr_v["6"] = self.nsim_v6_pfx + "1" + self.remote_addr_v["4"] = self.nsim_v4_pfx + "2" + self.remote_addr_v["6"] = self.nsim_v6_pfx + "2" kind = "netns" args = self._netns.name self.remote = Remote(kind, args, src_path) - self.addr = self.v6 if self.v6 else self.v4 - self.remote_addr = self.remote_v6 if self.remote_v6 else self.remote_v4 + self.addr_ipver = "6" if self.addr_v["6"] else "4" + self.addr = self.addr_v[self.addr_ipver] + self.remote_addr = self.remote_addr_v[self.addr_ipver] - self.addr_ipver = "6" if self.v6 else "4" # Bracketed addresses, some commands need IPv6 to be inside [] - self.baddr = f"[{self.v6}]" if self.v6 else self.v4 - self.remote_baddr = f"[{self.remote_v6}]" if self.remote_v6 else self.remote_v4 + self.baddr = f"[{self.addr_v['6']}]" if self.addr_v["6"] else self.addr_v["4"] + self.remote_baddr = f"[{self.remote_addr_v['6']}]" if self.remote_addr_v["6"] else self.remote_addr_v["4"] self.ifname = self.dev['ifname'] self.ifindex = self.dev['ifindex'] + # resolve remote interface name + self.remote_ifname = self.resolve_remote_ifc() + self._required_cmd = {} def create_local(self): @@ -181,6 +206,18 @@ class NetDrvEpEnv: raise Exception("Invalid environment, missing configuration:", missing, "Please see tools/testing/selftests/drivers/net/README.rst") + def resolve_remote_ifc(self): + v4 = v6 = None + if self.remote_addr_v["4"]: + v4 = ip("addr show to " + self.remote_addr_v["4"], json=True, host=self.remote) + if self.remote_addr_v["6"]: + v6 = ip("addr show to " + self.remote_addr_v["6"], json=True, host=self.remote) + if v4 and v6 and v4[0]["ifname"] != v6[0]["ifname"]: + raise Exception("Can't resolve remote interface name, v4 and v6 don't match") + if (v4 and len(v4) > 1) or (v6 and len(v6) > 1): + raise Exception("Can't resolve remote interface name, multiple interfaces match") + return v6[0]["ifname"] if v6 else v4[0]["ifname"] + def __enter__(self): return self @@ -204,13 +241,9 @@ class NetDrvEpEnv: del self.remote self.remote = None - def require_v4(self): - if not self.v4 or not self.remote_v4: - raise KsftSkipEx("Test requires IPv4 connectivity") - - def require_v6(self): - if not self.v6 or not self.remote_v6: - raise KsftSkipEx("Test requires IPv6 connectivity") + def require_ipver(self, ipver): + if not self.addr_v[ipver] or not self.remote_addr_v[ipver]: + raise KsftSkipEx(f"Test requires IPv{ipver} connectivity") def _require_cmd(self, comm, key, host=None): cached = self._required_cmd.get(comm, {}) diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 3acaba41ac7b..3c96b022954d 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -110,6 +110,13 @@ function create_dynamic_target() { echo 1 > "${NETCONS_PATH}"/enabled } +# Do not append the release to the header of the message +function disable_release_append() { + echo 0 > "${NETCONS_PATH}"/enabled + echo 0 > "${NETCONS_PATH}"/release + echo 1 > "${NETCONS_PATH}"/enabled +} + function cleanup() { local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device" @@ -223,3 +230,20 @@ function check_for_dependencies() { exit "${ksft_skip}" fi } + +function check_for_taskset() { + if ! which taskset > /dev/null ; then + echo "SKIP: taskset(1) is not available" >&2 + exit "${ksft_skip}" + fi +} + +# This is necessary if running multiple tests in a row +function pkill_socat() { + PROCESS_NAME="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}" + # socat runs under timeout(1), kill it if it is still alive + # do not fail if socat doesn't exist anymore + set +e + pkill -f "${PROCESS_NAME}" + set -e +} diff --git a/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh b/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh new file mode 100755 index 000000000000..4a71e01a230c --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh @@ -0,0 +1,122 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# Test netconsole's message fragmentation functionality. +# +# When a message exceeds the maximum packet size, netconsole splits it into +# multiple fragments for transmission. This test verifies: +# - Correct fragmentation of large messages +# - Proper reassembly of fragments at the receiver +# - Preservation of userdata across fragments +# - Behavior with and without kernel release version appending +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# The content of kmsg will be save to the following file +OUTPUT_FILE="/tmp/${TARGET}" + +# set userdata to a long value. In this case, it is "1-2-3-4...50-" +USERDATA_VALUE=$(printf -- '%.2s-' {1..60}) + +# Convert the header string in a regexp, so, we can remove +# the second header as well. +# A header looks like "13,468,514729715,-,ncfrag=0/1135;". If +# release is appended, you might find something like:L +# "6.13.0-04048-g4f561a87745a,13,468,514729715,-,ncfrag=0/1135;" +function header_to_regex() { + # header is everything before ; + local HEADER="${1}" + REGEX=$(echo "${HEADER}" | cut -d'=' -f1) + echo "${REGEX}=[0-9]*\/[0-9]*;" +} + +# We have two headers in the message. Remove both to get the full message, +# and extract the full message. +function extract_msg() { + local MSGFILE="${1}" + # Extract the header, which is the very first thing that arrives in the + # first list. + HEADER=$(sed -n '1p' "${MSGFILE}" | cut -d';' -f1) + HEADER_REGEX=$(header_to_regex "${HEADER}") + + # Remove the two headers from the received message + # This will return the message without any header, similarly to what + # was sent. + sed "s/""${HEADER_REGEX}""//g" "${MSGFILE}" +} + +# Validate the message, which has two messages glued together. +# unwrap them to make sure all the characters were transmitted. +# File will look like the following: +# 13,468,514729715,-,ncfrag=0/1135;<message> +# key=<part of key>-13,468,514729715,-,ncfrag=967/1135;<rest of the key> +function validate_fragmented_result() { + # Discard the netconsole headers, and assemble the full message + RCVMSG=$(extract_msg "${1}") + + # check for the main message + if ! echo "${RCVMSG}" | grep -q "${MSG}"; then + echo "Message body doesn't match." >&2 + echo "msg received=" "${RCVMSG}" >&2 + exit "${ksft_fail}" + fi + + # check userdata + if ! echo "${RCVMSG}" | grep -q "${USERDATA_VALUE}"; then + echo "message userdata doesn't match" >&2 + echo "msg received=" "${RCVMSG}" >&2 + exit "${ksft_fail}" + fi + # test passed. hooray +} + +# Check for basic system dependency and exit if not found +check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT +# Create one namespace and two interfaces +set_network +# Create a dynamic target for netconsole +create_dynamic_target +# Set userdata "key" with the "value" value +set_user_data + + +# TEST 1: Send message and userdata. They will fragment +# ======= +MSG=$(printf -- 'MSG%.3s=' {1..150}) + +# Listen for netconsole port inside the namespace and destination interface +listen_port_and_save_to "${OUTPUT_FILE}" & +# Wait for socat to start and listen to the port. +wait_local_port_listen "${NAMESPACE}" "${PORT}" udp +# Send the message +echo "${MSG}: ${TARGET}" > /dev/kmsg +# Wait until socat saves the file to disk +busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" +# Check if the message was not corrupted +validate_fragmented_result "${OUTPUT_FILE}" + +# TEST 2: Test with smaller message, and without release appended +# ======= +MSG=$(printf -- 'FOOBAR%.3s=' {1..100}) +# Let's disable release and test again. +disable_release_append + +listen_port_and_save_to "${OUTPUT_FILE}" & +wait_local_port_listen "${NAMESPACE}" "${PORT}" udp +echo "${MSG}: ${TARGET}" > /dev/kmsg +busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" +validate_fragmented_result "${OUTPUT_FILE}" +exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/netcons_sysdata.sh b/tools/testing/selftests/drivers/net/netcons_sysdata.sh new file mode 100755 index 000000000000..2b78fd1f5982 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_sysdata.sh @@ -0,0 +1,167 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# A test that makes sure that sysdata runtime CPU data is properly set +# when a message is sent. +# +# There are 3 different tests, every time sent using a random CPU. +# - Test #1 +# * Only enable cpu_nr sysdata feature. +# - Test #2 +# * Keep cpu_nr sysdata feature enable and enable userdata. +# - Test #3 +# * keep userdata enabled, and disable sysdata cpu_nr feature. +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh + +# Enable the sysdata cpu_nr feature +function set_cpu_nr() { + if [[ ! -f "${NETCONS_PATH}/userdata/cpu_nr_enabled" ]] + then + echo "Populate CPU configfs path not available in ${NETCONS_PATH}/userdata/cpu_nr_enabled" >&2 + exit "${ksft_skip}" + fi + + echo 1 > "${NETCONS_PATH}/userdata/cpu_nr_enabled" +} + +# Disable the sysdata cpu_nr feature +function unset_cpu_nr() { + echo 0 > "${NETCONS_PATH}/userdata/cpu_nr_enabled" +} + +# Test if MSG content and `cpu=${CPU}` exists in OUTPUT_FILE +function validate_sysdata_cpu_exists() { + # OUTPUT_FILE will contain something like: + # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM + # userdatakey=userdatavalue + # cpu=X + + if [ ! -f "$OUTPUT_FILE" ]; then + echo "FAIL: File was not generated." >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "${MSG}" "${OUTPUT_FILE}"; then + echo "FAIL: ${MSG} not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + # Check if cpu=XX exists in the file and matches the one used + # in taskset(1) + if ! grep -q "cpu=${CPU}\+" "${OUTPUT_FILE}"; then + echo "FAIL: 'cpu=${CPU}' not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + rm "${OUTPUT_FILE}" + pkill_socat +} + +# Test if MSG content exists in OUTPUT_FILE but no `cpu=` string +function validate_sysdata_no_cpu() { + if [ ! -f "$OUTPUT_FILE" ]; then + echo "FAIL: File was not generated." >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "${MSG}" "${OUTPUT_FILE}"; then + echo "FAIL: ${MSG} not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + if grep -q "cpu=" "${OUTPUT_FILE}"; then + echo "FAIL: 'cpu= found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + rm "${OUTPUT_FILE}" +} + +# Start socat, send the message and wait for the file to show up in the file +# system +function runtest { + # Listen for netconsole port inside the namespace and destination + # interface + listen_port_and_save_to "${OUTPUT_FILE}" & + # Wait for socat to start and listen to the port. + wait_local_port_listen "${NAMESPACE}" "${PORT}" udp + # Send the message + taskset -c "${CPU}" echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" +} + +# ========== # +# Start here # +# ========== # + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# Check for basic system dependency and exit if not found +check_for_dependencies +# This test also depends on taskset(1). Check for it before starting the test +check_for_taskset + +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT +# Create one namespace and two interfaces +set_network +# Create a dynamic target for netconsole +create_dynamic_target + +#==================================================== +# TEST #1 +# Send message from a random CPU +#==================================================== +# Random CPU in the system +CPU=$((RANDOM % $(nproc))) +OUTPUT_FILE="/tmp/${TARGET}_1" +MSG="Test #1 from CPU${CPU}" +# Enable the auto population of cpu_nr +set_cpu_nr +runtest +# Make sure the message was received in the dst part +# and exit +validate_sysdata_cpu_exists + +#==================================================== +# TEST #2 +# This test now adds userdata together with sysdata +# =================================================== +# Get a new random CPU +CPU=$((RANDOM % $(nproc))) +OUTPUT_FILE="/tmp/${TARGET}_2" +MSG="Test #2 from CPU${CPU}" +set_user_data +runtest +validate_sysdata_cpu_exists + +# =================================================== +# TEST #3 +# Unset cpu_nr, so, no CPU should be appended. +# userdata is still set +# =================================================== +CPU=$((RANDOM % $(nproc))) +OUTPUT_FILE="/tmp/${TARGET}_3" +MSG="Test #3 from CPU${CPU}" +# Enable the auto population of cpu_nr +unset_cpu_nr +runtest +# At this time, cpu= shouldn't be present in the msg +validate_sysdata_no_cpu + +exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index eb83e7b48797..17dc11e9b6dd 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -8,17 +8,17 @@ from lib.py import bkg, cmd, wait_port_listen, rand_port def test_v4(cfg) -> None: - cfg.require_v4() + cfg.require_ipver("4") - cmd(f"ping -c 1 -W0.5 {cfg.remote_v4}") - cmd(f"ping -c 1 -W0.5 {cfg.v4}", host=cfg.remote) + cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["4"]) + cmd("ping -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) def test_v6(cfg) -> None: - cfg.require_v6() + cfg.require_ipver("6") - cmd(f"ping -c 1 -W0.5 {cfg.remote_v6}") - cmd(f"ping -c 1 -W0.5 {cfg.v6}", host=cfg.remote) + cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["6"]) + cmd("ping -c 1 -W0.5 " + cfg.addr_v["6"], host=cfg.remote) def test_tcp(cfg) -> None: diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py index 38303da957ee..5fdfebc6415f 100755 --- a/tools/testing/selftests/drivers/net/queues.py +++ b/tools/testing/selftests/drivers/net/queues.py @@ -2,13 +2,16 @@ # SPDX-License-Identifier: GPL-2.0 from lib.py import ksft_disruptive, ksft_exit, ksft_run -from lib.py import ksft_eq, ksft_raises, KsftSkipEx +from lib.py import ksft_eq, ksft_raises, KsftSkipEx, KsftFailEx from lib.py import EthtoolFamily, NetdevFamily, NlError from lib.py import NetDrvEnv from lib.py import cmd, defer, ip import errno import glob - +import os +import socket +import struct +import subprocess def sys_get_queues(ifname, qtype='rx') -> int: folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*') @@ -21,6 +24,39 @@ def nl_get_queues(cfg, nl, qtype='rx'): return len([q for q in queues if q['type'] == qtype]) return None +def check_xdp(cfg, nl, xdp_queue_id=0) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + xdp = subprocess.Popen([f"{test_dir}/xdp_helper", f"{cfg.ifindex}", f"{xdp_queue_id}"], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, bufsize=1, + text=True) + defer(xdp.kill) + + stdout, stderr = xdp.communicate(timeout=10) + rx = tx = False + + if xdp.returncode == 255: + raise KsftSkipEx('AF_XDP unsupported') + elif xdp.returncode > 0: + raise KsftFailEx('unable to create AF_XDP socket') + + queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True) + if not queues: + raise KsftSkipEx("Netlink reports no queues") + + for q in queues: + if q['id'] == 0: + if q['type'] == 'rx': + rx = True + if q['type'] == 'tx': + tx = True + + ksft_eq(q['xsk'], {}) + else: + if 'xsk' in q: + _fail("Check failed: xsk attribute set.") + + ksft_eq(rx, True) + ksft_eq(tx, True) def get_queues(cfg, nl) -> None: snl = NetdevFamily(recv_size=4096) @@ -81,7 +117,7 @@ def check_down(cfg, nl) -> None: def main() -> None: with NetDrvEnv(__file__, queue_count=100) as cfg: - ksft_run([get_queues, addremove_queues, check_down], args=(cfg, NetdevFamily())) + ksft_run([get_queues, addremove_queues, check_down, check_xdp], args=(cfg, NetdevFamily())) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/xdp_helper.c b/tools/testing/selftests/drivers/net/xdp_helper.c new file mode 100644 index 000000000000..cf06a88b830b --- /dev/null +++ b/tools/testing/selftests/drivers/net/xdp_helper.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <linux/if_xdp.h> +#include <linux/if_link.h> +#include <net/if.h> +#include <inttypes.h> + +#define UMEM_SZ (1U << 16) +#define NUM_DESC (UMEM_SZ / 2048) + +/* this is a simple helper program that creates an XDP socket and does the + * minimum necessary to get bind() to succeed. + * + * this test program is not intended to actually process packets, but could be + * extended in the future if that is actually needed. + * + * it is used by queues.py to ensure the xsk netlinux attribute is set + * correctly. + */ +int main(int argc, char **argv) +{ + struct xdp_umem_reg umem_reg = { 0 }; + struct sockaddr_xdp sxdp = { 0 }; + int num_desc = NUM_DESC; + void *umem_area; + int ifindex; + int sock_fd; + int queue; + char byte; + + if (argc != 3) { + fprintf(stderr, "Usage: %s ifindex queue_id", argv[0]); + return 1; + } + + sock_fd = socket(AF_XDP, SOCK_RAW, 0); + if (sock_fd < 0) { + perror("socket creation failed"); + /* if the kernel doesn't support AF_XDP, let the test program + * know with -1. All other error paths return 1. + */ + if (errno == EAFNOSUPPORT) + return -1; + return 1; + } + + ifindex = atoi(argv[1]); + queue = atoi(argv[2]); + + umem_area = mmap(NULL, UMEM_SZ, PROT_READ | PROT_WRITE, MAP_PRIVATE | + MAP_ANONYMOUS, -1, 0); + if (umem_area == MAP_FAILED) { + perror("mmap failed"); + return 1; + } + + umem_reg.addr = (uintptr_t)umem_area; + umem_reg.len = UMEM_SZ; + umem_reg.chunk_size = 2048; + umem_reg.headroom = 0; + + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_REG, &umem_reg, + sizeof(umem_reg)); + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_FILL_RING, &num_desc, + sizeof(num_desc)); + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &num_desc, + sizeof(num_desc)); + setsockopt(sock_fd, SOL_XDP, XDP_RX_RING, &num_desc, sizeof(num_desc)); + + sxdp.sxdp_family = AF_XDP; + sxdp.sxdp_ifindex = ifindex; + sxdp.sxdp_queue_id = queue; + sxdp.sxdp_flags = 0; + + if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) != 0) { + munmap(umem_area, UMEM_SZ); + perror("bind failed"); + close(sock_fd); + return 1; + } + + /* give the parent program some data when the socket is ready*/ + fprintf(stdout, "%d\n", sock_fd); + + /* parent program will write a byte to stdin when its ready for this + * helper to exit + */ + read(STDIN_FILENO, &byte, 1); + + close(sock_fd); + return 0; +} diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 28a715a8ef2b..80dcae53ef55 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -42,6 +42,7 @@ socket so_incoming_cpu so_netns_cookie so_txtime +so_rcv_listener stress_reuseport_listen tap tcp_fastopen_backup_key diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 73ee88d6b043..8d6116b80cf1 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -7,7 +7,7 @@ CFLAGS += -I../../../../usr/include/ $(KHDR_INCLUDES) CFLAGS += -I../ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \ - rtnetlink.sh xfrm_policy.sh test_blackhole_dev.sh + rtnetlink.sh xfrm_policy.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh @@ -33,9 +33,11 @@ TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh TEST_PROGS += cmsg_so_mark.sh TEST_PROGS += cmsg_so_priority.sh +TEST_PROGS += test_so_rcv.sh TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh TEST_PROGS += netns-name.sh TEST_PROGS += nl_netdev.py +TEST_PROGS += rtnetlink.py TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh @@ -75,6 +77,7 @@ TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap epoll_busy_ TEST_GEN_FILES += toeplitz TEST_GEN_FILES += cmsg_sender TEST_GEN_FILES += stress_reuseport_listen +TEST_GEN_FILES += so_rcv_listener TEST_PROGS += test_vxlan_vnifiltering.sh TEST_GEN_FILES += io_uring_zerocopy_tx TEST_PROGS += io_uring_zerocopy_tx.sh diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 899dbad0104b..4fcc38907e48 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -3667,7 +3667,7 @@ ipv6_addr_bind_novrf() # when it really should not a=${NSA_LO_IP6} log_start - show_hint "Tecnically should fail since address is not on device but kernel allows" + show_hint "Technically should fail since address is not on device but kernel allows" run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to out of scope local address" } @@ -3724,7 +3724,7 @@ ipv6_addr_bind_vrf() # passes when it really should not a=${VRF_IP6} log_start - show_hint "Tecnically should fail since address is not on device but kernel allows" + show_hint "Technically should fail since address is not on device but kernel allows" run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to VRF address with device bind" diff --git a/tools/testing/selftests/net/fdb_flush.sh b/tools/testing/selftests/net/fdb_flush.sh index d5e3abb8658c..9931a1e36e3d 100755 --- a/tools/testing/selftests/net/fdb_flush.sh +++ b/tools/testing/selftests/net/fdb_flush.sh @@ -583,7 +583,7 @@ vxlan_test_flush_by_remote_attributes() $IP link del dev vx10 $IP link add name vx10 type vxlan dstport "$VXPORT" external - # For multicat FDB entries, the VXLAN driver stores a linked list of + # For multicast FDB entries, the VXLAN driver stores a linked list of # remotes for a given key. Verify that only the expected remotes are # flushed. multicast_fdb_entries_add diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 77c83d9508d3..bea1282e0281 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -741,7 +741,7 @@ ipv6_fcnal() run_cmd "$IP nexthop add id 52 via 2001:db8:92::3" log_test $? 2 "Create nexthop - gw only" - # gw is not reachable throught given dev + # gw is not reachable through given dev run_cmd "$IP nexthop add id 53 via 2001:db8:3::3 dev veth1" log_test $? 2 "Create nexthop - invalid gw+dev combination" diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index 847936363a12..06c51d7ceb4a 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -256,6 +256,24 @@ fib_rule6_test() fib_rule6_test_match_n_redirect "$match" "$match" \ "$getnomatch" "sport and dport redirect to table" \ "sport and dport no redirect to table" + + match="sport 100-200 dport 300-400" + getmatch="sport 100 dport 400" + getnomatch="sport 100 dport 401" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" \ + "sport and dport range redirect to table" \ + "sport and dport range no redirect to table" + fi + + ip rule help 2>&1 | grep sport | grep -q MASK + if [ $? -eq 0 ]; then + match="sport 0x0f00/0xff00 dport 0x000f/0x00ff" + getmatch="sport 0x0f11 dport 0x220f" + getnomatch="sport 0x1f11 dport 0x221f" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "sport and dport masked redirect to table" \ + "sport and dport masked no redirect to table" fi fib_check_iproute_support "ipproto" "ipproto" @@ -525,6 +543,24 @@ fib_rule4_test() fib_rule4_test_match_n_redirect "$match" "$match" \ "$getnomatch" "sport and dport redirect to table" \ "sport and dport no redirect to table" + + match="sport 100-200 dport 300-400" + getmatch="sport 100 dport 400" + getnomatch="sport 100 dport 401" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" \ + "sport and dport range redirect to table" \ + "sport and dport range no redirect to table" + fi + + ip rule help 2>&1 | grep sport | grep -q MASK + if [ $? -eq 0 ]; then + match="sport 0x0f00/0xff00 dport 0x000f/0x00ff" + getmatch="sport 0x0f11 dport 0x220f" + getnomatch="sport 0x1f11 dport 0x221f" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "sport and dport masked redirect to table" \ + "sport and dport masked no redirect to table" fi fib_check_iproute_support "ipproto" "ipproto" diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh index d9d587454d20..8c1597ebc2d3 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -149,7 +149,7 @@ cfg_test_host_common() check_err $? "Failed to add $name host entry" bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null - check_fail $? "Managed to replace $name host entry" + check_err $? "Failed to replace $name host entry" bridge mdb del dev br0 port br0 grp $grp $state vid 10 bridge mdb get dev br0 grp $grp vid 10 &> /dev/null diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 8de80acf249e..508f3c700d71 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -291,16 +291,6 @@ if [[ "$CHECK_TC" = "yes" ]]; then check_tc_version fi -require_command() -{ - local cmd=$1; shift - - if [[ ! -x "$(command -v "$cmd")" ]]; then - echo "SKIP: $cmd not installed" - exit $ksft_skip - fi -} - # IPv6 support was added in v3.0 check_mtools_version() { diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index 3f9d50f1ef9e..180c5eca556f 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -740,6 +740,8 @@ test_learning() vxlan_flood_test $mac $dst 0 10 0 + # The entry should age out when it only forwards traffic + $MZ $h1 -c 50 -d 1sec -p 64 -b $mac -B $dst -t icmp -q & sleep 60 bridge fdb show brport vx1 | grep $mac | grep -q self diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 0bd9a038a1f0..975be4fdbcdb 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -450,6 +450,25 @@ kill_process() { kill $pid && wait $pid; } 2>/dev/null } +check_command() +{ + local cmd=$1; shift + + if [[ ! -x "$(command -v "$cmd")" ]]; then + log_test_skip "$cmd not installed" + return $EXIT_STATUS + fi +} + +require_command() +{ + local cmd=$1; shift + + if ! check_command "$cmd"; then + exit $EXIT_STATUS + fi +} + ip_link_add() { local name=$1; shift diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index 54d8f5eba810..729457859316 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -5,5 +5,5 @@ from .ksft import * from .netns import NetNS from .nsim import * from .utils import * -from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily +from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily from .ynl import NetshaperFamily diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index ad1e36baee2a..8986c584cb37 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -42,6 +42,10 @@ class RtnlFamily(YnlFamily): super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(), schema='', recv_size=recv_size) +class RtnlAddrFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('rt_addr.yaml')).as_posix(), + schema='', recv_size=recv_size) class NetdevFamily(YnlFamily): def __init__(self, recv_size=0): diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 9c2a415976cb..2329c2f8519b 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -28,7 +28,7 @@ size=0 usage() { echo "Usage: $0 [ -b ] [ -c ] [ -d ] [ -i]" - echo -e "\t-b: bail out after first error, otherwise runs al testcases" + echo -e "\t-b: bail out after first error, otherwise runs all testcases" echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)" echo -e "\t-d: debug this script" echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'" diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py index 93e8cb671c3d..beaee5e4e2aa 100755 --- a/tools/testing/selftests/net/nl_netdev.py +++ b/tools/testing/selftests/net/nl_netdev.py @@ -35,6 +35,21 @@ def napi_list_check(nf) -> None: comment=f"queue count after reset queue {q} mode {i}") +def nsim_rxq_reset_down(nf) -> None: + """ + Test that the queue API supports resetting a queue + while the interface is down. We should convert this + test to testing real HW once more devices support + queue API. + """ + with NetdevSimDev(queue_count=4) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} down") + for i in [0, 2, 3]: + nsim.dfs_write("queue_reset", f"1 {i}") + + def page_pool_check(nf) -> None: with NetdevSimDev() as nsimdev: nsim = nsimdev.nsims[0] @@ -106,7 +121,8 @@ def page_pool_check(nf) -> None: def main() -> None: nf = NetdevFamily() - ksft_run([empty_check, lo_check, page_pool_check, napi_list_check], + ksft_run([empty_check, lo_check, page_pool_check, napi_list_check, + nsim_rxq_reset_down], args=(nf, )) ksft_exit() diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c index 404a2ce759ab..221270cee3ea 100644 --- a/tools/testing/selftests/net/psock_tpacket.c +++ b/tools/testing/selftests/net/psock_tpacket.c @@ -12,7 +12,7 @@ * * Datapath: * Open a pair of packet sockets and send resp. receive an a priori known - * packet pattern accross the sockets and check if it was received resp. + * packet pattern across the sockets and check if it was received resp. * sent correctly. Fanout in combination with RX_RING is currently not * tested here. * diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c index 066efd30e294..7b9bf8a7bbe1 100644 --- a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c +++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c @@ -112,7 +112,7 @@ TEST(reuseaddr_ports_exhausted_reusable_same_euid) ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind."); if (opts->reuseport[0] && opts->reuseport[1]) { - EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets succeed to be listened."); + EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets successfully listened."); } else { EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind to connect to different destinations."); } diff --git a/tools/testing/selftests/net/rtnetlink.py b/tools/testing/selftests/net/rtnetlink.py new file mode 100755 index 000000000000..80950888800b --- /dev/null +++ b/tools/testing/selftests/net/rtnetlink.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_exit, ksft_run, ksft_ge, RtnlAddrFamily +import socket + +IPV4_ALL_HOSTS_MULTICAST = b'\xe0\x00\x00\x01' + +def dump_mcaddr_check(rtnl: RtnlAddrFamily) -> None: + """ + Verify that at least one interface has the IPv4 all-hosts multicast address. + At least the loopback interface should have this address. + """ + + addresses = rtnl.getmaddrs({"ifa-family": socket.AF_INET}, dump=True) + + all_host_multicasts = [ + addr for addr in addresses if addr['ifa-multicast'] == IPV4_ALL_HOSTS_MULTICAST + ] + + ksft_ge(len(all_host_multicasts), 1, + "No interface found with the IPv4 all-hosts multicast address") + +def main() -> None: + rtnl = RtnlAddrFamily() + ksft_run([dump_mcaddr_check], args=(rtnl, )) + ksft_exit() + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/so_rcv_listener.c b/tools/testing/selftests/net/so_rcv_listener.c new file mode 100644 index 000000000000..bc5841192aa6 --- /dev/null +++ b/tools/testing/selftests/net/so_rcv_listener.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <errno.h> +#include <netdb.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <linux/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +#ifndef SO_RCVPRIORITY +#define SO_RCVPRIORITY 82 +#endif + +struct options { + __u32 val; + int name; + int rcvname; + const char *host; + const char *service; +} opt; + +static void __attribute__((noreturn)) usage(const char *bin) +{ + printf("Usage: %s [opts] <dst host> <dst port / service>\n", bin); + printf("Options:\n" + "\t\t-M val Test SO_RCVMARK\n" + "\t\t-P val Test SO_RCVPRIORITY\n" + ""); + exit(EXIT_FAILURE); +} + +static void parse_args(int argc, char *argv[]) +{ + int o; + + while ((o = getopt(argc, argv, "M:P:")) != -1) { + switch (o) { + case 'M': + opt.val = atoi(optarg); + opt.name = SO_MARK; + opt.rcvname = SO_RCVMARK; + break; + case 'P': + opt.val = atoi(optarg); + opt.name = SO_PRIORITY; + opt.rcvname = SO_RCVPRIORITY; + break; + default: + usage(argv[0]); + break; + } + } + + if (optind != argc - 2) + usage(argv[0]); + + opt.host = argv[optind]; + opt.service = argv[optind + 1]; +} + +int main(int argc, char *argv[]) +{ + int err = 0; + int recv_fd = -1; + int ret_value = 0; + __u32 recv_val; + struct cmsghdr *cmsg; + char cbuf[CMSG_SPACE(sizeof(__u32))]; + char recv_buf[CMSG_SPACE(sizeof(__u32))]; + struct iovec iov[1]; + struct msghdr msg; + struct sockaddr_in recv_addr4; + struct sockaddr_in6 recv_addr6; + + parse_args(argc, argv); + + int family = strchr(opt.host, ':') ? AF_INET6 : AF_INET; + + recv_fd = socket(family, SOCK_DGRAM, IPPROTO_UDP); + if (recv_fd < 0) { + perror("Can't open recv socket"); + ret_value = -errno; + goto cleanup; + } + + err = setsockopt(recv_fd, SOL_SOCKET, opt.rcvname, &opt.val, sizeof(opt.val)); + if (err < 0) { + perror("Recv setsockopt error"); + ret_value = -errno; + goto cleanup; + } + + if (family == AF_INET) { + memset(&recv_addr4, 0, sizeof(recv_addr4)); + recv_addr4.sin_family = family; + recv_addr4.sin_port = htons(atoi(opt.service)); + + if (inet_pton(family, opt.host, &recv_addr4.sin_addr) <= 0) { + perror("Invalid IPV4 address"); + ret_value = -errno; + goto cleanup; + } + + err = bind(recv_fd, (struct sockaddr *)&recv_addr4, sizeof(recv_addr4)); + } else { + memset(&recv_addr6, 0, sizeof(recv_addr6)); + recv_addr6.sin6_family = family; + recv_addr6.sin6_port = htons(atoi(opt.service)); + + if (inet_pton(family, opt.host, &recv_addr6.sin6_addr) <= 0) { + perror("Invalid IPV6 address"); + ret_value = -errno; + goto cleanup; + } + + err = bind(recv_fd, (struct sockaddr *)&recv_addr6, sizeof(recv_addr6)); + } + + if (err < 0) { + perror("Recv bind error"); + ret_value = -errno; + goto cleanup; + } + + iov[0].iov_base = recv_buf; + iov[0].iov_len = sizeof(recv_buf); + + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = cbuf; + msg.msg_controllen = sizeof(cbuf); + + err = recvmsg(recv_fd, &msg, 0); + if (err < 0) { + perror("Message receive error"); + ret_value = -errno; + goto cleanup; + } + + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == opt.name) { + recv_val = *(__u32 *)CMSG_DATA(cmsg); + printf("Received value: %u\n", recv_val); + + if (recv_val != opt.val) { + fprintf(stderr, "Error: expected value: %u, got: %u\n", + opt.val, recv_val); + ret_value = -EINVAL; + } + goto cleanup; + } + } + + fprintf(stderr, "Error: No matching cmsg received\n"); + ret_value = -ENOMSG; + +cleanup: + if (recv_fd >= 0) + close(recv_fd); + + return ret_value; +} diff --git a/tools/testing/selftests/net/test_blackhole_dev.sh b/tools/testing/selftests/net/test_blackhole_dev.sh deleted file mode 100755 index 3119b80e711f..000000000000 --- a/tools/testing/selftests/net/test_blackhole_dev.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# Runs blackhole-dev test using blackhole-dev kernel module - -if /sbin/modprobe -q test_blackhole_dev ; then - /sbin/modprobe -q -r test_blackhole_dev; - echo "test_blackhole_dev: ok"; -else - echo "test_blackhole_dev: [FAIL]"; - exit 1; -fi diff --git a/tools/testing/selftests/net/test_so_rcv.sh b/tools/testing/selftests/net/test_so_rcv.sh new file mode 100755 index 000000000000..d8aa4362879d --- /dev/null +++ b/tools/testing/selftests/net/test_so_rcv.sh @@ -0,0 +1,73 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +HOSTS=("127.0.0.1" "::1") +PORT=1234 +TOTAL_TESTS=0 +FAILED_TESTS=0 + +declare -A TESTS=( + ["SO_RCVPRIORITY"]="-P 2" + ["SO_RCVMARK"]="-M 3" +) + +check_result() { + ((TOTAL_TESTS++)) + if [ "$1" -ne 0 ]; then + ((FAILED_TESTS++)) + fi +} + +cleanup() +{ + cleanup_ns $NS +} + +trap cleanup EXIT + +setup_ns NS + +for HOST in "${HOSTS[@]}"; do + PROTOCOL="IPv4" + if [[ "$HOST" == "::1" ]]; then + PROTOCOL="IPv6" + fi + for test_name in "${!TESTS[@]}"; do + echo "Running $test_name test, $PROTOCOL" + arg=${TESTS[$test_name]} + + ip netns exec $NS ./so_rcv_listener $arg $HOST $PORT & + LISTENER_PID=$! + + sleep 0.5 + + if ! ip netns exec $NS ./cmsg_sender $arg $HOST $PORT; then + echo "Sender failed for $test_name, $PROTOCOL" + kill "$LISTENER_PID" 2>/dev/null + wait "$LISTENER_PID" + check_result 1 + continue + fi + + wait "$LISTENER_PID" + LISTENER_EXIT_CODE=$? + + if [ "$LISTENER_EXIT_CODE" -eq 0 ]; then + echo "Rcv test OK for $test_name, $PROTOCOL" + check_result 0 + else + echo "Rcv test FAILED for $test_name, $PROTOCOL" + check_result 1 + fi + done +done + +if [ "$FAILED_TESTS" -ne 0 ]; then + echo "FAIL - $FAILED_TESTS/$TOTAL_TESTS tests failed" + exit ${KSFT_FAIL} +else + echo "OK - All $TOTAL_TESTS tests passed" + exit ${KSFT_PASS} +fi diff --git a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh index 2d442cdab11e..062f957950af 100755 --- a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh +++ b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh @@ -1,29 +1,114 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -# Check FDB default-remote handling across "ip link set". +ALL_TESTS=" + test_set_remote + test_change_mc_remote +" +source lib.sh check_remotes() { local what=$1; shift local N=$(bridge fdb sh dev vx | grep 00:00:00:00:00:00 | wc -l) - echo -ne "expected two remotes after $what\t" - if [[ $N != 2 ]]; then - echo "[FAIL]" - EXIT_STATUS=1 + ((N == 2)) + check_err $? "expected 2 remotes after $what, got $N" +} + +# Check FDB default-remote handling across "ip link set". +test_set_remote() +{ + RET=0 + + ip_link_add vx up type vxlan id 2000 dstport 4789 + bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent + bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent + check_remotes "fdb append" + + ip link set dev vx type vxlan remote 192.0.2.30 + check_remotes "link set" + + log_test 'FDB default-remote handling across "ip link set"' +} + +fmt_remote() +{ + local addr=$1; shift + + if [[ $addr == 224.* ]]; then + echo "group $addr" else - echo "[ OK ]" + echo "remote $addr" fi } -ip link add name vx up type vxlan id 2000 dstport 4789 -bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent -bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent -check_remotes "fdb append" +change_remote() +{ + local remote=$1; shift + + ip link set dev vx type vxlan $(fmt_remote $remote) dev v1 +} + +check_membership() +{ + local check_vec=("$@") + + local memberships + memberships=$( + netstat -n --groups | + sed -n '/^v1\b/p' | + grep -o '[^ ]*$' + ) + check_err $? "Couldn't obtain group memberships" + + local item + for item in "${check_vec[@]}"; do + eval "local $item" + echo "$memberships" | grep -q "\b$group\b" + check_err_fail $fail $? "$group is_ex reported in IGMP query response" + done +} + +test_change_mc_remote() +{ + check_command netstat || return + + ip_link_add v1 up type veth peer name v2 + ip_link_set_up v2 + + RET=0 + + ip_link_add vx up type vxlan dstport 4789 \ + local 192.0.2.1 $(fmt_remote 224.1.1.1) dev v1 vni 1000 + + check_membership "group=224.1.1.1 fail=0" \ + "group=224.1.1.2 fail=1" \ + "group=224.1.1.3 fail=1" + + log_test "MC group report after VXLAN creation" + + RET=0 + + change_remote 224.1.1.2 + check_membership "group=224.1.1.1 fail=1" \ + "group=224.1.1.2 fail=0" \ + "group=224.1.1.3 fail=1" + + log_test "MC group report after changing VXLAN remote MC->MC" + + RET=0 + + change_remote 192.0.2.2 + check_membership "group=224.1.1.1 fail=1" \ + "group=224.1.1.2 fail=1" \ + "group=224.1.1.3 fail=1" + + log_test "MC group report after changing VXLAN remote MC->UC" +} + +trap defer_scopes_cleanup EXIT -ip link set dev vx type vxlan remote 192.0.2.30 -check_remotes "link set" +tests_run -ip link del dev vx exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk index 12e7cae251be..e907c2751956 100644 --- a/tools/testing/selftests/net/ynl.mk +++ b/tools/testing/selftests/net/ynl.mk @@ -27,7 +27,8 @@ $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig: $(OUTPUT)/libynl.a: $(YNL_SPECS) $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig $(Q)rm -f $(top_srcdir)/tools/net/ynl/libynl.a - $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl GENS="$(YNL_GENS)" libynl.a + $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl \ + GENS="$(YNL_GENS)" RSTS="" libynl.a $(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a EXTRA_CLEAN += \ |