summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu1
-rw-r--r--Documentation/admin-guide/hw-vuln/index.rst1
-rw-r--r--Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst246
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt37
-rw-r--r--Documentation/bpf/btf.rst43
-rw-r--r--Documentation/bpf/instruction-set.rst2
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,tmp401.yaml5
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/socionext,uniphier-aidet.yaml1
-rw-r--r--Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml404
-rw-r--r--Documentation/devicetree/bindings/net/dsa/mt7530.txt327
-rw-r--r--Documentation/devicetree/bindings/net/dsa/renesas,rzn1-a5psw.yaml134
-rw-r--r--Documentation/devicetree/bindings/net/pcs/renesas,rzn1-miic.yaml171
-rw-r--r--Documentation/devicetree/bindings/net/snps,dwmac.yaml5
-rw-r--r--Documentation/devicetree/bindings/net/ti,dp83867.yaml18
-rw-r--r--Documentation/devicetree/bindings/net/xlnx,emaclite.yaml63
-rw-r--r--Documentation/filesystems/ext4/attributes.rst68
-rw-r--r--Documentation/filesystems/ext4/bigalloc.rst2
-rw-r--r--Documentation/filesystems/ext4/bitmaps.rst6
-rw-r--r--Documentation/filesystems/ext4/blockgroup.rst30
-rw-r--r--Documentation/filesystems/ext4/blockmap.rst2
-rw-r--r--Documentation/filesystems/ext4/checksums.rst26
-rw-r--r--Documentation/filesystems/ext4/directory.rst166
-rw-r--r--Documentation/filesystems/ext4/eainode.rst10
-rw-r--r--Documentation/filesystems/ext4/group_descr.rst126
-rw-r--r--Documentation/filesystems/ext4/ifork.rst60
-rw-r--r--Documentation/filesystems/ext4/inlinedata.rst8
-rw-r--r--Documentation/filesystems/ext4/inodes.rst306
-rw-r--r--Documentation/filesystems/ext4/journal.rst214
-rw-r--r--Documentation/filesystems/ext4/mmp.rst36
-rw-r--r--Documentation/filesystems/ext4/overview.rst2
-rw-r--r--Documentation/filesystems/ext4/special_inodes.rst8
-rw-r--r--Documentation/filesystems/ext4/super.rst550
-rw-r--r--Documentation/filesystems/netfs_library.rst33
-rw-r--r--Documentation/loongarch/introduction.rst15
-rw-r--r--Documentation/loongarch/irq-chip-model.rst22
-rw-r--r--Documentation/networking/bonding.rst11
-rw-r--r--Documentation/networking/ip-sysctl.rst37
-rw-r--r--Documentation/networking/phy.rst2
-rw-r--r--Documentation/networking/tls.rst25
-rw-r--r--Documentation/process/changes.rst12
-rw-r--r--Documentation/translations/zh_CN/loongarch/introduction.rst14
-rw-r--r--Documentation/translations/zh_CN/loongarch/irq-chip-model.rst14
-rw-r--r--MAINTAINERS63
-rw-r--r--Makefile2
-rw-r--r--arch/arm/boot/dts/at91-sama5d3_ksz9477_evb.dts5
-rw-r--r--arch/arm/boot/dts/r9a06g032-rzn1d400-db.dts117
-rw-r--r--arch/arm/boot/dts/r9a06g032.dtsi108
-rw-r--r--arch/arm/net/bpf_jit_32.c16
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts48
-rw-r--r--arch/arm64/include/asm/kvm_host.h5
-rw-r--r--arch/arm64/include/asm/virt.h3
-rw-r--r--arch/arm64/kernel/cpufeature.c11
-rw-r--r--arch/arm64/kernel/entry-ftrace.S1
-rw-r--r--arch/arm64/kernel/ftrace.c137
-rw-r--r--arch/arm64/kernel/setup.c7
-rw-r--r--arch/arm64/kvm/arch_timer.c3
-rw-r--r--arch/arm64/kvm/arm.c10
-rw-r--r--arch/arm64/kvm/fpsimd.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sys_regs.c42
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v2.c4
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c40
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio.c40
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio.h3
-rw-r--r--arch/arm64/kvm/vmid.c2
-rw-r--r--arch/arm64/mm/cache.S2
-rw-r--r--arch/loongarch/Kconfig1
-rw-r--r--arch/loongarch/include/asm/hardirq.h2
-rw-r--r--arch/loongarch/include/asm/percpu.h1
-rw-r--r--arch/loongarch/include/asm/smp.h23
-rw-r--r--arch/loongarch/include/asm/timex.h7
-rw-r--r--arch/loongarch/kernel/acpi.c4
-rw-r--r--arch/loongarch/kernel/cacheinfo.c1
-rw-r--r--arch/loongarch/kernel/irq.c7
-rw-r--r--arch/loongarch/kernel/process.c14
-rw-r--r--arch/loongarch/kernel/setup.c5
-rw-r--r--arch/loongarch/kernel/smp.c2
-rw-r--r--arch/loongarch/kernel/vmlinux.lds.S1
-rw-r--r--arch/riscv/Kconfig9
-rw-r--r--arch/riscv/Kconfig.erratas1
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs.dtsi9
-rw-r--r--arch/riscv/kernel/cpufeature.c5
-rw-r--r--arch/riscv/kvm/vmid.c2
-rw-r--r--arch/riscv/net/bpf_jit.h1
-rw-r--r--arch/riscv/net/bpf_jit_core.c8
-rw-r--r--arch/um/drivers/virt-pci.c7
-rw-r--r--arch/x86/coco/tdx/tdx.c187
-rw-r--r--arch/x86/hyperv/hv_init.c6
-rw-r--r--arch/x86/hyperv/ivm.c84
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/e820/api.h5
-rw-r--r--arch/x86/include/asm/efi.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h67
-rw-r--r--arch/x86/include/asm/mshyperv.h4
-rw-r--r--arch/x86/include/asm/msr-index.h25
-rw-r--r--arch/x86/include/asm/nospec-branch.h2
-rw-r--r--arch/x86/include/asm/pci_x86.h8
-rw-r--r--arch/x86/include/asm/setup.h38
-rw-r--r--arch/x86/kernel/Makefile4
-rw-r--r--arch/x86/kernel/cpu/bugs.c235
-rw-r--r--arch/x86/kernel/cpu/common.c52
-rw-r--r--arch/x86/kernel/ftrace_64.S11
-rw-r--r--arch/x86/kernel/resource.c14
-rw-r--r--arch/x86/kernel/setup.c5
-rw-r--r--arch/x86/kernel/vmlinux.lds.S4
-rw-r--r--arch/x86/kvm/lapic.c27
-rw-r--r--arch/x86/kvm/mmu/mmu.c2
-rw-r--r--arch/x86/kvm/svm/avic.c171
-rw-r--r--arch/x86/kvm/svm/nested.c39
-rw-r--r--arch/x86/kvm/svm/svm.c8
-rw-r--r--arch/x86/kvm/svm/svm.h4
-rw-r--r--arch/x86/kvm/vmx/vmx.c76
-rw-r--r--arch/x86/kvm/vmx/vmx.h2
-rw-r--r--arch/x86/kvm/x86.c5
-rw-r--r--arch/x86/net/bpf_jit_comp.c3
-rw-r--r--arch/x86/pci/acpi.c8
-rw-r--r--block/bfq-iosched.c6
-rw-r--r--block/bio.c20
-rw-r--r--block/blk-mq-sched.c1
-rw-r--r--block/blk-mq.c29
-rw-r--r--block/kyber-iosched.c3
-rw-r--r--block/mq-deadline.c3
-rw-r--r--certs/.gitignore2
-rw-r--r--certs/Makefile24
-rw-r--r--certs/blacklist.c8
-rw-r--r--certs/blacklist_hashes.c4
-rw-r--r--certs/common.h9
-rw-r--r--certs/system_keyring.c6
-rw-r--r--crypto/Kconfig1
-rw-r--r--crypto/Makefile2
-rw-r--r--crypto/asymmetric_keys/Kconfig10
-rw-r--r--crypto/asymmetric_keys/Makefile2
-rw-r--r--crypto/asymmetric_keys/selftest.c224
-rw-r--r--crypto/asymmetric_keys/x509_loader.c (renamed from certs/common.c)8
-rw-r--r--crypto/asymmetric_keys/x509_parser.h9
-rw-r--r--crypto/asymmetric_keys/x509_public_key.c8
-rw-r--r--drivers/atm/iphase.c2
-rw-r--r--drivers/base/cpu.c8
-rw-r--r--drivers/base/init.c2
-rw-r--r--drivers/bus/fsl-mc/fsl-mc-bus.c6
-rw-r--r--drivers/char/Kconfig50
-rw-r--r--drivers/char/hw_random/virtio-rng.c2
-rw-r--r--drivers/char/lp.c2
-rw-r--r--drivers/char/random.c39
-rw-r--r--drivers/clocksource/hyperv_timer.c1
-rw-r--r--drivers/comedi/drivers/vmk80xx.c2
-rw-r--r--drivers/dma-buf/udmabuf.c5
-rw-r--r--drivers/firewire/core-cdev.c2
-rw-r--r--drivers/firewire/core-device.c6
-rw-r--r--drivers/firmware/efi/sysfb_efi.c2
-rw-r--r--drivers/gpio/gpio-crystalcove.c70
-rw-r--r--drivers/gpio/gpio-dln2.c23
-rw-r--r--drivers/gpio/gpio-dwapb.c7
-rw-r--r--drivers/gpio/gpio-merrifield.c22
-rw-r--r--drivers/gpio/gpio-sch.c35
-rw-r--r--drivers/gpio/gpio-wcove.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c8
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_drv.c6
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_mic.c42
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs.c29
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c2
-rw-r--r--drivers/gpu/drm/i915/i915_sysfs.c17
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c48
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo.c22
-rw-r--r--drivers/gpu/drm/ttm/ttm_device.c6
-rw-r--r--drivers/gpu/drm/ttm/ttm_resource.c52
-rw-r--r--drivers/hid/hid-hyperv.c5
-rw-r--r--drivers/hv/channel_mgmt.c18
-rw-r--r--drivers/hv/hv_kvp.c2
-rw-r--r--drivers/hv/vmbus_drv.c4
-rw-r--r--drivers/hwmon/asus-ec-sensors.c2
-rw-r--r--drivers/hwmon/occ/common.c5
-rw-r--r--drivers/i2c/busses/i2c-designware-common.c3
-rw-r--r--drivers/i2c/busses/i2c-designware-platdrv.c13
-rw-r--r--drivers/i2c/busses/i2c-mt65xx.c9
-rw-r--r--drivers/i2c/busses/i2c-npcm7xx.c3
-rw-r--r--drivers/infiniband/hw/mlx5/dm.c53
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c1
-rw-r--r--drivers/irqchip/Kconfig2
-rw-r--r--drivers/irqchip/irq-apple-aic.c2
-rw-r--r--drivers/irqchip/irq-gic-realview.c1
-rw-r--r--drivers/irqchip/irq-gic-v3.c7
-rw-r--r--drivers/irqchip/irq-loongson-liointc.c8
-rw-r--r--drivers/irqchip/irq-realtek-rtl.c2
-rw-r--r--drivers/irqchip/irq-uniphier-aidet.c1
-rw-r--r--drivers/isdn/hardware/mISDN/hfcsusb.c2
-rw-r--r--drivers/md/dm-core.h11
-rw-r--r--drivers/md/dm-log.c3
-rw-r--r--drivers/md/dm-raid.c2
-rw-r--r--drivers/md/dm-rq.c2
-rw-r--r--drivers/md/dm-table.c11
-rw-r--r--drivers/md/dm.c134
-rw-r--r--drivers/md/dm.h2
-rw-r--r--drivers/md/md.c14
-rw-r--r--drivers/md/md.h2
-rw-r--r--drivers/md/raid5-ppl.c4
-rw-r--r--drivers/misc/atmel-ssc.c4
-rw-r--r--drivers/misc/cardreader/rts5261.c2
-rw-r--r--drivers/misc/eeprom/at25.c93
-rw-r--r--drivers/misc/mei/hbm.c3
-rw-r--r--drivers/misc/mei/hw-me-regs.h2
-rw-r--r--drivers/misc/mei/hw-me.c2
-rw-r--r--drivers/misc/mei/pci-me.c2
-rw-r--r--drivers/mmc/host/mtk-sd.c20
-rw-r--r--drivers/mmc/host/sdhci-pci-o2micro.c2
-rw-r--r--drivers/net/amt.c8
-rw-r--r--drivers/net/bonding/bond_main.c31
-rw-r--r--drivers/net/bonding/bond_netlink.c15
-rw-r--r--drivers/net/bonding/bond_options.c33
-rw-r--r--drivers/net/dsa/Kconfig9
-rw-r--r--drivers/net/dsa/Makefile1
-rw-r--r--drivers/net/dsa/microchip/Kconfig42
-rw-r--r--drivers/net/dsa/microchip/Makefile10
-rw-r--r--drivers/net/dsa/microchip/ksz8.h48
-rw-r--r--drivers/net/dsa/microchip/ksz8795.c362
-rw-r--r--drivers/net/dsa/microchip/ksz8795_reg.h28
-rw-r--r--drivers/net/dsa/microchip/ksz8863_smi.c2
-rw-r--r--drivers/net/dsa/microchip/ksz9477.c303
-rw-r--r--drivers/net/dsa/microchip/ksz9477.h60
-rw-r--r--drivers/net/dsa/microchip/ksz9477_i2c.c6
-rw-r--r--drivers/net/dsa/microchip/ksz9477_reg.h13
-rw-r--r--drivers/net/dsa/microchip/ksz9477_spi.c150
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c596
-rw-r--r--drivers/net/dsa/microchip/ksz_common.h118
-rw-r--r--drivers/net/dsa/microchip/ksz_spi.c (renamed from drivers/net/dsa/microchip/ksz8795_spi.c)89
-rw-r--r--drivers/net/dsa/mt7530.c82
-rw-r--r--drivers/net/dsa/mt7530.h1
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c39
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.h3
-rw-r--r--drivers/net/dsa/mv88e6xxx/port.c36
-rw-r--r--drivers/net/dsa/mv88e6xxx/port.h2
-rw-r--r--drivers/net/dsa/ocelot/felix_vsc9959.c83
-rw-r--r--drivers/net/dsa/qca/ar9331.c17
-rw-r--r--drivers/net/dsa/qca8k.c22
-rw-r--r--drivers/net/dsa/qca8k.h2
-rw-r--r--drivers/net/dsa/realtek/rtl8365mb.c299
-rw-r--r--drivers/net/dsa/rzn1_a5psw.c1062
-rw-r--r--drivers/net/dsa/rzn1_a5psw.h259
-rw-r--r--drivers/net/dsa/sja1105/sja1105_main.c2
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-platform.c4
-rw-r--r--drivers/net/ethernet/atheros/ag71xx.c2
-rw-r--r--drivers/net/ethernet/broadcom/bcm63xx_enet.c4
-rw-r--r--drivers/net/ethernet/broadcom/bgmac-bcma.c1
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c2
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c4
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c2
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_ethtool.c2
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_main.c3
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_tx.c23
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_txrx.h1
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.h1
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c18
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c101
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h1
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_devlink.c4
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c115
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c7
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c104
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_xsk.c17
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c49
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c42
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c49
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.h31
-rw-r--r--drivers/net/ethernet/intel/ice/ice_tc_lib.c5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_lib.c5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl.c53
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c19
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/cmd.h53
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/minimal.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci.c42
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/port.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h995
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c42
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c271
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c901
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h76
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router_xm.c812
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c162
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/trap.h4
-rw-r--r--drivers/net/ethernet/microchip/lan743x_ethtool.c63
-rw-r--r--drivers/net/ethernet/microchip/lan743x_ethtool.h26
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.c378
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.h106
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c55
-rw-r--r--drivers/net/ethernet/microsoft/mana/gdma.h10
-rw-r--r--drivers/net/ethernet/microsoft/mana/gdma_main.c39
-rw-r--r--drivers/net/ethernet/microsoft/mana/hw_channel.c18
-rw-r--r--drivers/net/ethernet/microsoft/mana/hw_channel.h5
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana.h70
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_bpf.c64
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c148
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_ethtool.c12
-rw-r--r--drivers/net/ethernet/mscc/ocelot.c1
-rw-r--r--drivers/net/ethernet/mscc/ocelot_ptp.c8
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfd3/dp.c16
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfd3/rings.c1
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfd3/xsk.c8
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfdk/dp.c16
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfdk/rings.c1
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_main.c4
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net.h20
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c9
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_dp.h2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c53
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_xsk.c8
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h26
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_dev.c4
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c30
-rw-r--r--drivers/net/ethernet/qualcomm/emac/emac-mac.c2
-rw-r--r--drivers/net/ethernet/sfc/mcdi.c2
-rw-r--r--drivers/net/ethernet/sfc/siena/mcdi.c2
-rw-r--r--drivers/net/ethernet/sfc/siena/mcdi_pcol.h10
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/Kconfig3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c34
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c157
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c12
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c14
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet.h51
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet_main.c29
-rw-r--r--drivers/net/hamradio/6pack.c9
-rw-r--r--drivers/net/ipa/gsi.c229
-rw-r--r--drivers/net/ipa/gsi.h13
-rw-r--r--drivers/net/ipa/gsi_private.h24
-rw-r--r--drivers/net/ipa/gsi_trans.c108
-rw-r--r--drivers/net/ipa/gsi_trans.h15
-rw-r--r--drivers/net/ipa/ipa_cmd.c8
-rw-r--r--drivers/net/ipa/ipa_endpoint.c27
-rw-r--r--drivers/net/ipa/ipa_endpoint.h4
-rw-r--r--drivers/net/pcs/Kconfig12
-rw-r--r--drivers/net/pcs/Makefile1
-rw-r--r--drivers/net/pcs/pcs-lynx.c80
-rw-r--r--drivers/net/pcs/pcs-rzn1-miic.c520
-rw-r--r--drivers/net/pcs/pcs-xpcs.c168
-rw-r--r--drivers/net/pcs/pcs-xpcs.h1
-rw-r--r--drivers/net/phy/Kconfig7
-rw-r--r--drivers/net/phy/Makefile1
-rw-r--r--drivers/net/phy/aquantia_main.c35
-rw-r--r--drivers/net/phy/at803x.c6
-rw-r--r--drivers/net/phy/bcm-phy-lib.h19
-rw-r--r--drivers/net/phy/bcm-phy-ptp.c944
-rw-r--r--drivers/net/phy/broadcom.c33
-rw-r--r--drivers/net/phy/dp83867.c55
-rw-r--r--drivers/net/phy/dp83td510.c49
-rw-r--r--drivers/net/phy/fixed_phy.c1
-rw-r--r--drivers/net/phy/marvell-88x2222.c2
-rw-r--r--drivers/net/phy/marvell.c10
-rw-r--r--drivers/net/phy/mxl-gpy.c109
-rw-r--r--drivers/net/phy/nxp-tja11xx.c11
-rw-r--r--drivers/net/phy/phy_device.c18
-rw-r--r--drivers/net/phy/phylink.c1
-rw-r--r--drivers/net/phy/sfp.c10
-rw-r--r--drivers/net/phy/smsc.c19
-rw-r--r--drivers/net/ppp/ppp_generic.c2
-rw-r--r--drivers/net/usb/ax88179_178a.c26
-rw-r--r--drivers/net/usb/smsc95xx.c5
-rw-r--r--drivers/net/usb/usbnet.c8
-rw-r--r--drivers/net/veth.c4
-rw-r--r--drivers/net/virtio_net.c25
-rw-r--r--drivers/net/vmxnet3/vmxnet3_drv.c9
-rw-r--r--drivers/net/vmxnet3/vmxnet3_ethtool.c6
-rw-r--r--drivers/nvme/host/core.c5
-rw-r--r--drivers/nvme/host/nvme.h28
-rw-r--r--drivers/nvme/host/pci.c43
-rw-r--r--drivers/platform/mellanox/Kconfig2
-rw-r--r--drivers/platform/mellanox/nvsw-sn2201.c2
-rw-r--r--drivers/platform/x86/barco-p50-gpio.c5
-rw-r--r--drivers/platform/x86/gigabyte-wmi.c2
-rw-r--r--drivers/platform/x86/hp-wmi.c29
-rw-r--r--drivers/platform/x86/intel/hid.c6
-rw-r--r--drivers/platform/x86/intel/pmc/core.c1
-rw-r--r--drivers/platform/x86/intel/pmt/crashlog.c2
-rw-r--r--drivers/scsi/ibmvscsi/ibmvfc.c82
-rw-r--r--drivers/scsi/ibmvscsi/ibmvfc.h2
-rw-r--r--drivers/scsi/ipr.c4
-rw-r--r--drivers/scsi/lpfc/lpfc_crtn.h4
-rw-r--r--drivers/scsi/lpfc/lpfc_ct.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_els.c21
-rw-r--r--drivers/scsi/lpfc/lpfc_hw4.h3
-rw-r--r--drivers/scsi/lpfc/lpfc_init.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_nportdisc.c3
-rw-r--r--drivers/scsi/lpfc/lpfc_nvme.c52
-rw-r--r--drivers/scsi/lpfc/lpfc_scsi.c6
-rw-r--r--drivers/scsi/lpfc/lpfc_sli.c25
-rw-r--r--drivers/scsi/lpfc/lpfc_version.h2
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_base.c23
-rw-r--r--drivers/scsi/pmcraid.c2
-rw-r--r--drivers/scsi/scsi_debug.c22
-rw-r--r--drivers/scsi/scsi_transport_iscsi.c7
-rw-r--r--drivers/scsi/sd.c2
-rw-r--r--drivers/scsi/storvsc_drv.c27
-rw-r--r--drivers/scsi/vmw_pvscsi.h4
-rw-r--r--drivers/staging/olpc_dcon/Kconfig2
-rw-r--r--drivers/staging/r8188eu/core/rtw_xmit.c20
-rw-r--r--drivers/staging/r8188eu/os_dep/ioctl_linux.c2
-rw-r--r--drivers/staging/rtl8723bs/os_dep/ioctl_linux.c6
-rw-r--r--drivers/tty/goldfish.c2
-rw-r--r--drivers/tty/n_gsm.c2
-rw-r--r--drivers/tty/serial/8250/8250_port.c2
-rw-r--r--drivers/tty/serial/qcom_geni_serial.c1
-rw-r--r--drivers/tty/serial/serial_core.c9
-rw-r--r--drivers/ufs/core/ufshcd.c76
-rw-r--r--drivers/usb/cdns3/cdnsp-ring.c19
-rw-r--r--drivers/usb/dwc2/hcd.c2
-rw-r--r--drivers/usb/dwc3/core.c9
-rw-r--r--drivers/usb/dwc3/dwc3-pci.c1
-rw-r--r--drivers/usb/dwc3/gadget.c26
-rw-r--r--drivers/usb/gadget/function/f_fs.c40
-rw-r--r--drivers/usb/gadget/function/u_ether.c12
-rw-r--r--drivers/usb/gadget/udc/lpc32xx_udc.c1
-rw-r--r--drivers/usb/host/xhci.c15
-rw-r--r--drivers/usb/serial/io_ti.c2
-rw-r--r--drivers/usb/serial/io_usbvend.h1
-rw-r--r--drivers/usb/serial/option.c6
-rw-r--r--drivers/vdpa/mlx5/net/mlx5_vnet.c9
-rw-r--r--drivers/vdpa/vdpa_user/vduse_dev.c7
-rw-r--r--drivers/vhost/vdpa.c2
-rw-r--r--drivers/vhost/vringh.c10
-rw-r--r--drivers/virtio/virtio_mmio.c3
-rw-r--r--drivers/virtio/virtio_pci_modern_dev.c2
-rw-r--r--drivers/watchdog/gxp-wdt.c1
-rw-r--r--fs/9p/fid.c22
-rw-r--r--fs/9p/v9fs.h2
-rw-r--r--fs/9p/vfs_addr.c26
-rw-r--r--fs/9p/vfs_inode.c11
-rw-r--r--fs/9p/vfs_inode_dotl.c3
-rw-r--r--fs/afs/dynroot.c2
-rw-r--r--fs/afs/file.c6
-rw-r--r--fs/afs/inode.c5
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/volume.c3
-rw-r--r--fs/afs/write.c2
-rw-r--r--fs/attr.c26
-rw-r--r--fs/btrfs/disk-io.c13
-rw-r--r--fs/btrfs/super.c47
-rw-r--r--fs/ceph/addr.c12
-rw-r--r--fs/ceph/cache.h2
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/cifs/cifsfs.c2
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/connect.c4
-rw-r--r--fs/cifs/fscache.h2
-rw-r--r--fs/cifs/misc.c27
-rw-r--r--fs/cifs/sess.c8
-rw-r--r--fs/cifs/smb2pdu.c5
-rw-r--r--fs/cifs/trace.h38
-rw-r--r--fs/ext2/dir.c9
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/ext4/mballoc.c26
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/namei.c3
-rw-r--r--fs/ext4/page-io.c2
-rw-r--r--fs/ext4/resize.c10
-rw-r--r--fs/ext4/super.c172
-rw-r--r--fs/ext4/xattr.c3
-rw-r--r--fs/io_uring.c347
-rw-r--r--fs/jbd2/transaction.c2
-rw-r--r--fs/netfs/buffered_read.c5
-rw-r--r--fs/netfs/objects.c6
-rw-r--r--fs/nfs/callback_proc.c1
-rw-r--r--fs/nfs/dir.c1
-rw-r--r--fs/nfs/nfs4file.c1
-rw-r--r--fs/nfs/pnfs.c21
-rw-r--r--fs/nfs/pnfs.h1
-rw-r--r--fs/nfsd/filecache.c9
-rw-r--r--fs/xfs/libxfs/xfs_attr.c9
-rw-r--r--fs/xfs/libxfs/xfs_attr.h12
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c2
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.h4
-rw-r--r--fs/xfs/xfs_attr_item.c15
-rw-r--r--fs/xfs/xfs_ioctl.c3
-rw-r--r--fs/xfs/xfs_xattr.c17
-rw-r--r--include/drm/drm_atomic.h1
-rw-r--r--include/drm/ttm/ttm_resource.h8
-rw-r--r--include/dt-bindings/net/pcs-rzn1-miic.h33
-rw-r--r--include/keys/asymmetric-type.h3
-rw-r--r--include/linux/backing-dev.h2
-rw-r--r--include/linux/bio.h1
-rw-r--r--include/linux/blkdev.h4
-rw-r--r--include/linux/bpf.h105
-rw-r--r--include/linux/bpf_verifier.h2
-rw-r--r--include/linux/btf.h28
-rw-r--r--include/linux/cpu.h3
-rw-r--r--include/linux/crc-itu-t.h2
-rw-r--r--include/linux/filter.h34
-rw-r--r--include/linux/mii.h35
-rw-r--r--include/linux/mlx5/device.h36
-rw-r--r--include/linux/mlx5/driver.h1
-rw-r--r--include/linux/mlx5/fs.h14
-rw-r--r--include/linux/mlx5/mlx5_ifc.h151
-rw-r--r--include/linux/mm_types.h5
-rw-r--r--include/linux/mroute_base.h15
-rw-r--r--include/linux/netfs.h25
-rw-r--r--include/linux/objtool.h6
-rw-r--r--include/linux/pcs-rzn1-miic.h18
-rw-r--r--include/linux/pcs/pcs-xpcs.h3
-rw-r--r--include/linux/phy.h3
-rw-r--r--include/linux/printk.h5
-rw-r--r--include/linux/random.h3
-rw-r--r--include/linux/serial_core.h1
-rw-r--r--include/linux/skbuff.h24
-rw-r--r--include/linux/skmsg.h1
-rw-r--r--include/linux/socket.h4
-rw-r--r--include/linux/sunrpc/xdr.h16
-rw-r--r--include/linux/vdpa.h5
-rw-r--r--include/linux/visorbus.h344
-rw-r--r--include/linux/vmalloc.h1
-rw-r--r--include/linux/workqueue.h66
-rw-r--r--include/linux/xarray.h1
-rw-r--r--include/net/af_unix.h5
-rw-r--r--include/net/bond_options.h11
-rw-r--r--include/net/bonding.h1
-rw-r--r--include/net/dsa.h5
-rw-r--r--include/net/inet_connection_sock.h3
-rw-r--r--include/net/inet_hashtables.h68
-rw-r--r--include/net/inet_sock.h5
-rw-r--r--include/net/net_namespace.h2
-rw-r--r--include/net/netns/unix.h6
-rw-r--r--include/net/raw.h18
-rw-r--r--include/net/rawv6.h7
-rw-r--r--include/net/sock.h21
-rw-r--r--include/net/tcp.h1
-rw-r--r--include/soc/mscc/ocelot.h7
-rw-r--r--include/trace/events/workqueue.h8
-rw-r--r--include/uapi/linux/bpf.h88
-rw-r--r--include/uapi/linux/btf.h17
-rw-r--r--include/uapi/linux/if_ether.h1
-rw-r--r--include/uapi/linux/if_link.h1
-rw-r--r--include/uapi/linux/io_uring.h6
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_verbs.h1
-rw-r--r--kernel/auditsc.c2
-rw-r--r--kernel/bpf/btf.c147
-rw-r--r--kernel/bpf/cgroup.c70
-rw-r--r--kernel/bpf/core.c17
-rw-r--r--kernel/bpf/helpers.c12
-rw-r--r--kernel/bpf/percpu_freelist.c20
-rw-r--r--kernel/bpf/syscall.c7
-rw-r--r--kernel/bpf/verifier.c49
-rw-r--r--kernel/cfi.c22
-rw-r--r--kernel/events/core.c16
-rw-r--r--kernel/irq/chip.c5
-rw-r--r--kernel/locking/lockdep.c2
-rw-r--r--kernel/panic.c2
-rw-r--r--kernel/printk/internal.h2
-rw-r--r--kernel/printk/printk.c8
-rw-r--r--kernel/printk/printk_safe.c32
-rw-r--r--kernel/reboot.c2
-rw-r--r--kernel/sched/core.c36
-rw-r--r--kernel/sched/sched.h5
-rw-r--r--kernel/trace/bpf_trace.c64
-rw-r--r--kernel/trace/ftrace.c13
-rw-r--r--kernel/trace/rethook.c9
-rw-r--r--kernel/trace/trace_uprobe.c5
-rw-r--r--kernel/workqueue.c16
-rw-r--r--lib/Kconfig3
-rw-r--r--lib/Kconfig.ubsan2
-rw-r--r--lib/Makefile1
-rw-r--r--lib/crc-itu-t.c2
-rw-r--r--lib/crypto/Kconfig1
-rw-r--r--lib/iov_iter.c20
-rw-r--r--lib/memneq.c (renamed from crypto/memneq.c)0
-rw-r--r--lib/vsprintf.c3
-rw-r--r--lib/xarray.c5
-rw-r--r--mm/backing-dev.c11
-rw-r--r--mm/filemap.c9
-rw-r--r--mm/huge_memory.c3
-rw-r--r--mm/readahead.c2
-rw-r--r--mm/slub.c43
-rw-r--r--mm/usercopy.c26
-rw-r--r--mm/vmalloc.c2
-rw-r--r--net/ax25/af_ax25.c33
-rw-r--r--net/ax25/ax25_dev.c5
-rw-r--r--net/bpf/test_run.c6
-rw-r--r--net/bridge/br_mdb.c15
-rw-r--r--net/core/dev.c25
-rw-r--r--net/core/filter.c164
-rw-r--r--net/core/net-sysfs.c1
-rw-r--r--net/core/skbuff.c20
-rw-r--r--net/core/skmsg.c6
-rw-r--r--net/core/sock.c9
-rw-r--r--net/core/sock_map.c23
-rw-r--r--net/dccp/proto.c33
-rw-r--r--net/dsa/Kconfig7
-rw-r--r--net/dsa/Makefile1
-rw-r--r--net/dsa/slave.c18
-rw-r--r--net/dsa/tag_rzn1_a5psw.c113
-rw-r--r--net/ethtool/eeprom.c2
-rw-r--r--net/ethtool/ioctl.c17
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/esp4.c4
-rw-r--r--net/ipv4/inet_connection_sock.c247
-rw-r--r--net/ipv4/inet_hashtables.c193
-rw-r--r--net/ipv4/ip_gre.c15
-rw-r--r--net/ipv4/ip_output.c8
-rw-r--r--net/ipv4/ipmr.c215
-rw-r--r--net/ipv4/ipmr_base.c53
-rw-r--r--net/ipv4/ping.c46
-rw-r--r--net/ipv4/raw.c172
-rw-r--r--net/ipv4/raw_diag.c57
-rw-r--r--net/ipv4/tcp.c48
-rw-r--r--net/ipv4/tcp_bpf.c4
-rw-r--r--net/ipv4/tcp_input.c3
-rw-r--r--net/ipv4/tcp_output.c7
-rw-r--r--net/ipv6/af_inet6.c3
-rw-r--r--net/ipv6/ip6_gre.c15
-rw-r--r--net/ipv6/ip6mr.c202
-rw-r--r--net/ipv6/raw.c120
-rw-r--r--net/netfilter/nf_dup_netdev.c25
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c2
-rw-r--r--net/netfilter/nft_meta.c13
-rw-r--r--net/netfilter/nft_numgen.c12
-rw-r--r--net/openvswitch/flow.c2
-rw-r--r--net/sched/sch_netem.c4
-rw-r--r--net/socket.c15
-rw-r--r--net/sunrpc/clnt.c1
-rw-r--r--net/sunrpc/xdr.c37
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c4
-rw-r--r--net/tipc/core.c3
-rw-r--r--net/tipc/name_table.c11
-rw-r--r--net/tipc/name_table.h1
-rw-r--r--net/tls/tls_main.c2
-rw-r--r--net/unix/af_unix.c228
-rw-r--r--net/unix/diag.c49
-rw-r--r--net/xdp/xdp_umem.c6
-rw-r--r--net/xdp/xsk.c16
-rw-r--r--samples/bpf/xdp_fwd_user.c55
-rw-r--r--samples/bpf/xdp_router_ipv4.bpf.c9
-rw-r--r--samples/fprobe/fprobe_example.c29
-rw-r--r--scripts/Makefile.build4
-rwxr-xr-xscripts/bpf_doc.py4
-rwxr-xr-xscripts/check-local-export36
-rwxr-xr-xscripts/faddr2line45
-rw-r--r--scripts/gdb/linux/config.py6
-rw-r--r--scripts/nsdeps5
-rw-r--r--security/selinux/hooks.c11
-rw-r--r--sound/core/memalloc.c23
-rw-r--r--sound/hda/hdac_i915.c15
-rw-r--r--sound/hda/intel-dsp-config.c12
-rw-r--r--sound/hda/intel-nhlt.c17
-rw-r--r--sound/pci/hda/hda_auto_parser.c7
-rw-r--r--sound/pci/hda/hda_local.h1
-rw-r--r--sound/pci/hda/patch_conexant.c4
-rw-r--r--sound/pci/hda/patch_realtek.c36
-rw-r--r--sound/pci/hda/patch_via.c4
-rw-r--r--sound/usb/mixer_us16x08.c6
-rw-r--r--sound/x86/intel_hdmi_audio.c15
-rw-r--r--tools/arch/arm64/include/asm/cputype.h12
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h1
-rw-r--r--tools/arch/x86/include/asm/msr-index.h25
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h11
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-cgroup.rst16
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst5
-rw-r--r--tools/bpf/bpftool/Makefile2
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool18
-rw-r--r--tools/bpf/bpftool/btf.c57
-rw-r--r--tools/bpf/bpftool/btf_dumper.c29
-rw-r--r--tools/bpf/bpftool/cgroup.c53
-rw-r--r--tools/bpf/bpftool/common.c90
-rw-r--r--tools/bpf/bpftool/feature.c89
-rw-r--r--tools/bpf/bpftool/gen.c4
-rw-r--r--tools/bpf/bpftool/link.c61
-rw-r--r--tools/bpf/bpftool/main.c2
-rw-r--r--tools/bpf/bpftool/main.h22
-rw-r--r--tools/bpf/bpftool/map.c84
-rw-r--r--tools/bpf/bpftool/pids.c1
-rw-r--r--tools/bpf/bpftool/prog.c79
-rw-r--r--tools/bpf/bpftool/struct_ops.c2
-rw-r--r--tools/include/linux/objtool.h6
-rw-r--r--tools/include/uapi/linux/bpf.h88
-rw-r--r--tools/include/uapi/linux/btf.h17
-rw-r--r--tools/include/uapi/linux/if_link.h1
-rw-r--r--tools/include/uapi/linux/prctl.h9
-rw-r--r--tools/lib/bpf/btf.c229
-rw-r--r--tools/lib/bpf/btf.h32
-rw-r--r--tools/lib/bpf/btf_dump.c137
-rw-r--r--tools/lib/bpf/libbpf.c296
-rw-r--r--tools/lib/bpf/libbpf.h38
-rw-r--r--tools/lib/bpf/libbpf.map8
-rw-r--r--tools/lib/bpf/libbpf_internal.h7
-rw-r--r--tools/lib/bpf/linker.c7
-rw-r--r--tools/lib/bpf/relo_core.c113
-rw-r--r--tools/lib/bpf/relo_core.h4
-rw-r--r--tools/lib/bpf/usdt.c123
-rw-r--r--tools/lib/perf/evsel.c17
-rw-r--r--tools/perf/tests/bp_account.c16
-rw-r--r--tools/perf/tests/expr.c2
-rw-r--r--tools/perf/tests/shell/lib/perf_csv_output_lint.py48
-rwxr-xr-xtools/perf/tests/shell/stat+csv_output.sh69
-rwxr-xr-xtools/perf/tests/shell/test_arm_callgraph_fp.sh2
-rw-r--r--tools/perf/tests/topology.c2
-rw-r--r--tools/perf/trace/beauty/include/linux/socket.h7
-rw-r--r--tools/perf/util/arm-spe.c22
-rw-r--r--tools/perf/util/expr.l2
-rw-r--r--tools/perf/util/metricgroup.c9
-rw-r--r--tools/perf/util/unwind-libunwind-local.c2
-rw-r--r--tools/testing/selftests/bpf/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/Makefile28
-rw-r--r--tools/testing/selftests/bpf/bench.c2
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c96
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh11
-rw-r--r--tools/testing/selftests/bpf/btf_helpers.c25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c49
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_cookie.c78
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c153
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_write.c126
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c65
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_stress.c32
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/libbpf_str.c207
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tailcalls.c55
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c183
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c40
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c3
-rw-r--r--tools/testing/selftests/bpf/progs/core_reloc_types.h78
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_multi.c24
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c42
-rw-r--r--tools/testing/selftests/bpf/progs/test_attach_probe.c60
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c70
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_dtime.c53
-rw-r--r--tools/testing/selftests/bpf/progs/test_varlen.c8
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c833
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_synctypes.py166
-rw-r--r--tools/testing/selftests/bpf/test_btf.h1
-rwxr-xr-xtools/testing/selftests/bpf/test_xdping.sh4
-rw-r--r--tools/testing/selftests/bpf/xdp_synproxy.c466
-rw-r--r--tools/testing/selftests/dma/Makefile1
-rw-r--r--tools/testing/selftests/dma/dma_map_benchmark.c2
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh107
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh31
l---------tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh1
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh15
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh29
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh34
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh17
-rw-r--r--tools/testing/selftests/kvm/Makefile49
-rw-r--r--tools/testing/selftests/kvm/dirty_log_perf_test.c10
-rw-r--r--tools/testing/selftests/kvm/include/perf_test_util.h9
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h25
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/vmx.h6
-rw-r--r--tools/testing/selftests/kvm/lib/perf_test_util.c53
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c112
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c31
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/vmx.c149
-rw-r--r--tools/testing/selftests/kvm/max_guest_memory_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/mmu_role_test.c2
-rw-r--r--tools/testing/selftests/lib.mk25
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile2
-rw-r--r--tools/testing/selftests/net/bind_bhash_test.c119
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh61
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh7
-rwxr-xr-xtools/testing/selftests/netfilter/nft_concat_range.sh2
-rw-r--r--tools/testing/selftests/tc-testing/.gitignore1
-rw-r--r--tools/testing/selftests/vm/gup_test.c4
-rw-r--r--tools/testing/selftests/vm/ksm_tests.c2
-rw-r--r--tools/testing/selftests/wireguard/qemu/Makefile28
-rw-r--r--tools/testing/selftests/wireguard/qemu/init.c3
-rw-r--r--tools/testing/selftests/wireguard/qemu/kernel.config3
-rw-r--r--virt/kvm/kvm_main.c8
785 files changed, 20728 insertions, 10226 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 2ad01cad7f1c..bcc974d276dc 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -526,6 +526,7 @@ What: /sys/devices/system/cpu/vulnerabilities
/sys/devices/system/cpu/vulnerabilities/srbds
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
/sys/devices/system/cpu/vulnerabilities/itlb_multihit
+ /sys/devices/system/cpu/vulnerabilities/mmio_stale_data
Date: January 2018
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Information about CPU vulnerabilities
diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
index 8cbc711cda93..4df436e7c417 100644
--- a/Documentation/admin-guide/hw-vuln/index.rst
+++ b/Documentation/admin-guide/hw-vuln/index.rst
@@ -17,3 +17,4 @@ are configurable at compile, boot or run time.
special-register-buffer-data-sampling.rst
core-scheduling.rst
l1d_flush.rst
+ processor_mmio_stale_data.rst
diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
new file mode 100644
index 000000000000..9393c50b5afc
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
@@ -0,0 +1,246 @@
+=========================================
+Processor MMIO Stale Data Vulnerabilities
+=========================================
+
+Processor MMIO Stale Data Vulnerabilities are a class of memory-mapped I/O
+(MMIO) vulnerabilities that can expose data. The sequences of operations for
+exposing data range from simple to very complex. Because most of the
+vulnerabilities require the attacker to have access to MMIO, many environments
+are not affected. System environments using virtualization where MMIO access is
+provided to untrusted guests may need mitigation. These vulnerabilities are
+not transient execution attacks. However, these vulnerabilities may propagate
+stale data into core fill buffers where the data can subsequently be inferred
+by an unmitigated transient execution attack. Mitigation for these
+vulnerabilities includes a combination of microcode update and software
+changes, depending on the platform and usage model. Some of these mitigations
+are similar to those used to mitigate Microarchitectural Data Sampling (MDS) or
+those used to mitigate Special Register Buffer Data Sampling (SRBDS).
+
+Data Propagators
+================
+Propagators are operations that result in stale data being copied or moved from
+one microarchitectural buffer or register to another. Processor MMIO Stale Data
+Vulnerabilities are operations that may result in stale data being directly
+read into an architectural, software-visible state or sampled from a buffer or
+register.
+
+Fill Buffer Stale Data Propagator (FBSDP)
+-----------------------------------------
+Stale data may propagate from fill buffers (FB) into the non-coherent portion
+of the uncore on some non-coherent writes. Fill buffer propagation by itself
+does not make stale data architecturally visible. Stale data must be propagated
+to a location where it is subject to reading or sampling.
+
+Sideband Stale Data Propagator (SSDP)
+-------------------------------------
+The sideband stale data propagator (SSDP) is limited to the client (including
+Intel Xeon server E3) uncore implementation. The sideband response buffer is
+shared by all client cores. For non-coherent reads that go to sideband
+destinations, the uncore logic returns 64 bytes of data to the core, including
+both requested data and unrequested stale data, from a transaction buffer and
+the sideband response buffer. As a result, stale data from the sideband
+response and transaction buffers may now reside in a core fill buffer.
+
+Primary Stale Data Propagator (PSDP)
+------------------------------------
+The primary stale data propagator (PSDP) is limited to the client (including
+Intel Xeon server E3) uncore implementation. Similar to the sideband response
+buffer, the primary response buffer is shared by all client cores. For some
+processors, MMIO primary reads will return 64 bytes of data to the core fill
+buffer including both requested data and unrequested stale data. This is
+similar to the sideband stale data propagator.
+
+Vulnerabilities
+===============
+Device Register Partial Write (DRPW) (CVE-2022-21166)
+-----------------------------------------------------
+Some endpoint MMIO registers incorrectly handle writes that are smaller than
+the register size. Instead of aborting the write or only copying the correct
+subset of bytes (for example, 2 bytes for a 2-byte write), more bytes than
+specified by the write transaction may be written to the register. On
+processors affected by FBSDP, this may expose stale data from the fill buffers
+of the core that created the write transaction.
+
+Shared Buffers Data Sampling (SBDS) (CVE-2022-21125)
+----------------------------------------------------
+After propagators may have moved data around the uncore and copied stale data
+into client core fill buffers, processors affected by MFBDS can leak data from
+the fill buffer. It is limited to the client (including Intel Xeon server E3)
+uncore implementation.
+
+Shared Buffers Data Read (SBDR) (CVE-2022-21123)
+------------------------------------------------
+It is similar to Shared Buffer Data Sampling (SBDS) except that the data is
+directly read into the architectural software-visible state. It is limited to
+the client (including Intel Xeon server E3) uncore implementation.
+
+Affected Processors
+===================
+Not all the CPUs are affected by all the variants. For instance, most
+processors for the server market (excluding Intel Xeon E3 processors) are
+impacted by only Device Register Partial Write (DRPW).
+
+Below is the list of affected Intel processors [#f1]_:
+
+ =================== ============ =========
+ Common name Family_Model Steppings
+ =================== ============ =========
+ HASWELL_X 06_3FH 2,4
+ SKYLAKE_L 06_4EH 3
+ BROADWELL_X 06_4FH All
+ SKYLAKE_X 06_55H 3,4,6,7,11
+ BROADWELL_D 06_56H 3,4,5
+ SKYLAKE 06_5EH 3
+ ICELAKE_X 06_6AH 4,5,6
+ ICELAKE_D 06_6CH 1
+ ICELAKE_L 06_7EH 5
+ ATOM_TREMONT_D 06_86H All
+ LAKEFIELD 06_8AH 1
+ KABYLAKE_L 06_8EH 9 to 12
+ ATOM_TREMONT 06_96H 1
+ ATOM_TREMONT_L 06_9CH 0
+ KABYLAKE 06_9EH 9 to 13
+ COMETLAKE 06_A5H 2,3,5
+ COMETLAKE_L 06_A6H 0,1
+ ROCKETLAKE 06_A7H 1
+ =================== ============ =========
+
+If a CPU is in the affected processor list, but not affected by a variant, it
+is indicated by new bits in MSR IA32_ARCH_CAPABILITIES. As described in a later
+section, mitigation largely remains the same for all the variants, i.e. to
+clear the CPU fill buffers via VERW instruction.
+
+New bits in MSRs
+================
+Newer processors and microcode update on existing affected processors added new
+bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate
+specific variants of Processor MMIO Stale Data vulnerabilities and mitigation
+capability.
+
+MSR IA32_ARCH_CAPABILITIES
+--------------------------
+Bit 13 - SBDR_SSDP_NO - When set, processor is not affected by either the
+ Shared Buffers Data Read (SBDR) vulnerability or the sideband stale
+ data propagator (SSDP).
+Bit 14 - FBSDP_NO - When set, processor is not affected by the Fill Buffer
+ Stale Data Propagator (FBSDP).
+Bit 15 - PSDP_NO - When set, processor is not affected by Primary Stale Data
+ Propagator (PSDP).
+Bit 17 - FB_CLEAR - When set, VERW instruction will overwrite CPU fill buffer
+ values as part of MD_CLEAR operations. Processors that do not
+ enumerate MDS_NO (meaning they are affected by MDS) but that do
+ enumerate support for both L1D_FLUSH and MD_CLEAR implicitly enumerate
+ FB_CLEAR as part of their MD_CLEAR support.
+Bit 18 - FB_CLEAR_CTRL - Processor supports read and write to MSR
+ IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]. On such processors, the FB_CLEAR_DIS
+ bit can be set to cause the VERW instruction to not perform the
+ FB_CLEAR action. Not all processors that support FB_CLEAR will support
+ FB_CLEAR_CTRL.
+
+MSR IA32_MCU_OPT_CTRL
+---------------------
+Bit 3 - FB_CLEAR_DIS - When set, VERW instruction does not perform the FB_CLEAR
+action. This may be useful to reduce the performance impact of FB_CLEAR in
+cases where system software deems it warranted (for example, when performance
+is more critical, or the untrusted software has no MMIO access). Note that
+FB_CLEAR_DIS has no impact on enumeration (for example, it does not change
+FB_CLEAR or MD_CLEAR enumeration) and it may not be supported on all processors
+that enumerate FB_CLEAR.
+
+Mitigation
+==========
+Like MDS, all variants of Processor MMIO Stale Data vulnerabilities have the
+same mitigation strategy to force the CPU to clear the affected buffers before
+an attacker can extract the secrets.
+
+This is achieved by using the otherwise unused and obsolete VERW instruction in
+combination with a microcode update. The microcode clears the affected CPU
+buffers when the VERW instruction is executed.
+
+Kernel reuses the MDS function to invoke the buffer clearing:
+
+ mds_clear_cpu_buffers()
+
+On MDS affected CPUs, the kernel already invokes CPU buffer clear on
+kernel/userspace, hypervisor/guest and C-state (idle) transitions. No
+additional mitigation is needed on such CPUs.
+
+For CPUs not affected by MDS or TAA, mitigation is needed only for the attacker
+with MMIO capability. Therefore, VERW is not required for kernel/userspace. For
+virtualization case, VERW is only needed at VMENTER for a guest with MMIO
+capability.
+
+Mitigation points
+-----------------
+Return to user space
+^^^^^^^^^^^^^^^^^^^^
+Same mitigation as MDS when affected by MDS/TAA, otherwise no mitigation
+needed.
+
+C-State transition
+^^^^^^^^^^^^^^^^^^
+Control register writes by CPU during C-state transition can propagate data
+from fill buffer to uncore buffers. Execute VERW before C-state transition to
+clear CPU fill buffers.
+
+Guest entry point
+^^^^^^^^^^^^^^^^^
+Same mitigation as MDS when processor is also affected by MDS/TAA, otherwise
+execute VERW at VMENTER only for MMIO capable guests. On CPUs not affected by
+MDS/TAA, guest without MMIO access cannot extract secrets using Processor MMIO
+Stale Data vulnerabilities, so there is no need to execute VERW for such guests.
+
+Mitigation control on the kernel command line
+---------------------------------------------
+The kernel command line allows to control the Processor MMIO Stale Data
+mitigations at boot time with the option "mmio_stale_data=". The valid
+arguments for this option are:
+
+ ========== =================================================================
+ full If the CPU is vulnerable, enable mitigation; CPU buffer clearing
+ on exit to userspace and when entering a VM. Idle transitions are
+ protected as well. It does not automatically disable SMT.
+ full,nosmt Same as full, with SMT disabled on vulnerable CPUs. This is the
+ complete mitigation.
+ off Disables mitigation completely.
+ ========== =================================================================
+
+If the CPU is affected and mmio_stale_data=off is not supplied on the kernel
+command line, then the kernel selects the appropriate mitigation.
+
+Mitigation status information
+-----------------------------
+The Linux kernel provides a sysfs interface to enumerate the current
+vulnerability status of the system: whether the system is vulnerable, and
+which mitigations are active. The relevant sysfs file is:
+
+ /sys/devices/system/cpu/vulnerabilities/mmio_stale_data
+
+The possible values in this file are:
+
+ .. list-table::
+
+ * - 'Not affected'
+ - The processor is not vulnerable
+ * - 'Vulnerable'
+ - The processor is vulnerable, but no mitigation enabled
+ * - 'Vulnerable: Clear CPU buffers attempted, no microcode'
+ - The processor is vulnerable, but microcode is not updated. The
+ mitigation is enabled on a best effort basis.
+ * - 'Mitigation: Clear CPU buffers'
+ - The processor is vulnerable and the CPU buffer clearing mitigation is
+ enabled.
+
+If the processor is vulnerable then the following information is appended to
+the above information:
+
+ ======================== ===========================================
+ 'SMT vulnerable' SMT is enabled
+ 'SMT disabled' SMT is disabled
+ 'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown
+ ======================== ===========================================
+
+References
+----------
+.. [#f1] Affected Processors
+ https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 8090130b544b..2522b11e593f 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2469,7 +2469,6 @@
protected: nVHE-based mode with support for guests whose
state is kept private from the host.
- Not valid if the kernel is running in EL2.
Defaults to VHE/nVHE based on hardware support. Setting
mode to "protected" will disable kexec and hibernation
@@ -3176,6 +3175,7 @@
srbds=off [X86,INTEL]
no_entry_flush [PPC]
no_uaccess_flush [PPC]
+ mmio_stale_data=off [X86]
Exceptions:
This does not have any effect on
@@ -3197,6 +3197,7 @@
Equivalent to: l1tf=flush,nosmt [X86]
mds=full,nosmt [X86]
tsx_async_abort=full,nosmt [X86]
+ mmio_stale_data=full,nosmt [X86]
mminit_loglevel=
[KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
@@ -3206,6 +3207,40 @@
log everything. Information is printed at KERN_DEBUG
so loglevel=8 may also need to be specified.
+ mmio_stale_data=
+ [X86,INTEL] Control mitigation for the Processor
+ MMIO Stale Data vulnerabilities.
+
+ Processor MMIO Stale Data is a class of
+ vulnerabilities that may expose data after an MMIO
+ operation. Exposed data could originate or end in
+ the same CPU buffers as affected by MDS and TAA.
+ Therefore, similar to MDS and TAA, the mitigation
+ is to clear the affected CPU buffers.
+
+ This parameter controls the mitigation. The
+ options are:
+
+ full - Enable mitigation on vulnerable CPUs
+
+ full,nosmt - Enable mitigation and disable SMT on
+ vulnerable CPUs.
+
+ off - Unconditionally disable mitigation
+
+ On MDS or TAA affected machines,
+ mmio_stale_data=off can be prevented by an active
+ MDS or TAA mitigation as these vulnerabilities are
+ mitigated with the same mechanism so in order to
+ disable this mitigation, you need to specify
+ mds=off and tsx_async_abort=off too.
+
+ Not specifying this option is equivalent to
+ mmio_stale_data=full.
+
+ For details see:
+ Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
+
module.sig_enforce
[KNL] When CONFIG_MODULE_SIG is set, this means that
modules without (valid) signatures will fail to load.
diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst
index 7940da9bc6c1..f49aeef62d0c 100644
--- a/Documentation/bpf/btf.rst
+++ b/Documentation/bpf/btf.rst
@@ -74,7 +74,7 @@ sequentially and type id is assigned to each recognized type starting from id
#define BTF_KIND_ARRAY 3 /* Array */
#define BTF_KIND_STRUCT 4 /* Struct */
#define BTF_KIND_UNION 5 /* Union */
- #define BTF_KIND_ENUM 6 /* Enumeration */
+ #define BTF_KIND_ENUM 6 /* Enumeration up to 32-bit values */
#define BTF_KIND_FWD 7 /* Forward */
#define BTF_KIND_TYPEDEF 8 /* Typedef */
#define BTF_KIND_VOLATILE 9 /* Volatile */
@@ -87,6 +87,7 @@ sequentially and type id is assigned to each recognized type starting from id
#define BTF_KIND_FLOAT 16 /* Floating point */
#define BTF_KIND_DECL_TAG 17 /* Decl Tag */
#define BTF_KIND_TYPE_TAG 18 /* Type Tag */
+ #define BTF_KIND_ENUM64 19 /* Enumeration up to 64-bit values */
Note that the type section encodes debug info, not just pure types.
``BTF_KIND_FUNC`` is not a type, and it represents a defined subprogram.
@@ -101,10 +102,10 @@ Each type contains the following common data::
* bits 24-28: kind (e.g. int, ptr, array...etc)
* bits 29-30: unused
* bit 31: kind_flag, currently used by
- * struct, union and fwd
+ * struct, union, fwd, enum and enum64.
*/
__u32 info;
- /* "size" is used by INT, ENUM, STRUCT and UNION.
+ /* "size" is used by INT, ENUM, STRUCT, UNION and ENUM64.
* "size" tells the size of the type it is describing.
*
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
@@ -281,10 +282,10 @@ modes exist:
``struct btf_type`` encoding requirement:
* ``name_off``: 0 or offset to a valid C identifier
- * ``info.kind_flag``: 0
+ * ``info.kind_flag``: 0 for unsigned, 1 for signed
* ``info.kind``: BTF_KIND_ENUM
* ``info.vlen``: number of enum values
- * ``size``: 4
+ * ``size``: 1/2/4/8
``btf_type`` is followed by ``info.vlen`` number of ``struct btf_enum``.::
@@ -297,6 +298,10 @@ The ``btf_enum`` encoding:
* ``name_off``: offset to a valid C identifier
* ``val``: any value
+If the original enum value is signed and the size is less than 4,
+that value will be sign extended into 4 bytes. If the size is 8,
+the value will be truncated into 4 bytes.
+
2.2.7 BTF_KIND_FWD
~~~~~~~~~~~~~~~~~~
@@ -493,7 +498,7 @@ the attribute is applied to a ``struct``/``union`` member or
a ``func`` argument, and ``btf_decl_tag.component_idx`` should be a
valid index (starting from 0) pointing to a member or an argument.
-2.2.17 BTF_KIND_TYPE_TAG
+2.2.18 BTF_KIND_TYPE_TAG
~~~~~~~~~~~~~~~~~~~~~~~~
``struct btf_type`` encoding requirement:
@@ -516,6 +521,32 @@ type_tag, then zero or more const/volatile/restrict/typedef
and finally the base type. The base type is one of
int, ptr, array, struct, union, enum, func_proto and float types.
+2.2.19 BTF_KIND_ENUM64
+~~~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: 0 or offset to a valid C identifier
+ * ``info.kind_flag``: 0 for unsigned, 1 for signed
+ * ``info.kind``: BTF_KIND_ENUM64
+ * ``info.vlen``: number of enum values
+ * ``size``: 1/2/4/8
+
+``btf_type`` is followed by ``info.vlen`` number of ``struct btf_enum64``.::
+
+ struct btf_enum64 {
+ __u32 name_off;
+ __u32 val_lo32;
+ __u32 val_hi32;
+ };
+
+The ``btf_enum64`` encoding:
+ * ``name_off``: offset to a valid C identifier
+ * ``val_lo32``: lower 32-bit value for a 64-bit value
+ * ``val_hi32``: high 32-bit value for a 64-bit value
+
+If the original enum value is signed and the size is less than 8,
+that value will be sign extended into 8 bytes.
+
3. BTF Kernel API
=================
diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst
index 1de6a57c7e1e..9e27fbdb2206 100644
--- a/Documentation/bpf/instruction-set.rst
+++ b/Documentation/bpf/instruction-set.rst
@@ -127,7 +127,7 @@ BPF_XOR | BPF_K | BPF_ALU64 means::
Byte swap instructions
----------------------
-The byte swap instructions use an instruction class of ``BFP_ALU`` and a 4-bit
+The byte swap instructions use an instruction class of ``BPF_ALU`` and a 4-bit
code field of ``BPF_END``.
The byte swap instructions operate on the destination register
diff --git a/Documentation/devicetree/bindings/hwmon/ti,tmp401.yaml b/Documentation/devicetree/bindings/hwmon/ti,tmp401.yaml
index fe0ac08faa1a..0e8ddf0ad789 100644
--- a/Documentation/devicetree/bindings/hwmon/ti,tmp401.yaml
+++ b/Documentation/devicetree/bindings/hwmon/ti,tmp401.yaml
@@ -40,9 +40,8 @@ properties:
value to be used for converting remote channel measurements to
temperature.
$ref: /schemas/types.yaml#/definitions/int32
- items:
- minimum: -128
- maximum: 127
+ minimum: -128
+ maximum: 127
ti,beta-compensation:
description:
diff --git a/Documentation/devicetree/bindings/interrupt-controller/socionext,uniphier-aidet.yaml b/Documentation/devicetree/bindings/interrupt-controller/socionext,uniphier-aidet.yaml
index f89ebde76dab..de7c5e59bae1 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/socionext,uniphier-aidet.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/socionext,uniphier-aidet.yaml
@@ -30,6 +30,7 @@ properties:
- socionext,uniphier-ld11-aidet
- socionext,uniphier-ld20-aidet
- socionext,uniphier-pxs3-aidet
+ - socionext,uniphier-nx1-aidet
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml b/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml
new file mode 100644
index 000000000000..a3bf432960d8
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml
@@ -0,0 +1,404 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/dsa/mediatek,mt7530.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek MT7530 Ethernet switch
+
+maintainers:
+ - Sean Wang <sean.wang@mediatek.com>
+ - Landen Chao <Landen.Chao@mediatek.com>
+ - DENG Qingfang <dqfext@gmail.com>
+
+description: |
+ Port 5 of mt7530 and mt7621 switch is muxed between:
+ 1. GMAC5: GMAC5 can interface with another external MAC or PHY.
+ 2. PHY of port 0 or port 4: PHY interfaces with an external MAC like 2nd GMAC
+ of the SOC. Used in many setups where port 0/4 becomes the WAN port.
+ Note: On a MT7621 SOC with integrated switch: 2nd GMAC can only connected to
+ GMAC5 when the gpios for RGMII2 (GPIO 22-33) are not used and not
+ connected to external component!
+
+ Port 5 modes/configurations:
+ 1. Port 5 is disabled and isolated: An external phy can interface to the 2nd
+ GMAC of the SOC.
+ In the case of a build-in MT7530 switch, port 5 shares the RGMII bus with 2nd
+ GMAC and an optional external phy. Mind the GPIO/pinctl settings of the SOC!
+ 2. Port 5 is muxed to PHY of port 0/4: Port 0/4 interfaces with 2nd GMAC.
+ It is a simple MAC to PHY interface, port 5 needs to be setup for xMII mode
+ and RGMII delay.
+ 3. Port 5 is muxed to GMAC5 and can interface to an external phy.
+ Port 5 becomes an extra switch port.
+ Only works on platform where external phy TX<->RX lines are swapped.
+ Like in the Ubiquiti ER-X-SFP.
+ 4. Port 5 is muxed to GMAC5 and interfaces with the 2nd GAMC as 2nd CPU port.
+ Currently a 2nd CPU port is not supported by DSA code.
+
+ Depending on how the external PHY is wired:
+ 1. normal: The PHY can only connect to 2nd GMAC but not to the switch
+ 2. swapped: RGMII TX, RX are swapped; external phy interface with the switch as
+ a ethernet port. But can't interface to the 2nd GMAC.
+
+ Based on the DT the port 5 mode is configured.
+
+ Driver tries to lookup the phy-handle of the 2nd GMAC of the master device.
+ When phy-handle matches PHY of port 0 or 4 then port 5 set-up as mode 2.
+ phy-mode must be set, see also example 2 below!
+ * mt7621: phy-mode = "rgmii-txid";
+ * mt7623: phy-mode = "rgmii";
+
+ CPU-Ports need a phy-mode property:
+ Allowed values on mt7530 and mt7621:
+ - "rgmii"
+ - "trgmii"
+ On mt7531:
+ - "1000base-x"
+ - "2500base-x"
+ - "rgmii"
+ - "sgmii"
+
+
+properties:
+ compatible:
+ enum:
+ - mediatek,mt7530
+ - mediatek,mt7531
+ - mediatek,mt7621
+
+ core-supply:
+ description:
+ Phandle to the regulator node necessary for the core power.
+
+ "#gpio-cells":
+ const: 2
+
+ gpio-controller:
+ type: boolean
+ description:
+ if defined, MT7530's LED controller will run on GPIO mode.
+
+ "#interrupt-cells":
+ const: 1
+
+ interrupt-controller: true
+
+ interrupts:
+ maxItems: 1
+
+ io-supply:
+ description:
+ Phandle to the regulator node necessary for the I/O power.
+ See Documentation/devicetree/bindings/regulator/mt6323-regulator.txt
+ for details for the regulator setup on these boards.
+
+ mediatek,mcm:
+ type: boolean
+ description:
+ if defined, indicates that either MT7530 is the part on multi-chip
+ module belong to MT7623A has or the remotely standalone chip as the
+ function MT7623N reference board provided for.
+
+ reset-gpios:
+ maxItems: 1
+
+ reset-names:
+ const: mcm
+
+ resets:
+ description:
+ Phandle pointing to the system reset controller with line index for
+ the ethsys.
+ maxItems: 1
+
+patternProperties:
+ "^(ethernet-)?ports$":
+ type: object
+
+ patternProperties:
+ "^(ethernet-)?port@[0-9]+$":
+ type: object
+ description: Ethernet switch ports
+
+ unevaluatedProperties: false
+
+ properties:
+ reg:
+ description:
+ Port address described must be 5 or 6 for CPU port and from 0
+ to 5 for user ports.
+
+ allOf:
+ - $ref: dsa-port.yaml#
+ - if:
+ properties:
+ label:
+ items:
+ - const: cpu
+ then:
+ required:
+ - reg
+ - phy-mode
+
+required:
+ - compatible
+ - reg
+
+allOf:
+ - $ref: "dsa.yaml#"
+ - if:
+ required:
+ - mediatek,mcm
+ then:
+ required:
+ - resets
+ - reset-names
+
+ - dependencies:
+ interrupt-controller: [ interrupts ]
+
+ - if:
+ properties:
+ compatible:
+ items:
+ - const: mediatek,mt7530
+ then:
+ required:
+ - core-supply
+ - io-supply
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ switch@0 {
+ compatible = "mediatek,mt7530";
+ reg = <0>;
+
+ core-supply = <&mt6323_vpa_reg>;
+ io-supply = <&mt6323_vemc3v3_reg>;
+ reset-gpios = <&pio 33 GPIO_ACTIVE_HIGH>;
+
+ ethernet-ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ label = "lan0";
+ };
+
+ port@1 {
+ reg = <1>;
+ label = "lan1";
+ };
+
+ port@2 {
+ reg = <2>;
+ label = "lan2";
+ };
+
+ port@3 {
+ reg = <3>;
+ label = "lan3";
+ };
+
+ port@4 {
+ reg = <4>;
+ label = "wan";
+ };
+
+ port@6 {
+ reg = <6>;
+ label = "cpu";
+ ethernet = <&gmac0>;
+ phy-mode = "trgmii";
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+ };
+ };
+ };
+ };
+
+ - |
+ //Example 2: MT7621: Port 4 is WAN port: 2nd GMAC -> Port 5 -> PHY port 4.
+
+ ethernet {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ gmac0: mac@0 {
+ compatible = "mediatek,eth-mac";
+ reg = <0>;
+ phy-mode = "rgmii";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ pause;
+ };
+ };
+
+ gmac1: mac@1 {
+ compatible = "mediatek,eth-mac";
+ reg = <1>;
+ phy-mode = "rgmii-txid";
+ phy-handle = <&phy4>;
+ };
+
+ mdio: mdio-bus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ /* Internal phy */
+ phy4: ethernet-phy@4 {
+ reg = <4>;
+ };
+
+ mt7530: switch@1f {
+ compatible = "mediatek,mt7621";
+ reg = <0x1f>;
+ mediatek,mcm;
+
+ resets = <&rstctrl 2>;
+ reset-names = "mcm";
+
+ ethernet-ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ label = "lan0";
+ };
+
+ port@1 {
+ reg = <1>;
+ label = "lan1";
+ };
+
+ port@2 {
+ reg = <2>;
+ label = "lan2";
+ };
+
+ port@3 {
+ reg = <3>;
+ label = "lan3";
+ };
+
+ /* Commented out. Port 4 is handled by 2nd GMAC.
+ port@4 {
+ reg = <4>;
+ label = "lan4";
+ };
+ */
+
+ port@6 {
+ reg = <6>;
+ label = "cpu";
+ ethernet = <&gmac0>;
+ phy-mode = "rgmii";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ pause;
+ };
+ };
+ };
+ };
+ };
+ };
+
+ - |
+ //Example 3: MT7621: Port 5 is connected to external PHY: Port 5 -> external PHY.
+
+ ethernet {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ gmac_0: mac@0 {
+ compatible = "mediatek,eth-mac";
+ reg = <0>;
+ phy-mode = "rgmii";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ pause;
+ };
+ };
+
+ mdio0: mdio-bus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ /* External phy */
+ ephy5: ethernet-phy@7 {
+ reg = <7>;
+ };
+
+ switch@1f {
+ compatible = "mediatek,mt7621";
+ reg = <0x1f>;
+ mediatek,mcm;
+
+ resets = <&rstctrl 2>;
+ reset-names = "mcm";
+
+ ethernet-ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ label = "lan0";
+ };
+
+ port@1 {
+ reg = <1>;
+ label = "lan1";
+ };
+
+ port@2 {
+ reg = <2>;
+ label = "lan2";
+ };
+
+ port@3 {
+ reg = <3>;
+ label = "lan3";
+ };
+
+ port@4 {
+ reg = <4>;
+ label = "lan4";
+ };
+
+ port@5 {
+ reg = <5>;
+ label = "lan5";
+ phy-mode = "rgmii";
+ phy-handle = <&ephy5>;
+ };
+
+ cpu_port0: port@6 {
+ reg = <6>;
+ label = "cpu";
+ ethernet = <&gmac_0>;
+ phy-mode = "rgmii";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ pause;
+ };
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/dsa/mt7530.txt b/Documentation/devicetree/bindings/net/dsa/mt7530.txt
deleted file mode 100644
index 18247ebfc487..000000000000
--- a/Documentation/devicetree/bindings/net/dsa/mt7530.txt
+++ /dev/null
@@ -1,327 +0,0 @@
-Mediatek MT7530 Ethernet switch
-================================
-
-Required properties:
-
-- compatible: may be compatible = "mediatek,mt7530"
- or compatible = "mediatek,mt7621"
- or compatible = "mediatek,mt7531"
-- #address-cells: Must be 1.
-- #size-cells: Must be 0.
-- mediatek,mcm: Boolean; if defined, indicates that either MT7530 is the part
- on multi-chip module belong to MT7623A has or the remotely standalone
- chip as the function MT7623N reference board provided for.
-
-If compatible mediatek,mt7530 is set then the following properties are required
-
-- core-supply: Phandle to the regulator node necessary for the core power.
-- io-supply: Phandle to the regulator node necessary for the I/O power.
- See Documentation/devicetree/bindings/regulator/mt6323-regulator.txt
- for details for the regulator setup on these boards.
-
-If the property mediatek,mcm isn't defined, following property is required
-
-- reset-gpios: Should be a gpio specifier for a reset line.
-
-Else, following properties are required
-
-- resets : Phandle pointing to the system reset controller with
- line index for the ethsys.
-- reset-names : Should be set to "mcm".
-
-Required properties for the child nodes within ports container:
-
-- reg: Port address described must be 6 for CPU port and from 0 to 5 for
- user ports.
-- phy-mode: String, the following values are acceptable for port labeled
- "cpu":
- If compatible mediatek,mt7530 or mediatek,mt7621 is set,
- must be either "trgmii" or "rgmii"
- If compatible mediatek,mt7531 is set,
- must be either "sgmii", "1000base-x" or "2500base-x"
-
-Port 5 of mt7530 and mt7621 switch is muxed between:
-1. GMAC5: GMAC5 can interface with another external MAC or PHY.
-2. PHY of port 0 or port 4: PHY interfaces with an external MAC like 2nd GMAC
- of the SOC. Used in many setups where port 0/4 becomes the WAN port.
- Note: On a MT7621 SOC with integrated switch: 2nd GMAC can only connected to
- GMAC5 when the gpios for RGMII2 (GPIO 22-33) are not used and not
- connected to external component!
-
-Port 5 modes/configurations:
-1. Port 5 is disabled and isolated: An external phy can interface to the 2nd
- GMAC of the SOC.
- In the case of a build-in MT7530 switch, port 5 shares the RGMII bus with 2nd
- GMAC and an optional external phy. Mind the GPIO/pinctl settings of the SOC!
-2. Port 5 is muxed to PHY of port 0/4: Port 0/4 interfaces with 2nd GMAC.
- It is a simple MAC to PHY interface, port 5 needs to be setup for xMII mode
- and RGMII delay.
-3. Port 5 is muxed to GMAC5 and can interface to an external phy.
- Port 5 becomes an extra switch port.
- Only works on platform where external phy TX<->RX lines are swapped.
- Like in the Ubiquiti ER-X-SFP.
-4. Port 5 is muxed to GMAC5 and interfaces with the 2nd GAMC as 2nd CPU port.
- Currently a 2nd CPU port is not supported by DSA code.
-
-Depending on how the external PHY is wired:
-1. normal: The PHY can only connect to 2nd GMAC but not to the switch
-2. swapped: RGMII TX, RX are swapped; external phy interface with the switch as
- a ethernet port. But can't interface to the 2nd GMAC.
-
-Based on the DT the port 5 mode is configured.
-
-Driver tries to lookup the phy-handle of the 2nd GMAC of the master device.
-When phy-handle matches PHY of port 0 or 4 then port 5 set-up as mode 2.
-phy-mode must be set, see also example 2 below!
- * mt7621: phy-mode = "rgmii-txid";
- * mt7623: phy-mode = "rgmii";
-
-Optional properties:
-
-- gpio-controller: Boolean; if defined, MT7530's LED controller will run on
- GPIO mode.
-- #gpio-cells: Must be 2 if gpio-controller is defined.
-- interrupt-controller: Boolean; Enables the internal interrupt controller.
-
-If interrupt-controller is defined, the following properties are required.
-
-- #interrupt-cells: Must be 1.
-- interrupts: Parent interrupt for the interrupt controller.
-
-See Documentation/devicetree/bindings/net/dsa/dsa.txt for a list of additional
-required, optional properties and how the integrated switch subnodes must
-be specified.
-
-Example:
-
- &mdio0 {
- switch@0 {
- compatible = "mediatek,mt7530";
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <0>;
-
- core-supply = <&mt6323_vpa_reg>;
- io-supply = <&mt6323_vemc3v3_reg>;
- reset-gpios = <&pio 33 0>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <0>;
- port@0 {
- reg = <0>;
- label = "lan0";
- };
-
- port@1 {
- reg = <1>;
- label = "lan1";
- };
-
- port@2 {
- reg = <2>;
- label = "lan2";
- };
-
- port@3 {
- reg = <3>;
- label = "lan3";
- };
-
- port@4 {
- reg = <4>;
- label = "wan";
- };
-
- port@6 {
- reg = <6>;
- label = "cpu";
- ethernet = <&gmac0>;
- phy-mode = "trgmii";
- fixed-link {
- speed = <1000>;
- full-duplex;
- };
- };
- };
- };
- };
-
-Example 2: MT7621: Port 4 is WAN port: 2nd GMAC -> Port 5 -> PHY port 4.
-
-&eth {
- gmac0: mac@0 {
- compatible = "mediatek,eth-mac";
- reg = <0>;
- phy-mode = "rgmii";
-
- fixed-link {
- speed = <1000>;
- full-duplex;
- pause;
- };
- };
-
- gmac1: mac@1 {
- compatible = "mediatek,eth-mac";
- reg = <1>;
- phy-mode = "rgmii-txid";
- phy-handle = <&phy4>;
- };
-
- mdio: mdio-bus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- /* Internal phy */
- phy4: ethernet-phy@4 {
- reg = <4>;
- };
-
- mt7530: switch@1f {
- compatible = "mediatek,mt7621";
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <0x1f>;
- pinctrl-names = "default";
- mediatek,mcm;
-
- resets = <&rstctrl 2>;
- reset-names = "mcm";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- label = "lan0";
- };
-
- port@1 {
- reg = <1>;
- label = "lan1";
- };
-
- port@2 {
- reg = <2>;
- label = "lan2";
- };
-
- port@3 {
- reg = <3>;
- label = "lan3";
- };
-
-/* Commented out. Port 4 is handled by 2nd GMAC.
- port@4 {
- reg = <4>;
- label = "lan4";
- };
-*/
-
- cpu_port0: port@6 {
- reg = <6>;
- label = "cpu";
- ethernet = <&gmac0>;
- phy-mode = "rgmii";
-
- fixed-link {
- speed = <1000>;
- full-duplex;
- pause;
- };
- };
- };
- };
- };
-};
-
-Example 3: MT7621: Port 5 is connected to external PHY: Port 5 -> external PHY.
-
-&eth {
- gmac0: mac@0 {
- compatible = "mediatek,eth-mac";
- reg = <0>;
- phy-mode = "rgmii";
-
- fixed-link {
- speed = <1000>;
- full-duplex;
- pause;
- };
- };
-
- mdio: mdio-bus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- /* External phy */
- ephy5: ethernet-phy@7 {
- reg = <7>;
- };
-
- mt7530: switch@1f {
- compatible = "mediatek,mt7621";
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <0x1f>;
- pinctrl-names = "default";
- mediatek,mcm;
-
- resets = <&rstctrl 2>;
- reset-names = "mcm";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- label = "lan0";
- };
-
- port@1 {
- reg = <1>;
- label = "lan1";
- };
-
- port@2 {
- reg = <2>;
- label = "lan2";
- };
-
- port@3 {
- reg = <3>;
- label = "lan3";
- };
-
- port@4 {
- reg = <4>;
- label = "lan4";
- };
-
- port@5 {
- reg = <5>;
- label = "lan5";
- phy-mode = "rgmii";
- phy-handle = <&ephy5>;
- };
-
- cpu_port0: port@6 {
- reg = <6>;
- label = "cpu";
- ethernet = <&gmac0>;
- phy-mode = "rgmii";
-
- fixed-link {
- speed = <1000>;
- full-duplex;
- pause;
- };
- };
- };
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/net/dsa/renesas,rzn1-a5psw.yaml b/Documentation/devicetree/bindings/net/dsa/renesas,rzn1-a5psw.yaml
new file mode 100644
index 000000000000..103b1ef5af1b
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/dsa/renesas,rzn1-a5psw.yaml
@@ -0,0 +1,134 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/dsa/renesas,rzn1-a5psw.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas RZ/N1 Advanced 5 ports ethernet switch
+
+maintainers:
+ - Clément Léger <clement.leger@bootlin.com>
+
+description: |
+ The advanced 5 ports switch is present on the Renesas RZ/N1 SoC family and
+ handles 4 ports + 1 CPU management port.
+
+allOf:
+ - $ref: dsa.yaml#
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - renesas,r9a06g032-a5psw
+ - const: renesas,rzn1-a5psw
+
+ reg:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ mdio:
+ $ref: /schemas/net/mdio.yaml#
+ unevaluatedProperties: false
+
+ clocks:
+ items:
+ - description: AHB clock used for the switch register interface
+ - description: Switch system clock
+
+ clock-names:
+ items:
+ - const: hclk
+ - const: clk
+
+ ethernet-ports:
+ type: object
+ properties:
+ '#address-cells':
+ const: 1
+ '#size-cells':
+ const: 0
+
+ patternProperties:
+ "^(ethernet-)?port@[0-4]$":
+ type: object
+ description: Ethernet switch ports
+
+ properties:
+ pcs-handle:
+ description:
+ phandle pointing to a PCS sub-node compatible with
+ renesas,rzn1-miic.yaml#
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+unevaluatedProperties: false
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - power-domains
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/clock/r9a06g032-sysctrl.h>
+
+ switch@44050000 {
+ compatible = "renesas,r9a06g032-a5psw", "renesas,rzn1-a5psw";
+ reg = <0x44050000 0x10000>;
+ clocks = <&sysctrl R9A06G032_HCLK_SWITCH>, <&sysctrl R9A06G032_CLK_SWITCH>;
+ clock-names = "hclk", "clk";
+ power-domains = <&sysctrl>;
+
+ dsa,member = <0 0>;
+
+ ethernet-ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ label = "lan0";
+ phy-handle = <&switch0phy3>;
+ pcs-handle = <&mii_conv4>;
+ };
+
+ port@1 {
+ reg = <1>;
+ label = "lan1";
+ phy-handle = <&switch0phy1>;
+ pcs-handle = <&mii_conv3>;
+ };
+
+ port@4 {
+ reg = <4>;
+ ethernet = <&gmac2>;
+ label = "cpu";
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+ };
+ };
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ reset-gpios = <&gpio0a 2 GPIO_ACTIVE_HIGH>;
+ reset-delay-us = <15>;
+ clock-frequency = <2500000>;
+
+ switch0phy1: ethernet-phy@1{
+ reg = <1>;
+ };
+
+ switch0phy3: ethernet-phy@3{
+ reg = <3>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/pcs/renesas,rzn1-miic.yaml b/Documentation/devicetree/bindings/net/pcs/renesas,rzn1-miic.yaml
new file mode 100644
index 000000000000..2d33bbab7163
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/pcs/renesas,rzn1-miic.yaml
@@ -0,0 +1,171 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/pcs/renesas,rzn1-miic.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas RZ/N1 MII converter
+
+maintainers:
+ - Clément Léger <clement.leger@bootlin.com>
+
+description: |
+ This MII converter is present on the Renesas RZ/N1 SoC family. It is
+ responsible to do MII passthrough or convert it to RMII/RGMII.
+
+properties:
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 0
+
+ compatible:
+ items:
+ - enum:
+ - renesas,r9a06g032-miic
+ - const: renesas,rzn1-miic
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: MII reference clock
+ - description: RGMII reference clock
+ - description: RMII reference clock
+ - description: AHB clock used for the MII converter register interface
+
+ clock-names:
+ items:
+ - const: mii_ref
+ - const: rgmii_ref
+ - const: rmii_ref
+ - const: hclk
+
+ renesas,miic-switch-portin:
+ description: MII Switch PORTIN configuration. This value should use one of
+ the values defined in dt-bindings/net/pcs-rzn1-miic.h.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [1, 2]
+
+ power-domains:
+ maxItems: 1
+
+patternProperties:
+ "^mii-conv@[0-5]$":
+ type: object
+ description: MII converter port
+
+ properties:
+ reg:
+ description: MII Converter port number.
+ enum: [1, 2, 3, 4, 5]
+
+ renesas,miic-input:
+ description: Converter input port configuration. This value should use
+ one of the values defined in dt-bindings/net/pcs-rzn1-miic.h.
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ required:
+ - reg
+ - renesas,miic-input
+
+ additionalProperties: false
+
+ allOf:
+ - if:
+ properties:
+ reg:
+ const: 1
+ then:
+ properties:
+ renesas,miic-input:
+ const: 0
+ - if:
+ properties:
+ reg:
+ const: 2
+ then:
+ properties:
+ renesas,miic-input:
+ enum: [1, 11]
+ - if:
+ properties:
+ reg:
+ const: 3
+ then:
+ properties:
+ renesas,miic-input:
+ enum: [7, 10]
+ - if:
+ properties:
+ reg:
+ const: 4
+ then:
+ properties:
+ renesas,miic-input:
+ enum: [4, 6, 9, 13]
+ - if:
+ properties:
+ reg:
+ const: 5
+ then:
+ properties:
+ renesas,miic-input:
+ enum: [3, 5, 8, 12]
+
+required:
+ - '#address-cells'
+ - '#size-cells'
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - power-domains
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/net/pcs-rzn1-miic.h>
+ #include <dt-bindings/clock/r9a06g032-sysctrl.h>
+
+ eth-miic@44030000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "renesas,r9a06g032-miic", "renesas,rzn1-miic";
+ reg = <0x44030000 0x10000>;
+ clocks = <&sysctrl R9A06G032_CLK_MII_REF>,
+ <&sysctrl R9A06G032_CLK_RGMII_REF>,
+ <&sysctrl R9A06G032_CLK_RMII_REF>,
+ <&sysctrl R9A06G032_HCLK_SWITCH_RG>;
+ clock-names = "mii_ref", "rgmii_ref", "rmii_ref", "hclk";
+ renesas,miic-switch-portin = <MIIC_GMAC2_PORT>;
+ power-domains = <&sysctrl>;
+
+ mii_conv1: mii-conv@1 {
+ renesas,miic-input = <MIIC_GMAC1_PORT>;
+ reg = <1>;
+ };
+
+ mii_conv2: mii-conv@2 {
+ renesas,miic-input = <MIIC_SWITCH_PORTD>;
+ reg = <2>;
+ };
+
+ mii_conv3: mii-conv@3 {
+ renesas,miic-input = <MIIC_SWITCH_PORTC>;
+ reg = <3>;
+ };
+
+ mii_conv4: mii-conv@4 {
+ renesas,miic-input = <MIIC_SWITCH_PORTB>;
+ reg = <4>;
+ };
+
+ mii_conv5: mii-conv@5 {
+ renesas,miic-input = <MIIC_SWITCH_PORTA>;
+ reg = <5>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 36c85eb3dc0d..491597c02edf 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -65,6 +65,8 @@ properties:
- ingenic,x2000-mac
- loongson,ls2k-dwmac
- loongson,ls7a-dwmac
+ - renesas,r9a06g032-gmac
+ - renesas,rzn1-gmac
- rockchip,px30-gmac
- rockchip,rk3128-gmac
- rockchip,rk3228-gmac
@@ -135,6 +137,9 @@ properties:
reset-names:
const: stmmaceth
+ power-domains:
+ maxItems: 1
+
mac-mode:
$ref: ethernet-controller.yaml#/properties/phy-connection-type
description:
diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.yaml b/Documentation/devicetree/bindings/net/ti,dp83867.yaml
index 047d757e8d82..76ff08a477ba 100644
--- a/Documentation/devicetree/bindings/net/ti,dp83867.yaml
+++ b/Documentation/devicetree/bindings/net/ti,dp83867.yaml
@@ -31,6 +31,16 @@ properties:
reg:
maxItems: 1
+ nvmem-cells:
+ maxItems: 1
+ description:
+ Nvmem data cell containing the value to write to the
+ IO_IMPEDANCE_CTRL field of the IO_MUX_CFG register.
+
+ nvmem-cell-names:
+ items:
+ - const: io_impedance_ctrl
+
ti,min-output-impedance:
type: boolean
description: |
@@ -42,9 +52,11 @@ properties:
description: |
MAC Interface Impedance control to set the programmable output impedance
to a maximum value (70 ohms).
- Note: ti,min-output-impedance and ti,max-output-impedance are mutually
- exclusive. When both properties are present ti,max-output-impedance
- takes precedence.
+ Note: Specifying an io_impedance_ctrl nvmem cell or one of the
+ ti,min-output-impedance, ti,max-output-impedance properties
+ are mutually exclusive. If more than one is present, an nvmem
+ cell takes precedence over ti,max-output-impedance, which in
+ turn takes precedence over ti,min-output-impedance.
tx-fifo-depth:
$ref: /schemas/types.yaml#/definitions/uint32
diff --git a/Documentation/devicetree/bindings/net/xlnx,emaclite.yaml b/Documentation/devicetree/bindings/net/xlnx,emaclite.yaml
new file mode 100644
index 000000000000..92d8ade988f6
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/xlnx,emaclite.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/xlnx,emaclite.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Xilinx Emaclite Ethernet controller
+
+maintainers:
+ - Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
+ - Harini Katakam <harini.katakam@amd.com>
+
+allOf:
+ - $ref: ethernet-controller.yaml#
+
+properties:
+ compatible:
+ enum:
+ - xlnx,opb-ethernetlite-1.01.a
+ - xlnx,opb-ethernetlite-1.01.b
+ - xlnx,xps-ethernetlite-1.00.a
+ - xlnx,xps-ethernetlite-2.00.a
+ - xlnx,xps-ethernetlite-2.01.a
+ - xlnx,xps-ethernetlite-3.00.a
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ phy-handle: true
+
+ local-mac-address: true
+
+ xlnx,tx-ping-pong:
+ type: boolean
+ description: hardware supports tx ping pong buffer.
+
+ xlnx,rx-ping-pong:
+ type: boolean
+ description: hardware supports rx ping pong buffer.
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - phy-handle
+
+additionalProperties: false
+
+examples:
+ - |
+ axi_ethernetlite_1: ethernet@40e00000 {
+ compatible = "xlnx,xps-ethernetlite-3.00.a";
+ reg = <0x40e00000 0x10000>;
+ interrupt-parent = <&axi_intc_1>;
+ interrupts = <1>;
+ local-mac-address = [00 00 00 00 00 00];
+ phy-handle = <&phy0>;
+ xlnx,rx-ping-pong;
+ xlnx,tx-ping-pong;
+ };
diff --git a/Documentation/filesystems/ext4/attributes.rst b/Documentation/filesystems/ext4/attributes.rst
index 871d2da7a0a9..87814696a65b 100644
--- a/Documentation/filesystems/ext4/attributes.rst
+++ b/Documentation/filesystems/ext4/attributes.rst
@@ -13,8 +13,8 @@ disappeared as of Linux 3.0.
There are two places where extended attributes can be found. The first
place is between the end of each inode entry and the beginning of the
-next inode entry. For example, if inode.i\_extra\_isize = 28 and
-sb.inode\_size = 256, then there are 256 - (128 + 28) = 100 bytes
+next inode entry. For example, if inode.i_extra_isize = 28 and
+sb.inode_size = 256, then there are 256 - (128 + 28) = 100 bytes
available for in-inode extended attribute storage. The second place
where extended attributes can be found is in the block pointed to by
``inode.i_file_acl``. As of Linux 3.11, it is not possible for this
@@ -38,8 +38,8 @@ Extended attributes, when stored after the inode, have a header
- Name
- Description
* - 0x0
- - \_\_le32
- - h\_magic
+ - __le32
+ - h_magic
- Magic number for identification, 0xEA020000. This value is set by the
Linux driver, though e2fsprogs doesn't seem to check it(?)
@@ -55,28 +55,28 @@ The beginning of an extended attribute block is in
- Name
- Description
* - 0x0
- - \_\_le32
- - h\_magic
+ - __le32
+ - h_magic
- Magic number for identification, 0xEA020000.
* - 0x4
- - \_\_le32
- - h\_refcount
+ - __le32
+ - h_refcount
- Reference count.
* - 0x8
- - \_\_le32
- - h\_blocks
+ - __le32
+ - h_blocks
- Number of disk blocks used.
* - 0xC
- - \_\_le32
- - h\_hash
+ - __le32
+ - h_hash
- Hash value of all attributes.
* - 0x10
- - \_\_le32
- - h\_checksum
+ - __le32
+ - h_checksum
- Checksum of the extended attribute block.
* - 0x14
- - \_\_u32
- - h\_reserved[3]
+ - __u32
+ - h_reserved[3]
- Zero.
The checksum is calculated against the FS UUID, the 64-bit block number
@@ -100,46 +100,46 @@ Attributes stored inside an inode do not need be stored in sorted order.
- Name
- Description
* - 0x0
- - \_\_u8
- - e\_name\_len
+ - __u8
+ - e_name_len
- Length of name.
* - 0x1
- - \_\_u8
- - e\_name\_index
+ - __u8
+ - e_name_index
- Attribute name index. There is a discussion of this below.
* - 0x2
- - \_\_le16
- - e\_value\_offs
+ - __le16
+ - e_value_offs
- Location of this attribute's value on the disk block where it is stored.
Multiple attributes can share the same value. For an inode attribute
this value is relative to the start of the first entry; for a block this
value is relative to the start of the block (i.e. the header).
* - 0x4
- - \_\_le32
- - e\_value\_inum
+ - __le32
+ - e_value_inum
- The inode where the value is stored. Zero indicates the value is in the
same block as this entry. This field is only used if the
- INCOMPAT\_EA\_INODE feature is enabled.
+ INCOMPAT_EA_INODE feature is enabled.
* - 0x8
- - \_\_le32
- - e\_value\_size
+ - __le32
+ - e_value_size
- Length of attribute value.
* - 0xC
- - \_\_le32
- - e\_hash
+ - __le32
+ - e_hash
- Hash value of attribute name and attribute value. The kernel doesn't
update the hash for in-inode attributes, so for that case this value
must be zero, because e2fsck validates any non-zero hash regardless of
where the xattr lives.
* - 0x10
- char
- - e\_name[e\_name\_len]
+ - e_name[e_name_len]
- Attribute name. Does not include trailing NULL.
Attribute values can follow the end of the entry table. There appears to
be a requirement that they be aligned to 4-byte boundaries. The values
are stored starting at the end of the block and grow towards the
-xattr\_header/xattr\_entry table. When the two collide, the overflow is
+xattr_header/xattr_entry table. When the two collide, the overflow is
put into a separate disk block. If the disk block fills up, the
filesystem returns -ENOSPC.
@@ -167,15 +167,15 @@ the key name. Here is a map of name index values to key prefixes:
* - 1
- “user.â€
* - 2
- - “system.posix\_acl\_accessâ€
+ - “system.posix_acl_accessâ€
* - 3
- - “system.posix\_acl\_defaultâ€
+ - “system.posix_acl_defaultâ€
* - 4
- “trusted.â€
* - 6
- “security.â€
* - 7
- - “system.†(inline\_data only?)
+ - “system.†(inline_data only?)
* - 8
- “system.richacl†(SuSE kernels only?)
diff --git a/Documentation/filesystems/ext4/bigalloc.rst b/Documentation/filesystems/ext4/bigalloc.rst
index 72075aa608e4..976a180b209c 100644
--- a/Documentation/filesystems/ext4/bigalloc.rst
+++ b/Documentation/filesystems/ext4/bigalloc.rst
@@ -23,7 +23,7 @@ means that a block group addresses 32 gigabytes instead of 128 megabytes,
also shrinking the amount of file system overhead for metadata.
The administrator can set a block cluster size at mkfs time (which is
-stored in the s\_log\_cluster\_size field in the superblock); from then
+stored in the s_log_cluster_size field in the superblock); from then
on, the block bitmaps track clusters, not individual blocks. This means
that block groups can be several gigabytes in size (instead of just
128MiB); however, the minimum allocation unit becomes a cluster, not a
diff --git a/Documentation/filesystems/ext4/bitmaps.rst b/Documentation/filesystems/ext4/bitmaps.rst
index c7546dbc197a..91c45d86e9bb 100644
--- a/Documentation/filesystems/ext4/bitmaps.rst
+++ b/Documentation/filesystems/ext4/bitmaps.rst
@@ -9,15 +9,15 @@ group.
The inode bitmap records which entries in the inode table are in use.
As with most bitmaps, one bit represents the usage status of one data
-block or inode table entry. This implies a block group size of 8 \*
-number\_of\_bytes\_in\_a\_logical\_block.
+block or inode table entry. This implies a block group size of 8 *
+number_of_bytes_in_a_logical_block.
NOTE: If ``BLOCK_UNINIT`` is set for a given block group, various parts
of the kernel and e2fsprogs code pretends that the block bitmap contains
zeros (i.e. all blocks in the group are free). However, it is not
necessarily the case that no blocks are in use -- if ``meta_bg`` is set,
the bitmaps and group descriptor live inside the group. Unfortunately,
-ext2fs\_test\_block\_bitmap2() will return '0' for those locations,
+ext2fs_test_block_bitmap2() will return '0' for those locations,
which produces confusing debugfs output.
Inode Table
diff --git a/Documentation/filesystems/ext4/blockgroup.rst b/Documentation/filesystems/ext4/blockgroup.rst
index d5d652addce5..46d78f860623 100644
--- a/Documentation/filesystems/ext4/blockgroup.rst
+++ b/Documentation/filesystems/ext4/blockgroup.rst
@@ -56,39 +56,39 @@ established that the super block and the group descriptor table, if
present, will be at the beginning of the block group. The bitmaps and
the inode table can be anywhere, and it is quite possible for the
bitmaps to come after the inode table, or for both to be in different
-groups (flex\_bg). Leftover space is used for file data blocks, indirect
+groups (flex_bg). Leftover space is used for file data blocks, indirect
block maps, extent tree blocks, and extended attributes.
Flexible Block Groups
---------------------
Starting in ext4, there is a new feature called flexible block groups
-(flex\_bg). In a flex\_bg, several block groups are tied together as one
+(flex_bg). In a flex_bg, several block groups are tied together as one
logical block group; the bitmap spaces and the inode table space in the
-first block group of the flex\_bg are expanded to include the bitmaps
-and inode tables of all other block groups in the flex\_bg. For example,
-if the flex\_bg size is 4, then group 0 will contain (in order) the
+first block group of the flex_bg are expanded to include the bitmaps
+and inode tables of all other block groups in the flex_bg. For example,
+if the flex_bg size is 4, then group 0 will contain (in order) the
superblock, group descriptors, data block bitmaps for groups 0-3, inode
bitmaps for groups 0-3, inode tables for groups 0-3, and the remaining
space in group 0 is for file data. The effect of this is to group the
block group metadata close together for faster loading, and to enable
large files to be continuous on disk. Backup copies of the superblock
and group descriptors are always at the beginning of block groups, even
-if flex\_bg is enabled. The number of block groups that make up a
-flex\_bg is given by 2 ^ ``sb.s_log_groups_per_flex``.
+if flex_bg is enabled. The number of block groups that make up a
+flex_bg is given by 2 ^ ``sb.s_log_groups_per_flex``.
Meta Block Groups
-----------------
-Without the option META\_BG, for safety concerns, all block group
+Without the option META_BG, for safety concerns, all block group
descriptors copies are kept in the first block group. Given the default
128MiB(2^27 bytes) block group size and 64-byte group descriptors, ext4
can have at most 2^27/64 = 2^21 block groups. This limits the entire
filesystem size to 2^21 * 2^27 = 2^48bytes or 256TiB.
The solution to this problem is to use the metablock group feature
-(META\_BG), which is already in ext3 for all 2.6 releases. With the
-META\_BG feature, ext4 filesystems are partitioned into many metablock
+(META_BG), which is already in ext3 for all 2.6 releases. With the
+META_BG feature, ext4 filesystems are partitioned into many metablock
groups. Each metablock group is a cluster of block groups whose group
descriptor structures can be stored in a single disk block. For ext4
filesystems with 4 KB block size, a single metablock group partition
@@ -110,7 +110,7 @@ bytes, a meta-block group contains 32 block groups for filesystems with
a 1KB block size, and 128 block groups for filesystems with a 4KB
blocksize. Filesystems can either be created using this new block group
descriptor layout, or existing filesystems can be resized on-line, and
-the field s\_first\_meta\_bg in the superblock will indicate the first
+the field s_first_meta_bg in the superblock will indicate the first
block group using this new layout.
Please see an important note about ``BLOCK_UNINIT`` in the section about
@@ -121,15 +121,15 @@ Lazy Block Group Initialization
A new feature for ext4 are three block group descriptor flags that
enable mkfs to skip initializing other parts of the block group
-metadata. Specifically, the INODE\_UNINIT and BLOCK\_UNINIT flags mean
+metadata. Specifically, the INODE_UNINIT and BLOCK_UNINIT flags mean
that the inode and block bitmaps for that group can be calculated and
therefore the on-disk bitmap blocks are not initialized. This is
generally the case for an empty block group or a block group containing
-only fixed-location block group metadata. The INODE\_ZEROED flag means
+only fixed-location block group metadata. The INODE_ZEROED flag means
that the inode table has been initialized; mkfs will unset this flag and
rely on the kernel to initialize the inode tables in the background.
By not writing zeroes to the bitmaps and inode table, mkfs time is
-reduced considerably. Note the feature flag is RO\_COMPAT\_GDT\_CSUM,
-but the dumpe2fs output prints this as “uninit\_bgâ€. They are the same
+reduced considerably. Note the feature flag is RO_COMPAT_GDT_CSUM,
+but the dumpe2fs output prints this as “uninit_bgâ€. They are the same
thing.
diff --git a/Documentation/filesystems/ext4/blockmap.rst b/Documentation/filesystems/ext4/blockmap.rst
index 30e25750d88a..2bd990402a5c 100644
--- a/Documentation/filesystems/ext4/blockmap.rst
+++ b/Documentation/filesystems/ext4/blockmap.rst
@@ -1,7 +1,7 @@
.. SPDX-License-Identifier: GPL-2.0
+---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| i.i\_block Offset | Where It Points |
+| i.i_block Offset | Where It Points |
+=====================+==============================================================================================================================================================================================================================+
| 0 to 11 | Direct map to file blocks 0 to 11. |
+---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
diff --git a/Documentation/filesystems/ext4/checksums.rst b/Documentation/filesystems/ext4/checksums.rst
index 5519e253810d..e232749daf5f 100644
--- a/Documentation/filesystems/ext4/checksums.rst
+++ b/Documentation/filesystems/ext4/checksums.rst
@@ -4,7 +4,7 @@ Checksums
---------
Starting in early 2012, metadata checksums were added to all major ext4
-and jbd2 data structures. The associated feature flag is metadata\_csum.
+and jbd2 data structures. The associated feature flag is metadata_csum.
The desired checksum algorithm is indicated in the superblock, though as
of October 2012 the only supported algorithm is crc32c. Some data
structures did not have space to fit a full 32-bit checksum, so only the
@@ -20,7 +20,7 @@ encounters directory blocks that lack sufficient empty space to add a
checksum, it will request that you run ``e2fsck -D`` to have the
directories rebuilt with checksums. This has the added benefit of
removing slack space from the directory files and rebalancing the htree
-indexes. If you \_ignore\_ this step, your directories will not be
+indexes. If you _ignore_ this step, your directories will not be
protected by a checksum!
The following table describes the data elements that go into each type
@@ -35,39 +35,39 @@ of checksum. The checksum function is whatever the superblock describes
- Length
- Ingredients
* - Superblock
- - \_\_le32
+ - __le32
- The entire superblock up to the checksum field. The UUID lives inside
the superblock.
* - MMP
- - \_\_le32
+ - __le32
- UUID + the entire MMP block up to the checksum field.
* - Extended Attributes
- - \_\_le32
+ - __le32
- UUID + the entire extended attribute block. The checksum field is set to
zero.
* - Directory Entries
- - \_\_le32
+ - __le32
- UUID + inode number + inode generation + the directory block up to the
fake entry enclosing the checksum field.
* - HTREE Nodes
- - \_\_le32
+ - __le32
- UUID + inode number + inode generation + all valid extents + HTREE tail.
The checksum field is set to zero.
* - Extents
- - \_\_le32
+ - __le32
- UUID + inode number + inode generation + the entire extent block up to
the checksum field.
* - Bitmaps
- - \_\_le32 or \_\_le16
+ - __le32 or __le16
- UUID + the entire bitmap. Checksums are stored in the group descriptor,
and truncated if the group descriptor size is 32 bytes (i.e. ^64bit)
* - Inodes
- - \_\_le32
+ - __le32
- UUID + inode number + inode generation + the entire inode. The checksum
field is set to zero. Each inode has its own checksum.
* - Group Descriptors
- - \_\_le16
- - If metadata\_csum, then UUID + group number + the entire descriptor;
- else if gdt\_csum, then crc16(UUID + group number + the entire
+ - __le16
+ - If metadata_csum, then UUID + group number + the entire descriptor;
+ else if gdt_csum, then crc16(UUID + group number + the entire
descriptor). In all cases, only the lower 16 bits are stored.
diff --git a/Documentation/filesystems/ext4/directory.rst b/Documentation/filesystems/ext4/directory.rst
index 55f618b37144..6eece8e31df8 100644
--- a/Documentation/filesystems/ext4/directory.rst
+++ b/Documentation/filesystems/ext4/directory.rst
@@ -42,24 +42,24 @@ is at most 263 bytes long, though on disk you'll need to reference
- Name
- Description
* - 0x0
- - \_\_le32
+ - __le32
- inode
- Number of the inode that this directory entry points to.
* - 0x4
- - \_\_le16
- - rec\_len
+ - __le16
+ - rec_len
- Length of this directory entry. Must be a multiple of 4.
* - 0x6
- - \_\_le16
- - name\_len
+ - __le16
+ - name_len
- Length of the file name.
* - 0x8
- char
- - name[EXT4\_NAME\_LEN]
+ - name[EXT4_NAME_LEN]
- File name.
Since file names cannot be longer than 255 bytes, the new directory
-entry format shortens the name\_len field and uses the space for a file
+entry format shortens the name_len field and uses the space for a file
type flag, probably to avoid having to load every inode during directory
tree traversal. This format is ``ext4_dir_entry_2``, which is at most
263 bytes long, though on disk you'll need to reference
@@ -74,24 +74,24 @@ tree traversal. This format is ``ext4_dir_entry_2``, which is at most
- Name
- Description
* - 0x0
- - \_\_le32
+ - __le32
- inode
- Number of the inode that this directory entry points to.
* - 0x4
- - \_\_le16
- - rec\_len
+ - __le16
+ - rec_len
- Length of this directory entry.
* - 0x6
- - \_\_u8
- - name\_len
+ - __u8
+ - name_len
- Length of the file name.
* - 0x7
- - \_\_u8
- - file\_type
+ - __u8
+ - file_type
- File type code, see ftype_ table below.
* - 0x8
- char
- - name[EXT4\_NAME\_LEN]
+ - name[EXT4_NAME_LEN]
- File name.
.. _ftype:
@@ -137,19 +137,19 @@ entry uses this extension, it may be up to 271 bytes.
- Name
- Description
* - 0x0
- - \_\_le32
+ - __le32
- hash
- The hash of the directory name
* - 0x4
- - \_\_le32
- - minor\_hash
+ - __le32
+ - minor_hash
- The minor hash of the directory name
In order to add checksums to these classic directory blocks, a phony
``struct ext4_dir_entry`` is placed at the end of each leaf block to
hold the checksum. The directory entry is 12 bytes long. The inode
-number and name\_len fields are set to zero to fool old software into
+number and name_len fields are set to zero to fool old software into
ignoring an apparently empty directory entry, and the checksum is stored
in the place where the name normally goes. The structure is
``struct ext4_dir_entry_tail``:
@@ -163,24 +163,24 @@ in the place where the name normally goes. The structure is
- Name
- Description
* - 0x0
- - \_\_le32
- - det\_reserved\_zero1
+ - __le32
+ - det_reserved_zero1
- Inode number, which must be zero.
* - 0x4
- - \_\_le16
- - det\_rec\_len
+ - __le16
+ - det_rec_len
- Length of this directory entry, which must be 12.
* - 0x6
- - \_\_u8
- - det\_reserved\_zero2
+ - __u8
+ - det_reserved_zero2
- Length of the file name, which must be zero.
* - 0x7
- - \_\_u8
- - det\_reserved\_ft
+ - __u8
+ - det_reserved_ft
- File type, which must be 0xDE.
* - 0x8
- - \_\_le32
- - det\_checksum
+ - __le32
+ - det_checksum
- Directory leaf block checksum.
The leaf directory block checksum is calculated against the FS UUID, the
@@ -194,7 +194,7 @@ Hash Tree Directories
A linear array of directory entries isn't great for performance, so a
new feature was added to ext3 to provide a faster (but peculiar)
balanced tree keyed off a hash of the directory entry name. If the
-EXT4\_INDEX\_FL (0x1000) flag is set in the inode, this directory uses a
+EXT4_INDEX_FL (0x1000) flag is set in the inode, this directory uses a
hashed btree (htree) to organize and find directory entries. For
backwards read-only compatibility with ext2, this tree is actually
hidden inside the directory file, masquerading as “empty†directory data
@@ -206,14 +206,14 @@ rest of the directory block is empty so that it moves on.
The root of the tree always lives in the first data block of the
directory. By ext2 custom, the '.' and '..' entries must appear at the
beginning of this first block, so they are put here as two
-``struct ext4_dir_entry_2``\ s and not stored in the tree. The rest of
+``struct ext4_dir_entry_2`` s and not stored in the tree. The rest of
the root node contains metadata about the tree and finally a hash->block
map to find nodes that are lower in the htree. If
``dx_root.info.indirect_levels`` is non-zero then the htree has two
levels; the data block pointed to by the root node's map is an interior
node, which is indexed by a minor hash. Interior nodes in this tree
contains a zeroed out ``struct ext4_dir_entry_2`` followed by a
-minor\_hash->block map to find leafe nodes. Leaf nodes contain a linear
+minor_hash->block map to find leafe nodes. Leaf nodes contain a linear
array of all ``struct ext4_dir_entry_2``; all of these entries
(presumably) hash to the same value. If there is an overflow, the
entries simply overflow into the next leaf node, and the
@@ -245,83 +245,83 @@ of a data block:
- Name
- Description
* - 0x0
- - \_\_le32
+ - __le32
- dot.inode
- inode number of this directory.
* - 0x4
- - \_\_le16
- - dot.rec\_len
+ - __le16
+ - dot.rec_len
- Length of this record, 12.
* - 0x6
- u8
- - dot.name\_len
+ - dot.name_len
- Length of the name, 1.
* - 0x7
- u8
- - dot.file\_type
+ - dot.file_type
- File type of this entry, 0x2 (directory) (if the feature flag is set).
* - 0x8
- char
- dot.name[4]
- - “.\\0\\0\\0â€
+ - “.\0\0\0â€
* - 0xC
- - \_\_le32
+ - __le32
- dotdot.inode
- inode number of parent directory.
* - 0x10
- - \_\_le16
- - dotdot.rec\_len
- - block\_size - 12. The record length is long enough to cover all htree
+ - __le16
+ - dotdot.rec_len
+ - block_size - 12. The record length is long enough to cover all htree
data.
* - 0x12
- u8
- - dotdot.name\_len
+ - dotdot.name_len
- Length of the name, 2.
* - 0x13
- u8
- - dotdot.file\_type
+ - dotdot.file_type
- File type of this entry, 0x2 (directory) (if the feature flag is set).
* - 0x14
- char
- - dotdot\_name[4]
- - “..\\0\\0â€
+ - dotdot_name[4]
+ - “..\0\0â€
* - 0x18
- - \_\_le32
- - struct dx\_root\_info.reserved\_zero
+ - __le32
+ - struct dx_root_info.reserved_zero
- Zero.
* - 0x1C
- u8
- - struct dx\_root\_info.hash\_version
+ - struct dx_root_info.hash_version
- Hash type, see dirhash_ table below.
* - 0x1D
- u8
- - struct dx\_root\_info.info\_length
+ - struct dx_root_info.info_length
- Length of the tree information, 0x8.
* - 0x1E
- u8
- - struct dx\_root\_info.indirect\_levels
- - Depth of the htree. Cannot be larger than 3 if the INCOMPAT\_LARGEDIR
+ - struct dx_root_info.indirect_levels
+ - Depth of the htree. Cannot be larger than 3 if the INCOMPAT_LARGEDIR
feature is set; cannot be larger than 2 otherwise.
* - 0x1F
- u8
- - struct dx\_root\_info.unused\_flags
+ - struct dx_root_info.unused_flags
-
* - 0x20
- - \_\_le16
+ - __le16
- limit
- - Maximum number of dx\_entries that can follow this header, plus 1 for
+ - Maximum number of dx_entries that can follow this header, plus 1 for
the header itself.
* - 0x22
- - \_\_le16
+ - __le16
- count
- - Actual number of dx\_entries that follow this header, plus 1 for the
+ - Actual number of dx_entries that follow this header, plus 1 for the
header itself.
* - 0x24
- - \_\_le32
+ - __le32
- block
- The block number (within the directory file) that goes with hash=0.
* - 0x28
- - struct dx\_entry
+ - struct dx_entry
- entries[0]
- As many 8-byte ``struct dx_entry`` as fits in the rest of the data block.
@@ -362,38 +362,38 @@ also the full length of a data block:
- Name
- Description
* - 0x0
- - \_\_le32
+ - __le32
- fake.inode
- Zero, to make it look like this entry is not in use.
* - 0x4
- - \_\_le16
- - fake.rec\_len
- - The size of the block, in order to hide all of the dx\_node data.
+ - __le16
+ - fake.rec_len
+ - The size of the block, in order to hide all of the dx_node data.
* - 0x6
- u8
- - name\_len
+ - name_len
- Zero. There is no name for this “unused†directory entry.
* - 0x7
- u8
- - file\_type
+ - file_type
- Zero. There is no file type for this “unused†directory entry.
* - 0x8
- - \_\_le16
+ - __le16
- limit
- - Maximum number of dx\_entries that can follow this header, plus 1 for
+ - Maximum number of dx_entries that can follow this header, plus 1 for
the header itself.
* - 0xA
- - \_\_le16
+ - __le16
- count
- - Actual number of dx\_entries that follow this header, plus 1 for the
+ - Actual number of dx_entries that follow this header, plus 1 for the
header itself.
* - 0xE
- - \_\_le32
+ - __le32
- block
- The block number (within the directory file) that goes with the lowest
hash value of this block. This value is stored in the parent block.
* - 0x12
- - struct dx\_entry
+ - struct dx_entry
- entries[0]
- As many 8-byte ``struct dx_entry`` as fits in the rest of the data block.
@@ -410,11 +410,11 @@ long:
- Name
- Description
* - 0x0
- - \_\_le32
+ - __le32
- hash
- Hash code.
* - 0x4
- - \_\_le32
+ - __le32
- block
- Block number (within the directory file, not filesystem blocks) of the
next node in the htree.
@@ -423,13 +423,13 @@ long:
author.)
If metadata checksums are enabled, the last 8 bytes of the directory
-block (precisely the length of one dx\_entry) are used to store a
+block (precisely the length of one dx_entry) are used to store a
``struct dx_tail``, which contains the checksum. The ``limit`` and
-``count`` entries in the dx\_root/dx\_node structures are adjusted as
-necessary to fit the dx\_tail into the block. If there is no space for
-the dx\_tail, the user is notified to run e2fsck -D to rebuild the
+``count`` entries in the dx_root/dx_node structures are adjusted as
+necessary to fit the dx_tail into the block. If there is no space for
+the dx_tail, the user is notified to run e2fsck -D to rebuild the
directory index (which will ensure that there's space for the checksum.
-The dx\_tail structure is 8 bytes long and looks like this:
+The dx_tail structure is 8 bytes long and looks like this:
.. list-table::
:widths: 8 8 24 40
@@ -441,13 +441,13 @@ The dx\_tail structure is 8 bytes long and looks like this:
- Description
* - 0x0
- u32
- - dt\_reserved
+ - dt_reserved
- Zero.
* - 0x4
- - \_\_le32
- - dt\_checksum
+ - __le32
+ - dt_checksum
- Checksum of the htree directory block.
The checksum is calculated against the FS UUID, the htree index header
-(dx\_root or dx\_node), all of the htree indices (dx\_entry) that are in
-use, and the tail block (dx\_tail).
+(dx_root or dx_node), all of the htree indices (dx_entry) that are in
+use, and the tail block (dx_tail).
diff --git a/Documentation/filesystems/ext4/eainode.rst b/Documentation/filesystems/ext4/eainode.rst
index ecc0d01a0a72..7a2ef26b064a 100644
--- a/Documentation/filesystems/ext4/eainode.rst
+++ b/Documentation/filesystems/ext4/eainode.rst
@@ -5,14 +5,14 @@ Large Extended Attribute Values
To enable ext4 to store extended attribute values that do not fit in the
inode or in the single extended attribute block attached to an inode,
-the EA\_INODE feature allows us to store the value in the data blocks of
+the EA_INODE feature allows us to store the value in the data blocks of
a regular file inode. This “EA inode†is linked only from the extended
attribute name index and must not appear in a directory entry. The
-inode's i\_atime field is used to store a checksum of the xattr value;
-and i\_ctime/i\_version store a 64-bit reference count, which enables
+inode's i_atime field is used to store a checksum of the xattr value;
+and i_ctime/i_version store a 64-bit reference count, which enables
sharing of large xattr values between multiple owning inodes. For
backward compatibility with older versions of this feature, the
-i\_mtime/i\_generation *may* store a back-reference to the inode number
-and i\_generation of the **one** owning inode (in cases where the EA
+i_mtime/i_generation *may* store a back-reference to the inode number
+and i_generation of the **one** owning inode (in cases where the EA
inode is not referenced by multiple inodes) to verify that the EA inode
is the correct one being accessed.
diff --git a/Documentation/filesystems/ext4/group_descr.rst b/Documentation/filesystems/ext4/group_descr.rst
index 7ba6114e7f5c..392ec44f8fb0 100644
--- a/Documentation/filesystems/ext4/group_descr.rst
+++ b/Documentation/filesystems/ext4/group_descr.rst
@@ -7,34 +7,34 @@ Each block group on the filesystem has one of these descriptors
associated with it. As noted in the Layout section above, the group
descriptors (if present) are the second item in the block group. The
standard configuration is for each block group to contain a full copy of
-the block group descriptor table unless the sparse\_super feature flag
+the block group descriptor table unless the sparse_super feature flag
is set.
Notice how the group descriptor records the location of both bitmaps and
the inode table (i.e. they can float). This means that within a block
group, the only data structures with fixed locations are the superblock
-and the group descriptor table. The flex\_bg mechanism uses this
+and the group descriptor table. The flex_bg mechanism uses this
property to group several block groups into a flex group and lay out all
of the groups' bitmaps and inode tables into one long run in the first
group of the flex group.
-If the meta\_bg feature flag is set, then several block groups are
-grouped together into a meta group. Note that in the meta\_bg case,
+If the meta_bg feature flag is set, then several block groups are
+grouped together into a meta group. Note that in the meta_bg case,
however, the first and last two block groups within the larger meta
group contain only group descriptors for the groups inside the meta
group.
-flex\_bg and meta\_bg do not appear to be mutually exclusive features.
+flex_bg and meta_bg do not appear to be mutually exclusive features.
In ext2, ext3, and ext4 (when the 64bit feature is not enabled), the
block group descriptor was only 32 bytes long and therefore ends at
-bg\_checksum. On an ext4 filesystem with the 64bit feature enabled, the
+bg_checksum. On an ext4 filesystem with the 64bit feature enabled, the
block group descriptor expands to at least the 64 bytes described below;
the size is stored in the superblock.
-If gdt\_csum is set and metadata\_csum is not set, the block group
+If gdt_csum is set and metadata_csum is not set, the block group
checksum is the crc16 of the FS UUID, the group number, and the group
-descriptor structure. If metadata\_csum is set, then the block group
+descriptor structure. If metadata_csum is set, then the block group
checksum is the lower 16 bits of the checksum of the FS UUID, the group
number, and the group descriptor structure. Both block and inode bitmap
checksums are calculated against the FS UUID, the group number, and the
@@ -51,59 +51,59 @@ The block group descriptor is laid out in ``struct ext4_group_desc``.
- Name
- Description
* - 0x0
- - \_\_le32
- - bg\_block\_bitmap\_lo
+ - __le32
+ - bg_block_bitmap_lo
- Lower 32-bits of location of block bitmap.
* - 0x4
- - \_\_le32
- - bg\_inode\_bitmap\_lo
+ - __le32
+ - bg_inode_bitmap_lo
- Lower 32-bits of location of inode bitmap.
* - 0x8
- - \_\_le32
- - bg\_inode\_table\_lo
+ - __le32
+ - bg_inode_table_lo
- Lower 32-bits of location of inode table.
* - 0xC
- - \_\_le16
- - bg\_free\_blocks\_count\_lo
+ - __le16
+ - bg_free_blocks_count_lo
- Lower 16-bits of free block count.
* - 0xE
- - \_\_le16
- - bg\_free\_inodes\_count\_lo
+ - __le16
+ - bg_free_inodes_count_lo
- Lower 16-bits of free inode count.
* - 0x10
- - \_\_le16
- - bg\_used\_dirs\_count\_lo
+ - __le16
+ - bg_used_dirs_count_lo
- Lower 16-bits of directory count.
* - 0x12
- - \_\_le16
- - bg\_flags
+ - __le16
+ - bg_flags
- Block group flags. See the bgflags_ table below.
* - 0x14
- - \_\_le32
- - bg\_exclude\_bitmap\_lo
+ - __le32
+ - bg_exclude_bitmap_lo
- Lower 32-bits of location of snapshot exclusion bitmap.
* - 0x18
- - \_\_le16
- - bg\_block\_bitmap\_csum\_lo
+ - __le16
+ - bg_block_bitmap_csum_lo
- Lower 16-bits of the block bitmap checksum.
* - 0x1A
- - \_\_le16
- - bg\_inode\_bitmap\_csum\_lo
+ - __le16
+ - bg_inode_bitmap_csum_lo
- Lower 16-bits of the inode bitmap checksum.
* - 0x1C
- - \_\_le16
- - bg\_itable\_unused\_lo
+ - __le16
+ - bg_itable_unused_lo
- Lower 16-bits of unused inode count. If set, we needn't scan past the
- ``(sb.s_inodes_per_group - gdt.bg_itable_unused)``\ th entry in the
+ ``(sb.s_inodes_per_group - gdt.bg_itable_unused)`` th entry in the
inode table for this group.
* - 0x1E
- - \_\_le16
- - bg\_checksum
- - Group descriptor checksum; crc16(sb\_uuid+group\_num+bg\_desc) if the
- RO\_COMPAT\_GDT\_CSUM feature is set, or
- crc32c(sb\_uuid+group\_num+bg\_desc) & 0xFFFF if the
- RO\_COMPAT\_METADATA\_CSUM feature is set. The bg\_checksum
- field in bg\_desc is skipped when calculating crc16 checksum,
+ - __le16
+ - bg_checksum
+ - Group descriptor checksum; crc16(sb_uuid+group_num+bg_desc) if the
+ RO_COMPAT_GDT_CSUM feature is set, or
+ crc32c(sb_uuid+group_num+bg_desc) & 0xFFFF if the
+ RO_COMPAT_METADATA_CSUM feature is set. The bg_checksum
+ field in bg_desc is skipped when calculating crc16 checksum,
and set to zero if crc32c checksum is used.
* -
-
@@ -111,48 +111,48 @@ The block group descriptor is laid out in ``struct ext4_group_desc``.
- These fields only exist if the 64bit feature is enabled and s_desc_size
> 32.
* - 0x20
- - \_\_le32
- - bg\_block\_bitmap\_hi
+ - __le32
+ - bg_block_bitmap_hi
- Upper 32-bits of location of block bitmap.
* - 0x24
- - \_\_le32
- - bg\_inode\_bitmap\_hi
+ - __le32
+ - bg_inode_bitmap_hi
- Upper 32-bits of location of inodes bitmap.
* - 0x28
- - \_\_le32
- - bg\_inode\_table\_hi
+ - __le32
+ - bg_inode_table_hi
- Upper 32-bits of location of inodes table.
* - 0x2C
- - \_\_le16
- - bg\_free\_blocks\_count\_hi
+ - __le16
+ - bg_free_blocks_count_hi
- Upper 16-bits of free block count.
* - 0x2E
- - \_\_le16
- - bg\_free\_inodes\_count\_hi
+ - __le16
+ - bg_free_inodes_count_hi
- Upper 16-bits of free inode count.
* - 0x30
- - \_\_le16
- - bg\_used\_dirs\_count\_hi
+ - __le16
+ - bg_used_dirs_count_hi
- Upper 16-bits of directory count.
* - 0x32
- - \_\_le16
- - bg\_itable\_unused\_hi
+ - __le16
+ - bg_itable_unused_hi
- Upper 16-bits of unused inode count.
* - 0x34
- - \_\_le32
- - bg\_exclude\_bitmap\_hi
+ - __le32
+ - bg_exclude_bitmap_hi
- Upper 32-bits of location of snapshot exclusion bitmap.
* - 0x38
- - \_\_le16
- - bg\_block\_bitmap\_csum\_hi
+ - __le16
+ - bg_block_bitmap_csum_hi
- Upper 16-bits of the block bitmap checksum.
* - 0x3A
- - \_\_le16
- - bg\_inode\_bitmap\_csum\_hi
+ - __le16
+ - bg_inode_bitmap_csum_hi
- Upper 16-bits of the inode bitmap checksum.
* - 0x3C
- - \_\_u32
- - bg\_reserved
+ - __u32
+ - bg_reserved
- Padding to 64 bytes.
.. _bgflags:
@@ -166,8 +166,8 @@ Block group flags can be any combination of the following:
* - Value
- Description
* - 0x1
- - inode table and bitmap are not initialized (EXT4\_BG\_INODE\_UNINIT).
+ - inode table and bitmap are not initialized (EXT4_BG_INODE_UNINIT).
* - 0x2
- - block bitmap is not initialized (EXT4\_BG\_BLOCK\_UNINIT).
+ - block bitmap is not initialized (EXT4_BG_BLOCK_UNINIT).
* - 0x4
- - inode table is zeroed (EXT4\_BG\_INODE\_ZEROED).
+ - inode table is zeroed (EXT4_BG_INODE_ZEROED).
diff --git a/Documentation/filesystems/ext4/ifork.rst b/Documentation/filesystems/ext4/ifork.rst
index b9816d5a896b..dc31f505e6c8 100644
--- a/Documentation/filesystems/ext4/ifork.rst
+++ b/Documentation/filesystems/ext4/ifork.rst
@@ -1,6 +1,6 @@
.. SPDX-License-Identifier: GPL-2.0
-The Contents of inode.i\_block
+The Contents of inode.i_block
------------------------------
Depending on the type of file an inode describes, the 60 bytes of
@@ -47,7 +47,7 @@ In ext4, the file to logical block map has been replaced with an extent
tree. Under the old scheme, allocating a contiguous run of 1,000 blocks
requires an indirect block to map all 1,000 entries; with extents, the
mapping is reduced to a single ``struct ext4_extent`` with
-``ee_len = 1000``. If flex\_bg is enabled, it is possible to allocate
+``ee_len = 1000``. If flex_bg is enabled, it is possible to allocate
very large files with a single extent, at a considerable reduction in
metadata block use, and some improvement in disk efficiency. The inode
must have the extents flag (0x80000) flag set for this feature to be in
@@ -76,28 +76,28 @@ which is 12 bytes long:
- Name
- Description
* - 0x0
- - \_\_le16
- - eh\_magic
+ - __le16
+ - eh_magic
- Magic number, 0xF30A.
* - 0x2
- - \_\_le16
- - eh\_entries
+ - __le16
+ - eh_entries
- Number of valid entries following the header.
* - 0x4
- - \_\_le16
- - eh\_max
+ - __le16
+ - eh_max
- Maximum number of entries that could follow the header.
* - 0x6
- - \_\_le16
- - eh\_depth
+ - __le16
+ - eh_depth
- Depth of this extent node in the extent tree. 0 = this extent node
points to data blocks; otherwise, this extent node points to other
extent nodes. The extent tree can be at most 5 levels deep: a logical
block number can be at most ``2^32``, and the smallest ``n`` that
satisfies ``4*(((blocksize - 12)/12)^n) >= 2^32`` is 5.
* - 0x8
- - \_\_le32
- - eh\_generation
+ - __le32
+ - eh_generation
- Generation of the tree. (Used by Lustre, but not standard ext4).
Internal nodes of the extent tree, also known as index nodes, are
@@ -112,22 +112,22 @@ recorded as ``struct ext4_extent_idx``, and are 12 bytes long:
- Name
- Description
* - 0x0
- - \_\_le32
- - ei\_block
+ - __le32
+ - ei_block
- This index node covers file blocks from 'block' onward.
* - 0x4
- - \_\_le32
- - ei\_leaf\_lo
+ - __le32
+ - ei_leaf_lo
- Lower 32-bits of the block number of the extent node that is the next
level lower in the tree. The tree node pointed to can be either another
internal node or a leaf node, described below.
* - 0x8
- - \_\_le16
- - ei\_leaf\_hi
+ - __le16
+ - ei_leaf_hi
- Upper 16-bits of the previous field.
* - 0xA
- - \_\_u16
- - ei\_unused
+ - __u16
+ - ei_unused
-
Leaf nodes of the extent tree are recorded as ``struct ext4_extent``,
@@ -142,24 +142,24 @@ and are also 12 bytes long:
- Name
- Description
* - 0x0
- - \_\_le32
- - ee\_block
+ - __le32
+ - ee_block
- First file block number that this extent covers.
* - 0x4
- - \_\_le16
- - ee\_len
+ - __le16
+ - ee_len
- Number of blocks covered by extent. If the value of this field is <=
32768, the extent is initialized. If the value of the field is > 32768,
the extent is uninitialized and the actual extent length is ``ee_len`` -
32768. Therefore, the maximum length of a initialized extent is 32768
blocks, and the maximum length of an uninitialized extent is 32767.
* - 0x6
- - \_\_le16
- - ee\_start\_hi
+ - __le16
+ - ee_start_hi
- Upper 16-bits of the block number to which this extent points.
* - 0x8
- - \_\_le32
- - ee\_start\_lo
+ - __le32
+ - ee_start_lo
- Lower 32-bits of the block number to which this extent points.
Prior to the introduction of metadata checksums, the extent header +
@@ -182,8 +182,8 @@ including) the checksum itself.
- Name
- Description
* - 0x0
- - \_\_le32
- - eb\_checksum
+ - __le32
+ - eb_checksum
- Checksum of the extent block, crc32c(uuid+inum+igeneration+extentblock)
Inline Data
diff --git a/Documentation/filesystems/ext4/inlinedata.rst b/Documentation/filesystems/ext4/inlinedata.rst
index d1075178ce0b..a728af0d2fd0 100644
--- a/Documentation/filesystems/ext4/inlinedata.rst
+++ b/Documentation/filesystems/ext4/inlinedata.rst
@@ -11,12 +11,12 @@ file is smaller than 60 bytes, then the data are stored inline in
attribute space, then it might be found as an extended attribute
“system.data†within the inode body (“ibody EAâ€). This of course
constrains the amount of extended attributes one can attach to an inode.
-If the data size increases beyond i\_block + ibody EA, a regular block
+If the data size increases beyond i_block + ibody EA, a regular block
is allocated and the contents moved to that block.
Pending a change to compact the extended attribute key used to store
inline data, one ought to be able to store 160 bytes of data in a
-256-byte inode (as of June 2015, when i\_extra\_isize is 28). Prior to
+256-byte inode (as of June 2015, when i_extra_isize is 28). Prior to
that, the limit was 156 bytes due to inefficient use of inode space.
The inline data feature requires the presence of an extended attribute
@@ -25,12 +25,12 @@ for “system.dataâ€, even if the attribute value is zero length.
Inline Directories
~~~~~~~~~~~~~~~~~~
-The first four bytes of i\_block are the inode number of the parent
+The first four bytes of i_block are the inode number of the parent
directory. Following that is a 56-byte space for an array of directory
entries; see ``struct ext4_dir_entry``. If there is a “system.dataâ€
attribute in the inode body, the EA value is an array of
``struct ext4_dir_entry`` as well. Note that for inline directories, the
-i\_block and EA space are treated as separate dirent blocks; directory
+i_block and EA space are treated as separate dirent blocks; directory
entries cannot span the two.
Inline directory entries are not checksummed, as the inode checksum
diff --git a/Documentation/filesystems/ext4/inodes.rst b/Documentation/filesystems/ext4/inodes.rst
index 6c5ce666e63f..cfc6c1659931 100644
--- a/Documentation/filesystems/ext4/inodes.rst
+++ b/Documentation/filesystems/ext4/inodes.rst
@@ -38,138 +38,138 @@ The inode table entry is laid out in ``struct ext4_inode``.
- Name
- Description
* - 0x0
- - \_\_le16
- - i\_mode
+ - __le16
+ - i_mode
- File mode. See the table i_mode_ below.
* - 0x2
- - \_\_le16
- - i\_uid
+ - __le16
+ - i_uid
- Lower 16-bits of Owner UID.
* - 0x4
- - \_\_le32
- - i\_size\_lo
+ - __le32
+ - i_size_lo
- Lower 32-bits of size in bytes.
* - 0x8
- - \_\_le32
- - i\_atime
- - Last access time, in seconds since the epoch. However, if the EA\_INODE
+ - __le32
+ - i_atime
+ - Last access time, in seconds since the epoch. However, if the EA_INODE
inode flag is set, this inode stores an extended attribute value and
this field contains the checksum of the value.
* - 0xC
- - \_\_le32
- - i\_ctime
+ - __le32
+ - i_ctime
- Last inode change time, in seconds since the epoch. However, if the
- EA\_INODE inode flag is set, this inode stores an extended attribute
+ EA_INODE inode flag is set, this inode stores an extended attribute
value and this field contains the lower 32 bits of the attribute value's
reference count.
* - 0x10
- - \_\_le32
- - i\_mtime
+ - __le32
+ - i_mtime
- Last data modification time, in seconds since the epoch. However, if the
- EA\_INODE inode flag is set, this inode stores an extended attribute
+ EA_INODE inode flag is set, this inode stores an extended attribute
value and this field contains the number of the inode that owns the
extended attribute.
* - 0x14
- - \_\_le32
- - i\_dtime
+ - __le32
+ - i_dtime
- Deletion Time, in seconds since the epoch.
* - 0x18
- - \_\_le16
- - i\_gid
+ - __le16
+ - i_gid
- Lower 16-bits of GID.
* - 0x1A
- - \_\_le16
- - i\_links\_count
+ - __le16
+ - i_links_count
- Hard link count. Normally, ext4 does not permit an inode to have more
than 65,000 hard links. This applies to files as well as directories,
which means that there cannot be more than 64,998 subdirectories in a
directory (each subdirectory's '..' entry counts as a hard link, as does
- the '.' entry in the directory itself). With the DIR\_NLINK feature
+ the '.' entry in the directory itself). With the DIR_NLINK feature
enabled, ext4 supports more than 64,998 subdirectories by setting this
field to 1 to indicate that the number of hard links is not known.
* - 0x1C
- - \_\_le32
- - i\_blocks\_lo
- - Lower 32-bits of “block†count. If the huge\_file feature flag is not
+ - __le32
+ - i_blocks_lo
+ - Lower 32-bits of “block†count. If the huge_file feature flag is not
set on the filesystem, the file consumes ``i_blocks_lo`` 512-byte blocks
- on disk. If huge\_file is set and EXT4\_HUGE\_FILE\_FL is NOT set in
+ on disk. If huge_file is set and EXT4_HUGE_FILE_FL is NOT set in
``inode.i_flags``, then the file consumes ``i_blocks_lo + (i_blocks_hi
- << 32)`` 512-byte blocks on disk. If huge\_file is set and
- EXT4\_HUGE\_FILE\_FL IS set in ``inode.i_flags``, then this file
+ << 32)`` 512-byte blocks on disk. If huge_file is set and
+ EXT4_HUGE_FILE_FL IS set in ``inode.i_flags``, then this file
consumes (``i_blocks_lo + i_blocks_hi`` << 32) filesystem blocks on
disk.
* - 0x20
- - \_\_le32
- - i\_flags
+ - __le32
+ - i_flags
- Inode flags. See the table i_flags_ below.
* - 0x24
- 4 bytes
- - i\_osd1
+ - i_osd1
- See the table i_osd1_ for more details.
* - 0x28
- 60 bytes
- - i\_block[EXT4\_N\_BLOCKS=15]
- - Block map or extent tree. See the section “The Contents of inode.i\_blockâ€.
+ - i_block[EXT4_N_BLOCKS=15]
+ - Block map or extent tree. See the section “The Contents of inode.i_blockâ€.
* - 0x64
- - \_\_le32
- - i\_generation
+ - __le32
+ - i_generation
- File version (for NFS).
* - 0x68
- - \_\_le32
- - i\_file\_acl\_lo
+ - __le32
+ - i_file_acl_lo
- Lower 32-bits of extended attribute block. ACLs are of course one of
many possible extended attributes; I think the name of this field is a
result of the first use of extended attributes being for ACLs.
* - 0x6C
- - \_\_le32
- - i\_size\_high / i\_dir\_acl
+ - __le32
+ - i_size_high / i_dir_acl
- Upper 32-bits of file/directory size. In ext2/3 this field was named
- i\_dir\_acl, though it was usually set to zero and never used.
+ i_dir_acl, though it was usually set to zero and never used.
* - 0x70
- - \_\_le32
- - i\_obso\_faddr
+ - __le32
+ - i_obso_faddr
- (Obsolete) fragment address.
* - 0x74
- 12 bytes
- - i\_osd2
+ - i_osd2
- See the table i_osd2_ for more details.
* - 0x80
- - \_\_le16
- - i\_extra\_isize
+ - __le16
+ - i_extra_isize
- Size of this inode - 128. Alternately, the size of the extended inode
fields beyond the original ext2 inode, including this field.
* - 0x82
- - \_\_le16
- - i\_checksum\_hi
+ - __le16
+ - i_checksum_hi
- Upper 16-bits of the inode checksum.
* - 0x84
- - \_\_le32
- - i\_ctime\_extra
+ - __le32
+ - i_ctime_extra
- Extra change time bits. This provides sub-second precision. See Inode
Timestamps section.
* - 0x88
- - \_\_le32
- - i\_mtime\_extra
+ - __le32
+ - i_mtime_extra
- Extra modification time bits. This provides sub-second precision.
* - 0x8C
- - \_\_le32
- - i\_atime\_extra
+ - __le32
+ - i_atime_extra
- Extra access time bits. This provides sub-second precision.
* - 0x90
- - \_\_le32
- - i\_crtime
+ - __le32
+ - i_crtime
- File creation time, in seconds since the epoch.
* - 0x94
- - \_\_le32
- - i\_crtime\_extra
+ - __le32
+ - i_crtime_extra
- Extra file creation time bits. This provides sub-second precision.
* - 0x98
- - \_\_le32
- - i\_version\_hi
+ - __le32
+ - i_version_hi
- Upper 32-bits for version number.
* - 0x9C
- - \_\_le32
- - i\_projid
+ - __le32
+ - i_projid
- Project ID.
.. _i_mode:
@@ -183,45 +183,45 @@ The ``i_mode`` value is a combination of the following flags:
* - Value
- Description
* - 0x1
- - S\_IXOTH (Others may execute)
+ - S_IXOTH (Others may execute)
* - 0x2
- - S\_IWOTH (Others may write)
+ - S_IWOTH (Others may write)
* - 0x4
- - S\_IROTH (Others may read)
+ - S_IROTH (Others may read)
* - 0x8
- - S\_IXGRP (Group members may execute)
+ - S_IXGRP (Group members may execute)
* - 0x10
- - S\_IWGRP (Group members may write)
+ - S_IWGRP (Group members may write)
* - 0x20
- - S\_IRGRP (Group members may read)
+ - S_IRGRP (Group members may read)
* - 0x40
- - S\_IXUSR (Owner may execute)
+ - S_IXUSR (Owner may execute)
* - 0x80
- - S\_IWUSR (Owner may write)
+ - S_IWUSR (Owner may write)
* - 0x100
- - S\_IRUSR (Owner may read)
+ - S_IRUSR (Owner may read)
* - 0x200
- - S\_ISVTX (Sticky bit)
+ - S_ISVTX (Sticky bit)
* - 0x400
- - S\_ISGID (Set GID)
+ - S_ISGID (Set GID)
* - 0x800
- - S\_ISUID (Set UID)
+ - S_ISUID (Set UID)
* -
- These are mutually-exclusive file types:
* - 0x1000
- - S\_IFIFO (FIFO)
+ - S_IFIFO (FIFO)
* - 0x2000
- - S\_IFCHR (Character device)
+ - S_IFCHR (Character device)
* - 0x4000
- - S\_IFDIR (Directory)
+ - S_IFDIR (Directory)
* - 0x6000
- - S\_IFBLK (Block device)
+ - S_IFBLK (Block device)
* - 0x8000
- - S\_IFREG (Regular file)
+ - S_IFREG (Regular file)
* - 0xA000
- - S\_IFLNK (Symbolic link)
+ - S_IFLNK (Symbolic link)
* - 0xC000
- - S\_IFSOCK (Socket)
+ - S_IFSOCK (Socket)
.. _i_flags:
@@ -234,56 +234,56 @@ The ``i_flags`` field is a combination of these values:
* - Value
- Description
* - 0x1
- - This file requires secure deletion (EXT4\_SECRM\_FL). (not implemented)
+ - This file requires secure deletion (EXT4_SECRM_FL). (not implemented)
* - 0x2
- This file should be preserved, should undeletion be desired
- (EXT4\_UNRM\_FL). (not implemented)
+ (EXT4_UNRM_FL). (not implemented)
* - 0x4
- - File is compressed (EXT4\_COMPR\_FL). (not really implemented)
+ - File is compressed (EXT4_COMPR_FL). (not really implemented)
* - 0x8
- - All writes to the file must be synchronous (EXT4\_SYNC\_FL).
+ - All writes to the file must be synchronous (EXT4_SYNC_FL).
* - 0x10
- - File is immutable (EXT4\_IMMUTABLE\_FL).
+ - File is immutable (EXT4_IMMUTABLE_FL).
* - 0x20
- - File can only be appended (EXT4\_APPEND\_FL).
+ - File can only be appended (EXT4_APPEND_FL).
* - 0x40
- - The dump(1) utility should not dump this file (EXT4\_NODUMP\_FL).
+ - The dump(1) utility should not dump this file (EXT4_NODUMP_FL).
* - 0x80
- - Do not update access time (EXT4\_NOATIME\_FL).
+ - Do not update access time (EXT4_NOATIME_FL).
* - 0x100
- - Dirty compressed file (EXT4\_DIRTY\_FL). (not used)
+ - Dirty compressed file (EXT4_DIRTY_FL). (not used)
* - 0x200
- - File has one or more compressed clusters (EXT4\_COMPRBLK\_FL). (not used)
+ - File has one or more compressed clusters (EXT4_COMPRBLK_FL). (not used)
* - 0x400
- - Do not compress file (EXT4\_NOCOMPR\_FL). (not used)
+ - Do not compress file (EXT4_NOCOMPR_FL). (not used)
* - 0x800
- - Encrypted inode (EXT4\_ENCRYPT\_FL). This bit value previously was
- EXT4\_ECOMPR\_FL (compression error), which was never used.
+ - Encrypted inode (EXT4_ENCRYPT_FL). This bit value previously was
+ EXT4_ECOMPR_FL (compression error), which was never used.
* - 0x1000
- - Directory has hashed indexes (EXT4\_INDEX\_FL).
+ - Directory has hashed indexes (EXT4_INDEX_FL).
* - 0x2000
- - AFS magic directory (EXT4\_IMAGIC\_FL).
+ - AFS magic directory (EXT4_IMAGIC_FL).
* - 0x4000
- File data must always be written through the journal
- (EXT4\_JOURNAL\_DATA\_FL).
+ (EXT4_JOURNAL_DATA_FL).
* - 0x8000
- - File tail should not be merged (EXT4\_NOTAIL\_FL). (not used by ext4)
+ - File tail should not be merged (EXT4_NOTAIL_FL). (not used by ext4)
* - 0x10000
- All directory entry data should be written synchronously (see
- ``dirsync``) (EXT4\_DIRSYNC\_FL).
+ ``dirsync``) (EXT4_DIRSYNC_FL).
* - 0x20000
- - Top of directory hierarchy (EXT4\_TOPDIR\_FL).
+ - Top of directory hierarchy (EXT4_TOPDIR_FL).
* - 0x40000
- - This is a huge file (EXT4\_HUGE\_FILE\_FL).
+ - This is a huge file (EXT4_HUGE_FILE_FL).
* - 0x80000
- - Inode uses extents (EXT4\_EXTENTS\_FL).
+ - Inode uses extents (EXT4_EXTENTS_FL).
* - 0x100000
- - Verity protected file (EXT4\_VERITY\_FL).
+ - Verity protected file (EXT4_VERITY_FL).
* - 0x200000
- Inode stores a large extended attribute value in its data blocks
- (EXT4\_EA\_INODE\_FL).
+ (EXT4_EA_INODE_FL).
* - 0x400000
- - This file has blocks allocated past EOF (EXT4\_EOFBLOCKS\_FL).
+ - This file has blocks allocated past EOF (EXT4_EOFBLOCKS_FL).
(deprecated)
* - 0x01000000
- Inode is a snapshot (``EXT4_SNAPFILE_FL``). (not in mainline)
@@ -294,21 +294,21 @@ The ``i_flags`` field is a combination of these values:
- Snapshot shrink has completed (``EXT4_SNAPFILE_SHRUNK_FL``). (not in
mainline)
* - 0x10000000
- - Inode has inline data (EXT4\_INLINE\_DATA\_FL).
+ - Inode has inline data (EXT4_INLINE_DATA_FL).
* - 0x20000000
- - Create children with the same project ID (EXT4\_PROJINHERIT\_FL).
+ - Create children with the same project ID (EXT4_PROJINHERIT_FL).
* - 0x80000000
- - Reserved for ext4 library (EXT4\_RESERVED\_FL).
+ - Reserved for ext4 library (EXT4_RESERVED_FL).
* -
- Aggregate flags:
* - 0x705BDFFF
- User-visible flags.
* - 0x604BC0FF
- - User-modifiable flags. Note that while EXT4\_JOURNAL\_DATA\_FL and
- EXT4\_EXTENTS\_FL can be set with setattr, they are not in the kernel's
- EXT4\_FL\_USER\_MODIFIABLE mask, since it needs to handle the setting of
+ - User-modifiable flags. Note that while EXT4_JOURNAL_DATA_FL and
+ EXT4_EXTENTS_FL can be set with setattr, they are not in the kernel's
+ EXT4_FL_USER_MODIFIABLE mask, since it needs to handle the setting of
these flags in a special manner and they are masked out of the set of
- flags that are saved directly to i\_flags.
+ flags that are saved directly to i_flags.
.. _i_osd1:
@@ -325,9 +325,9 @@ Linux:
- Name
- Description
* - 0x0
- - \_\_le32
- - l\_i\_version
- - Inode version. However, if the EA\_INODE inode flag is set, this inode
+ - __le32
+ - l_i_version
+ - Inode version. However, if the EA_INODE inode flag is set, this inode
stores an extended attribute value and this field contains the upper 32
bits of the attribute value's reference count.
@@ -342,8 +342,8 @@ Hurd:
- Name
- Description
* - 0x0
- - \_\_le32
- - h\_i\_translator
+ - __le32
+ - h_i_translator
- ??
Masix:
@@ -357,8 +357,8 @@ Masix:
- Name
- Description
* - 0x0
- - \_\_le32
- - m\_i\_reserved
+ - __le32
+ - m_i_reserved
- ??
.. _i_osd2:
@@ -376,30 +376,30 @@ Linux:
- Name
- Description
* - 0x0
- - \_\_le16
- - l\_i\_blocks\_high
+ - __le16
+ - l_i_blocks_high
- Upper 16-bits of the block count. Please see the note attached to
- i\_blocks\_lo.
+ i_blocks_lo.
* - 0x2
- - \_\_le16
- - l\_i\_file\_acl\_high
+ - __le16
+ - l_i_file_acl_high
- Upper 16-bits of the extended attribute block (historically, the file
ACL location). See the Extended Attributes section below.
* - 0x4
- - \_\_le16
- - l\_i\_uid\_high
+ - __le16
+ - l_i_uid_high
- Upper 16-bits of the Owner UID.
* - 0x6
- - \_\_le16
- - l\_i\_gid\_high
+ - __le16
+ - l_i_gid_high
- Upper 16-bits of the GID.
* - 0x8
- - \_\_le16
- - l\_i\_checksum\_lo
+ - __le16
+ - l_i_checksum_lo
- Lower 16-bits of the inode checksum.
* - 0xA
- - \_\_le16
- - l\_i\_reserved
+ - __le16
+ - l_i_reserved
- Unused.
Hurd:
@@ -413,24 +413,24 @@ Hurd:
- Name
- Description
* - 0x0
- - \_\_le16
- - h\_i\_reserved1
+ - __le16
+ - h_i_reserved1
- ??
* - 0x2
- - \_\_u16
- - h\_i\_mode\_high
+ - __u16
+ - h_i_mode_high
- Upper 16-bits of the file mode.
* - 0x4
- - \_\_le16
- - h\_i\_uid\_high
+ - __le16
+ - h_i_uid_high
- Upper 16-bits of the Owner UID.
* - 0x6
- - \_\_le16
- - h\_i\_gid\_high
+ - __le16
+ - h_i_gid_high
- Upper 16-bits of the GID.
* - 0x8
- - \_\_u32
- - h\_i\_author
+ - __u32
+ - h_i_author
- Author code?
Masix:
@@ -444,17 +444,17 @@ Masix:
- Name
- Description
* - 0x0
- - \_\_le16
- - h\_i\_reserved1
+ - __le16
+ - h_i_reserved1
- ??
* - 0x2
- - \_\_u16
- - m\_i\_file\_acl\_high
+ - __u16
+ - m_i_file_acl_high
- Upper 16-bits of the extended attribute block (historically, the file
ACL location).
* - 0x4
- - \_\_u32
- - m\_i\_reserved2[2]
+ - __u32
+ - m_i_reserved2[2]
- ??
Inode Size
@@ -466,11 +466,11 @@ In ext2 and ext3, the inode structure size was fixed at 128 bytes
on-disk inode at format time for all inodes in the filesystem to provide
space beyond the end of the original ext2 inode. The on-disk inode
record size is recorded in the superblock as ``s_inode_size``. The
-number of bytes actually used by struct ext4\_inode beyond the original
+number of bytes actually used by struct ext4_inode beyond the original
128-byte ext2 inode is recorded in the ``i_extra_isize`` field for each
-inode, which allows struct ext4\_inode to grow for a new kernel without
+inode, which allows struct ext4_inode to grow for a new kernel without
having to upgrade all of the on-disk inodes. Access to fields beyond
-EXT2\_GOOD\_OLD\_INODE\_SIZE should be verified to be within
+EXT2_GOOD_OLD_INODE_SIZE should be verified to be within
``i_extra_isize``. By default, ext4 inode records are 256 bytes, and (as
of August 2019) the inode structure is 160 bytes
(``i_extra_isize = 32``). The extra space between the end of the inode
@@ -516,7 +516,7 @@ creation time (crtime); this field is 64-bits wide and decoded in the
same manner as 64-bit [cma]time. Neither crtime nor dtime are accessible
through the regular stat() interface, though debugfs will report them.
-We use the 32-bit signed time value plus (2^32 \* (extra epoch bits)).
+We use the 32-bit signed time value plus (2^32 * (extra epoch bits)).
In other words:
.. list-table::
@@ -525,8 +525,8 @@ In other words:
* - Extra epoch bits
- MSB of 32-bit time
- - Adjustment for signed 32-bit to 64-bit tv\_sec
- - Decoded 64-bit tv\_sec
+ - Adjustment for signed 32-bit to 64-bit tv_sec
+ - Decoded 64-bit tv_sec
- valid time range
* - 0 0
- 1
diff --git a/Documentation/filesystems/ext4/journal.rst b/Documentation/filesystems/ext4/journal.rst
index 5fad38860f17..a6bef5293a60 100644
--- a/Documentation/filesystems/ext4/journal.rst
+++ b/Documentation/filesystems/ext4/journal.rst
@@ -63,8 +63,8 @@ Generally speaking, the journal has this format:
:header-rows: 1
* - Superblock
- - descriptor\_block (data\_blocks or revocation\_block) [more data or
- revocations] commmit\_block
+ - descriptor_block (data_blocks or revocation_block) [more data or
+ revocations] commmit_block
- [more transactions...]
* -
- One transaction
@@ -93,8 +93,8 @@ superblock.
* - 1024 bytes of padding
- ext4 Superblock
- Journal Superblock
- - descriptor\_block (data\_blocks or revocation\_block) [more data or
- revocations] commmit\_block
+ - descriptor_block (data_blocks or revocation_block) [more data or
+ revocations] commmit_block
- [more transactions...]
* -
-
@@ -117,17 +117,17 @@ Every block in the journal starts with a common 12-byte header
- Name
- Description
* - 0x0
- - \_\_be32
- - h\_magic
+ - __be32
+ - h_magic
- jbd2 magic number, 0xC03B3998.
* - 0x4
- - \_\_be32
- - h\_blocktype
+ - __be32
+ - h_blocktype
- Description of what this block contains. See the jbd2_blocktype_ table
below.
* - 0x8
- - \_\_be32
- - h\_sequence
+ - __be32
+ - h_sequence
- The transaction ID that goes with this block.
.. _jbd2_blocktype:
@@ -177,99 +177,99 @@ which is 1024 bytes long:
-
- Static information describing the journal.
* - 0x0
- - journal\_header\_t (12 bytes)
- - s\_header
+ - journal_header_t (12 bytes)
+ - s_header
- Common header identifying this as a superblock.
* - 0xC
- - \_\_be32
- - s\_blocksize
+ - __be32
+ - s_blocksize
- Journal device block size.
* - 0x10
- - \_\_be32
- - s\_maxlen
+ - __be32
+ - s_maxlen
- Total number of blocks in this journal.
* - 0x14
- - \_\_be32
- - s\_first
+ - __be32
+ - s_first
- First block of log information.
* -
-
-
- Dynamic information describing the current state of the log.
* - 0x18
- - \_\_be32
- - s\_sequence
+ - __be32
+ - s_sequence
- First commit ID expected in log.
* - 0x1C
- - \_\_be32
- - s\_start
+ - __be32
+ - s_start
- Block number of the start of log. Contrary to the comments, this field
being zero does not imply that the journal is clean!
* - 0x20
- - \_\_be32
- - s\_errno
- - Error value, as set by jbd2\_journal\_abort().
+ - __be32
+ - s_errno
+ - Error value, as set by jbd2_journal_abort().
* -
-
-
- The remaining fields are only valid in a v2 superblock.
* - 0x24
- - \_\_be32
- - s\_feature\_compat;
+ - __be32
+ - s_feature_compat;
- Compatible feature set. See the table jbd2_compat_ below.
* - 0x28
- - \_\_be32
- - s\_feature\_incompat
+ - __be32
+ - s_feature_incompat
- Incompatible feature set. See the table jbd2_incompat_ below.
* - 0x2C
- - \_\_be32
- - s\_feature\_ro\_compat
+ - __be32
+ - s_feature_ro_compat
- Read-only compatible feature set. There aren't any of these currently.
* - 0x30
- - \_\_u8
- - s\_uuid[16]
+ - __u8
+ - s_uuid[16]
- 128-bit uuid for journal. This is compared against the copy in the ext4
super block at mount time.
* - 0x40
- - \_\_be32
- - s\_nr\_users
+ - __be32
+ - s_nr_users
- Number of file systems sharing this journal.
* - 0x44
- - \_\_be32
- - s\_dynsuper
+ - __be32
+ - s_dynsuper
- Location of dynamic super block copy. (Not used?)
* - 0x48
- - \_\_be32
- - s\_max\_transaction
+ - __be32
+ - s_max_transaction
- Limit of journal blocks per transaction. (Not used?)
* - 0x4C
- - \_\_be32
- - s\_max\_trans\_data
+ - __be32
+ - s_max_trans_data
- Limit of data blocks per transaction. (Not used?)
* - 0x50
- - \_\_u8
- - s\_checksum\_type
+ - __u8
+ - s_checksum_type
- Checksum algorithm used for the journal. See jbd2_checksum_type_ for
more info.
* - 0x51
- - \_\_u8[3]
- - s\_padding2
+ - __u8[3]
+ - s_padding2
-
* - 0x54
- - \_\_be32
- - s\_num\_fc\_blocks
+ - __be32
+ - s_num_fc_blocks
- Number of fast commit blocks in the journal.
* - 0x58
- - \_\_u32
- - s\_padding[42]
+ - __u32
+ - s_padding[42]
-
* - 0xFC
- - \_\_be32
- - s\_checksum
+ - __be32
+ - s_checksum
- Checksum of the entire superblock, with this field set to zero.
* - 0x100
- - \_\_u8
- - s\_users[16\*48]
+ - __u8
+ - s_users[16*48]
- ids of all file systems sharing the log. e2fsprogs/Linux don't allow
shared external journals, but I imagine Lustre (or ocfs2?), which use
the jbd2 code, might.
@@ -286,7 +286,7 @@ The journal compat features are any combination of the following:
- Description
* - 0x1
- Journal maintains checksums on the data blocks.
- (JBD2\_FEATURE\_COMPAT\_CHECKSUM)
+ (JBD2_FEATURE_COMPAT_CHECKSUM)
.. _jbd2_incompat:
@@ -299,23 +299,23 @@ The journal incompat features are any combination of the following:
* - Value
- Description
* - 0x1
- - Journal has block revocation records. (JBD2\_FEATURE\_INCOMPAT\_REVOKE)
+ - Journal has block revocation records. (JBD2_FEATURE_INCOMPAT_REVOKE)
* - 0x2
- Journal can deal with 64-bit block numbers.
- (JBD2\_FEATURE\_INCOMPAT\_64BIT)
+ (JBD2_FEATURE_INCOMPAT_64BIT)
* - 0x4
- - Journal commits asynchronously. (JBD2\_FEATURE\_INCOMPAT\_ASYNC\_COMMIT)
+ - Journal commits asynchronously. (JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)
* - 0x8
- This journal uses v2 of the checksum on-disk format. Each journal
metadata block gets its own checksum, and the block tags in the
descriptor table contain checksums for each of the data blocks in the
- journal. (JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2)
+ journal. (JBD2_FEATURE_INCOMPAT_CSUM_V2)
* - 0x10
- This journal uses v3 of the checksum on-disk format. This is the same as
v2, but the journal block tag size is fixed regardless of the size of
- block numbers. (JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3)
+ block numbers. (JBD2_FEATURE_INCOMPAT_CSUM_V3)
* - 0x20
- - Journal has fast commit blocks. (JBD2\_FEATURE\_INCOMPAT\_FAST\_COMMIT)
+ - Journal has fast commit blocks. (JBD2_FEATURE_INCOMPAT_FAST_COMMIT)
.. _jbd2_checksum_type:
@@ -355,11 +355,11 @@ Descriptor blocks consume at least 36 bytes, but use a full block:
- Name
- Descriptor
* - 0x0
- - journal\_header\_t
+ - journal_header_t
- (open coded)
- Common block header.
* - 0xC
- - struct journal\_block\_tag\_s
+ - struct journal_block_tag_s
- open coded array[]
- Enough tags either to fill up the block or to describe all the data
blocks that follow this descriptor block.
@@ -367,7 +367,7 @@ Descriptor blocks consume at least 36 bytes, but use a full block:
Journal block tags have any of the following formats, depending on which
journal feature and block tag flags are set.
-If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 is set, the journal block tag is
+If JBD2_FEATURE_INCOMPAT_CSUM_V3 is set, the journal block tag is
defined as ``struct journal_block_tag3_s``, which looks like the
following. The size is 16 or 32 bytes.
@@ -380,24 +380,24 @@ following. The size is 16 or 32 bytes.
- Name
- Descriptor
* - 0x0
- - \_\_be32
- - t\_blocknr
+ - __be32
+ - t_blocknr
- Lower 32-bits of the location of where the corresponding data block
should end up on disk.
* - 0x4
- - \_\_be32
- - t\_flags
+ - __be32
+ - t_flags
- Flags that go with the descriptor. See the table jbd2_tag_flags_ for
more info.
* - 0x8
- - \_\_be32
- - t\_blocknr\_high
+ - __be32
+ - t_blocknr_high
- Upper 32-bits of the location of where the corresponding data block
- should end up on disk. This is zero if JBD2\_FEATURE\_INCOMPAT\_64BIT is
+ should end up on disk. This is zero if JBD2_FEATURE_INCOMPAT_64BIT is
not enabled.
* - 0xC
- - \_\_be32
- - t\_checksum
+ - __be32
+ - t_checksum
- Checksum of the journal UUID, the sequence number, and the data block.
* -
-
@@ -433,7 +433,7 @@ The journal tag flags are any combination of the following:
* - 0x8
- This is the last tag in this descriptor block.
-If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 is NOT set, the journal block tag
+If JBD2_FEATURE_INCOMPAT_CSUM_V3 is NOT set, the journal block tag
is defined as ``struct journal_block_tag_s``, which looks like the
following. The size is 8, 12, 24, or 28 bytes:
@@ -446,18 +446,18 @@ following. The size is 8, 12, 24, or 28 bytes:
- Name
- Descriptor
* - 0x0
- - \_\_be32
- - t\_blocknr
+ - __be32
+ - t_blocknr
- Lower 32-bits of the location of where the corresponding data block
should end up on disk.
* - 0x4
- - \_\_be16
- - t\_checksum
+ - __be16
+ - t_checksum
- Checksum of the journal UUID, the sequence number, and the data block.
Note that only the lower 16 bits are stored.
* - 0x6
- - \_\_be16
- - t\_flags
+ - __be16
+ - t_flags
- Flags that go with the descriptor. See the table jbd2_tag_flags_ for
more info.
* -
@@ -466,8 +466,8 @@ following. The size is 8, 12, 24, or 28 bytes:
- This next field is only present if the super block indicates support for
64-bit block numbers.
* - 0x8
- - \_\_be32
- - t\_blocknr\_high
+ - __be32
+ - t_blocknr_high
- Upper 32-bits of the location of where the corresponding data block
should end up on disk.
* -
@@ -483,8 +483,8 @@ following. The size is 8, 12, 24, or 28 bytes:
``j_uuid`` field in ``struct journal_s``, but only tune2fs touches that
field.
-If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2 or
-JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 are set, the end of the block is a
+If JBD2_FEATURE_INCOMPAT_CSUM_V2 or
+JBD2_FEATURE_INCOMPAT_CSUM_V3 are set, the end of the block is a
``struct jbd2_journal_block_tail``, which looks like this:
.. list-table::
@@ -496,8 +496,8 @@ JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 are set, the end of the block is a
- Name
- Descriptor
* - 0x0
- - \_\_be32
- - t\_checksum
+ - __be32
+ - t_checksum
- Checksum of the journal UUID + the descriptor block, with this field set
to zero.
@@ -538,25 +538,25 @@ length, but use a full block:
- Name
- Description
* - 0x0
- - journal\_header\_t
- - r\_header
+ - journal_header_t
+ - r_header
- Common block header.
* - 0xC
- - \_\_be32
- - r\_count
+ - __be32
+ - r_count
- Number of bytes used in this block.
* - 0x10
- - \_\_be32 or \_\_be64
+ - __be32 or __be64
- blocks[0]
- Blocks to revoke.
-After r\_count is a linear array of block numbers that are effectively
+After r_count is a linear array of block numbers that are effectively
revoked by this transaction. The size of each block number is 8 bytes if
the superblock advertises 64-bit block number support, or 4 bytes
otherwise.
-If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2 or
-JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 are set, the end of the revocation
+If JBD2_FEATURE_INCOMPAT_CSUM_V2 or
+JBD2_FEATURE_INCOMPAT_CSUM_V3 are set, the end of the revocation
block is a ``struct jbd2_journal_revoke_tail``, which has this format:
.. list-table::
@@ -568,8 +568,8 @@ block is a ``struct jbd2_journal_revoke_tail``, which has this format:
- Name
- Description
* - 0x0
- - \_\_be32
- - r\_checksum
+ - __be32
+ - r_checksum
- Checksum of the journal UUID + revocation block
Commit Block
@@ -592,38 +592,38 @@ bytes long (but uses a full block):
- Name
- Descriptor
* - 0x0
- - journal\_header\_s
+ - journal_header_s
- (open coded)
- Common block header.
* - 0xC
- unsigned char
- - h\_chksum\_type
+ - h_chksum_type
- The type of checksum to use to verify the integrity of the data blocks
in the transaction. See jbd2_checksum_type_ for more info.
* - 0xD
- unsigned char
- - h\_chksum\_size
+ - h_chksum_size
- The number of bytes used by the checksum. Most likely 4.
* - 0xE
- unsigned char
- - h\_padding[2]
+ - h_padding[2]
-
* - 0x10
- - \_\_be32
- - h\_chksum[JBD2\_CHECKSUM\_BYTES]
+ - __be32
+ - h_chksum[JBD2_CHECKSUM_BYTES]
- 32 bytes of space to store checksums. If
- JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2 or JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3
+ JBD2_FEATURE_INCOMPAT_CSUM_V2 or JBD2_FEATURE_INCOMPAT_CSUM_V3
are set, the first ``__be32`` is the checksum of the journal UUID and
the entire commit block, with this field zeroed. If
- JBD2\_FEATURE\_COMPAT\_CHECKSUM is set, the first ``__be32`` is the
+ JBD2_FEATURE_COMPAT_CHECKSUM is set, the first ``__be32`` is the
crc32 of all the blocks already written to the transaction.
* - 0x30
- - \_\_be64
- - h\_commit\_sec
+ - __be64
+ - h_commit_sec
- The time that the transaction was committed, in seconds since the epoch.
* - 0x38
- - \_\_be32
- - h\_commit\_nsec
+ - __be32
+ - h_commit_nsec
- Nanoseconds component of the above timestamp.
Fast commits
diff --git a/Documentation/filesystems/ext4/mmp.rst b/Documentation/filesystems/ext4/mmp.rst
index 25660981d93c..174dd6538737 100644
--- a/Documentation/filesystems/ext4/mmp.rst
+++ b/Documentation/filesystems/ext4/mmp.rst
@@ -7,8 +7,8 @@ Multiple mount protection (MMP) is a feature that protects the
filesystem against multiple hosts trying to use the filesystem
simultaneously. When a filesystem is opened (for mounting, or fsck,
etc.), the MMP code running on the node (call it node A) checks a
-sequence number. If the sequence number is EXT4\_MMP\_SEQ\_CLEAN, the
-open continues. If the sequence number is EXT4\_MMP\_SEQ\_FSCK, then
+sequence number. If the sequence number is EXT4_MMP_SEQ_CLEAN, the
+open continues. If the sequence number is EXT4_MMP_SEQ_FSCK, then
fsck is (hopefully) running, and open fails immediately. Otherwise, the
open code will wait for twice the specified MMP check interval and check
the sequence number again. If the sequence number has changed, then the
@@ -40,38 +40,38 @@ The MMP structure (``struct mmp_struct``) is as follows:
- Name
- Description
* - 0x0
- - \_\_le32
- - mmp\_magic
+ - __le32
+ - mmp_magic
- Magic number for MMP, 0x004D4D50 (“MMPâ€).
* - 0x4
- - \_\_le32
- - mmp\_seq
+ - __le32
+ - mmp_seq
- Sequence number, updated periodically.
* - 0x8
- - \_\_le64
- - mmp\_time
+ - __le64
+ - mmp_time
- Time that the MMP block was last updated.
* - 0x10
- char[64]
- - mmp\_nodename
+ - mmp_nodename
- Hostname of the node that opened the filesystem.
* - 0x50
- char[32]
- - mmp\_bdevname
+ - mmp_bdevname
- Block device name of the filesystem.
* - 0x70
- - \_\_le16
- - mmp\_check\_interval
+ - __le16
+ - mmp_check_interval
- The MMP re-check interval, in seconds.
* - 0x72
- - \_\_le16
- - mmp\_pad1
+ - __le16
+ - mmp_pad1
- Zero.
* - 0x74
- - \_\_le32[226]
- - mmp\_pad2
+ - __le32[226]
+ - mmp_pad2
- Zero.
* - 0x3FC
- - \_\_le32
- - mmp\_checksum
+ - __le32
+ - mmp_checksum
- Checksum of the MMP block.
diff --git a/Documentation/filesystems/ext4/overview.rst b/Documentation/filesystems/ext4/overview.rst
index 123ebfde47ee..0fad6eda6e15 100644
--- a/Documentation/filesystems/ext4/overview.rst
+++ b/Documentation/filesystems/ext4/overview.rst
@@ -7,7 +7,7 @@ An ext4 file system is split into a series of block groups. To reduce
performance difficulties due to fragmentation, the block allocator tries
very hard to keep each file's blocks within the same group, thereby
reducing seek times. The size of a block group is specified in
-``sb.s_blocks_per_group`` blocks, though it can also calculated as 8 \*
+``sb.s_blocks_per_group`` blocks, though it can also calculated as 8 *
``block_size_in_bytes``. With the default block size of 4KiB, each group
will contain 32,768 blocks, for a length of 128MiB. The number of block
groups is the size of the device divided by the size of a block group.
diff --git a/Documentation/filesystems/ext4/special_inodes.rst b/Documentation/filesystems/ext4/special_inodes.rst
index 94f304e3a0a7..fc0636901fa0 100644
--- a/Documentation/filesystems/ext4/special_inodes.rst
+++ b/Documentation/filesystems/ext4/special_inodes.rst
@@ -34,7 +34,7 @@ ext4 reserves some inode for special features, as follows:
* - 10
- Replica inode, used for some non-upstream feature?
* - 11
- - Traditional first non-reserved inode. Usually this is the lost+found directory. See s\_first\_ino in the superblock.
+ - Traditional first non-reserved inode. Usually this is the lost+found directory. See s_first_ino in the superblock.
Note that there are also some inodes allocated from non-reserved inode numbers
for other filesystem features which are not referenced from standard directory
@@ -47,9 +47,9 @@ hierarchy. These are generally reference from the superblock. They are:
* - Superblock field
- Description
- * - s\_lpf\_ino
+ * - s_lpf_ino
- Inode number of lost+found directory.
- * - s\_prj\_quota\_inum
+ * - s_prj_quota_inum
- Inode number of quota file tracking project quotas
- * - s\_orphan\_file\_inum
+ * - s_orphan_file_inum
- Inode number of file tracking orphan inodes.
diff --git a/Documentation/filesystems/ext4/super.rst b/Documentation/filesystems/ext4/super.rst
index f6a548e957bb..268888522e35 100644
--- a/Documentation/filesystems/ext4/super.rst
+++ b/Documentation/filesystems/ext4/super.rst
@@ -7,7 +7,7 @@ The superblock records various information about the enclosing
filesystem, such as block counts, inode counts, supported features,
maintenance information, and more.
-If the sparse\_super feature flag is set, redundant copies of the
+If the sparse_super feature flag is set, redundant copies of the
superblock and group descriptors are kept only in the groups whose group
number is either 0 or a power of 3, 5, or 7. If the flag is not set,
redundant copies are kept in all groups.
@@ -27,107 +27,107 @@ The ext4 superblock is laid out as follows in
- Name
- Description
* - 0x0
- - \_\_le32
- - s\_inodes\_count
+ - __le32
+ - s_inodes_count
- Total inode count.
* - 0x4
- - \_\_le32
- - s\_blocks\_count\_lo
+ - __le32
+ - s_blocks_count_lo
- Total block count.
* - 0x8
- - \_\_le32
- - s\_r\_blocks\_count\_lo
+ - __le32
+ - s_r_blocks_count_lo
- This number of blocks can only be allocated by the super-user.
* - 0xC
- - \_\_le32
- - s\_free\_blocks\_count\_lo
+ - __le32
+ - s_free_blocks_count_lo
- Free block count.
* - 0x10
- - \_\_le32
- - s\_free\_inodes\_count
+ - __le32
+ - s_free_inodes_count
- Free inode count.
* - 0x14
- - \_\_le32
- - s\_first\_data\_block
+ - __le32
+ - s_first_data_block
- First data block. This must be at least 1 for 1k-block filesystems and
is typically 0 for all other block sizes.
* - 0x18
- - \_\_le32
- - s\_log\_block\_size
- - Block size is 2 ^ (10 + s\_log\_block\_size).
+ - __le32
+ - s_log_block_size
+ - Block size is 2 ^ (10 + s_log_block_size).
* - 0x1C
- - \_\_le32
- - s\_log\_cluster\_size
- - Cluster size is 2 ^ (10 + s\_log\_cluster\_size) blocks if bigalloc is
- enabled. Otherwise s\_log\_cluster\_size must equal s\_log\_block\_size.
+ - __le32
+ - s_log_cluster_size
+ - Cluster size is 2 ^ (10 + s_log_cluster_size) blocks if bigalloc is
+ enabled. Otherwise s_log_cluster_size must equal s_log_block_size.
* - 0x20
- - \_\_le32
- - s\_blocks\_per\_group
+ - __le32
+ - s_blocks_per_group
- Blocks per group.
* - 0x24
- - \_\_le32
- - s\_clusters\_per\_group
+ - __le32
+ - s_clusters_per_group
- Clusters per group, if bigalloc is enabled. Otherwise
- s\_clusters\_per\_group must equal s\_blocks\_per\_group.
+ s_clusters_per_group must equal s_blocks_per_group.
* - 0x28
- - \_\_le32
- - s\_inodes\_per\_group
+ - __le32
+ - s_inodes_per_group
- Inodes per group.
* - 0x2C
- - \_\_le32
- - s\_mtime
+ - __le32
+ - s_mtime
- Mount time, in seconds since the epoch.
* - 0x30
- - \_\_le32
- - s\_wtime
+ - __le32
+ - s_wtime
- Write time, in seconds since the epoch.
* - 0x34
- - \_\_le16
- - s\_mnt\_count
+ - __le16
+ - s_mnt_count
- Number of mounts since the last fsck.
* - 0x36
- - \_\_le16
- - s\_max\_mnt\_count
+ - __le16
+ - s_max_mnt_count
- Number of mounts beyond which a fsck is needed.
* - 0x38
- - \_\_le16
- - s\_magic
+ - __le16
+ - s_magic
- Magic signature, 0xEF53
* - 0x3A
- - \_\_le16
- - s\_state
+ - __le16
+ - s_state
- File system state. See super_state_ for more info.
* - 0x3C
- - \_\_le16
- - s\_errors
+ - __le16
+ - s_errors
- Behaviour when detecting errors. See super_errors_ for more info.
* - 0x3E
- - \_\_le16
- - s\_minor\_rev\_level
+ - __le16
+ - s_minor_rev_level
- Minor revision level.
* - 0x40
- - \_\_le32
- - s\_lastcheck
+ - __le32
+ - s_lastcheck
- Time of last check, in seconds since the epoch.
* - 0x44
- - \_\_le32
- - s\_checkinterval
+ - __le32
+ - s_checkinterval
- Maximum time between checks, in seconds.
* - 0x48
- - \_\_le32
- - s\_creator\_os
+ - __le32
+ - s_creator_os
- Creator OS. See the table super_creator_ for more info.
* - 0x4C
- - \_\_le32
- - s\_rev\_level
+ - __le32
+ - s_rev_level
- Revision level. See the table super_revision_ for more info.
* - 0x50
- - \_\_le16
- - s\_def\_resuid
+ - __le16
+ - s_def_resuid
- Default uid for reserved blocks.
* - 0x52
- - \_\_le16
- - s\_def\_resgid
+ - __le16
+ - s_def_resgid
- Default gid for reserved blocks.
* -
-
@@ -143,50 +143,50 @@ The ext4 superblock is laid out as follows in
about a feature in either the compatible or incompatible feature set, it
must abort and not try to meddle with things it doesn't understand...
* - 0x54
- - \_\_le32
- - s\_first\_ino
+ - __le32
+ - s_first_ino
- First non-reserved inode.
* - 0x58
- - \_\_le16
- - s\_inode\_size
+ - __le16
+ - s_inode_size
- Size of inode structure, in bytes.
* - 0x5A
- - \_\_le16
- - s\_block\_group\_nr
+ - __le16
+ - s_block_group_nr
- Block group # of this superblock.
* - 0x5C
- - \_\_le32
- - s\_feature\_compat
+ - __le32
+ - s_feature_compat
- Compatible feature set flags. Kernel can still read/write this fs even
if it doesn't understand a flag; fsck should not do that. See the
super_compat_ table for more info.
* - 0x60
- - \_\_le32
- - s\_feature\_incompat
+ - __le32
+ - s_feature_incompat
- Incompatible feature set. If the kernel or fsck doesn't understand one
of these bits, it should stop. See the super_incompat_ table for more
info.
* - 0x64
- - \_\_le32
- - s\_feature\_ro\_compat
+ - __le32
+ - s_feature_ro_compat
- Readonly-compatible feature set. If the kernel doesn't understand one of
these bits, it can still mount read-only. See the super_rocompat_ table
for more info.
* - 0x68
- - \_\_u8
- - s\_uuid[16]
+ - __u8
+ - s_uuid[16]
- 128-bit UUID for volume.
* - 0x78
- char
- - s\_volume\_name[16]
+ - s_volume_name[16]
- Volume label.
* - 0x88
- char
- - s\_last\_mounted[64]
+ - s_last_mounted[64]
- Directory where filesystem was last mounted.
* - 0xC8
- - \_\_le32
- - s\_algorithm\_usage\_bitmap
+ - __le32
+ - s_algorithm_usage_bitmap
- For compression (Not used in e2fsprogs/Linux)
* -
-
@@ -194,18 +194,18 @@ The ext4 superblock is laid out as follows in
- Performance hints. Directory preallocation should only happen if the
EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
* - 0xCC
- - \_\_u8
- - s\_prealloc\_blocks
+ - __u8
+ - s_prealloc_blocks
- #. of blocks to try to preallocate for ... files? (Not used in
e2fsprogs/Linux)
* - 0xCD
- - \_\_u8
- - s\_prealloc\_dir\_blocks
+ - __u8
+ - s_prealloc_dir_blocks
- #. of blocks to preallocate for directories. (Not used in
e2fsprogs/Linux)
* - 0xCE
- - \_\_le16
- - s\_reserved\_gdt\_blocks
+ - __le16
+ - s_reserved_gdt_blocks
- Number of reserved GDT entries for future filesystem expansion.
* -
-
@@ -213,281 +213,281 @@ The ext4 superblock is laid out as follows in
- Journalling support is valid only if EXT4_FEATURE_COMPAT_HAS_JOURNAL is
set.
* - 0xD0
- - \_\_u8
- - s\_journal\_uuid[16]
+ - __u8
+ - s_journal_uuid[16]
- UUID of journal superblock
* - 0xE0
- - \_\_le32
- - s\_journal\_inum
+ - __le32
+ - s_journal_inum
- inode number of journal file.
* - 0xE4
- - \_\_le32
- - s\_journal\_dev
+ - __le32
+ - s_journal_dev
- Device number of journal file, if the external journal feature flag is
set.
* - 0xE8
- - \_\_le32
- - s\_last\_orphan
+ - __le32
+ - s_last_orphan
- Start of list of orphaned inodes to delete.
* - 0xEC
- - \_\_le32
- - s\_hash\_seed[4]
+ - __le32
+ - s_hash_seed[4]
- HTREE hash seed.
* - 0xFC
- - \_\_u8
- - s\_def\_hash\_version
+ - __u8
+ - s_def_hash_version
- Default hash algorithm to use for directory hashes. See super_def_hash_
for more info.
* - 0xFD
- - \_\_u8
- - s\_jnl\_backup\_type
- - If this value is 0 or EXT3\_JNL\_BACKUP\_BLOCKS (1), then the
+ - __u8
+ - s_jnl_backup_type
+ - If this value is 0 or EXT3_JNL_BACKUP_BLOCKS (1), then the
``s_jnl_blocks`` field contains a duplicate copy of the inode's
``i_block[]`` array and ``i_size``.
* - 0xFE
- - \_\_le16
- - s\_desc\_size
+ - __le16
+ - s_desc_size
- Size of group descriptors, in bytes, if the 64bit incompat feature flag
is set.
* - 0x100
- - \_\_le32
- - s\_default\_mount\_opts
+ - __le32
+ - s_default_mount_opts
- Default mount options. See the super_mountopts_ table for more info.
* - 0x104
- - \_\_le32
- - s\_first\_meta\_bg
- - First metablock block group, if the meta\_bg feature is enabled.
+ - __le32
+ - s_first_meta_bg
+ - First metablock block group, if the meta_bg feature is enabled.
* - 0x108
- - \_\_le32
- - s\_mkfs\_time
+ - __le32
+ - s_mkfs_time
- When the filesystem was created, in seconds since the epoch.
* - 0x10C
- - \_\_le32
- - s\_jnl\_blocks[17]
+ - __le32
+ - s_jnl_blocks[17]
- Backup copy of the journal inode's ``i_block[]`` array in the first 15
- elements and i\_size\_high and i\_size in the 16th and 17th elements,
+ elements and i_size_high and i_size in the 16th and 17th elements,
respectively.
* -
-
-
- 64bit support is valid only if EXT4_FEATURE_COMPAT_64BIT is set.
* - 0x150
- - \_\_le32
- - s\_blocks\_count\_hi
+ - __le32
+ - s_blocks_count_hi
- High 32-bits of the block count.
* - 0x154
- - \_\_le32
- - s\_r\_blocks\_count\_hi
+ - __le32
+ - s_r_blocks_count_hi
- High 32-bits of the reserved block count.
* - 0x158
- - \_\_le32
- - s\_free\_blocks\_count\_hi
+ - __le32
+ - s_free_blocks_count_hi
- High 32-bits of the free block count.
* - 0x15C
- - \_\_le16
- - s\_min\_extra\_isize
+ - __le16
+ - s_min_extra_isize
- All inodes have at least # bytes.
* - 0x15E
- - \_\_le16
- - s\_want\_extra\_isize
+ - __le16
+ - s_want_extra_isize
- New inodes should reserve # bytes.
* - 0x160
- - \_\_le32
- - s\_flags
+ - __le32
+ - s_flags
- Miscellaneous flags. See the super_flags_ table for more info.
* - 0x164
- - \_\_le16
- - s\_raid\_stride
+ - __le16
+ - s_raid_stride
- RAID stride. This is the number of logical blocks read from or written
to the disk before moving to the next disk. This affects the placement
of filesystem metadata, which will hopefully make RAID storage faster.
* - 0x166
- - \_\_le16
- - s\_mmp\_interval
+ - __le16
+ - s_mmp_interval
- #. seconds to wait in multi-mount prevention (MMP) checking. In theory,
MMP is a mechanism to record in the superblock which host and device
have mounted the filesystem, in order to prevent multiple mounts. This
feature does not seem to be implemented...
* - 0x168
- - \_\_le64
- - s\_mmp\_block
+ - __le64
+ - s_mmp_block
- Block # for multi-mount protection data.
* - 0x170
- - \_\_le32
- - s\_raid\_stripe\_width
+ - __le32
+ - s_raid_stripe_width
- RAID stripe width. This is the number of logical blocks read from or
written to the disk before coming back to the current disk. This is used
by the block allocator to try to reduce the number of read-modify-write
operations in a RAID5/6.
* - 0x174
- - \_\_u8
- - s\_log\_groups\_per\_flex
+ - __u8
+ - s_log_groups_per_flex
- Size of a flexible block group is 2 ^ ``s_log_groups_per_flex``.
* - 0x175
- - \_\_u8
- - s\_checksum\_type
+ - __u8
+ - s_checksum_type
- Metadata checksum algorithm type. The only valid value is 1 (crc32c).
* - 0x176
- - \_\_le16
- - s\_reserved\_pad
+ - __le16
+ - s_reserved_pad
-
* - 0x178
- - \_\_le64
- - s\_kbytes\_written
+ - __le64
+ - s_kbytes_written
- Number of KiB written to this filesystem over its lifetime.
* - 0x180
- - \_\_le32
- - s\_snapshot\_inum
+ - __le32
+ - s_snapshot_inum
- inode number of active snapshot. (Not used in e2fsprogs/Linux.)
* - 0x184
- - \_\_le32
- - s\_snapshot\_id
+ - __le32
+ - s_snapshot_id
- Sequential ID of active snapshot. (Not used in e2fsprogs/Linux.)
* - 0x188
- - \_\_le64
- - s\_snapshot\_r\_blocks\_count
+ - __le64
+ - s_snapshot_r_blocks_count
- Number of blocks reserved for active snapshot's future use. (Not used in
e2fsprogs/Linux.)
* - 0x190
- - \_\_le32
- - s\_snapshot\_list
+ - __le32
+ - s_snapshot_list
- inode number of the head of the on-disk snapshot list. (Not used in
e2fsprogs/Linux.)
* - 0x194
- - \_\_le32
- - s\_error\_count
+ - __le32
+ - s_error_count
- Number of errors seen.
* - 0x198
- - \_\_le32
- - s\_first\_error\_time
+ - __le32
+ - s_first_error_time
- First time an error happened, in seconds since the epoch.
* - 0x19C
- - \_\_le32
- - s\_first\_error\_ino
+ - __le32
+ - s_first_error_ino
- inode involved in first error.
* - 0x1A0
- - \_\_le64
- - s\_first\_error\_block
+ - __le64
+ - s_first_error_block
- Number of block involved of first error.
* - 0x1A8
- - \_\_u8
- - s\_first\_error\_func[32]
+ - __u8
+ - s_first_error_func[32]
- Name of function where the error happened.
* - 0x1C8
- - \_\_le32
- - s\_first\_error\_line
+ - __le32
+ - s_first_error_line
- Line number where error happened.
* - 0x1CC
- - \_\_le32
- - s\_last\_error\_time
+ - __le32
+ - s_last_error_time
- Time of most recent error, in seconds since the epoch.
* - 0x1D0
- - \_\_le32
- - s\_last\_error\_ino
+ - __le32
+ - s_last_error_ino
- inode involved in most recent error.
* - 0x1D4
- - \_\_le32
- - s\_last\_error\_line
+ - __le32
+ - s_last_error_line
- Line number where most recent error happened.
* - 0x1D8
- - \_\_le64
- - s\_last\_error\_block
+ - __le64
+ - s_last_error_block
- Number of block involved in most recent error.
* - 0x1E0
- - \_\_u8
- - s\_last\_error\_func[32]
+ - __u8
+ - s_last_error_func[32]
- Name of function where the most recent error happened.
* - 0x200
- - \_\_u8
- - s\_mount\_opts[64]
+ - __u8
+ - s_mount_opts[64]
- ASCIIZ string of mount options.
* - 0x240
- - \_\_le32
- - s\_usr\_quota\_inum
+ - __le32
+ - s_usr_quota_inum
- Inode number of user `quota <quota>`__ file.
* - 0x244
- - \_\_le32
- - s\_grp\_quota\_inum
+ - __le32
+ - s_grp_quota_inum
- Inode number of group `quota <quota>`__ file.
* - 0x248
- - \_\_le32
- - s\_overhead\_blocks
+ - __le32
+ - s_overhead_blocks
- Overhead blocks/clusters in fs. (Huh? This field is always zero, which
means that the kernel calculates it dynamically.)
* - 0x24C
- - \_\_le32
- - s\_backup\_bgs[2]
- - Block groups containing superblock backups (if sparse\_super2)
+ - __le32
+ - s_backup_bgs[2]
+ - Block groups containing superblock backups (if sparse_super2)
* - 0x254
- - \_\_u8
- - s\_encrypt\_algos[4]
+ - __u8
+ - s_encrypt_algos[4]
- Encryption algorithms in use. There can be up to four algorithms in use
at any time; valid algorithm codes are given in the super_encrypt_ table
below.
* - 0x258
- - \_\_u8
- - s\_encrypt\_pw\_salt[16]
+ - __u8
+ - s_encrypt_pw_salt[16]
- Salt for the string2key algorithm for encryption.
* - 0x268
- - \_\_le32
- - s\_lpf\_ino
+ - __le32
+ - s_lpf_ino
- Inode number of lost+found
* - 0x26C
- - \_\_le32
- - s\_prj\_quota\_inum
+ - __le32
+ - s_prj_quota_inum
- Inode that tracks project quotas.
* - 0x270
- - \_\_le32
- - s\_checksum\_seed
- - Checksum seed used for metadata\_csum calculations. This value is
- crc32c(~0, $orig\_fs\_uuid).
+ - __le32
+ - s_checksum_seed
+ - Checksum seed used for metadata_csum calculations. This value is
+ crc32c(~0, $orig_fs_uuid).
* - 0x274
- - \_\_u8
- - s\_wtime_hi
+ - __u8
+ - s_wtime_hi
- Upper 8 bits of the s_wtime field.
* - 0x275
- - \_\_u8
- - s\_mtime_hi
+ - __u8
+ - s_mtime_hi
- Upper 8 bits of the s_mtime field.
* - 0x276
- - \_\_u8
- - s\_mkfs_time_hi
+ - __u8
+ - s_mkfs_time_hi
- Upper 8 bits of the s_mkfs_time field.
* - 0x277
- - \_\_u8
- - s\_lastcheck_hi
+ - __u8
+ - s_lastcheck_hi
- Upper 8 bits of the s_lastcheck_hi field.
* - 0x278
- - \_\_u8
- - s\_first_error_time_hi
+ - __u8
+ - s_first_error_time_hi
- Upper 8 bits of the s_first_error_time_hi field.
* - 0x279
- - \_\_u8
- - s\_last_error_time_hi
+ - __u8
+ - s_last_error_time_hi
- Upper 8 bits of the s_last_error_time_hi field.
* - 0x27A
- - \_\_u8
- - s\_pad[2]
+ - __u8
+ - s_pad[2]
- Zero padding.
* - 0x27C
- - \_\_le16
- - s\_encoding
+ - __le16
+ - s_encoding
- Filename charset encoding.
* - 0x27E
- - \_\_le16
- - s\_encoding_flags
+ - __le16
+ - s_encoding_flags
- Filename charset encoding flags.
* - 0x280
- - \_\_le32
- - s\_orphan\_file\_inum
+ - __le32
+ - s_orphan_file_inum
- Orphan file inode number.
* - 0x284
- - \_\_le32
- - s\_reserved[94]
+ - __le32
+ - s_reserved[94]
- Padding to the end of the block.
* - 0x3FC
- - \_\_le32
- - s\_checksum
+ - __le32
+ - s_checksum
- Superblock checksum.
.. _super_state:
@@ -574,44 +574,44 @@ following:
* - Value
- Description
* - 0x1
- - Directory preallocation (COMPAT\_DIR\_PREALLOC).
+ - Directory preallocation (COMPAT_DIR_PREALLOC).
* - 0x2
- “imagic inodesâ€. Not clear from the code what this does
- (COMPAT\_IMAGIC\_INODES).
+ (COMPAT_IMAGIC_INODES).
* - 0x4
- - Has a journal (COMPAT\_HAS\_JOURNAL).
+ - Has a journal (COMPAT_HAS_JOURNAL).
* - 0x8
- - Supports extended attributes (COMPAT\_EXT\_ATTR).
+ - Supports extended attributes (COMPAT_EXT_ATTR).
* - 0x10
- Has reserved GDT blocks for filesystem expansion
- (COMPAT\_RESIZE\_INODE). Requires RO\_COMPAT\_SPARSE\_SUPER.
+ (COMPAT_RESIZE_INODE). Requires RO_COMPAT_SPARSE_SUPER.
* - 0x20
- - Has directory indices (COMPAT\_DIR\_INDEX).
+ - Has directory indices (COMPAT_DIR_INDEX).
* - 0x40
- “Lazy BGâ€. Not in Linux kernel, seems to have been for uninitialized
- block groups? (COMPAT\_LAZY\_BG)
+ block groups? (COMPAT_LAZY_BG)
* - 0x80
- - “Exclude inodeâ€. Not used. (COMPAT\_EXCLUDE\_INODE).
+ - “Exclude inodeâ€. Not used. (COMPAT_EXCLUDE_INODE).
* - 0x100
- “Exclude bitmapâ€. Seems to be used to indicate the presence of
snapshot-related exclude bitmaps? Not defined in kernel or used in
- e2fsprogs (COMPAT\_EXCLUDE\_BITMAP).
+ e2fsprogs (COMPAT_EXCLUDE_BITMAP).
* - 0x200
- - Sparse Super Block, v2. If this flag is set, the SB field s\_backup\_bgs
+ - Sparse Super Block, v2. If this flag is set, the SB field s_backup_bgs
points to the two block groups that contain backup superblocks
- (COMPAT\_SPARSE\_SUPER2).
+ (COMPAT_SPARSE_SUPER2).
* - 0x400
- Fast commits supported. Although fast commits blocks are
backward incompatible, fast commit blocks are not always
present in the journal. If fast commit blocks are present in
the journal, JBD2 incompat feature
- (JBD2\_FEATURE\_INCOMPAT\_FAST\_COMMIT) gets
- set (COMPAT\_FAST\_COMMIT).
+ (JBD2_FEATURE_INCOMPAT_FAST_COMMIT) gets
+ set (COMPAT_FAST_COMMIT).
* - 0x1000
- Orphan file allocated. This is the special file for more efficient
tracking of unlinked but still open inodes. When there may be any
entries in the file, we additionally set proper rocompat feature
- (RO\_COMPAT\_ORPHAN\_PRESENT).
+ (RO_COMPAT_ORPHAN_PRESENT).
.. _super_incompat:
@@ -625,45 +625,45 @@ following:
* - Value
- Description
* - 0x1
- - Compression (INCOMPAT\_COMPRESSION).
+ - Compression (INCOMPAT_COMPRESSION).
* - 0x2
- - Directory entries record the file type. See ext4\_dir\_entry\_2 below
- (INCOMPAT\_FILETYPE).
+ - Directory entries record the file type. See ext4_dir_entry_2 below
+ (INCOMPAT_FILETYPE).
* - 0x4
- - Filesystem needs recovery (INCOMPAT\_RECOVER).
+ - Filesystem needs recovery (INCOMPAT_RECOVER).
* - 0x8
- - Filesystem has a separate journal device (INCOMPAT\_JOURNAL\_DEV).
+ - Filesystem has a separate journal device (INCOMPAT_JOURNAL_DEV).
* - 0x10
- Meta block groups. See the earlier discussion of this feature
- (INCOMPAT\_META\_BG).
+ (INCOMPAT_META_BG).
* - 0x40
- - Files in this filesystem use extents (INCOMPAT\_EXTENTS).
+ - Files in this filesystem use extents (INCOMPAT_EXTENTS).
* - 0x80
- - Enable a filesystem size of 2^64 blocks (INCOMPAT\_64BIT).
+ - Enable a filesystem size of 2^64 blocks (INCOMPAT_64BIT).
* - 0x100
- - Multiple mount protection (INCOMPAT\_MMP).
+ - Multiple mount protection (INCOMPAT_MMP).
* - 0x200
- Flexible block groups. See the earlier discussion of this feature
- (INCOMPAT\_FLEX\_BG).
+ (INCOMPAT_FLEX_BG).
* - 0x400
- Inodes can be used to store large extended attribute values
- (INCOMPAT\_EA\_INODE).
+ (INCOMPAT_EA_INODE).
* - 0x1000
- - Data in directory entry (INCOMPAT\_DIRDATA). (Not implemented?)
+ - Data in directory entry (INCOMPAT_DIRDATA). (Not implemented?)
* - 0x2000
- Metadata checksum seed is stored in the superblock. This feature enables
- the administrator to change the UUID of a metadata\_csum filesystem
+ the administrator to change the UUID of a metadata_csum filesystem
while the filesystem is mounted; without it, the checksum definition
- requires all metadata blocks to be rewritten (INCOMPAT\_CSUM\_SEED).
+ requires all metadata blocks to be rewritten (INCOMPAT_CSUM_SEED).
* - 0x4000
- - Large directory >2GB or 3-level htree (INCOMPAT\_LARGEDIR). Prior to
+ - Large directory >2GB or 3-level htree (INCOMPAT_LARGEDIR). Prior to
this feature, directories could not be larger than 4GiB and could not
have an htree more than 2 levels deep. If this feature is enabled,
directories can be larger than 4GiB and have a maximum htree depth of 3.
* - 0x8000
- - Data in inode (INCOMPAT\_INLINE\_DATA).
+ - Data in inode (INCOMPAT_INLINE_DATA).
* - 0x10000
- - Encrypted inodes are present on the filesystem. (INCOMPAT\_ENCRYPT).
+ - Encrypted inodes are present on the filesystem. (INCOMPAT_ENCRYPT).
.. _super_rocompat:
@@ -678,54 +678,54 @@ the following:
- Description
* - 0x1
- Sparse superblocks. See the earlier discussion of this feature
- (RO\_COMPAT\_SPARSE\_SUPER).
+ (RO_COMPAT_SPARSE_SUPER).
* - 0x2
- This filesystem has been used to store a file greater than 2GiB
- (RO\_COMPAT\_LARGE\_FILE).
+ (RO_COMPAT_LARGE_FILE).
* - 0x4
- - Not used in kernel or e2fsprogs (RO\_COMPAT\_BTREE\_DIR).
+ - Not used in kernel or e2fsprogs (RO_COMPAT_BTREE_DIR).
* - 0x8
- This filesystem has files whose sizes are represented in units of
logical blocks, not 512-byte sectors. This implies a very large file
- indeed! (RO\_COMPAT\_HUGE\_FILE)
+ indeed! (RO_COMPAT_HUGE_FILE)
* - 0x10
- Group descriptors have checksums. In addition to detecting corruption,
this is useful for lazy formatting with uninitialized groups
- (RO\_COMPAT\_GDT\_CSUM).
+ (RO_COMPAT_GDT_CSUM).
* - 0x20
- Indicates that the old ext3 32,000 subdirectory limit no longer applies
- (RO\_COMPAT\_DIR\_NLINK). A directory's i\_links\_count will be set to 1
+ (RO_COMPAT_DIR_NLINK). A directory's i_links_count will be set to 1
if it is incremented past 64,999.
* - 0x40
- Indicates that large inodes exist on this filesystem
- (RO\_COMPAT\_EXTRA\_ISIZE).
+ (RO_COMPAT_EXTRA_ISIZE).
* - 0x80
- - This filesystem has a snapshot (RO\_COMPAT\_HAS\_SNAPSHOT).
+ - This filesystem has a snapshot (RO_COMPAT_HAS_SNAPSHOT).
* - 0x100
- - `Quota <Quota>`__ (RO\_COMPAT\_QUOTA).
+ - `Quota <Quota>`__ (RO_COMPAT_QUOTA).
* - 0x200
- This filesystem supports “bigallocâ€, which means that file extents are
tracked in units of clusters (of blocks) instead of blocks
- (RO\_COMPAT\_BIGALLOC).
+ (RO_COMPAT_BIGALLOC).
* - 0x400
- This filesystem supports metadata checksumming.
- (RO\_COMPAT\_METADATA\_CSUM; implies RO\_COMPAT\_GDT\_CSUM, though
- GDT\_CSUM must not be set)
+ (RO_COMPAT_METADATA_CSUM; implies RO_COMPAT_GDT_CSUM, though
+ GDT_CSUM must not be set)
* - 0x800
- Filesystem supports replicas. This feature is neither in the kernel nor
- e2fsprogs. (RO\_COMPAT\_REPLICA)
+ e2fsprogs. (RO_COMPAT_REPLICA)
* - 0x1000
- Read-only filesystem image; the kernel will not mount this image
read-write and most tools will refuse to write to the image.
- (RO\_COMPAT\_READONLY)
+ (RO_COMPAT_READONLY)
* - 0x2000
- - Filesystem tracks project quotas. (RO\_COMPAT\_PROJECT)
+ - Filesystem tracks project quotas. (RO_COMPAT_PROJECT)
* - 0x8000
- - Verity inodes may be present on the filesystem. (RO\_COMPAT\_VERITY)
+ - Verity inodes may be present on the filesystem. (RO_COMPAT_VERITY)
* - 0x10000
- Indicates orphan file may have valid orphan entries and thus we need
to clean them up when mounting the filesystem
- (RO\_COMPAT\_ORPHAN\_PRESENT).
+ (RO_COMPAT_ORPHAN_PRESENT).
.. _super_def_hash:
@@ -761,36 +761,36 @@ The ``s_default_mount_opts`` field is any combination of the following:
* - Value
- Description
* - 0x0001
- - Print debugging info upon (re)mount. (EXT4\_DEFM\_DEBUG)
+ - Print debugging info upon (re)mount. (EXT4_DEFM_DEBUG)
* - 0x0002
- New files take the gid of the containing directory (instead of the fsgid
- of the current process). (EXT4\_DEFM\_BSDGROUPS)
+ of the current process). (EXT4_DEFM_BSDGROUPS)
* - 0x0004
- - Support userspace-provided extended attributes. (EXT4\_DEFM\_XATTR\_USER)
+ - Support userspace-provided extended attributes. (EXT4_DEFM_XATTR_USER)
* - 0x0008
- - Support POSIX access control lists (ACLs). (EXT4\_DEFM\_ACL)
+ - Support POSIX access control lists (ACLs). (EXT4_DEFM_ACL)
* - 0x0010
- - Do not support 32-bit UIDs. (EXT4\_DEFM\_UID16)
+ - Do not support 32-bit UIDs. (EXT4_DEFM_UID16)
* - 0x0020
- All data and metadata are commited to the journal.
- (EXT4\_DEFM\_JMODE\_DATA)
+ (EXT4_DEFM_JMODE_DATA)
* - 0x0040
- All data are flushed to the disk before metadata are committed to the
- journal. (EXT4\_DEFM\_JMODE\_ORDERED)
+ journal. (EXT4_DEFM_JMODE_ORDERED)
* - 0x0060
- Data ordering is not preserved; data may be written after the metadata
- has been written. (EXT4\_DEFM\_JMODE\_WBACK)
+ has been written. (EXT4_DEFM_JMODE_WBACK)
* - 0x0100
- - Disable write flushes. (EXT4\_DEFM\_NOBARRIER)
+ - Disable write flushes. (EXT4_DEFM_NOBARRIER)
* - 0x0200
- Track which blocks in a filesystem are metadata and therefore should not
be used as data blocks. This option will be enabled by default on 3.18,
- hopefully. (EXT4\_DEFM\_BLOCK\_VALIDITY)
+ hopefully. (EXT4_DEFM_BLOCK_VALIDITY)
* - 0x0400
- Enable DISCARD support, where the storage device is told about blocks
- becoming unused. (EXT4\_DEFM\_DISCARD)
+ becoming unused. (EXT4_DEFM_DISCARD)
* - 0x0800
- - Disable delayed allocation. (EXT4\_DEFM\_NODELALLOC)
+ - Disable delayed allocation. (EXT4_DEFM_NODELALLOC)
.. _super_flags:
@@ -820,12 +820,12 @@ The ``s_encrypt_algos`` list can contain any of the following:
* - Value
- Description
* - 0
- - Invalid algorithm (ENCRYPTION\_MODE\_INVALID).
+ - Invalid algorithm (ENCRYPTION_MODE_INVALID).
* - 1
- - 256-bit AES in XTS mode (ENCRYPTION\_MODE\_AES\_256\_XTS).
+ - 256-bit AES in XTS mode (ENCRYPTION_MODE_AES_256_XTS).
* - 2
- - 256-bit AES in GCM mode (ENCRYPTION\_MODE\_AES\_256\_GCM).
+ - 256-bit AES in GCM mode (ENCRYPTION_MODE_AES_256_GCM).
* - 3
- - 256-bit AES in CBC mode (ENCRYPTION\_MODE\_AES\_256\_CBC).
+ - 256-bit AES in CBC mode (ENCRYPTION_MODE_AES_256_CBC).
Total size of the superblock is 1024 bytes.
diff --git a/Documentation/filesystems/netfs_library.rst b/Documentation/filesystems/netfs_library.rst
index 3276c3d55142..4d19b19bcc08 100644
--- a/Documentation/filesystems/netfs_library.rst
+++ b/Documentation/filesystems/netfs_library.rst
@@ -79,7 +79,7 @@ To help deal with the per-inode context, a number helper functions are
provided. Firstly, a function to perform basic initialisation on a context and
set the operations table pointer::
- void netfs_inode_init(struct inode *inode,
+ void netfs_inode_init(struct netfs_inode *ctx,
const struct netfs_request_ops *ops);
then a function to cast from the VFS inode structure to the netfs context::
@@ -89,7 +89,7 @@ then a function to cast from the VFS inode structure to the netfs context::
and finally, a function to get the cache cookie pointer from the context
attached to an inode (or NULL if fscache is disabled)::
- struct fscache_cookie *netfs_i_cookie(struct inode *inode);
+ struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx);
Buffered Read Helpers
@@ -136,8 +136,9 @@ Three read helpers are provided::
void netfs_readahead(struct readahead_control *ractl);
int netfs_read_folio(struct file *file,
- struct folio *folio);
- int netfs_write_begin(struct file *file,
+ struct folio *folio);
+ int netfs_write_begin(struct netfs_inode *ctx,
+ struct file *file,
struct address_space *mapping,
loff_t pos,
unsigned int len,
@@ -157,9 +158,10 @@ The helpers manage the read request, calling back into the network filesystem
through the suppplied table of operations. Waits will be performed as
necessary before returning for helpers that are meant to be synchronous.
-If an error occurs and netfs_priv is non-NULL, ops->cleanup() will be called to
-deal with it. If some parts of the request are in progress when an error
-occurs, the request will get partially completed if sufficient data is read.
+If an error occurs, the ->free_request() will be called to clean up the
+netfs_io_request struct allocated. If some parts of the request are in
+progress when an error occurs, the request will get partially completed if
+sufficient data is read.
Additionally, there is::
@@ -207,8 +209,7 @@ The above fields are the ones the netfs can use. They are:
* ``netfs_priv``
The network filesystem's private data. The value for this can be passed in
- to the helper functions or set during the request. The ->cleanup() op will
- be called if this is non-NULL at the end.
+ to the helper functions or set during the request.
* ``start``
* ``len``
@@ -293,6 +294,7 @@ through which it can issue requests and negotiate::
struct netfs_request_ops {
void (*init_request)(struct netfs_io_request *rreq, struct file *file);
+ void (*free_request)(struct netfs_io_request *rreq);
int (*begin_cache_operation)(struct netfs_io_request *rreq);
void (*expand_readahead)(struct netfs_io_request *rreq);
bool (*clamp_length)(struct netfs_io_subrequest *subreq);
@@ -301,7 +303,6 @@ through which it can issue requests and negotiate::
int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
struct folio *folio, void **_fsdata);
void (*done)(struct netfs_io_request *rreq);
- void (*cleanup)(struct address_space *mapping, void *netfs_priv);
};
The operations are as follows:
@@ -309,7 +310,12 @@ The operations are as follows:
* ``init_request()``
[Optional] This is called to initialise the request structure. It is given
- the file for reference and can modify the ->netfs_priv value.
+ the file for reference.
+
+ * ``free_request()``
+
+ [Optional] This is called as the request is being deallocated so that the
+ filesystem can clean up any state it has attached there.
* ``begin_cache_operation()``
@@ -383,11 +389,6 @@ The operations are as follows:
[Optional] This is called after the folios in the request have all been
unlocked (and marked uptodate if applicable).
- * ``cleanup``
-
- [Optional] This is called as the request is being deallocated so that the
- filesystem can clean up ->netfs_priv.
-
Read Helper Procedure
diff --git a/Documentation/loongarch/introduction.rst b/Documentation/loongarch/introduction.rst
index 2bf40ad370df..216b3f390e80 100644
--- a/Documentation/loongarch/introduction.rst
+++ b/Documentation/loongarch/introduction.rst
@@ -45,10 +45,12 @@ Name Alias Usage Preserved
``$r23``-``$r31`` ``$s0``-``$s8`` Static registers Yes
================= =============== =================== ============
-Note: The register ``$r21`` is reserved in the ELF psABI, but used by the Linux
-kernel for storing the percpu base address. It normally has no ABI name, but is
-called ``$u0`` in the kernel. You may also see ``$v0`` or ``$v1`` in some old code,
-however they are deprecated aliases of ``$a0`` and ``$a1`` respectively.
+.. Note::
+ The register ``$r21`` is reserved in the ELF psABI, but used by the Linux
+ kernel for storing the percpu base address. It normally has no ABI name,
+ but is called ``$u0`` in the kernel. You may also see ``$v0`` or ``$v1``
+ in some old code,however they are deprecated aliases of ``$a0`` and ``$a1``
+ respectively.
FPRs
----
@@ -69,8 +71,9 @@ Name Alias Usage Preserved
``$f24``-``$f31`` ``$fs0``-``$fs7`` Static registers Yes
================= ================== =================== ============
-Note: You may see ``$fv0`` or ``$fv1`` in some old code, however they are deprecated
-aliases of ``$fa0`` and ``$fa1`` respectively.
+.. Note::
+ You may see ``$fv0`` or ``$fv1`` in some old code, however they are
+ deprecated aliases of ``$fa0`` and ``$fa1`` respectively.
VRs
----
diff --git a/Documentation/loongarch/irq-chip-model.rst b/Documentation/loongarch/irq-chip-model.rst
index 8d88f7ab2e5e..7988f4192363 100644
--- a/Documentation/loongarch/irq-chip-model.rst
+++ b/Documentation/loongarch/irq-chip-model.rst
@@ -145,12 +145,16 @@ Documentation of Loongson's LS7A chipset:
https://github.com/loongson/LoongArch-Documentation/releases/latest/download/Loongson-7A1000-usermanual-2.00-EN.pdf (in English)
-Note: CPUINTC is CSR.ECFG/CSR.ESTAT and its interrupt controller described
-in Section 7.4 of "LoongArch Reference Manual, Vol 1"; LIOINTC is "Legacy I/O
-Interrupts" described in Section 11.1 of "Loongson 3A5000 Processor Reference
-Manual"; EIOINTC is "Extended I/O Interrupts" described in Section 11.2 of
-"Loongson 3A5000 Processor Reference Manual"; HTVECINTC is "HyperTransport
-Interrupts" described in Section 14.3 of "Loongson 3A5000 Processor Reference
-Manual"; PCH-PIC/PCH-MSI is "Interrupt Controller" described in Section 5 of
-"Loongson 7A1000 Bridge User Manual"; PCH-LPC is "LPC Interrupts" described in
-Section 24.3 of "Loongson 7A1000 Bridge User Manual".
+.. Note::
+ - CPUINTC is CSR.ECFG/CSR.ESTAT and its interrupt controller described
+ in Section 7.4 of "LoongArch Reference Manual, Vol 1";
+ - LIOINTC is "Legacy I/OInterrupts" described in Section 11.1 of
+ "Loongson 3A5000 Processor Reference Manual";
+ - EIOINTC is "Extended I/O Interrupts" described in Section 11.2 of
+ "Loongson 3A5000 Processor Reference Manual";
+ - HTVECINTC is "HyperTransport Interrupts" described in Section 14.3 of
+ "Loongson 3A5000 Processor Reference Manual";
+ - PCH-PIC/PCH-MSI is "Interrupt Controller" described in Section 5 of
+ "Loongson 7A1000 Bridge User Manual";
+ - PCH-LPC is "LPC Interrupts" described in Section 24.3 of
+ "Loongson 7A1000 Bridge User Manual".
diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst
index 43be3782e5df..53a18ff7cf23 100644
--- a/Documentation/networking/bonding.rst
+++ b/Documentation/networking/bonding.rst
@@ -780,6 +780,17 @@ peer_notif_delay
value is 0 which means to match the value of the link monitor
interval.
+prio
+ Slave priority. A higher number means higher priority.
+ The primary slave has the highest priority. This option also
+ follows the primary_reselect rules.
+
+ This option could only be configured via netlink, and is only valid
+ for active-backup(1), balance-tlb (5) and balance-alb (6) mode.
+ The valid value range is a signed 32 bit integer.
+
+ The default value is 0.
+
primary
A string (eth0, eth2, etc) specifying which slave is the
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 04216564a03c..9f41961d11d5 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -2925,6 +2925,43 @@ plpmtud_probe_interval - INTEGER
Default: 0
+reconf_enable - BOOLEAN
+ Enable or disable extension of Stream Reconfiguration functionality
+ specified in RFC6525. This extension provides the ability to "reset"
+ a stream, and it includes the Parameters of "Outgoing/Incoming SSN
+ Reset", "SSN/TSN Reset" and "Add Outgoing/Incoming Streams".
+
+ - 1: Enable extension.
+ - 0: Disable extension.
+
+ Default: 0
+
+intl_enable - BOOLEAN
+ Enable or disable extension of User Message Interleaving functionality
+ specified in RFC8260. This extension allows the interleaving of user
+ messages sent on different streams. With this feature enabled, I-DATA
+ chunk will replace DATA chunk to carry user messages if also supported
+ by the peer. Note that to use this feature, one needs to set this option
+ to 1 and also needs to set socket options SCTP_FRAGMENT_INTERLEAVE to 2
+ and SCTP_INTERLEAVING_SUPPORTED to 1.
+
+ - 1: Enable extension.
+ - 0: Disable extension.
+
+ Default: 0
+
+ecn_enable - BOOLEAN
+ Control use of Explicit Congestion Notification (ECN) by SCTP.
+ Like in TCP, ECN is used only when both ends of the SCTP connection
+ indicate support for it. This feature is useful in avoiding losses
+ due to congestion by allowing supporting routers to signal congestion
+ before having to drop packets.
+
+ 1: Enable ecn.
+ 0: Disable ecn.
+
+ Default: 1
+
``/proc/sys/net/core/*``
========================
diff --git a/Documentation/networking/phy.rst b/Documentation/networking/phy.rst
index d43da709bf40..704f31da5167 100644
--- a/Documentation/networking/phy.rst
+++ b/Documentation/networking/phy.rst
@@ -104,7 +104,7 @@ Whenever possible, use the PHY side RGMII delay for these reasons:
* PHY device drivers in PHYLIB being reusable by nature, being able to
configure correctly a specified delay enables more designs with similar delay
- requirements to be operate correctly
+ requirements to be operated correctly
For cases where the PHY is not capable of providing this delay, but the
Ethernet MAC driver is capable of doing so, the correct phy_interface_t value
diff --git a/Documentation/networking/tls.rst b/Documentation/networking/tls.rst
index 8cb2cd4e2a80..be8e10c14b05 100644
--- a/Documentation/networking/tls.rst
+++ b/Documentation/networking/tls.rst
@@ -214,6 +214,31 @@ of calling send directly after a handshake using gnutls.
Since it doesn't implement a full record layer, control
messages are not supported.
+Optional optimizations
+----------------------
+
+There are certain condition-specific optimizations the TLS ULP can make,
+if requested. Those optimizations are either not universally beneficial
+or may impact correctness, hence they require an opt-in.
+All options are set per-socket using setsockopt(), and their
+state can be checked using getsockopt() and via socket diag (``ss``).
+
+TLS_TX_ZEROCOPY_RO
+~~~~~~~~~~~~~~~~~~
+
+For device offload only. Allow sendfile() data to be transmitted directly
+to the NIC without making an in-kernel copy. This allows true zero-copy
+behavior when device offload is enabled.
+
+The application must make sure that the data is not modified between being
+submitted and transmission completing. In other words this is mostly
+applicable if the data sent on a socket via sendfile() is read-only.
+
+Modifying the data may result in different versions of the data being used
+for the original TCP transmission and TCP retransmissions. To the receiver
+this will look like TLS records had been tampered with and will result
+in record authentication failures.
+
Statistics
==========
diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst
index 34415ae1af1b..19c286c23786 100644
--- a/Documentation/process/changes.rst
+++ b/Documentation/process/changes.rst
@@ -32,6 +32,7 @@ you probably needn't concern yourself with pcmciautils.
GNU C 5.1 gcc --version
Clang/LLVM (optional) 11.0.0 clang --version
GNU make 3.81 make --version
+bash 4.2 bash --version
binutils 2.23 ld -v
flex 2.5.35 flex --version
bison 2.0 bison --version
@@ -84,6 +85,12 @@ Make
You will need GNU make 3.81 or later to build the kernel.
+Bash
+----
+
+Some bash scripts are used for the kernel build.
+Bash 4.2 or newer is needed.
+
Binutils
--------
@@ -362,6 +369,11 @@ Make
- <ftp://ftp.gnu.org/gnu/make/>
+Bash
+----
+
+- <ftp://ftp.gnu.org/gnu/bash/>
+
Binutils
--------
diff --git a/Documentation/translations/zh_CN/loongarch/introduction.rst b/Documentation/translations/zh_CN/loongarch/introduction.rst
index e31a1a928c48..11686ee0caeb 100644
--- a/Documentation/translations/zh_CN/loongarch/introduction.rst
+++ b/Documentation/translations/zh_CN/loongarch/introduction.rst
@@ -46,10 +46,11 @@ LA64中æ¯ä¸ªå¯„存器为64ä½å®½ã€‚ ``$r0`` 的内容总是固定为0,而其ä
``$r23``-``$r31`` ``$s0``-``$s8`` 陿€å¯„存器 是
================= =============== =================== ==========
-注æ„:``$r21``寄存器在ELF psABI中ä¿ç•™æœªä½¿ç”¨ï¼Œä½†æ˜¯åœ¨Linux内核用于ä¿å­˜æ¯CPU
-å˜é‡åŸºåœ°å€ã€‚该寄存器没有ABI命å,ä¸è¿‡åœ¨å†…核中称为``$u0``。在一些é—留代ç 
-中有时å¯èƒ½è§åˆ°``$v0``å’Œ``$v1``,它们是``$a0``å’Œ``$a1``的别å,属于已ç»åºŸå¼ƒ
-的用法。
+.. note::
+ 注æ„: ``$r21`` 寄存器在ELF psABI中ä¿ç•™æœªä½¿ç”¨ï¼Œä½†æ˜¯åœ¨Linux内核用于ä¿
+ å­˜æ¯CPUå˜é‡åŸºåœ°å€ã€‚该寄存器没有ABI命å,ä¸è¿‡åœ¨å†…核中称为 ``$u0`` 。在
+ 一些é—留代ç ä¸­æœ‰æ—¶å¯èƒ½è§åˆ° ``$v0`` å’Œ ``$v1`` ,它们是 ``$a0`` å’Œ
+ ``$a1`` 的别å,属于已ç»åºŸå¼ƒçš„用法。
浮点寄存器
----------
@@ -68,8 +69,9 @@ LA64中æ¯ä¸ªå¯„存器为64ä½å®½ã€‚ ``$r0`` 的内容总是固定为0,而其ä
``$f24``-``$f31`` ``$fs0``-``$fs7`` 陿€å¯„存器 是
================= ================== =================== ==========
-注æ„:在一些é—留代ç ä¸­æœ‰æ—¶å¯èƒ½è§åˆ° ``$v0`` å’Œ ``$v1`` ,它们是 ``$a0``
-å’Œ ``$a1`` 的别å,属于已ç»åºŸå¼ƒçš„用法。
+.. note::
+ 注æ„:在一些é—留代ç ä¸­æœ‰æ—¶å¯èƒ½è§åˆ° ``$v0`` å’Œ ``$v1`` ,它们是
+ ``$a0`` å’Œ ``$a1`` 的别å,属于已ç»åºŸå¼ƒçš„用法。
å‘é‡å¯„存器
diff --git a/Documentation/translations/zh_CN/loongarch/irq-chip-model.rst b/Documentation/translations/zh_CN/loongarch/irq-chip-model.rst
index 2a4c3ad38be4..fb5d23b49ed5 100644
--- a/Documentation/translations/zh_CN/loongarch/irq-chip-model.rst
+++ b/Documentation/translations/zh_CN/loongarch/irq-chip-model.rst
@@ -147,9 +147,11 @@ PCH-LPC::
https://github.com/loongson/LoongArch-Documentation/releases/latest/download/Loongson-7A1000-usermanual-2.00-EN.pdf (英文版)
-注:CPUINTCå³ã€Šé¾™èŠ¯æž¶æž„å‚考手册å·ä¸€ã€‹ç¬¬7.4节所æè¿°çš„CSR.ECFG/CSR.ESTAT寄存器åŠå…¶ä¸­æ–­
-控制逻辑;LIOINTCå³ã€Šé¾™èН3A5000处ç†å™¨ä½¿ç”¨æ‰‹å†Œã€‹ç¬¬11.1节所æè¿°çš„“传统I/O中断â€ï¼›EIOINTC
-å³ã€Šé¾™èН3A5000处ç†å™¨ä½¿ç”¨æ‰‹å†Œã€‹ç¬¬11.2节所æè¿°çš„“扩展I/O中断â€ï¼›HTVECINTCå³ã€Šé¾™èН3A5000
-处ç†å™¨ä½¿ç”¨æ‰‹å†Œã€‹ç¬¬14.3节所æè¿°çš„“HyperTransport中断â€ï¼›PCH-PIC/PCH-MSIå³ã€Šé¾™èН7A1000æ¡¥
-片用户手册》第5章所æè¿°çš„“中断控制器â€ï¼›PCH-LPCå³ã€Šé¾™èН7A1000桥片用户手册》第24.3节所
-æè¿°çš„“LPC中断â€ã€‚
+.. note::
+ - CPUINTC:å³ã€Šé¾™èŠ¯æž¶æž„å‚考手册å·ä¸€ã€‹ç¬¬7.4节所æè¿°çš„CSR.ECFG/CSR.ESTAT寄存器åŠå…¶
+ 中断控制逻辑;
+ - LIOINTC:å³ã€Šé¾™èН3A5000处ç†å™¨ä½¿ç”¨æ‰‹å†Œã€‹ç¬¬11.1节所æè¿°çš„“传统I/O中断â€ï¼›
+ - EIOINTC:å³ã€Šé¾™èН3A5000处ç†å™¨ä½¿ç”¨æ‰‹å†Œã€‹ç¬¬11.2节所æè¿°çš„“扩展I/O中断â€ï¼›
+ - HTVECINTC:å³ã€Šé¾™èН3A5000处ç†å™¨ä½¿ç”¨æ‰‹å†Œã€‹ç¬¬14.3节所æè¿°çš„“HyperTransport中断â€ï¼›
+ - PCH-PIC/PCH-MSI:å³ã€Šé¾™èН7A1000桥片用户手册》第5章所æè¿°çš„“中断控制器â€ï¼›
+ - PCH-LPC:å³ã€Šé¾™èН7A1000桥片用户手册》第24.3节所æè¿°çš„“LPC中断â€ã€‚
diff --git a/MAINTAINERS b/MAINTAINERS
index 2d1cf1718140..36f0a205c54a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3661,7 +3661,7 @@ BPF JIT for ARM
M: Shubham Bansal <illusionist.neo@gmail.com>
L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
-S: Maintained
+S: Odd Fixes
F: arch/arm/net/
BPF JIT for ARM64
@@ -3685,14 +3685,15 @@ BPF JIT for NFP NICs
M: Jakub Kicinski <kuba@kernel.org>
L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
-S: Supported
+S: Odd Fixes
F: drivers/net/ethernet/netronome/nfp/bpf/
BPF JIT for POWERPC (32-BIT AND 64-BIT)
M: Naveen N. Rao <naveen.n.rao@linux.ibm.com>
+M: Michael Ellerman <mpe@ellerman.id.au>
L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
-S: Maintained
+S: Supported
F: arch/powerpc/net/
BPF JIT for RISC-V (32-bit)
@@ -3718,7 +3719,7 @@ M: Heiko Carstens <hca@linux.ibm.com>
M: Vasily Gorbik <gor@linux.ibm.com>
L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
-S: Maintained
+S: Supported
F: arch/s390/net/
X: arch/s390/net/pnet.c
@@ -3726,14 +3727,14 @@ BPF JIT for SPARC (32-BIT AND 64-BIT)
M: David S. Miller <davem@davemloft.net>
L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
-S: Maintained
+S: Odd Fixes
F: arch/sparc/net/
BPF JIT for X86 32-BIT
M: Wang YanQing <udknight@gmail.com>
L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
-S: Maintained
+S: Odd Fixes
F: arch/x86/net/bpf_jit_comp32.c
BPF JIT for X86 64-BIT
@@ -3756,6 +3757,19 @@ F: include/linux/bpf_lsm.h
F: kernel/bpf/bpf_lsm.c
F: security/bpf/
+BPF L7 FRAMEWORK
+M: John Fastabend <john.fastabend@gmail.com>
+M: Jakub Sitnicki <jakub@cloudflare.com>
+L: netdev@vger.kernel.org
+L: bpf@vger.kernel.org
+S: Maintained
+F: include/linux/skmsg.h
+F: net/core/skmsg.c
+F: net/core/sock_map.c
+F: net/ipv4/tcp_bpf.c
+F: net/ipv4/udp_bpf.c
+F: net/unix/unix_bpf.c
+
BPFTOOL
M: Quentin Monnet <quentin@isovalent.com>
L: bpf@vger.kernel.org
@@ -7660,6 +7674,7 @@ F: include/uapi/scsi/fc/
FILE LOCKING (flock() and fcntl()/lockf())
M: Jeff Layton <jlayton@kernel.org>
+M: Chuck Lever <chuck.lever@oracle.com>
L: linux-fsdevel@vger.kernel.org
S: Maintained
F: fs/fcntl.c
@@ -9281,6 +9296,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git
F: Documentation/devicetree/bindings/i2c/i2c.txt
F: Documentation/i2c/
F: drivers/i2c/*
+F: include/dt-bindings/i2c/i2c.h
F: include/linux/i2c-dev.h
F: include/linux/i2c-smbus.h
F: include/linux/i2c.h
@@ -9296,6 +9312,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git
F: Documentation/devicetree/bindings/i2c/
F: drivers/i2c/algos/
F: drivers/i2c/busses/
+F: include/dt-bindings/i2c/
I2C-TAOS-EVM DRIVER
M: Jean Delvare <jdelvare@suse.com>
@@ -10752,6 +10769,7 @@ W: http://kernelnewbies.org/KernelJanitors
KERNEL NFSD, SUNRPC, AND LOCKD SERVERS
M: Chuck Lever <chuck.lever@oracle.com>
+M: Jeff Layton <jlayton@kernel.org>
L: linux-nfs@vger.kernel.org
S: Supported
W: http://nfs.sourceforge.net/
@@ -10876,7 +10894,6 @@ F: arch/riscv/include/asm/kvm*
F: arch/riscv/include/uapi/asm/kvm*
F: arch/riscv/kvm/
F: tools/testing/selftests/kvm/*/riscv/
-F: tools/testing/selftests/kvm/riscv/
KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
M: Christian Borntraeger <borntraeger@linux.ibm.com>
@@ -11101,20 +11118,6 @@ S: Maintained
F: include/net/l3mdev.h
F: net/l3mdev
-L7 BPF FRAMEWORK
-M: John Fastabend <john.fastabend@gmail.com>
-M: Daniel Borkmann <daniel@iogearbox.net>
-M: Jakub Sitnicki <jakub@cloudflare.com>
-L: netdev@vger.kernel.org
-L: bpf@vger.kernel.org
-S: Maintained
-F: include/linux/skmsg.h
-F: net/core/skmsg.c
-F: net/core/sock_map.c
-F: net/ipv4/tcp_bpf.c
-F: net/ipv4/udp_bpf.c
-F: net/unix/unix_bpf.c
-
LANDLOCK SECURITY MODULE
M: Mickaël Salaün <mic@digikod.net>
L: linux-security-module@vger.kernel.org
@@ -13805,6 +13808,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
F: Documentation/devicetree/bindings/net/
F: drivers/connector/
F: drivers/net/
+F: include/dt-bindings/net/
F: include/linux/etherdevice.h
F: include/linux/fcdevice.h
F: include/linux/fddidevice.h
@@ -13956,7 +13960,6 @@ F: net/ipv6/tcp*.c
NETWORKING [TLS]
M: Boris Pismenny <borisp@nvidia.com>
M: John Fastabend <john.fastabend@gmail.com>
-M: Daniel Borkmann <daniel@iogearbox.net>
M: Jakub Kicinski <kuba@kernel.org>
L: netdev@vger.kernel.org
S: Maintained
@@ -14873,6 +14876,7 @@ F: include/dt-bindings/
OPENCOMPUTE PTP CLOCK DRIVER
M: Jonathan Lemon <jonathan.lemon@gmail.com>
+M: Vadim Fedorenko <vadfed@fb.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/ptp/ptp_ocp.c
@@ -17059,6 +17063,19 @@ S: Supported
F: Documentation/devicetree/bindings/iio/adc/renesas,rzg2l-adc.yaml
F: drivers/iio/adc/rzg2l_adc.c
+RENESAS RZ/N1 A5PSW SWITCH DRIVER
+M: Clément Léger <clement.leger@bootlin.com>
+L: linux-renesas-soc@vger.kernel.org
+L: netdev@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/net/dsa/renesas,rzn1-a5psw.yaml
+F: Documentation/devicetree/bindings/net/pcs/renesas,rzn1-miic.yaml
+F: drivers/net/dsa/rzn1_a5psw*
+F: drivers/net/pcs/pcs-rzn1-miic.c
+F: include/dt-bindings/net/pcs-rzn1-miic.h
+F: include/linux/pcs-rzn1-miic.h
+F: net/dsa/tag_rzn1_a5psw.c
+
RENESAS RZ/N1 RTC CONTROLLER DRIVER
M: Miquel Raynal <miquel.raynal@bootlin.com>
L: linux-rtc@vger.kernel.org
@@ -19309,7 +19326,7 @@ R: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
R: Mika Westerberg <mika.westerberg@linux.intel.com>
R: Jan Dabros <jsd@semihalf.com>
L: linux-i2c@vger.kernel.org
-S: Maintained
+S: Supported
F: drivers/i2c/busses/i2c-designware-*
SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER
diff --git a/Makefile b/Makefile
index b2e93c1a8021..513c1fbf7888 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 5
PATCHLEVEL = 19
SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc3
NAME = Superb Owl
# *DOCUMENTATION*
diff --git a/arch/arm/boot/dts/at91-sama5d3_ksz9477_evb.dts b/arch/arm/boot/dts/at91-sama5d3_ksz9477_evb.dts
index 443e8b022897..14af1fd6d247 100644
--- a/arch/arm/boot/dts/at91-sama5d3_ksz9477_evb.dts
+++ b/arch/arm/boot/dts/at91-sama5d3_ksz9477_evb.dts
@@ -120,26 +120,31 @@
port@0 {
reg = <0>;
label = "lan1";
+ phy-mode = "internal";
};
port@1 {
reg = <1>;
label = "lan2";
+ phy-mode = "internal";
};
port@2 {
reg = <2>;
label = "lan3";
+ phy-mode = "internal";
};
port@3 {
reg = <3>;
label = "lan4";
+ phy-mode = "internal";
};
port@4 {
reg = <4>;
label = "lan5";
+ phy-mode = "internal";
};
port@5 {
diff --git a/arch/arm/boot/dts/r9a06g032-rzn1d400-db.dts b/arch/arm/boot/dts/r9a06g032-rzn1d400-db.dts
index 3f8f3ce87e12..4227aba70c30 100644
--- a/arch/arm/boot/dts/r9a06g032-rzn1d400-db.dts
+++ b/arch/arm/boot/dts/r9a06g032-rzn1d400-db.dts
@@ -8,6 +8,8 @@
/dts-v1/;
+#include <dt-bindings/pinctrl/rzn1-pinctrl.h>
+#include <dt-bindings/net/pcs-rzn1-miic.h>
#include "r9a06g032.dtsi"
/ {
@@ -31,3 +33,118 @@
timeout-sec = <60>;
status = "okay";
};
+
+&gmac2 {
+ status = "okay";
+ phy-mode = "gmii";
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+};
+
+&switch {
+ status = "okay";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_mdio1>, <&pins_eth3>, <&pins_eth4>;
+
+ dsa,member = <0 0>;
+
+ mdio {
+ clock-frequency = <2500000>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ switch0phy4: ethernet-phy@4 {
+ reg = <4>;
+ micrel,led-mode = <1>;
+ };
+
+ switch0phy5: ethernet-phy@5 {
+ reg = <5>;
+ micrel,led-mode = <1>;
+ };
+ };
+};
+
+&switch_port0 {
+ label = "lan0";
+ phy-mode = "mii";
+ phy-handle = <&switch0phy5>;
+ status = "okay";
+};
+
+&switch_port1 {
+ label = "lan1";
+ phy-mode = "mii";
+ phy-handle = <&switch0phy4>;
+ status = "okay";
+};
+
+&switch_port4 {
+ status = "okay";
+};
+
+&eth_miic {
+ status = "okay";
+ renesas,miic-switch-portin = <MIIC_GMAC2_PORT>;
+};
+
+&mii_conv4 {
+ renesas,miic-input = <MIIC_SWITCH_PORTB>;
+ status = "okay";
+};
+
+&mii_conv5 {
+ renesas,miic-input = <MIIC_SWITCH_PORTA>;
+ status = "okay";
+};
+
+&pinctrl{
+ pins_mdio1: pins_mdio1 {
+ pinmux = <
+ RZN1_PINMUX(152, RZN1_FUNC_MDIO1_SWITCH)
+ RZN1_PINMUX(153, RZN1_FUNC_MDIO1_SWITCH)
+ >;
+ };
+ pins_eth3: pins_eth3 {
+ pinmux = <
+ RZN1_PINMUX(36, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ RZN1_PINMUX(37, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ RZN1_PINMUX(38, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ RZN1_PINMUX(39, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ RZN1_PINMUX(40, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ RZN1_PINMUX(41, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ RZN1_PINMUX(42, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ RZN1_PINMUX(43, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ RZN1_PINMUX(44, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ RZN1_PINMUX(45, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ RZN1_PINMUX(46, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ RZN1_PINMUX(47, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ >;
+ drive-strength = <6>;
+ bias-disable;
+ };
+ pins_eth4: pins_eth4 {
+ pinmux = <
+ RZN1_PINMUX(48, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ RZN1_PINMUX(49, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ RZN1_PINMUX(50, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ RZN1_PINMUX(51, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ RZN1_PINMUX(52, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ RZN1_PINMUX(53, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ RZN1_PINMUX(54, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ RZN1_PINMUX(55, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ RZN1_PINMUX(56, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ RZN1_PINMUX(57, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ RZN1_PINMUX(58, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ RZN1_PINMUX(59, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII)
+ >;
+ drive-strength = <6>;
+ bias-disable;
+ };
+};
diff --git a/arch/arm/boot/dts/r9a06g032.dtsi b/arch/arm/boot/dts/r9a06g032.dtsi
index d3665910958b..5b97fa85474f 100644
--- a/arch/arm/boot/dts/r9a06g032.dtsi
+++ b/arch/arm/boot/dts/r9a06g032.dtsi
@@ -304,6 +304,114 @@
data-width = <8>;
};
+ gmac2: ethernet@44002000 {
+ compatible = "renesas,r9a06g032-gmac", "renesas,rzn1-gmac", "snps,dwmac";
+ reg = <0x44002000 0x2000>;
+ interrupt-parent = <&gic>;
+ interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "eth_wake_irq", "eth_lpi";
+ clocks = <&sysctrl R9A06G032_HCLK_GMAC1>;
+ clock-names = "stmmaceth";
+ power-domains = <&sysctrl>;
+ snps,multicast-filter-bins = <256>;
+ snps,perfect-filter-entries = <128>;
+ tx-fifo-depth = <2048>;
+ rx-fifo-depth = <4096>;
+ status = "disabled";
+ };
+
+ eth_miic: eth-miic@44030000 {
+ compatible = "renesas,r9a06g032-miic", "renesas,rzn1-miic";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x44030000 0x10000>;
+ clocks = <&sysctrl R9A06G032_CLK_MII_REF>,
+ <&sysctrl R9A06G032_CLK_RGMII_REF>,
+ <&sysctrl R9A06G032_CLK_RMII_REF>,
+ <&sysctrl R9A06G032_HCLK_SWITCH_RG>;
+ clock-names = "mii_ref", "rgmii_ref", "rmii_ref", "hclk";
+ power-domains = <&sysctrl>;
+ status = "disabled";
+
+ mii_conv1: mii-conv@1 {
+ reg = <1>;
+ status = "disabled";
+ };
+
+ mii_conv2: mii-conv@2 {
+ reg = <2>;
+ status = "disabled";
+ };
+
+ mii_conv3: mii-conv@3 {
+ reg = <3>;
+ status = "disabled";
+ };
+
+ mii_conv4: mii-conv@4 {
+ reg = <4>;
+ status = "disabled";
+ };
+
+ mii_conv5: mii-conv@5 {
+ reg = <5>;
+ status = "disabled";
+ };
+ };
+
+ switch: switch@44050000 {
+ compatible = "renesas,r9a06g032-a5psw", "renesas,rzn1-a5psw";
+ reg = <0x44050000 0x10000>;
+ clocks = <&sysctrl R9A06G032_HCLK_SWITCH>,
+ <&sysctrl R9A06G032_CLK_SWITCH>;
+ clock-names = "hclk", "clk";
+ power-domains = <&sysctrl>;
+ status = "disabled";
+
+ ethernet-ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ switch_port0: port@0 {
+ reg = <0>;
+ pcs-handle = <&mii_conv5>;
+ status = "disabled";
+ };
+
+ switch_port1: port@1 {
+ reg = <1>;
+ pcs-handle = <&mii_conv4>;
+ status = "disabled";
+ };
+
+ switch_port2: port@2 {
+ reg = <2>;
+ pcs-handle = <&mii_conv3>;
+ status = "disabled";
+ };
+
+ switch_port3: port@3 {
+ reg = <3>;
+ pcs-handle = <&mii_conv2>;
+ status = "disabled";
+ };
+
+ switch_port4: port@4 {
+ reg = <4>;
+ ethernet = <&gmac2>;
+ label = "cpu";
+ phy-mode = "internal";
+ status = "disabled";
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+ };
+ };
+ };
+
gic: interrupt-controller@44101000 {
compatible = "arm,gic-400", "arm,cortex-a7-gic";
interrupt-controller;
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 9e457156ad4d..6a1c9fca5260 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -712,22 +712,6 @@ static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64,
}
}
-/* ALU operation (32 bit)
- * dst = dst (op) src
- */
-static inline void emit_a32_alu_r(const s8 dst, const s8 src,
- struct jit_ctx *ctx, const bool is64,
- const bool hi, const u8 op) {
- const s8 *tmp = bpf2a32[TMP_REG_1];
- s8 rn, rd;
-
- rn = arm_bpf_get_reg32(src, tmp[1], ctx);
- rd = arm_bpf_get_reg32(dst, tmp[0], ctx);
- /* ALU operation */
- emit_alu_r(rd, rn, is64, hi, op, ctx);
- arm_bpf_put_reg32(dst, rd, ctx);
-}
-
/* ALU operation (64 bit) */
static inline void emit_a32_alu_r64(const bool is64, const s8 dst[],
const s8 src[], struct jit_ctx *ctx,
diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts
index 40cf2236c0b6..7df8cfb1d3b9 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts
@@ -394,6 +394,54 @@
status = "disabled";
};
+&mdio0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ switch@0 {
+ compatible = "mediatek,mt7531";
+ reg = <0>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@1 {
+ reg = <1>;
+ label = "lan0";
+ };
+
+ port@2 {
+ reg = <2>;
+ label = "lan1";
+ };
+
+ port@3 {
+ reg = <3>;
+ label = "lan2";
+ };
+
+ port@4 {
+ reg = <4>;
+ label = "lan3";
+ };
+
+ port@5 {
+ reg = <5>;
+ label = "cpu";
+ ethernet = <&gmac0>;
+ phy-mode = "rgmii";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ pause;
+ };
+ };
+ };
+ };
+};
+
&mdio1 {
rgmii_phy1: ethernet-phy@0 {
compatible = "ethernet-phy-ieee802.3-c22";
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 47a1e25e25bb..de32152cea04 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -363,11 +363,6 @@ struct kvm_vcpu_arch {
struct kvm_pmu pmu;
/*
- * Anything that is not used directly from assembly code goes
- * here.
- */
-
- /*
* Guest registers we preserve during guest debugging.
*
* These shadow registers are updated by the kvm_handle_sys_reg
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 3c8af033a997..0e80db4327b6 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -113,6 +113,9 @@ static __always_inline bool has_vhe(void)
/*
* Code only run in VHE/NVHE hyp context can assume VHE is present or
* absent. Otherwise fall back to caps.
+ * This allows the compiler to discard VHE-specific code from the
+ * nVHE object, reducing the number of external symbol references
+ * needed to link.
*/
if (is_vhe_hyp_code())
return true;
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 42ea2bd856c6..8d88433de81d 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1974,15 +1974,7 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
#ifdef CONFIG_KVM
static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused)
{
- if (kvm_get_mode() != KVM_MODE_PROTECTED)
- return false;
-
- if (is_kernel_in_hyp_mode()) {
- pr_warn("Protected KVM not available with VHE\n");
- return false;
- }
-
- return true;
+ return kvm_get_mode() == KVM_MODE_PROTECTED;
}
#endif /* CONFIG_KVM */
@@ -3109,7 +3101,6 @@ void cpu_set_feature(unsigned int num)
WARN_ON(num >= MAX_CPU_FEATURES);
elf_hwcap |= BIT(num);
}
-EXPORT_SYMBOL_GPL(cpu_set_feature);
bool cpu_have_feature(unsigned int num)
{
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index d42a205ef625..bd5df50e4643 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -102,7 +102,6 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
* x19-x29 per the AAPCS, and we created frame records upon entry, so we need
* to restore x0-x8, x29, and x30.
*/
-ftrace_common_return:
/* Restore function arguments */
ldp x0, x1, [sp]
ldp x2, x3, [sp, #S_X2]
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index f447c4a36f69..ea5dc7c90f46 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -78,47 +78,76 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
}
/*
- * Turn on the call to ftrace_caller() in instrumented function
+ * Find the address the callsite must branch to in order to reach '*addr'.
+ *
+ * Due to the limited range of 'BL' instructions, modules may be placed too far
+ * away to branch directly and must use a PLT.
+ *
+ * Returns true when '*addr' contains a reachable target address, or has been
+ * modified to contain a PLT address. Returns false otherwise.
*/
-int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+static bool ftrace_find_callable_addr(struct dyn_ftrace *rec,
+ struct module *mod,
+ unsigned long *addr)
{
unsigned long pc = rec->ip;
- u32 old, new;
- long offset = (long)pc - (long)addr;
+ long offset = (long)*addr - (long)pc;
+ struct plt_entry *plt;
- if (offset < -SZ_128M || offset >= SZ_128M) {
- struct module *mod;
- struct plt_entry *plt;
+ /*
+ * When the target is within range of the 'BL' instruction, use 'addr'
+ * as-is and branch to that directly.
+ */
+ if (offset >= -SZ_128M && offset < SZ_128M)
+ return true;
- if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
- return -EINVAL;
+ /*
+ * When the target is outside of the range of a 'BL' instruction, we
+ * must use a PLT to reach it. We can only place PLTs for modules, and
+ * only when module PLT support is built-in.
+ */
+ if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
+ return false;
- /*
- * On kernels that support module PLTs, the offset between the
- * branch instruction and its target may legally exceed the
- * range of an ordinary relative 'bl' opcode. In this case, we
- * need to branch via a trampoline in the module.
- *
- * NOTE: __module_text_address() must be called with preemption
- * disabled, but we can rely on ftrace_lock to ensure that 'mod'
- * retains its validity throughout the remainder of this code.
- */
+ /*
+ * 'mod' is only set at module load time, but if we end up
+ * dealing with an out-of-range condition, we can assume it
+ * is due to a module being loaded far away from the kernel.
+ *
+ * NOTE: __module_text_address() must be called with preemption
+ * disabled, but we can rely on ftrace_lock to ensure that 'mod'
+ * retains its validity throughout the remainder of this code.
+ */
+ if (!mod) {
preempt_disable();
mod = __module_text_address(pc);
preempt_enable();
+ }
- if (WARN_ON(!mod))
- return -EINVAL;
+ if (WARN_ON(!mod))
+ return false;
- plt = get_ftrace_plt(mod, addr);
- if (!plt) {
- pr_err("ftrace: no module PLT for %ps\n", (void *)addr);
- return -EINVAL;
- }
-
- addr = (unsigned long)plt;
+ plt = get_ftrace_plt(mod, *addr);
+ if (!plt) {
+ pr_err("ftrace: no module PLT for %ps\n", (void *)*addr);
+ return false;
}
+ *addr = (unsigned long)plt;
+ return true;
+}
+
+/*
+ * Turn on the call to ftrace_caller() in instrumented function
+ */
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long pc = rec->ip;
+ u32 old, new;
+
+ if (!ftrace_find_callable_addr(rec, NULL, &addr))
+ return -EINVAL;
+
old = aarch64_insn_gen_nop();
new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
@@ -132,6 +161,11 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long pc = rec->ip;
u32 old, new;
+ if (!ftrace_find_callable_addr(rec, NULL, &old_addr))
+ return -EINVAL;
+ if (!ftrace_find_callable_addr(rec, NULL, &addr))
+ return -EINVAL;
+
old = aarch64_insn_gen_branch_imm(pc, old_addr,
AARCH64_INSN_BRANCH_LINK);
new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
@@ -181,54 +215,15 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
unsigned long addr)
{
unsigned long pc = rec->ip;
- bool validate = true;
u32 old = 0, new;
- long offset = (long)pc - (long)addr;
- if (offset < -SZ_128M || offset >= SZ_128M) {
- u32 replaced;
-
- if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
- return -EINVAL;
-
- /*
- * 'mod' is only set at module load time, but if we end up
- * dealing with an out-of-range condition, we can assume it
- * is due to a module being loaded far away from the kernel.
- */
- if (!mod) {
- preempt_disable();
- mod = __module_text_address(pc);
- preempt_enable();
-
- if (WARN_ON(!mod))
- return -EINVAL;
- }
-
- /*
- * The instruction we are about to patch may be a branch and
- * link instruction that was redirected via a PLT entry. In
- * this case, the normal validation will fail, but we can at
- * least check that we are dealing with a branch and link
- * instruction that points into the right module.
- */
- if (aarch64_insn_read((void *)pc, &replaced))
- return -EFAULT;
-
- if (!aarch64_insn_is_bl(replaced) ||
- !within_module(pc + aarch64_get_branch_offset(replaced),
- mod))
- return -EINVAL;
-
- validate = false;
- } else {
- old = aarch64_insn_gen_branch_imm(pc, addr,
- AARCH64_INSN_BRANCH_LINK);
- }
+ if (!ftrace_find_callable_addr(rec, mod, &addr))
+ return -EINVAL;
+ old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
new = aarch64_insn_gen_nop();
- return ftrace_modify_code(pc, old, new, validate);
+ return ftrace_modify_code(pc, old, new, true);
}
void arch_ftrace_update_code(int command)
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index cf3a759f10d4..fea3223704b6 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -303,14 +303,13 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
early_fixmap_init();
early_ioremap_init();
+ setup_machine_fdt(__fdt_pointer);
+
/*
* Initialise the static keys early as they may be enabled by the
- * cpufeature code, early parameters, and DT setup.
+ * cpufeature code and early parameters.
*/
jump_label_init();
-
- setup_machine_fdt(__fdt_pointer);
-
parse_early_param();
/*
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 4e39ace073af..3b8d062e30ea 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -1230,6 +1230,9 @@ bool kvm_arch_timer_get_input_level(int vintid)
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
struct arch_timer_context *timer;
+ if (WARN(!vcpu, "No vcpu context!\n"))
+ return false;
+
if (vintid == vcpu_vtimer(vcpu)->irq.irq)
timer = vcpu_vtimer(vcpu);
else if (vintid == vcpu_ptimer(vcpu)->irq.irq)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 400bb0fe2745..a0188144a122 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -150,8 +150,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret)
goto out_free_stage2_pgd;
- if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) {
+ ret = -ENOMEM;
goto out_free_stage2_pgd;
+ }
cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask);
kvm_vgic_early_init(kvm);
@@ -2271,7 +2273,11 @@ static int __init early_kvm_mode_cfg(char *arg)
return -EINVAL;
if (strcmp(arg, "protected") == 0) {
- kvm_mode = KVM_MODE_PROTECTED;
+ if (!is_kernel_in_hyp_mode())
+ kvm_mode = KVM_MODE_PROTECTED;
+ else
+ pr_warn_once("Protected KVM not available with VHE\n");
+
return 0;
}
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 3d251a4d2cf7..6012b08ecb14 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -80,6 +80,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED;
vcpu->arch.flags |= KVM_ARM64_FP_HOST;
+ vcpu->arch.flags &= ~KVM_ARM64_HOST_SVE_ENABLED;
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;
@@ -93,6 +94,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
* operations. Do this for ZA as well for now for simplicity.
*/
if (system_supports_sme()) {
+ vcpu->arch.flags &= ~KVM_ARM64_HOST_SME_ENABLED;
if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED;
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 78edf077fa3b..1e78acf9662e 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -314,15 +314,11 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
enum kvm_pgtable_prot prot)
{
- hyp_assert_lock_held(&host_kvm.lock);
-
return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
}
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
{
- hyp_assert_lock_held(&host_kvm.lock);
-
return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt,
addr, size, &host_s2_pool, owner_id);
}
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index b6d86e423319..35a4331ba5f3 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -243,15 +243,9 @@ u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id)
case SYS_ID_AA64MMFR2_EL1:
return get_pvm_id_aa64mmfr2(vcpu);
default:
- /*
- * Should never happen because all cases are covered in
- * pvm_sys_reg_descs[].
- */
- WARN_ON(1);
- break;
+ /* Unhandled ID register, RAZ */
+ return 0;
}
-
- return 0;
}
static u64 read_id_reg(const struct kvm_vcpu *vcpu,
@@ -332,6 +326,16 @@ static bool pvm_gic_read_sre(struct kvm_vcpu *vcpu,
/* Mark the specified system register as an AArch64 feature id register. */
#define AARCH64(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch64 }
+/*
+ * sys_reg_desc initialiser for architecturally unallocated cpufeature ID
+ * register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2
+ * (1 <= crm < 8, 0 <= Op2 < 8).
+ */
+#define ID_UNALLOCATED(crm, op2) { \
+ Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \
+ .access = pvm_access_id_aarch64, \
+}
+
/* Mark the specified system register as Read-As-Zero/Write-Ignored */
#define RAZ_WI(REG) { SYS_DESC(REG), .access = pvm_access_raz_wi }
@@ -375,24 +379,46 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = {
AARCH32(SYS_MVFR0_EL1),
AARCH32(SYS_MVFR1_EL1),
AARCH32(SYS_MVFR2_EL1),
+ ID_UNALLOCATED(3,3),
AARCH32(SYS_ID_PFR2_EL1),
AARCH32(SYS_ID_DFR1_EL1),
AARCH32(SYS_ID_MMFR5_EL1),
+ ID_UNALLOCATED(3,7),
/* AArch64 ID registers */
/* CRm=4 */
AARCH64(SYS_ID_AA64PFR0_EL1),
AARCH64(SYS_ID_AA64PFR1_EL1),
+ ID_UNALLOCATED(4,2),
+ ID_UNALLOCATED(4,3),
AARCH64(SYS_ID_AA64ZFR0_EL1),
+ ID_UNALLOCATED(4,5),
+ ID_UNALLOCATED(4,6),
+ ID_UNALLOCATED(4,7),
AARCH64(SYS_ID_AA64DFR0_EL1),
AARCH64(SYS_ID_AA64DFR1_EL1),
+ ID_UNALLOCATED(5,2),
+ ID_UNALLOCATED(5,3),
AARCH64(SYS_ID_AA64AFR0_EL1),
AARCH64(SYS_ID_AA64AFR1_EL1),
+ ID_UNALLOCATED(5,6),
+ ID_UNALLOCATED(5,7),
AARCH64(SYS_ID_AA64ISAR0_EL1),
AARCH64(SYS_ID_AA64ISAR1_EL1),
+ AARCH64(SYS_ID_AA64ISAR2_EL1),
+ ID_UNALLOCATED(6,3),
+ ID_UNALLOCATED(6,4),
+ ID_UNALLOCATED(6,5),
+ ID_UNALLOCATED(6,6),
+ ID_UNALLOCATED(6,7),
AARCH64(SYS_ID_AA64MMFR0_EL1),
AARCH64(SYS_ID_AA64MMFR1_EL1),
AARCH64(SYS_ID_AA64MMFR2_EL1),
+ ID_UNALLOCATED(7,3),
+ ID_UNALLOCATED(7,4),
+ ID_UNALLOCATED(7,5),
+ ID_UNALLOCATED(7,6),
+ ID_UNALLOCATED(7,7),
/* Scalable Vector Registers are restricted. */
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c
index 77a67e9d3d14..e070cda86e12 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c
@@ -429,11 +429,11 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = {
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET,
vgic_mmio_read_pending, vgic_mmio_write_spending,
- NULL, vgic_uaccess_write_spending, 1,
+ vgic_uaccess_read_pending, vgic_uaccess_write_spending, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR,
vgic_mmio_read_pending, vgic_mmio_write_cpending,
- NULL, vgic_uaccess_write_cpending, 1,
+ vgic_uaccess_read_pending, vgic_uaccess_write_cpending, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET,
vgic_mmio_read_active, vgic_mmio_write_sactive,
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index f7aa7bcd6fb8..f15e29cc63ce 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -353,42 +353,6 @@ static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu,
return 0;
}
-static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
-{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- u32 value = 0;
- int i;
-
- /*
- * pending state of interrupt is latched in pending_latch variable.
- * Userspace will save and restore pending state and line_level
- * separately.
- * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst
- * for handling of ISPENDR and ICPENDR.
- */
- for (i = 0; i < len * 8; i++) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
- bool state = irq->pending_latch;
-
- if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
- int err;
-
- err = irq_get_irqchip_state(irq->host_irq,
- IRQCHIP_STATE_PENDING,
- &state);
- WARN_ON(err);
- }
-
- if (state)
- value |= (1U << i);
-
- vgic_put_irq(vcpu->kvm, irq);
- }
-
- return value;
-}
-
static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
@@ -666,7 +630,7 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = {
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR,
vgic_mmio_read_pending, vgic_mmio_write_spending,
- vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1,
+ vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR,
vgic_mmio_read_pending, vgic_mmio_write_cpending,
@@ -750,7 +714,7 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = {
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0,
vgic_mmio_read_pending, vgic_mmio_write_spending,
- vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
+ vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0,
vgic_mmio_read_pending, vgic_mmio_write_cpending,
diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c
index 49837d3a3ef5..997d0fce2088 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio.c
@@ -226,8 +226,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu,
return 0;
}
-unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
+static unsigned long __read_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ bool is_user)
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
u32 value = 0;
@@ -239,6 +240,15 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
unsigned long flags;
bool val;
+ /*
+ * When used from userspace with a GICv3 model:
+ *
+ * Pending state of interrupt is latched in pending_latch
+ * variable. Userspace will save and restore pending state
+ * and line_level separately.
+ * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst
+ * for handling of ISPENDR and ICPENDR.
+ */
raw_spin_lock_irqsave(&irq->irq_lock, flags);
if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
int err;
@@ -248,10 +258,20 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
IRQCHIP_STATE_PENDING,
&val);
WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
- } else if (vgic_irq_is_mapped_level(irq)) {
+ } else if (!is_user && vgic_irq_is_mapped_level(irq)) {
val = vgic_get_phys_line_level(irq);
} else {
- val = irq_is_pending(irq);
+ switch (vcpu->kvm->arch.vgic.vgic_model) {
+ case KVM_DEV_TYPE_ARM_VGIC_V3:
+ if (is_user) {
+ val = irq->pending_latch;
+ break;
+ }
+ fallthrough;
+ default:
+ val = irq_is_pending(irq);
+ break;
+ }
}
value |= ((u32)val << i);
@@ -263,6 +283,18 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
return value;
}
+unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ return __read_pending(vcpu, addr, len, false);
+}
+
+unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ return __read_pending(vcpu, addr, len, true);
+}
+
static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
{
return (vgic_irq_is_sgi(irq->intid) &&
diff --git a/arch/arm64/kvm/vgic/vgic-mmio.h b/arch/arm64/kvm/vgic/vgic-mmio.h
index 3fa696f198a3..6082d4b66d39 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio.h
+++ b/arch/arm64/kvm/vgic/vgic-mmio.h
@@ -149,6 +149,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu,
unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len);
+unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len);
+
void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val);
diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c
index 8d5f0506fd87..d78ae63d7c15 100644
--- a/arch/arm64/kvm/vmid.c
+++ b/arch/arm64/kvm/vmid.c
@@ -66,7 +66,7 @@ static void flush_context(void)
* the next context-switch, we broadcast TLB flush + I-cache
* invalidation over the inner shareable domain on rollover.
*/
- kvm_call_hyp(__kvm_flush_vm_context);
+ kvm_call_hyp(__kvm_flush_vm_context);
}
static bool check_update_reserved_vmid(u64 vmid, u64 newvmid)
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 0ea6cc25dc66..21c907987080 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -218,8 +218,6 @@ SYM_FUNC_ALIAS(__dma_flush_area, __pi___dma_flush_area)
*/
SYM_FUNC_START(__pi___dma_map_area)
add x1, x0, x1
- cmp w2, #DMA_FROM_DEVICE
- b.eq __pi_dcache_inval_poc
b __pi_dcache_clean_poc
SYM_FUNC_END(__pi___dma_map_area)
SYM_FUNC_ALIAS(__dma_map_area, __pi___dma_map_area)
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 80657bf83b05..1920d52653b4 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -343,6 +343,7 @@ config NR_CPUS
config NUMA
bool "NUMA Support"
+ select SMP
select ACPI_NUMA if ACPI
help
Say Y to compile the kernel with NUMA (Non-Uniform Memory Access)
diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h
index befe8184aa08..0ef3b18f8980 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -19,7 +19,7 @@ typedef struct {
unsigned int __softirq_pending;
} ____cacheline_aligned irq_cpustat_t;
-DECLARE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat);
+DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
#define __ARCH_IRQ_STAT
diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h
index 34f15a6fb1e7..e6569f18c6dd 100644
--- a/arch/loongarch/include/asm/percpu.h
+++ b/arch/loongarch/include/asm/percpu.h
@@ -6,6 +6,7 @@
#define __ASM_PERCPU_H
#include <asm/cmpxchg.h>
+#include <asm/loongarch.h>
/* Use r21 for fast access */
register unsigned long __my_cpu_offset __asm__("$r21");
diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index 551e1f37c705..71189b28bfb2 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -9,10 +9,16 @@
#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/linkage.h>
-#include <linux/smp.h>
#include <linux/threads.h>
#include <linux/cpumask.h>
+extern int smp_num_siblings;
+extern int num_processors;
+extern int disabled_cpus;
+extern cpumask_t cpu_sibling_map[];
+extern cpumask_t cpu_core_map[];
+extern cpumask_t cpu_foreign_map[];
+
void loongson3_smp_setup(void);
void loongson3_prepare_cpus(unsigned int max_cpus);
void loongson3_boot_secondary(int cpu, struct task_struct *idle);
@@ -25,26 +31,11 @@ int loongson3_cpu_disable(void);
void loongson3_cpu_die(unsigned int cpu);
#endif
-#ifdef CONFIG_SMP
-
static inline void plat_smp_setup(void)
{
loongson3_smp_setup();
}
-#else /* !CONFIG_SMP */
-
-static inline void plat_smp_setup(void) { }
-
-#endif /* !CONFIG_SMP */
-
-extern int smp_num_siblings;
-extern int num_processors;
-extern int disabled_cpus;
-extern cpumask_t cpu_sibling_map[];
-extern cpumask_t cpu_core_map[];
-extern cpumask_t cpu_foreign_map[];
-
static inline int raw_smp_processor_id(void)
{
#if defined(__VDSO__)
diff --git a/arch/loongarch/include/asm/timex.h b/arch/loongarch/include/asm/timex.h
index d3ed99a4fdbd..fb41e9e7a222 100644
--- a/arch/loongarch/include/asm/timex.h
+++ b/arch/loongarch/include/asm/timex.h
@@ -12,13 +12,6 @@
#include <asm/cpu.h>
#include <asm/cpu-features.h>
-/*
- * Standard way to access the cycle counter.
- * Currently only used on SMP for scheduling.
- *
- * We know that all SMP capable CPUs have cycle counters.
- */
-
typedef unsigned long cycles_t;
#define get_cycles get_cycles
diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c
index b16c3dea5eeb..bb729ee8a237 100644
--- a/arch/loongarch/kernel/acpi.c
+++ b/arch/loongarch/kernel/acpi.c
@@ -138,6 +138,7 @@ void __init acpi_boot_table_init(void)
}
}
+#ifdef CONFIG_SMP
static int set_processor_mask(u32 id, u32 flags)
{
@@ -166,15 +167,18 @@ static int set_processor_mask(u32 id, u32 flags)
return cpu;
}
+#endif
static void __init acpi_process_madt(void)
{
+#ifdef CONFIG_SMP
int i;
for (i = 0; i < NR_CPUS; i++) {
__cpu_number_map[i] = -1;
__cpu_logical_map[i] = -1;
}
+#endif
loongson_sysconf.nr_cpus = num_processors;
}
diff --git a/arch/loongarch/kernel/cacheinfo.c b/arch/loongarch/kernel/cacheinfo.c
index 8c9fe29e98f0..b38f5489d094 100644
--- a/arch/loongarch/kernel/cacheinfo.c
+++ b/arch/loongarch/kernel/cacheinfo.c
@@ -4,6 +4,7 @@
*
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
+#include <asm/cpu-info.h>
#include <linux/cacheinfo.h>
/* Populates leaf and increments to next leaf */
diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index 4b671d305ede..b34b8d792aa4 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -22,6 +22,8 @@
#include <asm/setup.h>
DEFINE_PER_CPU(unsigned long, irq_stack);
+DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+EXPORT_PER_CPU_SYMBOL(irq_stat);
struct irq_domain *cpu_domain;
struct irq_domain *liointc_domain;
@@ -56,8 +58,11 @@ int arch_show_interrupts(struct seq_file *p, int prec)
void __init init_IRQ(void)
{
- int i, r, ipi_irq;
+ int i;
+#ifdef CONFIG_SMP
+ int r, ipi_irq;
static int ipi_dummy_dev;
+#endif
unsigned int order = get_order(IRQ_STACK_SIZE);
struct page *page;
diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
index 6d944d65f600..bfa0dfe8b7d7 100644
--- a/arch/loongarch/kernel/process.c
+++ b/arch/loongarch/kernel/process.c
@@ -120,10 +120,12 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
/*
* Copy architecture-specific thread state
*/
-int copy_thread(unsigned long clone_flags, unsigned long usp,
- unsigned long kthread_arg, struct task_struct *p, unsigned long tls)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
unsigned long childksp;
+ unsigned long tls = args->tls;
+ unsigned long usp = args->stack;
+ unsigned long clone_flags = args->flags;
struct pt_regs *childregs, *regs = current_pt_regs();
childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32;
@@ -136,12 +138,12 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
p->thread.csr_crmd = csr_read32(LOONGARCH_CSR_CRMD);
p->thread.csr_prmd = csr_read32(LOONGARCH_CSR_PRMD);
p->thread.csr_ecfg = csr_read32(LOONGARCH_CSR_ECFG);
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(args->fn)) {
/* kernel thread */
- p->thread.reg23 = usp; /* fn */
- p->thread.reg24 = kthread_arg;
p->thread.reg03 = childksp;
- p->thread.reg01 = (unsigned long) ret_from_kernel_thread;
+ p->thread.reg23 = (unsigned long)args->fn;
+ p->thread.reg24 = (unsigned long)args->fn_arg;
+ p->thread.reg01 = (unsigned long)ret_from_kernel_thread;
memset(childregs, 0, sizeof(struct pt_regs));
childregs->csr_euen = p->thread.csr_euen;
childregs->csr_crmd = p->thread.csr_crmd;
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index 185e4035811a..c74860b53375 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -39,7 +39,6 @@
#include <asm/pgalloc.h>
#include <asm/sections.h>
#include <asm/setup.h>
-#include <asm/smp.h>
#include <asm/time.h>
#define SMBIOS_BIOSSIZE_OFFSET 0x09
@@ -349,8 +348,6 @@ static void __init prefill_possible_map(void)
nr_cpu_ids = possible;
}
-#else
-static inline void prefill_possible_map(void) {}
#endif
void __init setup_arch(char **cmdline_p)
@@ -367,8 +364,10 @@ void __init setup_arch(char **cmdline_p)
arch_mem_init(cmdline_p);
resource_init();
+#ifdef CONFIG_SMP
plat_smp_setup();
prefill_possible_map();
+#endif
paging_init();
}
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index b8c53b755a25..73cec62504fb 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -66,8 +66,6 @@ static cpumask_t cpu_core_setup_map;
struct secondary_data cpuboot_data;
static DEFINE_PER_CPU(int, cpu_state);
-DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
-EXPORT_PER_CPU_SYMBOL(irq_stat);
enum ipi_msg_type {
IPI_RESCHEDULE,
diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S
index 9d508158fe1a..78311a6101a3 100644
--- a/arch/loongarch/kernel/vmlinux.lds.S
+++ b/arch/loongarch/kernel/vmlinux.lds.S
@@ -101,6 +101,7 @@ SECTIONS
STABS_DEBUG
DWARF_DEBUG
+ ELF_DETAILS
.gptab.sdata : {
*(.gptab.data)
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index c22f58155948..32ffef9f6e5b 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -364,8 +364,13 @@ config RISCV_ISA_SVPBMT
select RISCV_ALTERNATIVE
default y
help
- Adds support to dynamically detect the presence of the SVPBMT extension
- (Supervisor-mode: page-based memory types) and enable its usage.
+ Adds support to dynamically detect the presence of the SVPBMT
+ ISA-extension (Supervisor-mode: page-based memory types) and
+ enable its usage.
+
+ The memory type for a page contains a combination of attributes
+ that indicate the cacheability, idempotency, and ordering
+ properties for access to that page.
The SVPBMT extension is only available on 64Bit cpus.
diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas
index ebfcd5cc6eaf..457ac72c9b36 100644
--- a/arch/riscv/Kconfig.erratas
+++ b/arch/riscv/Kconfig.erratas
@@ -35,6 +35,7 @@ config ERRATA_SIFIVE_CIP_1200
config ERRATA_THEAD
bool "T-HEAD errata"
+ depends on !XIP_KERNEL
select RISCV_ALTERNATIVE
help
All T-HEAD errata Kconfig depend on this Kconfig. Disabling
diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi
index 737e0e70c432..0ec9a143dfd4 100644
--- a/arch/riscv/boot/dts/microchip/mpfs.dtsi
+++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi
@@ -192,6 +192,15 @@
riscv,ndev = <186>;
};
+ pdma: dma-controller@3000000 {
+ compatible = "sifive,fu540-c000-pdma", "sifive,pdma0";
+ reg = <0x0 0x3000000 0x0 0x8000>;
+ interrupt-parent = <&plic>;
+ interrupts = <5 6>, <7 8>, <9 10>, <11 12>;
+ dma-channels = <4>;
+ #dma-cells = <1>;
+ };
+
clkcfg: clkcfg@20002000 {
compatible = "microchip,mpfs-clkcfg";
reg = <0x0 0x20002000 0x0 0x1000>, <0x0 0x3E001000 0x0 0x1000>;
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index a6f62a6d1edd..12b05ce164bb 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -293,7 +293,6 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
unsigned int stage)
{
u32 cpu_req_feature = cpufeature_probe(stage);
- u32 cpu_apply_feature = 0;
struct alt_entry *alt;
u32 tmp;
@@ -307,10 +306,8 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
}
tmp = (1U << alt->errata_id);
- if (cpu_req_feature & tmp) {
+ if (cpu_req_feature & tmp)
patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len);
- cpu_apply_feature |= tmp;
- }
}
}
#endif
diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c
index 9f764df125db..6cd93995fb65 100644
--- a/arch/riscv/kvm/vmid.c
+++ b/arch/riscv/kvm/vmid.c
@@ -97,7 +97,7 @@ void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu)
* We ran out of VMIDs so we increment vmid_version and
* start assigning VMIDs from 1.
*
- * This also means existing VMIDs assignement to all Guest
+ * This also means existing VMIDs assignment to all Guest
* instances is invalid and we have force VMID re-assignement
* for all Guest instances. The Guest instances that were not
* running will automatically pick-up new VMIDs because will
diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h
index 2a3715bf29fe..d926e0f7ef57 100644
--- a/arch/riscv/net/bpf_jit.h
+++ b/arch/riscv/net/bpf_jit.h
@@ -69,6 +69,7 @@ struct rv_jit_context {
struct bpf_prog *prog;
u16 *insns; /* RV insns */
int ninsns;
+ int body_len;
int epilogue_offset;
int *offset; /* BPF to RV */
int nexentries;
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index be743d700aa7..737baf8715da 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -44,7 +44,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
unsigned int prog_size = 0, extable_size = 0;
bool tmp_blinded = false, extra_pass = false;
struct bpf_prog *tmp, *orig_prog = prog;
- int pass = 0, prev_ninsns = 0, i;
+ int pass = 0, prev_ninsns = 0, prologue_len, i;
struct rv_jit_data *jit_data;
struct rv_jit_context *ctx;
@@ -95,6 +95,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
prog = orig_prog;
goto out_offset;
}
+ ctx->body_len = ctx->ninsns;
bpf_jit_build_prologue(ctx);
ctx->epilogue_offset = ctx->ninsns;
bpf_jit_build_epilogue(ctx);
@@ -161,6 +162,11 @@ skip_init_ctx:
if (!prog->is_func || extra_pass) {
bpf_jit_binary_lock_ro(jit_data->header);
+ prologue_len = ctx->epilogue_offset - ctx->body_len;
+ for (i = 0; i < prog->len; i++)
+ ctx->offset[i] = ninsns_rvoff(prologue_len +
+ ctx->offset[i]);
+ bpf_prog_fill_jited_linfo(prog, ctx->offset);
out_offset:
kfree(ctx->offset);
kfree(jit_data);
diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c
index 5c092a9153ea..027847023184 100644
--- a/arch/um/drivers/virt-pci.c
+++ b/arch/um/drivers/virt-pci.c
@@ -544,6 +544,8 @@ static int um_pci_init_vqs(struct um_pci_device *dev)
dev->cmd_vq = vqs[0];
dev->irq_vq = vqs[1];
+ virtio_device_ready(dev->vdev);
+
for (i = 0; i < NUM_IRQ_MSGS; i++) {
void *msg = kzalloc(MAX_IRQ_MSG_SIZE, GFP_KERNEL);
@@ -587,7 +589,7 @@ static int um_pci_virtio_probe(struct virtio_device *vdev)
dev->irq = irq_alloc_desc(numa_node_id());
if (dev->irq < 0) {
err = dev->irq;
- goto error;
+ goto err_reset;
}
um_pci_devices[free].dev = dev;
vdev->priv = dev;
@@ -604,6 +606,9 @@ static int um_pci_virtio_probe(struct virtio_device *vdev)
um_pci_rescan();
return 0;
+err_reset:
+ virtio_reset_device(vdev);
+ vdev->config->del_vqs(vdev);
error:
mutex_unlock(&um_pci_mtx);
kfree(dev);
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 03deb4d6920d..928dcf7a20d9 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -124,6 +124,51 @@ static u64 get_cc_mask(void)
return BIT_ULL(gpa_width - 1);
}
+/*
+ * The TDX module spec states that #VE may be injected for a limited set of
+ * reasons:
+ *
+ * - Emulation of the architectural #VE injection on EPT violation;
+ *
+ * - As a result of guest TD execution of a disallowed instruction,
+ * a disallowed MSR access, or CPUID virtualization;
+ *
+ * - A notification to the guest TD about anomalous behavior;
+ *
+ * The last one is opt-in and is not used by the kernel.
+ *
+ * The Intel Software Developer's Manual describes cases when instruction
+ * length field can be used in section "Information for VM Exits Due to
+ * Instruction Execution".
+ *
+ * For TDX, it ultimately means GET_VEINFO provides reliable instruction length
+ * information if #VE occurred due to instruction execution, but not for EPT
+ * violations.
+ */
+static int ve_instr_len(struct ve_info *ve)
+{
+ switch (ve->exit_reason) {
+ case EXIT_REASON_HLT:
+ case EXIT_REASON_MSR_READ:
+ case EXIT_REASON_MSR_WRITE:
+ case EXIT_REASON_CPUID:
+ case EXIT_REASON_IO_INSTRUCTION:
+ /* It is safe to use ve->instr_len for #VE due instructions */
+ return ve->instr_len;
+ case EXIT_REASON_EPT_VIOLATION:
+ /*
+ * For EPT violations, ve->insn_len is not defined. For those,
+ * the kernel must decode instructions manually and should not
+ * be using this function.
+ */
+ WARN_ONCE(1, "ve->instr_len is not defined for EPT violations");
+ return 0;
+ default:
+ WARN_ONCE(1, "Unexpected #VE-type: %lld\n", ve->exit_reason);
+ return ve->instr_len;
+ }
+}
+
static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
{
struct tdx_hypercall_args args = {
@@ -147,7 +192,7 @@ static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
return __tdx_hypercall(&args, do_sti ? TDX_HCALL_ISSUE_STI : 0);
}
-static bool handle_halt(void)
+static int handle_halt(struct ve_info *ve)
{
/*
* Since non safe halt is mainly used in CPU offlining
@@ -158,9 +203,9 @@ static bool handle_halt(void)
const bool do_sti = false;
if (__halt(irq_disabled, do_sti))
- return false;
+ return -EIO;
- return true;
+ return ve_instr_len(ve);
}
void __cpuidle tdx_safe_halt(void)
@@ -180,7 +225,7 @@ void __cpuidle tdx_safe_halt(void)
WARN_ONCE(1, "HLT instruction emulation failed\n");
}
-static bool read_msr(struct pt_regs *regs)
+static int read_msr(struct pt_regs *regs, struct ve_info *ve)
{
struct tdx_hypercall_args args = {
.r10 = TDX_HYPERCALL_STANDARD,
@@ -194,14 +239,14 @@ static bool read_msr(struct pt_regs *regs)
* (GHCI), section titled "TDG.VP.VMCALL<Instruction.RDMSR>".
*/
if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
- return false;
+ return -EIO;
regs->ax = lower_32_bits(args.r11);
regs->dx = upper_32_bits(args.r11);
- return true;
+ return ve_instr_len(ve);
}
-static bool write_msr(struct pt_regs *regs)
+static int write_msr(struct pt_regs *regs, struct ve_info *ve)
{
struct tdx_hypercall_args args = {
.r10 = TDX_HYPERCALL_STANDARD,
@@ -215,10 +260,13 @@ static bool write_msr(struct pt_regs *regs)
* can be found in TDX Guest-Host-Communication Interface
* (GHCI) section titled "TDG.VP.VMCALL<Instruction.WRMSR>".
*/
- return !__tdx_hypercall(&args, 0);
+ if (__tdx_hypercall(&args, 0))
+ return -EIO;
+
+ return ve_instr_len(ve);
}
-static bool handle_cpuid(struct pt_regs *regs)
+static int handle_cpuid(struct pt_regs *regs, struct ve_info *ve)
{
struct tdx_hypercall_args args = {
.r10 = TDX_HYPERCALL_STANDARD,
@@ -236,7 +284,7 @@ static bool handle_cpuid(struct pt_regs *regs)
*/
if (regs->ax < 0x40000000 || regs->ax > 0x4FFFFFFF) {
regs->ax = regs->bx = regs->cx = regs->dx = 0;
- return true;
+ return ve_instr_len(ve);
}
/*
@@ -245,7 +293,7 @@ static bool handle_cpuid(struct pt_regs *regs)
* (GHCI), section titled "VP.VMCALL<Instruction.CPUID>".
*/
if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
- return false;
+ return -EIO;
/*
* As per TDX GHCI CPUID ABI, r12-r15 registers contain contents of
@@ -257,7 +305,7 @@ static bool handle_cpuid(struct pt_regs *regs)
regs->cx = args.r14;
regs->dx = args.r15;
- return true;
+ return ve_instr_len(ve);
}
static bool mmio_read(int size, unsigned long addr, unsigned long *val)
@@ -283,10 +331,10 @@ static bool mmio_write(int size, unsigned long addr, unsigned long val)
EPT_WRITE, addr, val);
}
-static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
+static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
{
+ unsigned long *reg, val, vaddr;
char buffer[MAX_INSN_SIZE];
- unsigned long *reg, val;
struct insn insn = {};
enum mmio_type mmio;
int size, extend_size;
@@ -294,34 +342,49 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
/* Only in-kernel MMIO is supported */
if (WARN_ON_ONCE(user_mode(regs)))
- return false;
+ return -EFAULT;
if (copy_from_kernel_nofault(buffer, (void *)regs->ip, MAX_INSN_SIZE))
- return false;
+ return -EFAULT;
if (insn_decode(&insn, buffer, MAX_INSN_SIZE, INSN_MODE_64))
- return false;
+ return -EINVAL;
mmio = insn_decode_mmio(&insn, &size);
if (WARN_ON_ONCE(mmio == MMIO_DECODE_FAILED))
- return false;
+ return -EINVAL;
if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) {
reg = insn_get_modrm_reg_ptr(&insn, regs);
if (!reg)
- return false;
+ return -EINVAL;
}
- ve->instr_len = insn.length;
+ /*
+ * Reject EPT violation #VEs that split pages.
+ *
+ * MMIO accesses are supposed to be naturally aligned and therefore
+ * never cross page boundaries. Seeing split page accesses indicates
+ * a bug or a load_unaligned_zeropad() that stepped into an MMIO page.
+ *
+ * load_unaligned_zeropad() will recover using exception fixups.
+ */
+ vaddr = (unsigned long)insn_get_addr_ref(&insn, regs);
+ if (vaddr / PAGE_SIZE != (vaddr + size - 1) / PAGE_SIZE)
+ return -EFAULT;
/* Handle writes first */
switch (mmio) {
case MMIO_WRITE:
memcpy(&val, reg, size);
- return mmio_write(size, ve->gpa, val);
+ if (!mmio_write(size, ve->gpa, val))
+ return -EIO;
+ return insn.length;
case MMIO_WRITE_IMM:
val = insn.immediate.value;
- return mmio_write(size, ve->gpa, val);
+ if (!mmio_write(size, ve->gpa, val))
+ return -EIO;
+ return insn.length;
case MMIO_READ:
case MMIO_READ_ZERO_EXTEND:
case MMIO_READ_SIGN_EXTEND:
@@ -334,15 +397,15 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
* decoded or handled properly. It was likely not using io.h
* helpers or accessed MMIO accidentally.
*/
- return false;
+ return -EINVAL;
default:
WARN_ONCE(1, "Unknown insn_decode_mmio() decode value?");
- return false;
+ return -EINVAL;
}
/* Handle reads */
if (!mmio_read(size, ve->gpa, &val))
- return false;
+ return -EIO;
switch (mmio) {
case MMIO_READ:
@@ -364,13 +427,13 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
default:
/* All other cases has to be covered with the first switch() */
WARN_ON_ONCE(1);
- return false;
+ return -EINVAL;
}
if (extend_size)
memset(reg, extend_val, extend_size);
memcpy(reg, &val, size);
- return true;
+ return insn.length;
}
static bool handle_in(struct pt_regs *regs, int size, int port)
@@ -421,13 +484,14 @@ static bool handle_out(struct pt_regs *regs, int size, int port)
*
* Return True on success or False on failure.
*/
-static bool handle_io(struct pt_regs *regs, u32 exit_qual)
+static int handle_io(struct pt_regs *regs, struct ve_info *ve)
{
+ u32 exit_qual = ve->exit_qual;
int size, port;
- bool in;
+ bool in, ret;
if (VE_IS_IO_STRING(exit_qual))
- return false;
+ return -EIO;
in = VE_IS_IO_IN(exit_qual);
size = VE_GET_IO_SIZE(exit_qual);
@@ -435,9 +499,13 @@ static bool handle_io(struct pt_regs *regs, u32 exit_qual)
if (in)
- return handle_in(regs, size, port);
+ ret = handle_in(regs, size, port);
else
- return handle_out(regs, size, port);
+ ret = handle_out(regs, size, port);
+ if (!ret)
+ return -EIO;
+
+ return ve_instr_len(ve);
}
/*
@@ -447,13 +515,19 @@ static bool handle_io(struct pt_regs *regs, u32 exit_qual)
__init bool tdx_early_handle_ve(struct pt_regs *regs)
{
struct ve_info ve;
+ int insn_len;
tdx_get_ve_info(&ve);
if (ve.exit_reason != EXIT_REASON_IO_INSTRUCTION)
return false;
- return handle_io(regs, ve.exit_qual);
+ insn_len = handle_io(regs, &ve);
+ if (insn_len < 0)
+ return false;
+
+ regs->ip += insn_len;
+ return true;
}
void tdx_get_ve_info(struct ve_info *ve)
@@ -486,54 +560,65 @@ void tdx_get_ve_info(struct ve_info *ve)
ve->instr_info = upper_32_bits(out.r10);
}
-/* Handle the user initiated #VE */
-static bool virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
+/*
+ * Handle the user initiated #VE.
+ *
+ * On success, returns the number of bytes RIP should be incremented (>=0)
+ * or -errno on error.
+ */
+static int virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
{
switch (ve->exit_reason) {
case EXIT_REASON_CPUID:
- return handle_cpuid(regs);
+ return handle_cpuid(regs, ve);
default:
pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
- return false;
+ return -EIO;
}
}
-/* Handle the kernel #VE */
-static bool virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
+/*
+ * Handle the kernel #VE.
+ *
+ * On success, returns the number of bytes RIP should be incremented (>=0)
+ * or -errno on error.
+ */
+static int virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
{
switch (ve->exit_reason) {
case EXIT_REASON_HLT:
- return handle_halt();
+ return handle_halt(ve);
case EXIT_REASON_MSR_READ:
- return read_msr(regs);
+ return read_msr(regs, ve);
case EXIT_REASON_MSR_WRITE:
- return write_msr(regs);
+ return write_msr(regs, ve);
case EXIT_REASON_CPUID:
- return handle_cpuid(regs);
+ return handle_cpuid(regs, ve);
case EXIT_REASON_EPT_VIOLATION:
return handle_mmio(regs, ve);
case EXIT_REASON_IO_INSTRUCTION:
- return handle_io(regs, ve->exit_qual);
+ return handle_io(regs, ve);
default:
pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
- return false;
+ return -EIO;
}
}
bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve)
{
- bool ret;
+ int insn_len;
if (user_mode(regs))
- ret = virt_exception_user(regs, ve);
+ insn_len = virt_exception_user(regs, ve);
else
- ret = virt_exception_kernel(regs, ve);
+ insn_len = virt_exception_kernel(regs, ve);
+ if (insn_len < 0)
+ return false;
/* After successful #VE handling, move the IP */
- if (ret)
- regs->ip += ve->instr_len;
+ regs->ip += insn_len;
- return ret;
+ return true;
}
static bool tdx_tlb_flush_required(bool private)
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 8b392b6b7b93..3de6d8b53367 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -13,6 +13,7 @@
#include <linux/io.h>
#include <asm/apic.h>
#include <asm/desc.h>
+#include <asm/sev.h>
#include <asm/hypervisor.h>
#include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h>
@@ -405,6 +406,11 @@ void __init hyperv_init(void)
}
if (hv_isolation_type_snp()) {
+ /* Negotiate GHCB Version. */
+ if (!hv_ghcb_negotiate_protocol())
+ hv_ghcb_terminate(SEV_TERM_SET_GEN,
+ GHCB_SEV_ES_PROT_UNSUPPORTED);
+
hv_ghcb_pg = alloc_percpu(union hv_ghcb *);
if (!hv_ghcb_pg)
goto free_vp_assist_page;
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 2b994117581e..1dbcbd9da74d 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -53,6 +53,8 @@ union hv_ghcb {
} hypercall;
} __packed __aligned(HV_HYP_PAGE_SIZE);
+static u16 hv_ghcb_version __ro_after_init;
+
u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
{
union hv_ghcb *hv_ghcb;
@@ -96,12 +98,85 @@ u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
return status;
}
+static inline u64 rd_ghcb_msr(void)
+{
+ return __rdmsr(MSR_AMD64_SEV_ES_GHCB);
+}
+
+static inline void wr_ghcb_msr(u64 val)
+{
+ native_wrmsrl(MSR_AMD64_SEV_ES_GHCB, val);
+}
+
+static enum es_result hv_ghcb_hv_call(struct ghcb *ghcb, u64 exit_code,
+ u64 exit_info_1, u64 exit_info_2)
+{
+ /* Fill in protocol and format specifiers */
+ ghcb->protocol_version = hv_ghcb_version;
+ ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
+
+ ghcb_set_sw_exit_code(ghcb, exit_code);
+ ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
+ ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
+
+ VMGEXIT();
+
+ if (ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0))
+ return ES_VMM_ERROR;
+ else
+ return ES_OK;
+}
+
+void hv_ghcb_terminate(unsigned int set, unsigned int reason)
+{
+ u64 val = GHCB_MSR_TERM_REQ;
+
+ /* Tell the hypervisor what went wrong. */
+ val |= GHCB_SEV_TERM_REASON(set, reason);
+
+ /* Request Guest Termination from Hypvervisor */
+ wr_ghcb_msr(val);
+ VMGEXIT();
+
+ while (true)
+ asm volatile("hlt\n" : : : "memory");
+}
+
+bool hv_ghcb_negotiate_protocol(void)
+{
+ u64 ghcb_gpa;
+ u64 val;
+
+ /* Save ghcb page gpa. */
+ ghcb_gpa = rd_ghcb_msr();
+
+ /* Do the GHCB protocol version negotiation */
+ wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ);
+ VMGEXIT();
+ val = rd_ghcb_msr();
+
+ if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP)
+ return false;
+
+ if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN ||
+ GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX)
+ return false;
+
+ hv_ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val),
+ GHCB_PROTOCOL_MAX);
+
+ /* Write ghcb page back after negotiating protocol. */
+ wr_ghcb_msr(ghcb_gpa);
+ VMGEXIT();
+
+ return true;
+}
+
void hv_ghcb_msr_write(u64 msr, u64 value)
{
union hv_ghcb *hv_ghcb;
void **ghcb_base;
unsigned long flags;
- struct es_em_ctxt ctxt;
if (!hv_ghcb_pg)
return;
@@ -120,8 +195,7 @@ void hv_ghcb_msr_write(u64 msr, u64 value)
ghcb_set_rax(&hv_ghcb->ghcb, lower_32_bits(value));
ghcb_set_rdx(&hv_ghcb->ghcb, upper_32_bits(value));
- if (sev_es_ghcb_hv_call(&hv_ghcb->ghcb, false, &ctxt,
- SVM_EXIT_MSR, 1, 0))
+ if (hv_ghcb_hv_call(&hv_ghcb->ghcb, SVM_EXIT_MSR, 1, 0))
pr_warn("Fail to write msr via ghcb %llx.\n", msr);
local_irq_restore(flags);
@@ -133,7 +207,6 @@ void hv_ghcb_msr_read(u64 msr, u64 *value)
union hv_ghcb *hv_ghcb;
void **ghcb_base;
unsigned long flags;
- struct es_em_ctxt ctxt;
/* Check size of union hv_ghcb here. */
BUILD_BUG_ON(sizeof(union hv_ghcb) != HV_HYP_PAGE_SIZE);
@@ -152,8 +225,7 @@ void hv_ghcb_msr_read(u64 msr, u64 *value)
}
ghcb_set_rcx(&hv_ghcb->ghcb, msr);
- if (sev_es_ghcb_hv_call(&hv_ghcb->ghcb, false, &ctxt,
- SVM_EXIT_MSR, 0, 0))
+ if (hv_ghcb_hv_call(&hv_ghcb->ghcb, SVM_EXIT_MSR, 0, 0))
pr_warn("Fail to read msr via ghcb %llx.\n", msr);
else
*value = (u64)lower_32_bits(hv_ghcb->ghcb.save.rax)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 393f2bbb5e3a..03acc823838a 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -446,5 +446,6 @@
#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
+#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h
index 5a39ed59b6db..e8f58ddd06d9 100644
--- a/arch/x86/include/asm/e820/api.h
+++ b/arch/x86/include/asm/e820/api.h
@@ -4,9 +4,6 @@
#include <asm/e820/types.h>
-struct device;
-struct resource;
-
extern struct e820_table *e820_table;
extern struct e820_table *e820_table_kexec;
extern struct e820_table *e820_table_firmware;
@@ -46,8 +43,6 @@ extern void e820__register_nosave_regions(unsigned long limit_pfn);
extern int e820__get_entry_type(u64 start, u64 end);
-extern void remove_e820_regions(struct device *dev, struct resource *avail);
-
/*
* Returns true iff the specified range [start,end) is completely contained inside
* the ISA region.
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 71943dce691e..9636742a80f2 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -323,7 +323,7 @@ static inline u32 efi64_convert_status(efi_status_t status)
#define __efi64_argmap_get_memory_space_descriptor(phys, desc) \
(__efi64_split(phys), (desc))
-#define __efi64_argmap_set_memory_space_descriptor(phys, size, flags) \
+#define __efi64_argmap_set_memory_space_attributes(phys, size, flags) \
(__efi64_split(phys), __efi64_split(size), __efi64_split(flags))
/*
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3a240a64ac68..9217bd6cf0d1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1047,14 +1047,77 @@ struct kvm_x86_msr_filter {
};
enum kvm_apicv_inhibit {
+
+ /********************************************************************/
+ /* INHIBITs that are relevant to both Intel's APICv and AMD's AVIC. */
+ /********************************************************************/
+
+ /*
+ * APIC acceleration is disabled by a module parameter
+ * and/or not supported in hardware.
+ */
APICV_INHIBIT_REASON_DISABLE,
+
+ /*
+ * APIC acceleration is inhibited because AutoEOI feature is
+ * being used by a HyperV guest.
+ */
APICV_INHIBIT_REASON_HYPERV,
+
+ /*
+ * APIC acceleration is inhibited because the userspace didn't yet
+ * enable the kernel/split irqchip.
+ */
+ APICV_INHIBIT_REASON_ABSENT,
+
+ /* APIC acceleration is inhibited because KVM_GUESTDBG_BLOCKIRQ
+ * (out of band, debug measure of blocking all interrupts on this vCPU)
+ * was enabled, to avoid AVIC/APICv bypassing it.
+ */
+ APICV_INHIBIT_REASON_BLOCKIRQ,
+
+ /*
+ * For simplicity, the APIC acceleration is inhibited
+ * first time either APIC ID or APIC base are changed by the guest
+ * from their reset values.
+ */
+ APICV_INHIBIT_REASON_APIC_ID_MODIFIED,
+ APICV_INHIBIT_REASON_APIC_BASE_MODIFIED,
+
+ /******************************************************/
+ /* INHIBITs that are relevant only to the AMD's AVIC. */
+ /******************************************************/
+
+ /*
+ * AVIC is inhibited on a vCPU because it runs a nested guest.
+ *
+ * This is needed because unlike APICv, the peers of this vCPU
+ * cannot use the doorbell mechanism to signal interrupts via AVIC when
+ * a vCPU runs nested.
+ */
APICV_INHIBIT_REASON_NESTED,
+
+ /*
+ * On SVM, the wait for the IRQ window is implemented with pending vIRQ,
+ * which cannot be injected when the AVIC is enabled, thus AVIC
+ * is inhibited while KVM waits for IRQ window.
+ */
APICV_INHIBIT_REASON_IRQWIN,
+
+ /*
+ * PIT (i8254) 're-inject' mode, relies on EOI intercept,
+ * which AVIC doesn't support for edge triggered interrupts.
+ */
APICV_INHIBIT_REASON_PIT_REINJ,
+
+ /*
+ * AVIC is inhibited because the guest has x2apic in its CPUID.
+ */
APICV_INHIBIT_REASON_X2APIC,
- APICV_INHIBIT_REASON_BLOCKIRQ,
- APICV_INHIBIT_REASON_ABSENT,
+
+ /*
+ * AVIC is disabled because SEV doesn't support it.
+ */
APICV_INHIBIT_REASON_SEV,
};
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index a82f603d4312..61f0c206bff0 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -179,9 +179,13 @@ int hv_set_mem_host_visibility(unsigned long addr, int numpages, bool visible);
#ifdef CONFIG_AMD_MEM_ENCRYPT
void hv_ghcb_msr_write(u64 msr, u64 value);
void hv_ghcb_msr_read(u64 msr, u64 *value);
+bool hv_ghcb_negotiate_protocol(void);
+void hv_ghcb_terminate(unsigned int set, unsigned int reason);
#else
static inline void hv_ghcb_msr_write(u64 msr, u64 value) {}
static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {}
+static inline bool hv_ghcb_negotiate_protocol(void) { return false; }
+static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {}
#endif
extern bool hv_isolation_type_snp(void);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 403e83b4adc8..d27e0581b777 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -116,6 +116,30 @@
* Not susceptible to
* TSX Async Abort (TAA) vulnerabilities.
*/
+#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /*
+ * Not susceptible to SBDR and SSDP
+ * variants of Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_FBSDP_NO BIT(14) /*
+ * Not susceptible to FBSDP variant of
+ * Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_PSDP_NO BIT(15) /*
+ * Not susceptible to PSDP variant of
+ * Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_FB_CLEAR BIT(17) /*
+ * VERW clears CPU fill buffer
+ * even on MDS_NO CPUs.
+ */
+#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /*
+ * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]
+ * bit available to control VERW
+ * behavior.
+ */
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
@@ -133,6 +157,7 @@
#define MSR_IA32_MCU_OPT_CTRL 0x00000123
#define RNGDS_MITG_DIS BIT(0) /* SRBDS support */
#define RTM_ALLOW BIT(1) /* TSX development mode */
+#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index acbaeaf83b61..da251a5645b0 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -269,6 +269,8 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
+DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
+
#include <asm/segment.h>
/**
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index f52a886d35cf..70533fdcbf02 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -69,6 +69,8 @@ void pcibios_scan_specific_bus(int busn);
/* pci-irq.c */
+struct pci_dev;
+
struct irq_info {
u8 bus, devfn; /* Bus, device and function */
struct {
@@ -246,3 +248,9 @@ static inline void mmio_config_writel(void __iomem *pos, u32 val)
# define x86_default_pci_init_irq NULL
# define x86_default_pci_fixup_irqs NULL
#endif
+
+#if defined(CONFIG_PCI) && defined(CONFIG_ACPI)
+extern bool pci_use_e820;
+#else
+#define pci_use_e820 false
+#endif
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 7590ac2570b9..f8b9ee97a891 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -108,19 +108,16 @@ extern unsigned long _brk_end;
void *extend_brk(size_t size, size_t align);
/*
- * Reserve space in the brk section. The name must be unique within the file,
- * and somewhat descriptive. The size is in bytes.
+ * Reserve space in the .brk section, which is a block of memory from which the
+ * caller is allowed to allocate very early (before even memblock is available)
+ * by calling extend_brk(). All allocated memory will be eventually converted
+ * to memblock. Any leftover unallocated memory will be freed.
*
- * The allocation is done using inline asm (rather than using a section
- * attribute on a normal variable) in order to allow the use of @nobits, so
- * that it doesn't take up any space in the vmlinux file.
+ * The size is in bytes.
*/
-#define RESERVE_BRK(name, size) \
- asm(".pushsection .brk_reservation,\"aw\",@nobits\n\t" \
- ".brk." #name ":\n\t" \
- ".skip " __stringify(size) "\n\t" \
- ".size .brk." #name ", " __stringify(size) "\n\t" \
- ".popsection\n\t")
+#define RESERVE_BRK(name, size) \
+ __section(".bss..brk") __aligned(1) __used \
+ static char __brk_##name[size]
extern void probe_roms(void);
#ifdef __i386__
@@ -133,12 +130,19 @@ asmlinkage void __init x86_64_start_reservations(char *real_mode_data);
#endif /* __i386__ */
#endif /* _SETUP */
-#else
-#define RESERVE_BRK(name,sz) \
- .pushsection .brk_reservation,"aw",@nobits; \
-.brk.name: \
-1: .skip sz; \
- .size .brk.name,.-1b; \
+
+#else /* __ASSEMBLY */
+
+.macro __RESERVE_BRK name, size
+ .pushsection .bss..brk, "aw"
+SYM_DATA_START(__brk_\name)
+ .skip \size
+SYM_DATA_END(__brk_\name)
.popsection
+.endm
+
+#define RESERVE_BRK(name, size) __RESERVE_BRK name, size
+
#endif /* __ASSEMBLY__ */
+
#endif /* _ASM_X86_SETUP_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 03364dc40d8d..4c8b6ae802ac 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -36,10 +36,6 @@ KCSAN_SANITIZE := n
OBJECT_FILES_NON_STANDARD_test_nx.o := y
-ifdef CONFIG_FRAME_POINTER
-OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
-endif
-
# If instrumentation of this dir is enabled, boot hangs during first second.
# Probably could be more selective here, but note that files related to irqs,
# boot, dumpstack/stacktrace, etc are either non-interesting or can lead to
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d879a6c93609..74c62cc47a5f 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -41,8 +41,10 @@ static void __init spectre_v2_select_mitigation(void);
static void __init ssb_select_mitigation(void);
static void __init l1tf_select_mitigation(void);
static void __init mds_select_mitigation(void);
-static void __init mds_print_mitigation(void);
+static void __init md_clear_update_mitigation(void);
+static void __init md_clear_select_mitigation(void);
static void __init taa_select_mitigation(void);
+static void __init mmio_select_mitigation(void);
static void __init srbds_select_mitigation(void);
static void __init l1d_flush_select_mitigation(void);
@@ -85,6 +87,10 @@ EXPORT_SYMBOL_GPL(mds_idle_clear);
*/
DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
+/* Controls CPU Fill buffer clear before KVM guest MMIO accesses */
+DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear);
+EXPORT_SYMBOL_GPL(mmio_stale_data_clear);
+
void __init check_bugs(void)
{
identify_boot_cpu();
@@ -117,17 +123,10 @@ void __init check_bugs(void)
spectre_v2_select_mitigation();
ssb_select_mitigation();
l1tf_select_mitigation();
- mds_select_mitigation();
- taa_select_mitigation();
+ md_clear_select_mitigation();
srbds_select_mitigation();
l1d_flush_select_mitigation();
- /*
- * As MDS and TAA mitigations are inter-related, print MDS
- * mitigation until after TAA mitigation selection is done.
- */
- mds_print_mitigation();
-
arch_smt_update();
#ifdef CONFIG_X86_32
@@ -267,14 +266,6 @@ static void __init mds_select_mitigation(void)
}
}
-static void __init mds_print_mitigation(void)
-{
- if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off())
- return;
-
- pr_info("%s\n", mds_strings[mds_mitigation]);
-}
-
static int __init mds_cmdline(char *str)
{
if (!boot_cpu_has_bug(X86_BUG_MDS))
@@ -329,7 +320,7 @@ static void __init taa_select_mitigation(void)
/* TSX previously disabled by tsx=off */
if (!boot_cpu_has(X86_FEATURE_RTM)) {
taa_mitigation = TAA_MITIGATION_TSX_DISABLED;
- goto out;
+ return;
}
if (cpu_mitigations_off()) {
@@ -343,7 +334,7 @@ static void __init taa_select_mitigation(void)
*/
if (taa_mitigation == TAA_MITIGATION_OFF &&
mds_mitigation == MDS_MITIGATION_OFF)
- goto out;
+ return;
if (boot_cpu_has(X86_FEATURE_MD_CLEAR))
taa_mitigation = TAA_MITIGATION_VERW;
@@ -375,18 +366,6 @@ static void __init taa_select_mitigation(void)
if (taa_nosmt || cpu_mitigations_auto_nosmt())
cpu_smt_disable(false);
-
- /*
- * Update MDS mitigation, if necessary, as the mds_user_clear is
- * now enabled for TAA mitigation.
- */
- if (mds_mitigation == MDS_MITIGATION_OFF &&
- boot_cpu_has_bug(X86_BUG_MDS)) {
- mds_mitigation = MDS_MITIGATION_FULL;
- mds_select_mitigation();
- }
-out:
- pr_info("%s\n", taa_strings[taa_mitigation]);
}
static int __init tsx_async_abort_parse_cmdline(char *str)
@@ -411,6 +390,151 @@ static int __init tsx_async_abort_parse_cmdline(char *str)
early_param("tsx_async_abort", tsx_async_abort_parse_cmdline);
#undef pr_fmt
+#define pr_fmt(fmt) "MMIO Stale Data: " fmt
+
+enum mmio_mitigations {
+ MMIO_MITIGATION_OFF,
+ MMIO_MITIGATION_UCODE_NEEDED,
+ MMIO_MITIGATION_VERW,
+};
+
+/* Default mitigation for Processor MMIO Stale Data vulnerabilities */
+static enum mmio_mitigations mmio_mitigation __ro_after_init = MMIO_MITIGATION_VERW;
+static bool mmio_nosmt __ro_after_init = false;
+
+static const char * const mmio_strings[] = {
+ [MMIO_MITIGATION_OFF] = "Vulnerable",
+ [MMIO_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode",
+ [MMIO_MITIGATION_VERW] = "Mitigation: Clear CPU buffers",
+};
+
+static void __init mmio_select_mitigation(void)
+{
+ u64 ia32_cap;
+
+ if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
+ cpu_mitigations_off()) {
+ mmio_mitigation = MMIO_MITIGATION_OFF;
+ return;
+ }
+
+ if (mmio_mitigation == MMIO_MITIGATION_OFF)
+ return;
+
+ ia32_cap = x86_read_arch_cap_msr();
+
+ /*
+ * Enable CPU buffer clear mitigation for host and VMM, if also affected
+ * by MDS or TAA. Otherwise, enable mitigation for VMM only.
+ */
+ if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) &&
+ boot_cpu_has(X86_FEATURE_RTM)))
+ static_branch_enable(&mds_user_clear);
+ else
+ static_branch_enable(&mmio_stale_data_clear);
+
+ /*
+ * If Processor-MMIO-Stale-Data bug is present and Fill Buffer data can
+ * be propagated to uncore buffers, clearing the Fill buffers on idle
+ * is required irrespective of SMT state.
+ */
+ if (!(ia32_cap & ARCH_CAP_FBSDP_NO))
+ static_branch_enable(&mds_idle_clear);
+
+ /*
+ * Check if the system has the right microcode.
+ *
+ * CPU Fill buffer clear mitigation is enumerated by either an explicit
+ * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS
+ * affected systems.
+ */
+ if ((ia32_cap & ARCH_CAP_FB_CLEAR) ||
+ (boot_cpu_has(X86_FEATURE_MD_CLEAR) &&
+ boot_cpu_has(X86_FEATURE_FLUSH_L1D) &&
+ !(ia32_cap & ARCH_CAP_MDS_NO)))
+ mmio_mitigation = MMIO_MITIGATION_VERW;
+ else
+ mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED;
+
+ if (mmio_nosmt || cpu_mitigations_auto_nosmt())
+ cpu_smt_disable(false);
+}
+
+static int __init mmio_stale_data_parse_cmdline(char *str)
+{
+ if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
+ return 0;
+
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "off")) {
+ mmio_mitigation = MMIO_MITIGATION_OFF;
+ } else if (!strcmp(str, "full")) {
+ mmio_mitigation = MMIO_MITIGATION_VERW;
+ } else if (!strcmp(str, "full,nosmt")) {
+ mmio_mitigation = MMIO_MITIGATION_VERW;
+ mmio_nosmt = true;
+ }
+
+ return 0;
+}
+early_param("mmio_stale_data", mmio_stale_data_parse_cmdline);
+
+#undef pr_fmt
+#define pr_fmt(fmt) "" fmt
+
+static void __init md_clear_update_mitigation(void)
+{
+ if (cpu_mitigations_off())
+ return;
+
+ if (!static_key_enabled(&mds_user_clear))
+ goto out;
+
+ /*
+ * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data
+ * mitigation, if necessary.
+ */
+ if (mds_mitigation == MDS_MITIGATION_OFF &&
+ boot_cpu_has_bug(X86_BUG_MDS)) {
+ mds_mitigation = MDS_MITIGATION_FULL;
+ mds_select_mitigation();
+ }
+ if (taa_mitigation == TAA_MITIGATION_OFF &&
+ boot_cpu_has_bug(X86_BUG_TAA)) {
+ taa_mitigation = TAA_MITIGATION_VERW;
+ taa_select_mitigation();
+ }
+ if (mmio_mitigation == MMIO_MITIGATION_OFF &&
+ boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) {
+ mmio_mitigation = MMIO_MITIGATION_VERW;
+ mmio_select_mitigation();
+ }
+out:
+ if (boot_cpu_has_bug(X86_BUG_MDS))
+ pr_info("MDS: %s\n", mds_strings[mds_mitigation]);
+ if (boot_cpu_has_bug(X86_BUG_TAA))
+ pr_info("TAA: %s\n", taa_strings[taa_mitigation]);
+ if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
+ pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]);
+}
+
+static void __init md_clear_select_mitigation(void)
+{
+ mds_select_mitigation();
+ taa_select_mitigation();
+ mmio_select_mitigation();
+
+ /*
+ * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update
+ * and print their mitigation after MDS, TAA and MMIO Stale Data
+ * mitigation selection is done.
+ */
+ md_clear_update_mitigation();
+}
+
+#undef pr_fmt
#define pr_fmt(fmt) "SRBDS: " fmt
enum srbds_mitigations {
@@ -478,11 +602,13 @@ static void __init srbds_select_mitigation(void)
return;
/*
- * Check to see if this is one of the MDS_NO systems supporting
- * TSX that are only exposed to SRBDS when TSX is enabled.
+ * Check to see if this is one of the MDS_NO systems supporting TSX that
+ * are only exposed to SRBDS when TSX is enabled or when CPU is affected
+ * by Processor MMIO Stale Data vulnerability.
*/
ia32_cap = x86_read_arch_cap_msr();
- if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM))
+ if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&
+ !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
srbds_mitigation = SRBDS_MITIGATION_TSX_OFF;
else if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
srbds_mitigation = SRBDS_MITIGATION_HYPERVISOR;
@@ -1116,6 +1242,8 @@ static void update_indir_branch_cond(void)
/* Update the static key controlling the MDS CPU buffer clear in idle */
static void update_mds_branch_idle(void)
{
+ u64 ia32_cap = x86_read_arch_cap_msr();
+
/*
* Enable the idle clearing if SMT is active on CPUs which are
* affected only by MSBDS and not any other MDS variant.
@@ -1127,14 +1255,17 @@ static void update_mds_branch_idle(void)
if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY))
return;
- if (sched_smt_active())
+ if (sched_smt_active()) {
static_branch_enable(&mds_idle_clear);
- else
+ } else if (mmio_mitigation == MMIO_MITIGATION_OFF ||
+ (ia32_cap & ARCH_CAP_FBSDP_NO)) {
static_branch_disable(&mds_idle_clear);
+ }
}
#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
+#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n"
void cpu_bugs_smt_update(void)
{
@@ -1179,6 +1310,16 @@ void cpu_bugs_smt_update(void)
break;
}
+ switch (mmio_mitigation) {
+ case MMIO_MITIGATION_VERW:
+ case MMIO_MITIGATION_UCODE_NEEDED:
+ if (sched_smt_active())
+ pr_warn_once(MMIO_MSG_SMT);
+ break;
+ case MMIO_MITIGATION_OFF:
+ break;
+ }
+
mutex_unlock(&spec_ctrl_mutex);
}
@@ -1781,6 +1922,20 @@ static ssize_t tsx_async_abort_show_state(char *buf)
sched_smt_active() ? "vulnerable" : "disabled");
}
+static ssize_t mmio_stale_data_show_state(char *buf)
+{
+ if (mmio_mitigation == MMIO_MITIGATION_OFF)
+ return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]);
+
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+ return sysfs_emit(buf, "%s; SMT Host state unknown\n",
+ mmio_strings[mmio_mitigation]);
+ }
+
+ return sysfs_emit(buf, "%s; SMT %s\n", mmio_strings[mmio_mitigation],
+ sched_smt_active() ? "vulnerable" : "disabled");
+}
+
static char *stibp_state(void)
{
if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
@@ -1881,6 +2036,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_SRBDS:
return srbds_show_state(buf);
+ case X86_BUG_MMIO_STALE_DATA:
+ return mmio_stale_data_show_state(buf);
+
default:
break;
}
@@ -1932,4 +2090,9 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *
{
return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS);
}
+
+ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
+}
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c296cb1c0113..4730b0a58f24 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1211,18 +1211,42 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
X86_FEATURE_ANY, issues)
#define SRBDS BIT(0)
+/* CPU is affected by X86_BUG_MMIO_STALE_DATA */
+#define MMIO BIT(1)
+/* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */
+#define MMIO_SBDS BIT(2)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(HASWELL_X, BIT(2) | BIT(4), MMIO),
+ VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x5), MMIO),
VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO),
VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) |
+ BIT(7) | BIT(0xB), MMIO),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO),
VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS),
- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0xC), SRBDS),
- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0xD), SRBDS),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO),
+ VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS),
{}
};
@@ -1243,6 +1267,13 @@ u64 x86_read_arch_cap_msr(void)
return ia32_cap;
}
+static bool arch_cap_mmio_immune(u64 ia32_cap)
+{
+ return (ia32_cap & ARCH_CAP_FBSDP_NO &&
+ ia32_cap & ARCH_CAP_PSDP_NO &&
+ ia32_cap & ARCH_CAP_SBDR_SSDP_NO);
+}
+
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
u64 ia32_cap = x86_read_arch_cap_msr();
@@ -1296,12 +1327,27 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
/*
* SRBDS affects CPUs which support RDRAND or RDSEED and are listed
* in the vulnerability blacklist.
+ *
+ * Some of the implications and mitigation of Shared Buffers Data
+ * Sampling (SBDS) are similar to SRBDS. Give SBDS same treatment as
+ * SRBDS.
*/
if ((cpu_has(c, X86_FEATURE_RDRAND) ||
cpu_has(c, X86_FEATURE_RDSEED)) &&
- cpu_matches(cpu_vuln_blacklist, SRBDS))
+ cpu_matches(cpu_vuln_blacklist, SRBDS | MMIO_SBDS))
setup_force_cpu_bug(X86_BUG_SRBDS);
+ /*
+ * Processor MMIO Stale Data bug enumeration
+ *
+ * Affected CPU list is generally enough to enumerate the vulnerability,
+ * but for virtualization case check for ARCH_CAP MSR bits also, VMM may
+ * not want the guest to enumerate the bug.
+ */
+ if (cpu_matches(cpu_vuln_blacklist, MMIO) &&
+ !arch_cap_mmio_immune(ia32_cap))
+ setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 4ec13608d3c6..dfeb227de561 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -175,6 +175,7 @@ SYM_INNER_LABEL(ftrace_caller_end, SYM_L_GLOBAL)
jmp ftrace_epilogue
SYM_FUNC_END(ftrace_caller);
+STACK_FRAME_NON_STANDARD_FP(ftrace_caller)
SYM_FUNC_START(ftrace_epilogue)
/*
@@ -282,6 +283,7 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
jmp ftrace_epilogue
SYM_FUNC_END(ftrace_regs_caller)
+STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller)
#else /* ! CONFIG_DYNAMIC_FTRACE */
@@ -311,10 +313,14 @@ trace:
jmp ftrace_stub
SYM_FUNC_END(__fentry__)
EXPORT_SYMBOL(__fentry__)
+STACK_FRAME_NON_STANDARD_FP(__fentry__)
+
#endif /* CONFIG_DYNAMIC_FTRACE */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-SYM_FUNC_START(return_to_handler)
+SYM_CODE_START(return_to_handler)
+ UNWIND_HINT_EMPTY
+ ANNOTATE_NOENDBR
subq $16, %rsp
/* Save the return values */
@@ -339,7 +345,6 @@ SYM_FUNC_START(return_to_handler)
int3
.Ldo_rop:
mov %rdi, (%rsp)
- UNWIND_HINT_FUNC
RET
-SYM_FUNC_END(return_to_handler)
+SYM_CODE_END(return_to_handler)
#endif
diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c
index db2b350a37b7..bba1abd05bfe 100644
--- a/arch/x86/kernel/resource.c
+++ b/arch/x86/kernel/resource.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/dev_printk.h>
#include <linux/ioport.h>
+#include <linux/printk.h>
#include <asm/e820/api.h>
+#include <asm/pci_x86.h>
static void resource_clip(struct resource *res, resource_size_t start,
resource_size_t end)
@@ -24,14 +25,14 @@ static void resource_clip(struct resource *res, resource_size_t start,
res->start = end + 1;
}
-void remove_e820_regions(struct device *dev, struct resource *avail)
+static void remove_e820_regions(struct resource *avail)
{
int i;
struct e820_entry *entry;
u64 e820_start, e820_end;
struct resource orig = *avail;
- if (!(avail->flags & IORESOURCE_MEM))
+ if (!pci_use_e820)
return;
for (i = 0; i < e820_table->nr_entries; i++) {
@@ -41,7 +42,7 @@ void remove_e820_regions(struct device *dev, struct resource *avail)
resource_clip(avail, e820_start, e820_end);
if (orig.start != avail->start || orig.end != avail->end) {
- dev_info(dev, "clipped %pR to %pR for e820 entry [mem %#010Lx-%#010Lx]\n",
+ pr_info("clipped %pR to %pR for e820 entry [mem %#010Lx-%#010Lx]\n",
&orig, avail, e820_start, e820_end);
orig = *avail;
}
@@ -55,6 +56,9 @@ void arch_remove_reservations(struct resource *avail)
* the low 1MB unconditionally, as this area is needed for some ISA
* cards requiring a memory range, e.g. the i82365 PCMCIA controller.
*/
- if (avail->flags & IORESOURCE_MEM)
+ if (avail->flags & IORESOURCE_MEM) {
resource_clip(avail, BIOS_ROM_BASE, BIOS_ROM_END);
+
+ remove_e820_regions(avail);
+ }
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 3ebb85327edb..bd6c6fd373ae 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -67,11 +67,6 @@ RESERVE_BRK(dmi_alloc, 65536);
#endif
-/*
- * Range of the BSS area. The size of the BSS area is determined
- * at link time, with RESERVE_BRK() facility reserving additional
- * chunks.
- */
unsigned long _brk_start = (unsigned long)__brk_base;
unsigned long _brk_end = (unsigned long)__brk_base;
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index f5f6dc2e8007..81aba718ecd5 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -385,10 +385,10 @@ SECTIONS
__end_of_kernel_reserve = .;
. = ALIGN(PAGE_SIZE);
- .brk : AT(ADDR(.brk) - LOAD_OFFSET) {
+ .brk (NOLOAD) : AT(ADDR(.brk) - LOAD_OFFSET) {
__brk_base = .;
. += 64 * 1024; /* 64k alignment slop space */
- *(.brk_reservation) /* areas brk users have reserved */
+ *(.bss..brk) /* areas brk users have reserved */
__brk_limit = .;
}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index f1bdac3f5aa8..0e68b4c937fc 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2039,6 +2039,19 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
}
}
+static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic)
+{
+ struct kvm *kvm = apic->vcpu->kvm;
+
+ if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm))
+ return;
+
+ if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id)
+ return;
+
+ kvm_set_apicv_inhibit(apic->vcpu->kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
+}
+
static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
{
int ret = 0;
@@ -2047,10 +2060,12 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
switch (reg) {
case APIC_ID: /* Local APIC ID */
- if (!apic_x2apic_mode(apic))
+ if (!apic_x2apic_mode(apic)) {
kvm_apic_set_xapic_id(apic, val >> 24);
- else
+ kvm_lapic_xapic_id_updated(apic);
+ } else {
ret = 1;
+ }
break;
case APIC_TASKPRI:
@@ -2336,8 +2351,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
MSR_IA32_APICBASE_BASE;
if ((value & MSR_IA32_APICBASE_ENABLE) &&
- apic->base_address != APIC_DEFAULT_PHYS_BASE)
- pr_warn_once("APIC base relocation is unsupported by KVM");
+ apic->base_address != APIC_DEFAULT_PHYS_BASE) {
+ kvm_set_apicv_inhibit(apic->vcpu->kvm,
+ APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
+ }
}
void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
@@ -2648,6 +2665,8 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
__kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
}
+ } else {
+ kvm_lapic_xapic_id_updated(vcpu->arch.apic);
}
return 0;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e826ee9138fa..17252f39bd7c 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3411,7 +3411,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
root = mmu_alloc_root(vcpu, i << (30 - PAGE_SHIFT),
i << 30, PT32_ROOT_LEVEL, true);
mmu->pae_root[i] = root | PT_PRESENT_MASK |
- shadow_me_mask;
+ shadow_me_value;
}
mmu->root.hpa = __pa(mmu->pae_root);
} else {
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 54fe03714f8a..d1bc5820ea46 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -291,58 +291,91 @@ void avic_ring_doorbell(struct kvm_vcpu *vcpu)
static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source,
u32 icrl, u32 icrh, u32 index)
{
- u32 dest, apic_id;
- struct kvm_vcpu *vcpu;
+ u32 l1_physical_id, dest;
+ struct kvm_vcpu *target_vcpu;
int dest_mode = icrl & APIC_DEST_MASK;
int shorthand = icrl & APIC_SHORT_MASK;
struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
- u32 *avic_logical_id_table = page_address(kvm_svm->avic_logical_id_table_page);
if (shorthand != APIC_DEST_NOSHORT)
return -EINVAL;
- /*
- * The AVIC incomplete IPI #vmexit info provides index into
- * the physical APIC ID table, which can be used to derive
- * guest physical APIC ID.
- */
+ if (apic_x2apic_mode(source))
+ dest = icrh;
+ else
+ dest = GET_APIC_DEST_FIELD(icrh);
+
if (dest_mode == APIC_DEST_PHYSICAL) {
- apic_id = index;
+ /* broadcast destination, use slow path */
+ if (apic_x2apic_mode(source) && dest == X2APIC_BROADCAST)
+ return -EINVAL;
+ if (!apic_x2apic_mode(source) && dest == APIC_BROADCAST)
+ return -EINVAL;
+
+ l1_physical_id = dest;
+
+ if (WARN_ON_ONCE(l1_physical_id != index))
+ return -EINVAL;
+
} else {
- if (!apic_x2apic_mode(source)) {
- /* For xAPIC logical mode, the index is for logical APIC table. */
- apic_id = avic_logical_id_table[index] & 0x1ff;
+ u32 bitmap, cluster;
+ int logid_index;
+
+ if (apic_x2apic_mode(source)) {
+ /* 16 bit dest mask, 16 bit cluster id */
+ bitmap = dest & 0xFFFF0000;
+ cluster = (dest >> 16) << 4;
+ } else if (kvm_lapic_get_reg(source, APIC_DFR) == APIC_DFR_FLAT) {
+ /* 8 bit dest mask*/
+ bitmap = dest;
+ cluster = 0;
} else {
- return -EINVAL;
+ /* 4 bit desk mask, 4 bit cluster id */
+ bitmap = dest & 0xF;
+ cluster = (dest >> 4) << 2;
}
- }
- /*
- * Assuming vcpu ID is the same as physical apic ID,
- * and use it to retrieve the target vCPU.
- */
- vcpu = kvm_get_vcpu_by_id(kvm, apic_id);
- if (!vcpu)
- return -EINVAL;
+ if (unlikely(!bitmap))
+ /* guest bug: nobody to send the logical interrupt to */
+ return 0;
- if (apic_x2apic_mode(vcpu->arch.apic))
- dest = icrh;
- else
- dest = GET_APIC_DEST_FIELD(icrh);
+ if (!is_power_of_2(bitmap))
+ /* multiple logical destinations, use slow path */
+ return -EINVAL;
- /*
- * Try matching the destination APIC ID with the vCPU.
- */
- if (kvm_apic_match_dest(vcpu, source, shorthand, dest, dest_mode)) {
- vcpu->arch.apic->irr_pending = true;
- svm_complete_interrupt_delivery(vcpu,
- icrl & APIC_MODE_MASK,
- icrl & APIC_INT_LEVELTRIG,
- icrl & APIC_VECTOR_MASK);
- return 0;
+ logid_index = cluster + __ffs(bitmap);
+
+ if (apic_x2apic_mode(source)) {
+ l1_physical_id = logid_index;
+ } else {
+ u32 *avic_logical_id_table =
+ page_address(kvm_svm->avic_logical_id_table_page);
+
+ u32 logid_entry = avic_logical_id_table[logid_index];
+
+ if (WARN_ON_ONCE(index != logid_index))
+ return -EINVAL;
+
+ /* guest bug: non existing/reserved logical destination */
+ if (unlikely(!(logid_entry & AVIC_LOGICAL_ID_ENTRY_VALID_MASK)))
+ return 0;
+
+ l1_physical_id = logid_entry &
+ AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
+ }
}
- return -EINVAL;
+ target_vcpu = kvm_get_vcpu_by_id(kvm, l1_physical_id);
+ if (unlikely(!target_vcpu))
+ /* guest bug: non existing vCPU is a target of this IPI*/
+ return 0;
+
+ target_vcpu->arch.apic->irr_pending = true;
+ svm_complete_interrupt_delivery(target_vcpu,
+ icrl & APIC_MODE_MASK,
+ icrl & APIC_INT_LEVELTRIG,
+ icrl & APIC_VECTOR_MASK);
+ return 0;
}
static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
@@ -508,35 +541,6 @@ static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
return ret;
}
-static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
-{
- u64 *old, *new;
- struct vcpu_svm *svm = to_svm(vcpu);
- u32 id = kvm_xapic_id(vcpu->arch.apic);
-
- if (vcpu->vcpu_id == id)
- return 0;
-
- old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
- new = avic_get_physical_id_entry(vcpu, id);
- if (!new || !old)
- return 1;
-
- /* We need to move physical_id_entry to new offset */
- *new = *old;
- *old = 0ULL;
- to_svm(vcpu)->avic_physical_id_cache = new;
-
- /*
- * Also update the guest physical APIC ID in the logical
- * APIC ID table entry if already setup the LDR.
- */
- if (svm->ldr_reg)
- avic_handle_ldr_update(vcpu);
-
- return 0;
-}
-
static void avic_handle_dfr_update(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -555,10 +559,6 @@ static int avic_unaccel_trap_write(struct kvm_vcpu *vcpu)
AVIC_UNACCEL_ACCESS_OFFSET_MASK;
switch (offset) {
- case APIC_ID:
- if (avic_handle_apic_id_update(vcpu))
- return 0;
- break;
case APIC_LDR:
if (avic_handle_ldr_update(vcpu))
return 0;
@@ -650,8 +650,6 @@ int avic_init_vcpu(struct vcpu_svm *svm)
void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu)
{
- if (avic_handle_apic_id_update(vcpu) != 0)
- return;
avic_handle_dfr_update(vcpu);
avic_handle_ldr_update(vcpu);
}
@@ -910,7 +908,9 @@ bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
BIT(APICV_INHIBIT_REASON_X2APIC) |
BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
- BIT(APICV_INHIBIT_REASON_SEV);
+ BIT(APICV_INHIBIT_REASON_SEV) |
+ BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |
+ BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
return supported & BIT(reason);
}
@@ -946,7 +946,7 @@ out:
return ret;
}
-void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
u64 entry;
int h_physical_id = kvm_cpu_get_apicid(cpu);
@@ -978,7 +978,7 @@ void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
}
-void __avic_vcpu_put(struct kvm_vcpu *vcpu)
+void avic_vcpu_put(struct kvm_vcpu *vcpu)
{
u64 entry;
struct vcpu_svm *svm = to_svm(vcpu);
@@ -997,25 +997,6 @@ void __avic_vcpu_put(struct kvm_vcpu *vcpu)
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
}
-static void avic_vcpu_load(struct kvm_vcpu *vcpu)
-{
- int cpu = get_cpu();
-
- WARN_ON(cpu != vcpu->cpu);
-
- __avic_vcpu_load(vcpu, cpu);
-
- put_cpu();
-}
-
-static void avic_vcpu_put(struct kvm_vcpu *vcpu)
-{
- preempt_disable();
-
- __avic_vcpu_put(vcpu);
-
- preempt_enable();
-}
void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
{
@@ -1042,7 +1023,7 @@ void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
vmcb_mark_dirty(vmcb, VMCB_AVIC);
if (activated)
- avic_vcpu_load(vcpu);
+ avic_vcpu_load(vcpu, vcpu->cpu);
else
avic_vcpu_put(vcpu);
@@ -1075,5 +1056,5 @@ void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
if (!kvm_vcpu_apicv_active(vcpu))
return;
- avic_vcpu_load(vcpu);
+ avic_vcpu_load(vcpu, vcpu->cpu);
}
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 3361258640a2..ba7cd26f438f 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -616,6 +616,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
struct kvm_vcpu *vcpu = &svm->vcpu;
struct vmcb *vmcb01 = svm->vmcb01.ptr;
struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
+ u32 pause_count12;
+ u32 pause_thresh12;
/*
* Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2,
@@ -671,27 +673,25 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
if (!nested_vmcb_needs_vls_intercept(svm))
vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
+ pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0;
+ pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0;
if (kvm_pause_in_guest(svm->vcpu.kvm)) {
- /* use guest values since host doesn't use them */
- vmcb02->control.pause_filter_count =
- svm->pause_filter_enabled ?
- svm->nested.ctl.pause_filter_count : 0;
+ /* use guest values since host doesn't intercept PAUSE */
+ vmcb02->control.pause_filter_count = pause_count12;
+ vmcb02->control.pause_filter_thresh = pause_thresh12;
- vmcb02->control.pause_filter_thresh =
- svm->pause_threshold_enabled ?
- svm->nested.ctl.pause_filter_thresh : 0;
-
- } else if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) {
- /* use host values when guest doesn't use them */
+ } else {
+ /* start from host values otherwise */
vmcb02->control.pause_filter_count = vmcb01->control.pause_filter_count;
vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh;
- } else {
- /*
- * Intercept every PAUSE otherwise and
- * ignore both host and guest values
- */
- vmcb02->control.pause_filter_count = 0;
- vmcb02->control.pause_filter_thresh = 0;
+
+ /* ... but ensure filtering is disabled if so requested. */
+ if (vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) {
+ if (!pause_count12)
+ vmcb02->control.pause_filter_count = 0;
+ if (!pause_thresh12)
+ vmcb02->control.pause_filter_thresh = 0;
+ }
}
nested_svm_transition_tlb_flush(vcpu);
@@ -951,8 +951,11 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb12->control.event_inj = svm->nested.ctl.event_inj;
vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err;
- if (!kvm_pause_in_guest(vcpu->kvm) && vmcb02->control.pause_filter_count)
+ if (!kvm_pause_in_guest(vcpu->kvm)) {
vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count;
+ vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
+
+ }
nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 1dc02cdf6960..87da90360bc7 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -921,7 +921,7 @@ static void grow_ple_window(struct kvm_vcpu *vcpu)
struct vmcb_control_area *control = &svm->vmcb->control;
int old = control->pause_filter_count;
- if (kvm_pause_in_guest(vcpu->kvm) || !old)
+ if (kvm_pause_in_guest(vcpu->kvm))
return;
control->pause_filter_count = __grow_ple_window(old,
@@ -942,7 +942,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
struct vmcb_control_area *control = &svm->vmcb->control;
int old = control->pause_filter_count;
- if (kvm_pause_in_guest(vcpu->kvm) || !old)
+ if (kvm_pause_in_guest(vcpu->kvm))
return;
control->pause_filter_count =
@@ -1400,13 +1400,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
indirect_branch_prediction_barrier();
}
if (kvm_vcpu_apicv_active(vcpu))
- __avic_vcpu_load(vcpu, cpu);
+ avic_vcpu_load(vcpu, cpu);
}
static void svm_vcpu_put(struct kvm_vcpu *vcpu)
{
if (kvm_vcpu_apicv_active(vcpu))
- __avic_vcpu_put(vcpu);
+ avic_vcpu_put(vcpu);
svm_prepare_host_switch(vcpu);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 500348c1cb35..1bddd336a27e 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -610,8 +610,8 @@ void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb);
int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu);
int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu);
int avic_init_vcpu(struct vcpu_svm *svm);
-void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
-void __avic_vcpu_put(struct kvm_vcpu *vcpu);
+void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
+void avic_vcpu_put(struct kvm_vcpu *vcpu);
void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu);
void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 9bd86ecccdab..3a919e49129b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -229,6 +229,9 @@ static const struct {
#define L1D_CACHE_ORDER 4
static void *vmx_l1d_flush_pages;
+/* Control for disabling CPU Fill buffer clear */
+static bool __read_mostly vmx_fb_clear_ctrl_available;
+
static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
{
struct page *page;
@@ -360,6 +363,60 @@ static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
}
+static void vmx_setup_fb_clear_ctrl(void)
+{
+ u64 msr;
+
+ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) &&
+ !boot_cpu_has_bug(X86_BUG_MDS) &&
+ !boot_cpu_has_bug(X86_BUG_TAA)) {
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
+ if (msr & ARCH_CAP_FB_CLEAR_CTRL)
+ vmx_fb_clear_ctrl_available = true;
+ }
+}
+
+static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
+{
+ u64 msr;
+
+ if (!vmx->disable_fb_clear)
+ return;
+
+ rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
+ msr |= FB_CLEAR_DIS;
+ wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
+ /* Cache the MSR value to avoid reading it later */
+ vmx->msr_ia32_mcu_opt_ctrl = msr;
+}
+
+static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
+{
+ if (!vmx->disable_fb_clear)
+ return;
+
+ vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS;
+ wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
+}
+
+static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
+{
+ vmx->disable_fb_clear = vmx_fb_clear_ctrl_available;
+
+ /*
+ * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS
+ * at VMEntry. Skip the MSR read/write when a guest has no use case to
+ * execute VERW.
+ */
+ if ((vcpu->arch.arch_capabilities & ARCH_CAP_FB_CLEAR) ||
+ ((vcpu->arch.arch_capabilities & ARCH_CAP_MDS_NO) &&
+ (vcpu->arch.arch_capabilities & ARCH_CAP_TAA_NO) &&
+ (vcpu->arch.arch_capabilities & ARCH_CAP_PSDP_NO) &&
+ (vcpu->arch.arch_capabilities & ARCH_CAP_FBSDP_NO) &&
+ (vcpu->arch.arch_capabilities & ARCH_CAP_SBDR_SSDP_NO)))
+ vmx->disable_fb_clear = false;
+}
+
static const struct kernel_param_ops vmentry_l1d_flush_ops = {
.set = vmentry_l1d_flush_set,
.get = vmentry_l1d_flush_get,
@@ -2252,6 +2309,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
ret = kvm_set_msr_common(vcpu, msr_info);
}
+ /* FB_CLEAR may have changed, also update the FB_CLEAR_DIS behavior */
+ if (msr_index == MSR_IA32_ARCH_CAPABILITIES)
+ vmx_update_fb_clear_dis(vcpu, vmx);
+
return ret;
}
@@ -4553,6 +4614,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
vpid_sync_context(vmx->vpid);
+
+ vmx_update_fb_clear_dis(vcpu, vmx);
}
static void vmx_enable_irq_window(struct kvm_vcpu *vcpu)
@@ -6772,6 +6835,11 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
vmx_l1d_flush(vcpu);
else if (static_branch_unlikely(&mds_user_clear))
mds_clear_cpu_buffers();
+ else if (static_branch_unlikely(&mmio_stale_data_clear) &&
+ kvm_arch_has_assigned_device(vcpu->kvm))
+ mds_clear_cpu_buffers();
+
+ vmx_disable_fb_clear(vmx);
if (vcpu->arch.cr2 != native_read_cr2())
native_write_cr2(vcpu->arch.cr2);
@@ -6781,6 +6849,8 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
vcpu->arch.cr2 = native_read_cr2();
+ vmx_enable_fb_clear(vmx);
+
guest_state_exit_irqoff();
}
@@ -7709,7 +7779,9 @@ static bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
BIT(APICV_INHIBIT_REASON_ABSENT) |
BIT(APICV_INHIBIT_REASON_HYPERV) |
- BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
+ BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
+ BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |
+ BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
return supported & BIT(reason);
}
@@ -8212,6 +8284,8 @@ static int __init vmx_init(void)
return r;
}
+ vmx_setup_fb_clear_ctrl();
+
for_each_possible_cpu(cpu) {
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index b98c7e96697a..8d2342ede0c5 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -348,6 +348,8 @@ struct vcpu_vmx {
u64 msr_ia32_feature_control_valid_bits;
/* SGX Launch Control public key hash */
u64 msr_ia32_sgxlepubkeyhash[4];
+ u64 msr_ia32_mcu_opt_ctrl;
+ bool disable_fb_clear;
struct pt_desc pt_desc;
struct lbr_desc lbr_desc;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 03fbfbbec460..1910e1e78b15 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1617,6 +1617,9 @@ static u64 kvm_get_arch_capabilities(void)
*/
}
+ /* Guests don't need to know "Fill buffer clear control" exists */
+ data &= ~ARCH_CAP_FB_CLEAR_CTRL;
+
return data;
}
@@ -9850,6 +9853,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
return;
down_read(&vcpu->kvm->arch.apicv_update_lock);
+ preempt_disable();
activate = kvm_vcpu_apicv_activated(vcpu);
@@ -9870,6 +9874,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
kvm_make_request(KVM_REQ_EVENT, vcpu);
out:
+ preempt_enable();
up_read(&vcpu->kvm->arch.apicv_update_lock);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index f298b18a9a3d..c98b8c0ed3b8 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1420,8 +1420,9 @@ st: if (is_imm8(insn->off))
case BPF_JMP | BPF_CALL:
func = (u8 *) __bpf_call_base + imm32;
if (tail_call_reachable) {
+ /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
EMIT3_off32(0x48, 0x8B, 0x85,
- -(bpf_prog->aux->stack_depth + 8));
+ -round_up(bpf_prog->aux->stack_depth, 8) - 8);
if (!imm32 || emit_call(&prog, func, image + addrs[i - 1] + 7))
return -EINVAL;
} else {
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index a4f43054bc79..2f82480fd430 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -8,7 +8,6 @@
#include <linux/pci-acpi.h>
#include <asm/numa.h>
#include <asm/pci_x86.h>
-#include <asm/e820/api.h>
struct pci_root_info {
struct acpi_pci_root_info common;
@@ -20,7 +19,7 @@ struct pci_root_info {
#endif
};
-static bool pci_use_e820 = true;
+bool pci_use_e820 = true;
static bool pci_use_crs = true;
static bool pci_ignore_seg;
@@ -387,11 +386,6 @@ static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci)
status = acpi_pci_probe_root_resources(ci);
- if (pci_use_e820) {
- resource_list_for_each_entry(entry, &ci->resources)
- remove_e820_regions(&device->dev, entry->res);
- }
-
if (pci_use_crs) {
resource_list_for_each_entry_safe(entry, tmp, &ci->resources)
if (resource_is_pcicfg_ioport(entry->res))
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 0d46cb728bbf..e6d7e6b01a05 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -7046,6 +7046,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
spin_unlock_irq(&bfqd->lock);
#endif
+ blk_stat_disable_accounting(bfqd->queue);
wbt_enable_default(bfqd->queue);
kfree(bfqd);
@@ -7188,7 +7189,12 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
bfq_init_root_group(bfqd->root_group, bfqd);
bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group);
+ /* We dispatch from request queue wide instead of hw queue */
+ blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
+
wbt_disable_default(q);
+ blk_stat_enable_accounting(q);
+
return 0;
out_free:
diff --git a/block/bio.c b/block/bio.c
index f92d0223247b..51c99f2c5c90 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1747,26 +1747,6 @@ bad:
}
EXPORT_SYMBOL(bioset_init);
-/*
- * Initialize and setup a new bio_set, based on the settings from
- * another bio_set.
- */
-int bioset_init_from_src(struct bio_set *bs, struct bio_set *src)
-{
- int flags;
-
- flags = 0;
- if (src->bvec_pool.min_nr)
- flags |= BIOSET_NEED_BVECS;
- if (src->rescue_workqueue)
- flags |= BIOSET_NEED_RESCUER;
- if (src->cache)
- flags |= BIOSET_PERCPU_CACHE;
-
- return bioset_init(bs, src->bio_pool.min_nr, src->front_pad, flags);
-}
-EXPORT_SYMBOL(bioset_init_from_src);
-
static int __init init_bio(void)
{
int i;
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 9e56a69422b6..eb3c65a21362 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -564,6 +564,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
int ret;
if (!e) {
+ blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
q->elevator = NULL;
q->nr_requests = q->tag_set->queue_depth;
return 0;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e9bf950983c7..33145ba52c96 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -579,6 +579,8 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
if (!blk_mq_hw_queue_mapped(data.hctx))
goto out_queue_exit;
cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);
+ if (cpu >= nr_cpu_ids)
+ goto out_queue_exit;
data.ctx = __blk_mq_get_ctx(q, cpu);
if (!q->elevator)
@@ -2141,20 +2143,6 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
EXPORT_SYMBOL(blk_mq_run_hw_queue);
/*
- * Is the request queue handled by an IO scheduler that does not respect
- * hardware queues when dispatching?
- */
-static bool blk_mq_has_sqsched(struct request_queue *q)
-{
- struct elevator_queue *e = q->elevator;
-
- if (e && e->type->ops.dispatch_request &&
- !(e->type->elevator_features & ELEVATOR_F_MQ_AWARE))
- return true;
- return false;
-}
-
-/*
* Return prefered queue to dispatch from (if any) for non-mq aware IO
* scheduler.
*/
@@ -2186,7 +2174,7 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async)
unsigned long i;
sq_hctx = NULL;
- if (blk_mq_has_sqsched(q))
+ if (blk_queue_sq_sched(q))
sq_hctx = blk_mq_get_sq_hctx(q);
queue_for_each_hw_ctx(q, hctx, i) {
if (blk_mq_hctx_stopped(hctx))
@@ -2214,7 +2202,7 @@ void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs)
unsigned long i;
sq_hctx = NULL;
- if (blk_mq_has_sqsched(q))
+ if (blk_queue_sq_sched(q))
sq_hctx = blk_mq_get_sq_hctx(q);
queue_for_each_hw_ctx(q, hctx, i) {
if (blk_mq_hctx_stopped(hctx))
@@ -3443,8 +3431,9 @@ static void blk_mq_exit_hctx(struct request_queue *q,
if (blk_mq_hw_queue_mapped(hctx))
blk_mq_tag_idle(hctx);
- blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx],
- set->queue_depth, flush_rq);
+ if (blk_queue_init_done(q))
+ blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx],
+ set->queue_depth, flush_rq);
if (set->ops->exit_request)
set->ops->exit_request(set, flush_rq, hctx_idx);
@@ -4438,12 +4427,14 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
if (!qe)
return false;
+ /* q->elevator needs protection from ->sysfs_lock */
+ mutex_lock(&q->sysfs_lock);
+
INIT_LIST_HEAD(&qe->node);
qe->q = q;
qe->type = q->elevator->type;
list_add(&qe->node, head);
- mutex_lock(&q->sysfs_lock);
/*
* After elevator_switch_mq, the previous elevator_queue will be
* released by elevator_release. The reference of the io scheduler
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index 70ff2a599ef6..8f7c745b4a57 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -421,6 +421,8 @@ static int kyber_init_sched(struct request_queue *q, struct elevator_type *e)
blk_stat_enable_accounting(q);
+ blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
+
eq->elevator_data = kqd;
q->elevator = eq;
@@ -1033,7 +1035,6 @@ static struct elevator_type kyber_sched = {
#endif
.elevator_attrs = kyber_sched_attrs,
.elevator_name = "kyber",
- .elevator_features = ELEVATOR_F_MQ_AWARE,
.elevator_owner = THIS_MODULE,
};
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 6ed602b2f80a..1a9e835e816c 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -642,6 +642,9 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
spin_lock_init(&dd->lock);
spin_lock_init(&dd->zone_lock);
+ /* We dispatch from request queue wide instead of hw queue */
+ blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
+
q->elevator = eq;
return 0;
diff --git a/certs/.gitignore b/certs/.gitignore
index 56637aceaf81..cec5465f31c1 100644
--- a/certs/.gitignore
+++ b/certs/.gitignore
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-/blacklist_hashes_checked
+/blacklist_hash_list
/extract-cert
/x509_certificate_list
/x509_revocation_list
diff --git a/certs/Makefile b/certs/Makefile
index cb1a9da3fc58..88a73b28d254 100644
--- a/certs/Makefile
+++ b/certs/Makefile
@@ -3,26 +3,26 @@
# Makefile for the linux kernel signature checking certificates.
#
-obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o common.o
-obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o common.o
+obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o
+obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o
obj-$(CONFIG_SYSTEM_REVOCATION_LIST) += revocation_certificates.o
ifneq ($(CONFIG_SYSTEM_BLACKLIST_HASH_LIST),)
-quiet_cmd_check_blacklist_hashes = CHECK $(patsubst "%",%,$(2))
- cmd_check_blacklist_hashes = $(AWK) -f $(srctree)/scripts/check-blacklist-hashes.awk $(2); touch $@
-$(eval $(call config_filename,SYSTEM_BLACKLIST_HASH_LIST))
+$(obj)/blacklist_hashes.o: $(obj)/blacklist_hash_list
+CFLAGS_blacklist_hashes.o := -I $(obj)
-$(obj)/blacklist_hashes.o: $(obj)/blacklist_hashes_checked
+quiet_cmd_check_and_copy_blacklist_hash_list = GEN $@
+ cmd_check_and_copy_blacklist_hash_list = \
+ $(AWK) -f $(srctree)/scripts/check-blacklist-hashes.awk $(CONFIG_SYSTEM_BLACKLIST_HASH_LIST) >&2; \
+ cat $(CONFIG_SYSTEM_BLACKLIST_HASH_LIST) > $@
-CFLAGS_blacklist_hashes.o += -I$(srctree)
-
-targets += blacklist_hashes_checked
-$(obj)/blacklist_hashes_checked: $(SYSTEM_BLACKLIST_HASH_LIST_SRCPREFIX)$(SYSTEM_BLACKLIST_HASH_LIST_FILENAME) scripts/check-blacklist-hashes.awk FORCE
- $(call if_changed,check_blacklist_hashes,$(SYSTEM_BLACKLIST_HASH_LIST_SRCPREFIX)$(CONFIG_SYSTEM_BLACKLIST_HASH_LIST))
+$(obj)/blacklist_hash_list: $(CONFIG_SYSTEM_BLACKLIST_HASH_LIST) FORCE
+ $(call if_changed,check_and_copy_blacklist_hash_list)
obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_hashes.o
else
obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_nohashes.o
endif
+targets += blacklist_hash_list
quiet_cmd_extract_certs = CERT $@
cmd_extract_certs = $(obj)/extract-cert $(extract-cert-in) $@
@@ -33,7 +33,7 @@ $(obj)/system_certificates.o: $(obj)/x509_certificate_list
$(obj)/x509_certificate_list: $(CONFIG_SYSTEM_TRUSTED_KEYS) $(obj)/extract-cert FORCE
$(call if_changed,extract_certs)
-targets += x509_certificate_list blacklist_hashes_checked
+targets += x509_certificate_list
# If module signing is requested, say by allyesconfig, but a key has not been
# supplied, then one will need to be generated to make sure the build does not
diff --git a/certs/blacklist.c b/certs/blacklist.c
index 25094ea73600..41f10601cc72 100644
--- a/certs/blacklist.c
+++ b/certs/blacklist.c
@@ -15,10 +15,9 @@
#include <linux/err.h>
#include <linux/seq_file.h>
#include <linux/uidgid.h>
-#include <linux/verification.h>
+#include <keys/asymmetric-type.h>
#include <keys/system_keyring.h>
#include "blacklist.h"
-#include "common.h"
/*
* According to crypto/asymmetric_keys/x509_cert_parser.c:x509_note_pkey_algo(),
@@ -365,8 +364,9 @@ static __init int load_revocation_certificate_list(void)
if (revocation_certificate_list_size)
pr_notice("Loading compiled-in revocation X.509 certificates\n");
- return load_certificate_list(revocation_certificate_list, revocation_certificate_list_size,
- blacklist_keyring);
+ return x509_load_certificate_list(revocation_certificate_list,
+ revocation_certificate_list_size,
+ blacklist_keyring);
}
late_initcall(load_revocation_certificate_list);
#endif
diff --git a/certs/blacklist_hashes.c b/certs/blacklist_hashes.c
index 344892337be0..86d66fe11348 100644
--- a/certs/blacklist_hashes.c
+++ b/certs/blacklist_hashes.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "blacklist.h"
-const char __initdata *const blacklist_hashes[] = {
-#include CONFIG_SYSTEM_BLACKLIST_HASH_LIST
+const char __initconst *const blacklist_hashes[] = {
+#include "blacklist_hash_list"
, NULL
};
diff --git a/certs/common.h b/certs/common.h
deleted file mode 100644
index abdb5795936b..000000000000
--- a/certs/common.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-
-#ifndef _CERT_COMMON_H
-#define _CERT_COMMON_H
-
-int load_certificate_list(const u8 cert_list[], const unsigned long list_size,
- const struct key *keyring);
-
-#endif
diff --git a/certs/system_keyring.c b/certs/system_keyring.c
index 05b66ce9d1c9..5042cc54fa5e 100644
--- a/certs/system_keyring.c
+++ b/certs/system_keyring.c
@@ -16,7 +16,6 @@
#include <keys/asymmetric-type.h>
#include <keys/system_keyring.h>
#include <crypto/pkcs7.h>
-#include "common.h"
static struct key *builtin_trusted_keys;
#ifdef CONFIG_SECONDARY_TRUSTED_KEYRING
@@ -183,7 +182,8 @@ __init int load_module_cert(struct key *keyring)
pr_notice("Loading compiled-in module X.509 certificates\n");
- return load_certificate_list(system_certificate_list, module_cert_size, keyring);
+ return x509_load_certificate_list(system_certificate_list,
+ module_cert_size, keyring);
}
/*
@@ -204,7 +204,7 @@ static __init int load_system_certificate_list(void)
size = system_certificate_list_size - module_cert_size;
#endif
- return load_certificate_list(p, size, builtin_trusted_keys);
+ return x509_load_certificate_list(p, size, builtin_trusted_keys);
}
late_initcall(load_system_certificate_list);
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 19197469cfab..1d44893a997b 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -15,6 +15,7 @@ source "crypto/async_tx/Kconfig"
#
menuconfig CRYPTO
tristate "Cryptographic API"
+ select LIB_MEMNEQ
help
This option provides the core Cryptographic API.
diff --git a/crypto/Makefile b/crypto/Makefile
index 43bc33e247d1..ceaaa9f34145 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -4,7 +4,7 @@
#
obj-$(CONFIG_CRYPTO) += crypto.o
-crypto-y := api.o cipher.o compress.o memneq.o
+crypto-y := api.o cipher.o compress.o
obj-$(CONFIG_CRYPTO_ENGINE) += crypto_engine.o
obj-$(CONFIG_CRYPTO_FIPS) += fips.o
diff --git a/crypto/asymmetric_keys/Kconfig b/crypto/asymmetric_keys/Kconfig
index 460bc5d0a828..3df3fe4ed95f 100644
--- a/crypto/asymmetric_keys/Kconfig
+++ b/crypto/asymmetric_keys/Kconfig
@@ -75,4 +75,14 @@ config SIGNED_PE_FILE_VERIFICATION
This option provides support for verifying the signature(s) on a
signed PE binary.
+config FIPS_SIGNATURE_SELFTEST
+ bool "Run FIPS selftests on the X.509+PKCS7 signature verification"
+ help
+ This option causes some selftests to be run on the signature
+ verification code, using some built in data. This is required
+ for FIPS.
+ depends on KEYS
+ depends on ASYMMETRIC_KEY_TYPE
+ depends on PKCS7_MESSAGE_PARSER
+
endif # ASYMMETRIC_KEY_TYPE
diff --git a/crypto/asymmetric_keys/Makefile b/crypto/asymmetric_keys/Makefile
index c38424f55b08..0d1fa1b692c6 100644
--- a/crypto/asymmetric_keys/Makefile
+++ b/crypto/asymmetric_keys/Makefile
@@ -20,7 +20,9 @@ x509_key_parser-y := \
x509.asn1.o \
x509_akid.asn1.o \
x509_cert_parser.o \
+ x509_loader.o \
x509_public_key.o
+x509_key_parser-$(CONFIG_FIPS_SIGNATURE_SELFTEST) += selftest.o
$(obj)/x509_cert_parser.o: \
$(obj)/x509.asn1.h \
diff --git a/crypto/asymmetric_keys/selftest.c b/crypto/asymmetric_keys/selftest.c
new file mode 100644
index 000000000000..fa0bf7f24284
--- /dev/null
+++ b/crypto/asymmetric_keys/selftest.c
@@ -0,0 +1,224 @@
+/* Self-testing for signature checking.
+ *
+ * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/cred.h>
+#include <linux/key.h>
+#include <crypto/pkcs7.h>
+#include "x509_parser.h"
+
+struct certs_test {
+ const u8 *data;
+ size_t data_len;
+ const u8 *pkcs7;
+ size_t pkcs7_len;
+};
+
+/*
+ * Set of X.509 certificates to provide public keys for the tests. These will
+ * be loaded into a temporary keyring for the duration of the testing.
+ */
+static const __initconst u8 certs_selftest_keys[] = {
+ "\x30\x82\x05\x55\x30\x82\x03\x3d\xa0\x03\x02\x01\x02\x02\x14\x73"
+ "\x98\xea\x98\x2d\xd0\x2e\xa8\xb1\xcf\x57\xc7\xf2\x97\xb3\xe6\x1a"
+ "\xfc\x8c\x0a\x30\x0d\x06\x09\x2a\x86\x48\x86\xf7\x0d\x01\x01\x0b"
+ "\x05\x00\x30\x34\x31\x32\x30\x30\x06\x03\x55\x04\x03\x0c\x29\x43"
+ "\x65\x72\x74\x69\x66\x69\x63\x61\x74\x65\x20\x76\x65\x72\x69\x66"
+ "\x69\x63\x61\x74\x69\x6f\x6e\x20\x73\x65\x6c\x66\x2d\x74\x65\x73"
+ "\x74\x69\x6e\x67\x20\x6b\x65\x79\x30\x20\x17\x0d\x32\x32\x30\x35"
+ "\x31\x38\x32\x32\x33\x32\x34\x31\x5a\x18\x0f\x32\x31\x32\x32\x30"
+ "\x34\x32\x34\x32\x32\x33\x32\x34\x31\x5a\x30\x34\x31\x32\x30\x30"
+ "\x06\x03\x55\x04\x03\x0c\x29\x43\x65\x72\x74\x69\x66\x69\x63\x61"
+ "\x74\x65\x20\x76\x65\x72\x69\x66\x69\x63\x61\x74\x69\x6f\x6e\x20"
+ "\x73\x65\x6c\x66\x2d\x74\x65\x73\x74\x69\x6e\x67\x20\x6b\x65\x79"
+ "\x30\x82\x02\x22\x30\x0d\x06\x09\x2a\x86\x48\x86\xf7\x0d\x01\x01"
+ "\x01\x05\x00\x03\x82\x02\x0f\x00\x30\x82\x02\x0a\x02\x82\x02\x01"
+ "\x00\xcc\xac\x49\xdd\x3b\xca\xb0\x15\x7e\x84\x6a\xb2\x0a\x69\x5f"
+ "\x1c\x0a\x61\x82\x3b\x4f\x2c\xa3\x95\x2c\x08\x58\x4b\xb1\x5d\x99"
+ "\xe0\xc3\xc1\x79\xc2\xb3\xeb\xc0\x1e\x6d\x3e\x54\x1d\xbd\xb7\x92"
+ "\x7b\x4d\xb5\x95\x58\xb2\x52\x2e\xc6\x24\x4b\x71\x63\x80\x32\x77"
+ "\xa7\x38\x5e\xdb\x72\xae\x6e\x0d\xec\xfb\xb6\x6d\x01\x7f\xe9\x55"
+ "\x66\xdf\xbf\x1d\x76\x78\x02\x31\xe8\xe5\x07\xf8\xb7\x82\x5c\x0d"
+ "\xd4\xbb\xfb\xa2\x59\x0d\x2e\x3a\x78\x95\x3a\x8b\x46\x06\x47\x44"
+ "\x46\xd7\xcd\x06\x6a\x41\x13\xe3\x19\xf6\xbb\x6e\x38\xf4\x83\x01"
+ "\xa3\xbf\x4a\x39\x4f\xd7\x0a\xe9\x38\xb3\xf5\x94\x14\x4e\xdd\xf7"
+ "\x43\xfd\x24\xb2\x49\x3c\xa5\xf7\x7a\x7c\xd4\x45\x3d\x97\x75\x68"
+ "\xf1\xed\x4c\x42\x0b\x70\xca\x85\xf3\xde\xe5\x88\x2c\xc5\xbe\xb6"
+ "\x97\x34\xba\x24\x02\xcd\x8b\x86\x9f\xa9\x73\xca\x73\xcf\x92\x81"
+ "\xee\x75\x55\xbb\x18\x67\x5c\xff\x3f\xb5\xdd\x33\x1b\x0c\xe9\x78"
+ "\xdb\x5c\xcf\xaa\x5c\x43\x42\xdf\x5e\xa9\x6d\xec\xd7\xd7\xff\xe6"
+ "\xa1\x3a\x92\x1a\xda\xae\xf6\x8c\x6f\x7b\xd5\xb4\x6e\x06\xe9\x8f"
+ "\xe8\xde\x09\x31\x89\xed\x0e\x11\xa1\xfa\x8a\xe9\xe9\x64\x59\x62"
+ "\x53\xda\xd1\x70\xbe\x11\xd4\x99\x97\x11\xcf\x99\xde\x0b\x9d\x94"
+ "\x7e\xaa\xb8\x52\xea\x37\xdb\x90\x7e\x35\xbd\xd9\xfe\x6d\x0a\x48"
+ "\x70\x28\xdd\xd5\x0d\x7f\x03\x80\x93\x14\x23\x8f\xb9\x22\xcd\x7c"
+ "\x29\xfe\xf1\x72\xb5\x5c\x0b\x12\xcf\x9c\x15\xf6\x11\x4c\x7a\x45"
+ "\x25\x8c\x45\x0a\x34\xac\x2d\x9a\x81\xca\x0b\x13\x22\xcd\xeb\x1a"
+ "\x38\x88\x18\x97\x96\x08\x81\xaa\xcc\x8f\x0f\x8a\x32\x7b\x76\x68"
+ "\x03\x68\x43\xbf\x11\xba\x55\x60\xfd\x80\x1c\x0d\x9b\x69\xb6\x09"
+ "\x72\xbc\x0f\x41\x2f\x07\x82\xc6\xe3\xb2\x13\x91\xc4\x6d\x14\x95"
+ "\x31\xbe\x19\xbd\xbc\xed\xe1\x4c\x74\xa2\xe0\x78\x0b\xbb\x94\xec"
+ "\x4c\x53\x3a\xa2\xb5\x84\x1d\x4b\x65\x7e\xdc\xf7\xdb\x36\x7d\xbe"
+ "\x9e\x3b\x36\x66\x42\x66\x76\x35\xbf\xbe\xf0\xc1\x3c\x7c\xe9\x42"
+ "\x5c\x24\x53\x03\x05\xa8\x67\x24\x50\x02\x75\xff\x24\x46\x3b\x35"
+ "\x89\x76\xe6\x70\xda\xc5\x51\x8c\x9a\xe5\x05\xb0\x0b\xd0\x2d\xd4"
+ "\x7d\x57\x75\x94\x6b\xf9\x0a\xad\x0e\x41\x00\x15\xd0\x4f\xc0\x7f"
+ "\x90\x2d\x18\x48\x8f\x28\xfe\x5d\xa7\xcd\x99\x9e\xbd\x02\x6c\x8a"
+ "\x31\xf3\x1c\xc7\x4b\xe6\x93\xcd\x42\xa2\xe4\x68\x10\x47\x9d\xfc"
+ "\x21\x02\x03\x01\x00\x01\xa3\x5d\x30\x5b\x30\x0c\x06\x03\x55\x1d"
+ "\x13\x01\x01\xff\x04\x02\x30\x00\x30\x0b\x06\x03\x55\x1d\x0f\x04"
+ "\x04\x03\x02\x07\x80\x30\x1d\x06\x03\x55\x1d\x0e\x04\x16\x04\x14"
+ "\xf5\x87\x03\xbb\x33\xce\x1b\x73\xee\x02\xec\xcd\xee\x5b\x88\x17"
+ "\x51\x8f\xe3\xdb\x30\x1f\x06\x03\x55\x1d\x23\x04\x18\x30\x16\x80"
+ "\x14\xf5\x87\x03\xbb\x33\xce\x1b\x73\xee\x02\xec\xcd\xee\x5b\x88"
+ "\x17\x51\x8f\xe3\xdb\x30\x0d\x06\x09\x2a\x86\x48\x86\xf7\x0d\x01"
+ "\x01\x0b\x05\x00\x03\x82\x02\x01\x00\xc0\x2e\x12\x41\x7b\x73\x85"
+ "\x16\xc8\xdb\x86\x79\xe8\xf5\xcd\x44\xf4\xc6\xe2\x81\x23\x5e\x47"
+ "\xcb\xab\x25\xf1\x1e\x58\x3e\x31\x7f\x78\xad\x85\xeb\xfe\x14\x88"
+ "\x60\xf7\x7f\xd2\x26\xa2\xf4\x98\x2a\xfd\xba\x05\x0c\x20\x33\x12"
+ "\xcc\x4d\x14\x61\x64\x81\x93\xd3\x33\xed\xc8\xff\xf1\x78\xcc\x5f"
+ "\x51\x9f\x09\xd7\xbe\x0d\x5c\x74\xfd\x9b\xdf\x52\x4a\xc9\xa8\x71"
+ "\x25\x33\x04\x10\x67\x36\xd0\xb3\x0b\xc9\xa1\x40\x72\xae\x41\x7b"
+ "\x68\xe6\xe4\x7b\xd0\x28\xf7\x6d\xe7\x3f\x50\xfc\x91\x7c\x91\x56"
+ "\xd4\xdf\xa6\xbb\xe8\x4d\x1b\x58\xaa\x28\xfa\xc1\x19\xeb\x11\x2f"
+ "\x24\x8b\x7c\xc5\xa9\x86\x26\xaa\x6e\xb7\x9b\xd5\xf8\x06\xfb\x02"
+ "\x52\x7b\x9c\x9e\xa1\xe0\x07\x8b\x5e\xe4\xb8\x55\x29\xf6\x48\x52"
+ "\x1c\x1b\x54\x2d\x46\xd8\xe5\x71\xb9\x60\xd1\x45\xb5\x92\x89\x8a"
+ "\x63\x58\x2a\xb3\xc6\xb2\x76\xe2\x3c\x82\x59\x04\xae\x5a\xc4\x99"
+ "\x7b\x2e\x4b\x46\x57\xb8\x29\x24\xb2\xfd\xee\x2c\x0d\xa4\x83\xfa"
+ "\x65\x2a\x07\x35\x8b\x97\xcf\xbd\x96\x2e\xd1\x7e\x6c\xc2\x1e\x87"
+ "\xb6\x6c\x76\x65\xb5\xb2\x62\xda\x8b\xe9\x73\xe3\xdb\x33\xdd\x13"
+ "\x3a\x17\x63\x6a\x76\xde\x8d\x8f\xe0\x47\x61\x28\x3a\x83\xff\x8f"
+ "\xe7\xc7\xe0\x4a\xa3\xe5\x07\xcf\xe9\x8c\x35\x35\x2e\xe7\x80\x66"
+ "\x31\xbf\x91\x58\x0a\xe1\x25\x3d\x38\xd3\xa4\xf0\x59\x34\x47\x07"
+ "\x62\x0f\xbe\x30\xdd\x81\x88\x58\xf0\x28\xb0\x96\xe5\x82\xf8\x05"
+ "\xb7\x13\x01\xbc\xfa\xc6\x1f\x86\x72\xcc\xf9\xee\x8e\xd9\xd6\x04"
+ "\x8c\x24\x6c\xbf\x0f\x5d\x37\x39\xcf\x45\xc1\x93\x3a\xd2\xed\x5c"
+ "\x58\x79\x74\x86\x62\x30\x7e\x8e\xbb\xdd\x7a\xa9\xed\xca\x40\xcb"
+ "\x62\x47\xf4\xb4\x9f\x52\x7f\x72\x63\xa8\xf0\x2b\xaf\x45\x2a\x48"
+ "\x19\x6d\xe3\xfb\xf9\x19\x66\x69\xc8\xcc\x62\x87\x6c\x53\x2b\x2d"
+ "\x6e\x90\x6c\x54\x3a\x82\x25\x41\xcb\x18\x6a\xa4\x22\xa8\xa1\xc4"
+ "\x47\xd7\x81\x00\x1c\x15\x51\x0f\x1a\xaf\xef\x9f\xa6\x61\x8c\xbd"
+ "\x6b\x8b\xed\xe6\xac\x0e\xb6\x3a\x4c\x92\xe6\x0f\x91\x0a\x0f\x71"
+ "\xc7\xa0\xb9\x0d\x3a\x17\x5a\x6f\x35\xc8\xe7\x50\x4f\x46\xe8\x70"
+ "\x60\x48\x06\x82\x8b\x66\x58\xe6\x73\x91\x9c\x12\x3d\x35\x8e\x46"
+ "\xad\x5a\xf5\xb3\xdb\x69\x21\x04\xfd\xd3\x1c\xdf\x94\x9d\x56\xb0"
+ "\x0a\xd1\x95\x76\x8d\xec\x9e\xdd\x0b\x15\x97\x64\xad\xe5\xf2\x62"
+ "\x02\xfc\x9e\x5f\x56\x42\x39\x05\xb3"
+};
+
+/*
+ * Signed data and detached signature blobs that form the verification tests.
+ */
+static const __initconst u8 certs_selftest_1_data[] = {
+ "\x54\x68\x69\x73\x20\x69\x73\x20\x73\x6f\x6d\x65\x20\x74\x65\x73"
+ "\x74\x20\x64\x61\x74\x61\x20\x75\x73\x65\x64\x20\x66\x6f\x72\x20"
+ "\x73\x65\x6c\x66\x2d\x74\x65\x73\x74\x69\x6e\x67\x20\x63\x65\x72"
+ "\x74\x69\x66\x69\x63\x61\x74\x65\x20\x76\x65\x72\x69\x66\x69\x63"
+ "\x61\x74\x69\x6f\x6e\x2e\x0a"
+};
+
+static const __initconst u8 certs_selftest_1_pkcs7[] = {
+ "\x30\x82\x02\xab\x06\x09\x2a\x86\x48\x86\xf7\x0d\x01\x07\x02\xa0"
+ "\x82\x02\x9c\x30\x82\x02\x98\x02\x01\x01\x31\x0d\x30\x0b\x06\x09"
+ "\x60\x86\x48\x01\x65\x03\x04\x02\x01\x30\x0b\x06\x09\x2a\x86\x48"
+ "\x86\xf7\x0d\x01\x07\x01\x31\x82\x02\x75\x30\x82\x02\x71\x02\x01"
+ "\x01\x30\x4c\x30\x34\x31\x32\x30\x30\x06\x03\x55\x04\x03\x0c\x29"
+ "\x43\x65\x72\x74\x69\x66\x69\x63\x61\x74\x65\x20\x76\x65\x72\x69"
+ "\x66\x69\x63\x61\x74\x69\x6f\x6e\x20\x73\x65\x6c\x66\x2d\x74\x65"
+ "\x73\x74\x69\x6e\x67\x20\x6b\x65\x79\x02\x14\x73\x98\xea\x98\x2d"
+ "\xd0\x2e\xa8\xb1\xcf\x57\xc7\xf2\x97\xb3\xe6\x1a\xfc\x8c\x0a\x30"
+ "\x0b\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x30\x0d\x06\x09"
+ "\x2a\x86\x48\x86\xf7\x0d\x01\x01\x01\x05\x00\x04\x82\x02\x00\xac"
+ "\xb0\xf2\x07\xd6\x99\x6d\xc0\xc0\xd9\x8d\x31\x0d\x7e\x04\xeb\xc3"
+ "\x88\x90\xc4\x58\x46\xd4\xe2\xa0\xa3\x25\xe3\x04\x50\x37\x85\x8c"
+ "\x91\xc6\xfc\xc5\xd4\x92\xfd\x05\xd8\xb8\xa3\xb8\xba\x89\x13\x00"
+ "\x88\x79\x99\x51\x6b\x5b\x28\x31\xc0\xb3\x1b\x7a\x68\x2c\x00\xdb"
+ "\x4b\x46\x11\xf3\xfa\x50\x8e\x19\x89\xa2\x4c\xda\x4c\x89\x01\x11"
+ "\x89\xee\xd3\xc8\xc1\xe7\xa7\xf6\xb2\xa2\xf8\x65\xb8\x35\x20\x33"
+ "\xba\x12\x62\xd5\xbd\xaa\x71\xe5\x5b\xc0\x6a\x32\xff\x6a\x2e\x23"
+ "\xef\x2b\xb6\x58\xb1\xfb\x5f\x82\x34\x40\x6d\x9f\xbc\x27\xac\x37"
+ "\x23\x99\xcf\x7d\x20\xb2\x39\x01\xc0\x12\xce\xd7\x5d\x2f\xb6\xab"
+ "\xb5\x56\x4f\xef\xf4\x72\x07\x58\x65\xa9\xeb\x1f\x75\x1c\x5f\x0c"
+ "\x88\xe0\xa4\xe2\xcd\x73\x2b\x9e\xb2\x05\x7e\x12\xf8\xd0\x66\x41"
+ "\xcc\x12\x63\xd4\xd6\xac\x9b\x1d\x14\x77\x8d\x1c\x57\xd5\x27\xc6"
+ "\x49\xa2\x41\x43\xf3\x59\x29\xe5\xcb\xd1\x75\xbc\x3a\x97\x2a\x72"
+ "\x22\x66\xc5\x3b\xc1\xba\xfc\x53\x18\x98\xe2\x21\x64\xc6\x52\x87"
+ "\x13\xd5\x7c\x42\xe8\xfb\x9c\x9a\x45\x32\xd5\xa5\x22\x62\x9d\xd4"
+ "\xcb\xa4\xfa\x77\xbb\x50\x24\x0b\x8b\x88\x99\x15\x56\xa9\x1e\x92"
+ "\xbf\x5d\x94\x77\xb6\xf1\x67\x01\x60\x06\x58\x5c\xdf\x18\x52\x79"
+ "\x37\x30\x93\x7d\x87\x04\xf1\xe0\x55\x59\x52\xf3\xc2\xb1\x1c\x5b"
+ "\x12\x7c\x49\x87\xfb\xf7\xed\xdd\x95\x71\xec\x4b\x1a\x85\x08\xb0"
+ "\xa0\x36\xc4\x7b\xab\x40\xe0\xf1\x98\xcc\xaf\x19\x40\x8f\x47\x6f"
+ "\xf0\x6c\x84\x29\x7f\x7f\x04\x46\xcb\x08\x0f\xe0\xc1\xc9\x70\x6e"
+ "\x95\x3b\xa4\xbc\x29\x2b\x53\x67\x45\x1b\x0d\xbc\x13\xa5\x76\x31"
+ "\xaf\xb9\xd0\xe0\x60\x12\xd2\xf4\xb7\x7c\x58\x7e\xf6\x2d\xbb\x24"
+ "\x14\x5a\x20\x24\xa8\x12\xdf\x25\xbd\x42\xce\x96\x7c\x2e\xba\x14"
+ "\x1b\x81\x9f\x18\x45\xa4\xc6\x70\x3e\x0e\xf0\xd3\x7b\x9c\x10\xbe"
+ "\xb8\x7a\x89\xc5\x9e\xd9\x97\xdf\xd7\xe7\xc6\x1d\xc0\x20\x6c\xb8"
+ "\x1e\x3a\x63\xb8\x39\x8e\x8e\x62\xd5\xd2\xb4\xcd\xff\x46\xfc\x8e"
+ "\xec\x07\x35\x0c\xff\xb0\x05\xe6\xf4\xe5\xfe\xa2\xe3\x0a\xe6\x36"
+ "\xa7\x4a\x7e\x62\x1d\xc4\x50\x39\x35\x4e\x28\xcb\x4a\xfb\x9d\xdb"
+ "\xdd\x23\xd6\x53\xb1\x74\x77\x12\xf7\x9c\xf0\x9a\x6b\xf7\xa9\x64"
+ "\x2d\x86\x21\x2a\xcf\xc6\x54\xf5\xc9\xad\xfa\xb5\x12\xb4\xf3\x51"
+ "\x77\x55\x3c\x6f\x0c\x32\xd3\x8c\x44\x39\x71\x25\xfe\x96\xd2"
+};
+
+/*
+ * List of tests to be run.
+ */
+#define TEST(data, pkcs7) { data, sizeof(data) - 1, pkcs7, sizeof(pkcs7) - 1 }
+static const struct certs_test certs_tests[] __initconst = {
+ TEST(certs_selftest_1_data, certs_selftest_1_pkcs7),
+};
+
+int __init fips_signature_selftest(void)
+{
+ struct key *keyring;
+ int ret, i;
+
+ pr_notice("Running certificate verification selftests\n");
+
+ keyring = keyring_alloc(".certs_selftest",
+ GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(),
+ (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ |
+ KEY_USR_SEARCH,
+ KEY_ALLOC_NOT_IN_QUOTA,
+ NULL, NULL);
+ if (IS_ERR(keyring))
+ panic("Can't allocate certs selftest keyring: %ld\n",
+ PTR_ERR(keyring));
+
+ ret = x509_load_certificate_list(certs_selftest_keys,
+ sizeof(certs_selftest_keys) - 1, keyring);
+ if (ret < 0)
+ panic("Can't allocate certs selftest keyring: %d\n", ret);
+
+ for (i = 0; i < ARRAY_SIZE(certs_tests); i++) {
+ const struct certs_test *test = &certs_tests[i];
+ struct pkcs7_message *pkcs7;
+
+ pkcs7 = pkcs7_parse_message(test->pkcs7, test->pkcs7_len);
+ if (IS_ERR(pkcs7))
+ panic("Certs selftest %d: pkcs7_parse_message() = %d\n", i, ret);
+
+ pkcs7_supply_detached_data(pkcs7, test->data, test->data_len);
+
+ ret = pkcs7_verify(pkcs7, VERIFYING_MODULE_SIGNATURE);
+ if (ret < 0)
+ panic("Certs selftest %d: pkcs7_verify() = %d\n", i, ret);
+
+ ret = pkcs7_validate_trust(pkcs7, keyring);
+ if (ret < 0)
+ panic("Certs selftest %d: pkcs7_validate_trust() = %d\n", i, ret);
+
+ pkcs7_free_message(pkcs7);
+ }
+
+ key_put(keyring);
+ return 0;
+}
diff --git a/certs/common.c b/crypto/asymmetric_keys/x509_loader.c
index 16a220887a53..1bc169dee22e 100644
--- a/certs/common.c
+++ b/crypto/asymmetric_keys/x509_loader.c
@@ -2,11 +2,11 @@
#include <linux/kernel.h>
#include <linux/key.h>
-#include "common.h"
+#include <keys/asymmetric-type.h>
-int load_certificate_list(const u8 cert_list[],
- const unsigned long list_size,
- const struct key *keyring)
+int x509_load_certificate_list(const u8 cert_list[],
+ const unsigned long list_size,
+ const struct key *keyring)
{
key_ref_t key;
const u8 *p, *end;
diff --git a/crypto/asymmetric_keys/x509_parser.h b/crypto/asymmetric_keys/x509_parser.h
index 97a886cbe01c..a299c9c56f40 100644
--- a/crypto/asymmetric_keys/x509_parser.h
+++ b/crypto/asymmetric_keys/x509_parser.h
@@ -41,6 +41,15 @@ struct x509_certificate {
};
/*
+ * selftest.c
+ */
+#ifdef CONFIG_FIPS_SIGNATURE_SELFTEST
+extern int __init fips_signature_selftest(void);
+#else
+static inline int fips_signature_selftest(void) { return 0; }
+#endif
+
+/*
* x509_cert_parser.c
*/
extern void x509_free_certificate(struct x509_certificate *cert);
diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c
index 77ed4e93ad56..0b4943a4592b 100644
--- a/crypto/asymmetric_keys/x509_public_key.c
+++ b/crypto/asymmetric_keys/x509_public_key.c
@@ -244,9 +244,15 @@ static struct asymmetric_key_parser x509_key_parser = {
/*
* Module stuff
*/
+extern int __init certs_selftest(void);
static int __init x509_key_init(void)
{
- return register_asymmetric_key_parser(&x509_key_parser);
+ int ret;
+
+ ret = register_asymmetric_key_parser(&x509_key_parser);
+ if (ret < 0)
+ return ret;
+ return fips_signature_selftest();
}
static void __exit x509_key_exit(void)
diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c
index 3e726ee91fdc..324148686953 100644
--- a/drivers/atm/iphase.c
+++ b/drivers/atm/iphase.c
@@ -739,7 +739,7 @@ static u16 ia_eeprom_get (IADEV *iadev, u32 addr)
u32 t;
int i;
/*
- * Read the first bit that was clocked with the falling edge of the
+ * Read the first bit that was clocked with the falling edge of
* the last command data clock
*/
NVRAM_CMD(IAREAD + addr);
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 2ef23fce0860..a97776ea9d99 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -564,6 +564,12 @@ ssize_t __weak cpu_show_srbds(struct device *dev,
return sysfs_emit(buf, "Not affected\n");
}
+ssize_t __weak cpu_show_mmio_stale_data(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "Not affected\n");
+}
+
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
@@ -573,6 +579,7 @@ static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL);
+static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL);
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_meltdown.attr,
@@ -584,6 +591,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_tsx_async_abort.attr,
&dev_attr_itlb_multihit.attr,
&dev_attr_srbds.attr,
+ &dev_attr_mmio_stale_data.attr,
NULL
};
diff --git a/drivers/base/init.c b/drivers/base/init.c
index d8d0fe687111..397eb9880cec 100644
--- a/drivers/base/init.c
+++ b/drivers/base/init.c
@@ -8,6 +8,7 @@
#include <linux/init.h>
#include <linux/memory.h>
#include <linux/of.h>
+#include <linux/backing-dev.h>
#include "base.h"
@@ -20,6 +21,7 @@
void __init driver_init(void)
{
/* These are the core pieces */
+ bdi_init(&noop_backing_dev_info);
devtmpfs_init();
devices_init();
buses_init();
diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c
index e81a9700cfd0..6143dbf31f31 100644
--- a/drivers/bus/fsl-mc/fsl-mc-bus.c
+++ b/drivers/bus/fsl-mc/fsl-mc-bus.c
@@ -1239,14 +1239,14 @@ error_cleanup_mc_io:
static int fsl_mc_bus_remove(struct platform_device *pdev)
{
struct fsl_mc *mc = platform_get_drvdata(pdev);
+ struct fsl_mc_io *mc_io;
if (!fsl_mc_is_root_dprc(&mc->root_mc_bus_dev->dev))
return -EINVAL;
+ mc_io = mc->root_mc_bus_dev->mc_io;
fsl_mc_device_remove(mc->root_mc_bus_dev);
-
- fsl_destroy_mc_io(mc->root_mc_bus_dev->mc_io);
- mc->root_mc_bus_dev->mc_io = NULL;
+ fsl_destroy_mc_io(mc_io);
bus_unregister_notifier(&fsl_mc_bus_type, &fsl_mc_nb);
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 69fd31ffb847..0b6c03643ddc 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -429,28 +429,40 @@ config ADI
driver include crash and makedumpfile.
config RANDOM_TRUST_CPU
- bool "Trust the CPU manufacturer to initialize Linux's CRNG"
+ bool "Initialize RNG using CPU RNG instructions"
+ default y
depends on ARCH_RANDOM
- default n
help
- Assume that CPU manufacturer (e.g., Intel or AMD for RDSEED or
- RDRAND, IBM for the S390 and Power PC architectures) is trustworthy
- for the purposes of initializing Linux's CRNG. Since this is not
- something that can be independently audited, this amounts to trusting
- that CPU manufacturer (perhaps with the insistence or mandate
- of a Nation State's intelligence or law enforcement agencies)
- has not installed a hidden back door to compromise the CPU's
- random number generation facilities. This can also be configured
- at boot with "random.trust_cpu=on/off".
+ Initialize the RNG using random numbers supplied by the CPU's
+ RNG instructions (e.g. RDRAND), if supported and available. These
+ random numbers are never used directly, but are rather hashed into
+ the main input pool, and this happens regardless of whether or not
+ this option is enabled. Instead, this option controls whether the
+ they are credited and hence can initialize the RNG. Additionally,
+ other sources of randomness are always used, regardless of this
+ setting. Enabling this implies trusting that the CPU can supply high
+ quality and non-backdoored random numbers.
+
+ Say Y here unless you have reason to mistrust your CPU or believe
+ its RNG facilities may be faulty. This may also be configured at
+ boot time with "random.trust_cpu=on/off".
config RANDOM_TRUST_BOOTLOADER
- bool "Trust the bootloader to initialize Linux's CRNG"
- help
- Some bootloaders can provide entropy to increase the kernel's initial
- device randomness. Say Y here to assume the entropy provided by the
- booloader is trustworthy so it will be added to the kernel's entropy
- pool. Otherwise, say N here so it will be regarded as device input that
- only mixes the entropy pool. This can also be configured at boot with
- "random.trust_bootloader=on/off".
+ bool "Initialize RNG using bootloader-supplied seed"
+ default y
+ help
+ Initialize the RNG using a seed supplied by the bootloader or boot
+ environment (e.g. EFI or a bootloader-generated device tree). This
+ seed is not used directly, but is rather hashed into the main input
+ pool, and this happens regardless of whether or not this option is
+ enabled. Instead, this option controls whether the seed is credited
+ and hence can initialize the RNG. Additionally, other sources of
+ randomness are always used, regardless of this setting. Enabling
+ this implies trusting that the bootloader can supply high quality and
+ non-backdoored seeds.
+
+ Say Y here unless you have reason to mistrust your bootloader or
+ believe its RNG facilities may be faulty. This may also be configured
+ at boot time with "random.trust_bootloader=on/off".
endmenu
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index e856df7e285c..a6f3a8a2aca6 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -159,6 +159,8 @@ static int probe_common(struct virtio_device *vdev)
goto err_find;
}
+ virtio_device_ready(vdev);
+
/* we always have a pending entropy request */
request_entropy(vi);
diff --git a/drivers/char/lp.c b/drivers/char/lp.c
index 0e22e3b0a04e..38aad99ebb61 100644
--- a/drivers/char/lp.c
+++ b/drivers/char/lp.c
@@ -1019,7 +1019,7 @@ static struct parport_driver lp_driver = {
static int __init lp_init(void)
{
- int i, err = 0;
+ int i, err;
if (parport_nr[0] == LP_PARPORT_OFF)
return 0;
diff --git a/drivers/char/random.c b/drivers/char/random.c
index b691b9d59503..655e327d425e 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -650,7 +650,8 @@ static void __cold _credit_init_bits(size_t bits)
if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) {
crng_reseed(); /* Sets crng_init to CRNG_READY under base_crng.lock. */
- execute_in_process_context(crng_set_ready, &set_ready);
+ if (static_key_initialized)
+ execute_in_process_context(crng_set_ready, &set_ready);
wake_up_interruptible(&crng_init_wait);
kill_fasync(&fasync, SIGIO, POLL_IN);
pr_notice("crng init done\n");
@@ -724,9 +725,8 @@ static void __cold _credit_init_bits(size_t bits)
*
**********************************************************************/
-static bool used_arch_random;
-static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU);
-static bool trust_bootloader __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER);
+static bool trust_cpu __initdata = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU);
+static bool trust_bootloader __initdata = IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER);
static int __init parse_trust_cpu(char *arg)
{
return kstrtobool(arg, &trust_cpu);
@@ -776,7 +776,7 @@ static struct notifier_block pm_notifier = { .notifier_call = random_pm_notifica
int __init random_init(const char *command_line)
{
ktime_t now = ktime_get_real();
- unsigned int i, arch_bytes;
+ unsigned int i, arch_bits;
unsigned long entropy;
#if defined(LATENT_ENTROPY_PLUGIN)
@@ -784,12 +784,12 @@ int __init random_init(const char *command_line)
_mix_pool_bytes(compiletime_seed, sizeof(compiletime_seed));
#endif
- for (i = 0, arch_bytes = BLAKE2S_BLOCK_SIZE;
+ for (i = 0, arch_bits = BLAKE2S_BLOCK_SIZE * 8;
i < BLAKE2S_BLOCK_SIZE; i += sizeof(entropy)) {
if (!arch_get_random_seed_long_early(&entropy) &&
!arch_get_random_long_early(&entropy)) {
entropy = random_get_entropy();
- arch_bytes -= sizeof(entropy);
+ arch_bits -= sizeof(entropy) * 8;
}
_mix_pool_bytes(&entropy, sizeof(entropy));
}
@@ -798,11 +798,18 @@ int __init random_init(const char *command_line)
_mix_pool_bytes(command_line, strlen(command_line));
add_latent_entropy();
+ /*
+ * If we were initialized by the bootloader before jump labels are
+ * initialized, then we should enable the static branch here, where
+ * it's guaranteed that jump labels have been initialized.
+ */
+ if (!static_branch_likely(&crng_is_ready) && crng_init >= CRNG_READY)
+ crng_set_ready(NULL);
+
if (crng_ready())
crng_reseed();
else if (trust_cpu)
- credit_init_bits(arch_bytes * 8);
- used_arch_random = arch_bytes * 8 >= POOL_READY_BITS;
+ _credit_init_bits(arch_bits);
WARN_ON(register_pm_notifier(&pm_notifier));
@@ -812,17 +819,6 @@ int __init random_init(const char *command_line)
}
/*
- * Returns whether arch randomness has been mixed into the initial
- * state of the RNG, regardless of whether or not that randomness
- * was credited. Knowing this is only good for a very limited set
- * of uses, such as early init printk pointer obfuscation.
- */
-bool rng_has_arch_random(void)
-{
- return used_arch_random;
-}
-
-/*
* Add device- or boot-specific data to the input pool to help
* initialize it.
*
@@ -865,13 +861,12 @@ EXPORT_SYMBOL_GPL(add_hwgenerator_randomness);
* Handle random seed passed by bootloader, and credit it if
* CONFIG_RANDOM_TRUST_BOOTLOADER is set.
*/
-void __cold add_bootloader_randomness(const void *buf, size_t len)
+void __init add_bootloader_randomness(const void *buf, size_t len)
{
mix_pool_bytes(buf, len);
if (trust_bootloader)
credit_init_bits(len * 8);
}
-EXPORT_SYMBOL_GPL(add_bootloader_randomness);
#if IS_ENABLED(CONFIG_VMGENID)
static BLOCKING_NOTIFIER_HEAD(vmfork_chain);
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index ff188ab68496..bb47610bbd1c 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -565,4 +565,3 @@ void __init hv_init_clocksource(void)
hv_sched_clock_offset = hv_read_reference_counter();
hv_setup_sched_clock(read_hv_sched_clock_msr);
}
-EXPORT_SYMBOL_GPL(hv_init_clocksource);
diff --git a/drivers/comedi/drivers/vmk80xx.c b/drivers/comedi/drivers/vmk80xx.c
index 46023adc5395..4536ed43f65b 100644
--- a/drivers/comedi/drivers/vmk80xx.c
+++ b/drivers/comedi/drivers/vmk80xx.c
@@ -684,7 +684,7 @@ static int vmk80xx_alloc_usb_buffers(struct comedi_device *dev)
if (!devpriv->usb_rx_buf)
return -ENOMEM;
- size = max(usb_endpoint_maxp(devpriv->ep_rx), MIN_BUF_SIZE);
+ size = max(usb_endpoint_maxp(devpriv->ep_tx), MIN_BUF_SIZE);
devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL);
if (!devpriv->usb_tx_buf)
return -ENOMEM;
diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
index e7330684d3b8..9631f2fd2faf 100644
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@@ -32,8 +32,11 @@ static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct udmabuf *ubuf = vma->vm_private_data;
+ pgoff_t pgoff = vmf->pgoff;
- vmf->page = ubuf->pages[vmf->pgoff];
+ if (pgoff >= ubuf->pagecount)
+ return VM_FAULT_SIGBUS;
+ vmf->page = ubuf->pages[pgoff];
get_page(vmf->page);
return 0;
}
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index c9fe5903725a..9c89f7d53e99 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -1211,7 +1211,7 @@ static int ioctl_get_cycle_timer2(struct client *client, union ioctl_arg *arg)
struct fw_cdev_get_cycle_timer2 *a = &arg->get_cycle_timer2;
struct fw_card *card = client->device->card;
struct timespec64 ts = {0, 0};
- u32 cycle_time;
+ u32 cycle_time = 0;
int ret = 0;
local_irq_disable();
diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index 90ed8fdaba75..adddd8c45d0c 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -372,8 +372,7 @@ static ssize_t rom_index_show(struct device *dev,
struct fw_device *device = fw_device(dev->parent);
struct fw_unit *unit = fw_unit(dev);
- return snprintf(buf, PAGE_SIZE, "%d\n",
- (int)(unit->directory - device->config_rom));
+ return sysfs_emit(buf, "%td\n", unit->directory - device->config_rom);
}
static struct device_attribute fw_unit_attributes[] = {
@@ -403,8 +402,7 @@ static ssize_t guid_show(struct device *dev,
int ret;
down_read(&fw_device_rwsem);
- ret = snprintf(buf, PAGE_SIZE, "0x%08x%08x\n",
- device->config_rom[3], device->config_rom[4]);
+ ret = sysfs_emit(buf, "0x%08x%08x\n", device->config_rom[3], device->config_rom[4]);
up_read(&fw_device_rwsem);
return ret;
diff --git a/drivers/firmware/efi/sysfb_efi.c b/drivers/firmware/efi/sysfb_efi.c
index 4c7c9dd7733f..7882d4b3f2be 100644
--- a/drivers/firmware/efi/sysfb_efi.c
+++ b/drivers/firmware/efi/sysfb_efi.c
@@ -26,8 +26,6 @@
#include <linux/sysfb.h>
#include <video/vga.h>
-#include <asm/efi.h>
-
enum {
OVERRIDE_NONE = 0x0,
OVERRIDE_BASE = 0x1,
diff --git a/drivers/gpio/gpio-crystalcove.c b/drivers/gpio/gpio-crystalcove.c
index b55c74a5e064..1ee62cd58582 100644
--- a/drivers/gpio/gpio-crystalcove.c
+++ b/drivers/gpio/gpio-crystalcove.c
@@ -15,6 +15,7 @@
#include <linux/platform_device.h>
#include <linux/regmap.h>
#include <linux/seq_file.h>
+#include <linux/types.h>
#define CRYSTALCOVE_GPIO_NUM 16
#define CRYSTALCOVE_VGPIO_NUM 95
@@ -110,8 +111,7 @@ static inline int to_reg(int gpio, enum ctrl_register reg_type)
return reg + gpio % 8;
}
-static void crystalcove_update_irq_mask(struct crystalcove_gpio *cg,
- int gpio)
+static void crystalcove_update_irq_mask(struct crystalcove_gpio *cg, int gpio)
{
u8 mirqs0 = gpio < 8 ? MGPIO0IRQS0 : MGPIO1IRQS0;
int mask = BIT(gpio % 8);
@@ -140,8 +140,7 @@ static int crystalcove_gpio_dir_in(struct gpio_chip *chip, unsigned int gpio)
return regmap_write(cg->regmap, reg, CTLO_INPUT_SET);
}
-static int crystalcove_gpio_dir_out(struct gpio_chip *chip, unsigned int gpio,
- int value)
+static int crystalcove_gpio_dir_out(struct gpio_chip *chip, unsigned int gpio, int value)
{
struct crystalcove_gpio *cg = gpiochip_get_data(chip);
int reg = to_reg(gpio, CTRL_OUT);
@@ -168,8 +167,7 @@ static int crystalcove_gpio_get(struct gpio_chip *chip, unsigned int gpio)
return val & 0x1;
}
-static void crystalcove_gpio_set(struct gpio_chip *chip,
- unsigned int gpio, int value)
+static void crystalcove_gpio_set(struct gpio_chip *chip, unsigned int gpio, int value)
{
struct crystalcove_gpio *cg = gpiochip_get_data(chip);
int reg = to_reg(gpio, CTRL_OUT);
@@ -185,10 +183,10 @@ static void crystalcove_gpio_set(struct gpio_chip *chip,
static int crystalcove_irq_type(struct irq_data *data, unsigned int type)
{
- struct crystalcove_gpio *cg =
- gpiochip_get_data(irq_data_get_irq_chip_data(data));
+ struct crystalcove_gpio *cg = gpiochip_get_data(irq_data_get_irq_chip_data(data));
+ irq_hw_number_t hwirq = irqd_to_hwirq(data);
- if (data->hwirq >= CRYSTALCOVE_GPIO_NUM)
+ if (hwirq >= CRYSTALCOVE_GPIO_NUM)
return 0;
switch (type) {
@@ -215,22 +213,20 @@ static int crystalcove_irq_type(struct irq_data *data, unsigned int type)
static void crystalcove_bus_lock(struct irq_data *data)
{
- struct crystalcove_gpio *cg =
- gpiochip_get_data(irq_data_get_irq_chip_data(data));
+ struct crystalcove_gpio *cg = gpiochip_get_data(irq_data_get_irq_chip_data(data));
mutex_lock(&cg->buslock);
}
static void crystalcove_bus_sync_unlock(struct irq_data *data)
{
- struct crystalcove_gpio *cg =
- gpiochip_get_data(irq_data_get_irq_chip_data(data));
- int gpio = data->hwirq;
+ struct crystalcove_gpio *cg = gpiochip_get_data(irq_data_get_irq_chip_data(data));
+ irq_hw_number_t hwirq = irqd_to_hwirq(data);
if (cg->update & UPDATE_IRQ_TYPE)
- crystalcove_update_irq_ctrl(cg, gpio);
+ crystalcove_update_irq_ctrl(cg, hwirq);
if (cg->update & UPDATE_IRQ_MASK)
- crystalcove_update_irq_mask(cg, gpio);
+ crystalcove_update_irq_mask(cg, hwirq);
cg->update = 0;
mutex_unlock(&cg->buslock);
@@ -238,34 +234,43 @@ static void crystalcove_bus_sync_unlock(struct irq_data *data)
static void crystalcove_irq_unmask(struct irq_data *data)
{
- struct crystalcove_gpio *cg =
- gpiochip_get_data(irq_data_get_irq_chip_data(data));
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
+ struct crystalcove_gpio *cg = gpiochip_get_data(gc);
+ irq_hw_number_t hwirq = irqd_to_hwirq(data);
- if (data->hwirq < CRYSTALCOVE_GPIO_NUM) {
- cg->set_irq_mask = false;
- cg->update |= UPDATE_IRQ_MASK;
- }
+ if (hwirq >= CRYSTALCOVE_GPIO_NUM)
+ return;
+
+ gpiochip_enable_irq(gc, hwirq);
+
+ cg->set_irq_mask = false;
+ cg->update |= UPDATE_IRQ_MASK;
}
static void crystalcove_irq_mask(struct irq_data *data)
{
- struct crystalcove_gpio *cg =
- gpiochip_get_data(irq_data_get_irq_chip_data(data));
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
+ struct crystalcove_gpio *cg = gpiochip_get_data(gc);
+ irq_hw_number_t hwirq = irqd_to_hwirq(data);
- if (data->hwirq < CRYSTALCOVE_GPIO_NUM) {
- cg->set_irq_mask = true;
- cg->update |= UPDATE_IRQ_MASK;
- }
+ if (hwirq >= CRYSTALCOVE_GPIO_NUM)
+ return;
+
+ cg->set_irq_mask = true;
+ cg->update |= UPDATE_IRQ_MASK;
+
+ gpiochip_disable_irq(gc, hwirq);
}
-static struct irq_chip crystalcove_irqchip = {
+static const struct irq_chip crystalcove_irqchip = {
.name = "Crystal Cove",
.irq_mask = crystalcove_irq_mask,
.irq_unmask = crystalcove_irq_unmask,
.irq_set_type = crystalcove_irq_type,
.irq_bus_lock = crystalcove_bus_lock,
.irq_bus_sync_unlock = crystalcove_bus_sync_unlock,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+ .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
};
static irqreturn_t crystalcove_gpio_irq_handler(int irq, void *data)
@@ -293,8 +298,7 @@ static irqreturn_t crystalcove_gpio_irq_handler(int irq, void *data)
return IRQ_HANDLED;
}
-static void crystalcove_gpio_dbg_show(struct seq_file *s,
- struct gpio_chip *chip)
+static void crystalcove_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
{
struct crystalcove_gpio *cg = gpiochip_get_data(chip);
int gpio, offset;
@@ -353,7 +357,7 @@ static int crystalcove_gpio_probe(struct platform_device *pdev)
cg->regmap = pmic->regmap;
girq = &cg->chip.irq;
- girq->chip = &crystalcove_irqchip;
+ gpio_irq_chip_set_chip(girq, &crystalcove_irqchip);
/* This will let us handle the parent IRQ in the driver */
girq->parent_handler = NULL;
girq->num_parents = 0;
diff --git a/drivers/gpio/gpio-dln2.c b/drivers/gpio/gpio-dln2.c
index 08b9e2cf4f2d..71fa437b491f 100644
--- a/drivers/gpio/gpio-dln2.c
+++ b/drivers/gpio/gpio-dln2.c
@@ -46,7 +46,6 @@
struct dln2_gpio {
struct platform_device *pdev;
struct gpio_chip gpio;
- struct irq_chip irqchip;
/*
* Cache pin direction to save us one transfer, since the hardware has
@@ -306,6 +305,7 @@ static void dln2_irq_unmask(struct irq_data *irqd)
struct dln2_gpio *dln2 = gpiochip_get_data(gc);
int pin = irqd_to_hwirq(irqd);
+ gpiochip_enable_irq(gc, pin);
set_bit(pin, dln2->unmasked_irqs);
}
@@ -316,6 +316,7 @@ static void dln2_irq_mask(struct irq_data *irqd)
int pin = irqd_to_hwirq(irqd);
clear_bit(pin, dln2->unmasked_irqs);
+ gpiochip_disable_irq(gc, pin);
}
static int dln2_irq_set_type(struct irq_data *irqd, unsigned type)
@@ -384,6 +385,17 @@ static void dln2_irq_bus_unlock(struct irq_data *irqd)
mutex_unlock(&dln2->irq_lock);
}
+static const struct irq_chip dln2_irqchip = {
+ .name = "dln2-irq",
+ .irq_mask = dln2_irq_mask,
+ .irq_unmask = dln2_irq_unmask,
+ .irq_set_type = dln2_irq_set_type,
+ .irq_bus_lock = dln2_irq_bus_lock,
+ .irq_bus_sync_unlock = dln2_irq_bus_unlock,
+ .flags = IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
+};
+
static void dln2_gpio_event(struct platform_device *pdev, u16 echo,
const void *data, int len)
{
@@ -465,15 +477,8 @@ static int dln2_gpio_probe(struct platform_device *pdev)
dln2->gpio.direction_output = dln2_gpio_direction_output;
dln2->gpio.set_config = dln2_gpio_set_config;
- dln2->irqchip.name = "dln2-irq",
- dln2->irqchip.irq_mask = dln2_irq_mask,
- dln2->irqchip.irq_unmask = dln2_irq_unmask,
- dln2->irqchip.irq_set_type = dln2_irq_set_type,
- dln2->irqchip.irq_bus_lock = dln2_irq_bus_lock,
- dln2->irqchip.irq_bus_sync_unlock = dln2_irq_bus_unlock,
-
girq = &dln2->gpio.irq;
- girq->chip = &dln2->irqchip;
+ gpio_irq_chip_set_chip(girq, &dln2_irqchip);
/* The event comes from the outside so no parent handler */
girq->parent_handler = NULL;
girq->num_parents = 0;
diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c
index 04afe728e187..c22fcaa44a61 100644
--- a/drivers/gpio/gpio-dwapb.c
+++ b/drivers/gpio/gpio-dwapb.c
@@ -662,10 +662,9 @@ static int dwapb_get_clks(struct dwapb_gpio *gpio)
gpio->clks[1].id = "db";
err = devm_clk_bulk_get_optional(gpio->dev, DWAPB_NR_CLOCKS,
gpio->clks);
- if (err) {
- dev_err(gpio->dev, "Cannot get APB/Debounce clocks\n");
- return err;
- }
+ if (err)
+ return dev_err_probe(gpio->dev, err,
+ "Cannot get APB/Debounce clocks\n");
err = clk_bulk_prepare_enable(DWAPB_NR_CLOCKS, gpio->clks);
if (err) {
diff --git a/drivers/gpio/gpio-merrifield.c b/drivers/gpio/gpio-merrifield.c
index f3d1baeacbe9..72ac09a59702 100644
--- a/drivers/gpio/gpio-merrifield.c
+++ b/drivers/gpio/gpio-merrifield.c
@@ -220,10 +220,8 @@ static void mrfld_irq_ack(struct irq_data *d)
raw_spin_unlock_irqrestore(&priv->lock, flags);
}
-static void mrfld_irq_unmask_mask(struct irq_data *d, bool unmask)
+static void mrfld_irq_unmask_mask(struct mrfld_gpio *priv, u32 gpio, bool unmask)
{
- struct mrfld_gpio *priv = irq_data_get_irq_chip_data(d);
- u32 gpio = irqd_to_hwirq(d);
void __iomem *gimr = gpio_reg(&priv->chip, gpio, GIMR);
unsigned long flags;
u32 value;
@@ -241,12 +239,20 @@ static void mrfld_irq_unmask_mask(struct irq_data *d, bool unmask)
static void mrfld_irq_mask(struct irq_data *d)
{
- mrfld_irq_unmask_mask(d, false);
+ struct mrfld_gpio *priv = irq_data_get_irq_chip_data(d);
+ u32 gpio = irqd_to_hwirq(d);
+
+ mrfld_irq_unmask_mask(priv, gpio, false);
+ gpiochip_disable_irq(&priv->chip, gpio);
}
static void mrfld_irq_unmask(struct irq_data *d)
{
- mrfld_irq_unmask_mask(d, true);
+ struct mrfld_gpio *priv = irq_data_get_irq_chip_data(d);
+ u32 gpio = irqd_to_hwirq(d);
+
+ gpiochip_enable_irq(&priv->chip, gpio);
+ mrfld_irq_unmask_mask(priv, gpio, true);
}
static int mrfld_irq_set_type(struct irq_data *d, unsigned int type)
@@ -329,13 +335,15 @@ static int mrfld_irq_set_wake(struct irq_data *d, unsigned int on)
return 0;
}
-static struct irq_chip mrfld_irqchip = {
+static const struct irq_chip mrfld_irqchip = {
.name = "gpio-merrifield",
.irq_ack = mrfld_irq_ack,
.irq_mask = mrfld_irq_mask,
.irq_unmask = mrfld_irq_unmask,
.irq_set_type = mrfld_irq_set_type,
.irq_set_wake = mrfld_irq_set_wake,
+ .flags = IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
};
static void mrfld_irq_handler(struct irq_desc *desc)
@@ -482,7 +490,7 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id
return retval;
girq = &priv->chip.irq;
- girq->chip = &mrfld_irqchip;
+ gpio_irq_chip_set_chip(girq, &mrfld_irqchip);
girq->init_hw = mrfld_irq_init_hw;
girq->parent_handler = mrfld_irq_handler;
girq->num_parents = 1;
diff --git a/drivers/gpio/gpio-sch.c b/drivers/gpio/gpio-sch.c
index acda4c5052d3..8a83f7bf4382 100644
--- a/drivers/gpio/gpio-sch.c
+++ b/drivers/gpio/gpio-sch.c
@@ -38,7 +38,6 @@
struct sch_gpio {
struct gpio_chip chip;
- struct irq_chip irqchip;
spinlock_t lock;
unsigned short iobase;
unsigned short resume_base;
@@ -218,11 +217,9 @@ static void sch_irq_ack(struct irq_data *d)
spin_unlock_irqrestore(&sch->lock, flags);
}
-static void sch_irq_mask_unmask(struct irq_data *d, int val)
+static void sch_irq_mask_unmask(struct gpio_chip *gc, irq_hw_number_t gpio_num, int val)
{
- struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
struct sch_gpio *sch = gpiochip_get_data(gc);
- irq_hw_number_t gpio_num = irqd_to_hwirq(d);
unsigned long flags;
spin_lock_irqsave(&sch->lock, flags);
@@ -232,14 +229,32 @@ static void sch_irq_mask_unmask(struct irq_data *d, int val)
static void sch_irq_mask(struct irq_data *d)
{
- sch_irq_mask_unmask(d, 0);
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ irq_hw_number_t gpio_num = irqd_to_hwirq(d);
+
+ sch_irq_mask_unmask(gc, gpio_num, 0);
+ gpiochip_disable_irq(gc, gpio_num);
}
static void sch_irq_unmask(struct irq_data *d)
{
- sch_irq_mask_unmask(d, 1);
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ irq_hw_number_t gpio_num = irqd_to_hwirq(d);
+
+ gpiochip_enable_irq(gc, gpio_num);
+ sch_irq_mask_unmask(gc, gpio_num, 1);
}
+static const struct irq_chip sch_irqchip = {
+ .name = "sch_gpio",
+ .irq_ack = sch_irq_ack,
+ .irq_mask = sch_irq_mask,
+ .irq_unmask = sch_irq_unmask,
+ .irq_set_type = sch_irq_type,
+ .flags = IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
+};
+
static u32 sch_gpio_gpe_handler(acpi_handle gpe_device, u32 gpe, void *context)
{
struct sch_gpio *sch = context;
@@ -367,14 +382,8 @@ static int sch_gpio_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, sch);
- sch->irqchip.name = "sch_gpio";
- sch->irqchip.irq_ack = sch_irq_ack;
- sch->irqchip.irq_mask = sch_irq_mask;
- sch->irqchip.irq_unmask = sch_irq_unmask;
- sch->irqchip.irq_set_type = sch_irq_type;
-
girq = &sch->chip.irq;
- girq->chip = &sch->irqchip;
+ gpio_irq_chip_set_chip(girq, &sch_irqchip);
girq->num_parents = 0;
girq->parents = NULL;
girq->parent_handler = NULL;
diff --git a/drivers/gpio/gpio-wcove.c b/drivers/gpio/gpio-wcove.c
index 16a0fae1e32e..c18b6b47384f 100644
--- a/drivers/gpio/gpio-wcove.c
+++ b/drivers/gpio/gpio-wcove.c
@@ -299,6 +299,8 @@ static void wcove_irq_unmask(struct irq_data *data)
if (gpio >= WCOVE_GPIO_NUM)
return;
+ gpiochip_enable_irq(chip, gpio);
+
wg->set_irq_mask = false;
wg->update |= UPDATE_IRQ_MASK;
}
@@ -314,15 +316,19 @@ static void wcove_irq_mask(struct irq_data *data)
wg->set_irq_mask = true;
wg->update |= UPDATE_IRQ_MASK;
+
+ gpiochip_disable_irq(chip, gpio);
}
-static struct irq_chip wcove_irqchip = {
+static const struct irq_chip wcove_irqchip = {
.name = "Whiskey Cove",
.irq_mask = wcove_irq_mask,
.irq_unmask = wcove_irq_unmask,
.irq_set_type = wcove_irq_type,
.irq_bus_lock = wcove_bus_lock,
.irq_bus_sync_unlock = wcove_bus_sync_unlock,
+ .flags = IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
};
static irqreturn_t wcove_gpio_irq_handler(int irq, void *data)
@@ -452,7 +458,7 @@ static int wcove_gpio_probe(struct platform_device *pdev)
}
girq = &wg->chip.irq;
- girq->chip = &wcove_irqchip;
+ gpio_irq_chip_set_chip(girq, &wcove_irqchip);
/* This will let us handle the parent IRQ in the driver */
girq->parent_handler = NULL;
girq->num_parents = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 801f6fa692e9..6de63ea6687e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -642,7 +642,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
atomic64_read(&adev->visible_pin_size),
vram_gtt.vram_size);
vram_gtt.gtt_size = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT)->size;
- vram_gtt.gtt_size *= PAGE_SIZE;
vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size);
return copy_to_user(out, &vram_gtt,
min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0;
@@ -675,7 +674,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
mem.cpu_accessible_vram.usable_heap_size * 3 / 4;
mem.gtt.total_heap_size = gtt_man->size;
- mem.gtt.total_heap_size *= PAGE_SIZE;
mem.gtt.usable_heap_size = mem.gtt.total_heap_size -
atomic64_read(&adev->gart_pin_size);
mem.gtt.heap_usage = ttm_resource_manager_usage(gtt_man);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 70be67a56673..39b425d83bb1 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2812,7 +2812,7 @@ static struct drm_mode_config_helper_funcs amdgpu_dm_mode_config_helperfuncs = {
static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
{
- u32 max_cll, min_cll, max, min, q, r;
+ u32 max_avg, min_cll, max, min, q, r;
struct amdgpu_dm_backlight_caps *caps;
struct amdgpu_display_manager *dm;
struct drm_connector *conn_base;
@@ -2842,7 +2842,7 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
caps = &dm->backlight_caps[i];
caps->ext_caps = &aconnector->dc_link->dpcd_sink_ext_caps;
caps->aux_support = false;
- max_cll = conn_base->hdr_sink_metadata.hdmi_type1.max_cll;
+ max_avg = conn_base->hdr_sink_metadata.hdmi_type1.max_fall;
min_cll = conn_base->hdr_sink_metadata.hdmi_type1.min_cll;
if (caps->ext_caps->bits.oled == 1 /*||
@@ -2870,8 +2870,8 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
* The results of the above expressions can be verified at
* pre_computed_values.
*/
- q = max_cll >> 5;
- r = max_cll % 32;
+ q = max_avg >> 5;
+ r = max_avg % 32;
max = (1 << q) * pre_computed_values[r];
// min luminance: maxLum * (CV/255)^2 / 100
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 424ea23eec32..16c539657f73 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -177,15 +177,15 @@ static struct exynos_drm_driver_info exynos_drm_drivers[] = {
DRV_PTR(mixer_driver, CONFIG_DRM_EXYNOS_MIXER),
DRM_COMPONENT_DRIVER
}, {
- DRV_PTR(mic_driver, CONFIG_DRM_EXYNOS_MIC),
- DRM_COMPONENT_DRIVER
- }, {
DRV_PTR(dp_driver, CONFIG_DRM_EXYNOS_DP),
DRM_COMPONENT_DRIVER
}, {
DRV_PTR(dsi_driver, CONFIG_DRM_EXYNOS_DSI),
DRM_COMPONENT_DRIVER
}, {
+ DRV_PTR(mic_driver, CONFIG_DRM_EXYNOS_MIC),
+ DRM_COMPONENT_DRIVER
+ }, {
DRV_PTR(hdmi_driver, CONFIG_DRM_EXYNOS_HDMI),
DRM_COMPONENT_DRIVER
}, {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_mic.c b/drivers/gpu/drm/exynos/exynos_drm_mic.c
index 9e06f8e2a863..09ce28ee08d9 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_mic.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_mic.c
@@ -26,6 +26,7 @@
#include <drm/drm_print.h>
#include "exynos_drm_drv.h"
+#include "exynos_drm_crtc.h"
/* Sysreg registers for MIC */
#define DSD_CFG_MUX 0x1004
@@ -100,9 +101,7 @@ struct exynos_mic {
bool i80_mode;
struct videomode vm;
- struct drm_encoder *encoder;
struct drm_bridge bridge;
- struct drm_bridge *next_bridge;
bool enabled;
};
@@ -229,8 +228,6 @@ static void mic_set_reg_on(struct exynos_mic *mic, bool enable)
writel(reg, mic->reg + MIC_OP);
}
-static void mic_disable(struct drm_bridge *bridge) { }
-
static void mic_post_disable(struct drm_bridge *bridge)
{
struct exynos_mic *mic = bridge->driver_private;
@@ -297,34 +294,30 @@ unlock:
mutex_unlock(&mic_mutex);
}
-static void mic_enable(struct drm_bridge *bridge) { }
-
-static int mic_attach(struct drm_bridge *bridge,
- enum drm_bridge_attach_flags flags)
-{
- struct exynos_mic *mic = bridge->driver_private;
-
- return drm_bridge_attach(bridge->encoder, mic->next_bridge,
- &mic->bridge, flags);
-}
-
static const struct drm_bridge_funcs mic_bridge_funcs = {
- .disable = mic_disable,
.post_disable = mic_post_disable,
.mode_set = mic_mode_set,
.pre_enable = mic_pre_enable,
- .enable = mic_enable,
- .attach = mic_attach,
};
static int exynos_mic_bind(struct device *dev, struct device *master,
void *data)
{
struct exynos_mic *mic = dev_get_drvdata(dev);
+ struct drm_device *drm_dev = data;
+ struct exynos_drm_crtc *crtc = exynos_drm_crtc_get_by_type(drm_dev,
+ EXYNOS_DISPLAY_TYPE_LCD);
+ struct drm_encoder *e, *encoder = NULL;
+
+ drm_for_each_encoder(e, drm_dev)
+ if (e->possible_crtcs == drm_crtc_mask(&crtc->base))
+ encoder = e;
+ if (!encoder)
+ return -ENODEV;
mic->bridge.driver_private = mic;
- return 0;
+ return drm_bridge_attach(encoder, &mic->bridge, NULL, 0);
}
static void exynos_mic_unbind(struct device *dev, struct device *master,
@@ -388,7 +381,6 @@ static int exynos_mic_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct exynos_mic *mic;
- struct device_node *remote;
struct resource res;
int ret, i;
@@ -432,16 +424,6 @@ static int exynos_mic_probe(struct platform_device *pdev)
}
}
- remote = of_graph_get_remote_node(dev->of_node, 1, 0);
- mic->next_bridge = of_drm_find_bridge(remote);
- if (IS_ERR(mic->next_bridge)) {
- DRM_DEV_ERROR(dev, "mic: Failed to find next bridge\n");
- ret = PTR_ERR(mic->next_bridge);
- goto err;
- }
-
- of_node_put(remote);
-
platform_set_drvdata(pdev, mic);
mic->bridge.funcs = &mic_bridge_funcs;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index c326bd2b444f..30fe847c6664 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -999,7 +999,8 @@ static int eb_validate_vmas(struct i915_execbuffer *eb)
}
}
- err = dma_resv_reserve_fences(vma->obj->base.resv, 1);
+ /* Reserve enough slots to accommodate composite fences */
+ err = dma_resv_reserve_fences(vma->obj->base.resv, eb->num_batches);
if (err)
return err;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 53307ca0eed0..51a0fe60c050 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -785,6 +785,7 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
{
intel_wakeref_t wakeref;
+ intel_gt_sysfs_unregister(gt);
intel_rps_driver_unregister(&gt->rps);
intel_gsc_fini(&gt->gsc);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
index 8ec8bc660c8c..9e4ebf53379b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
@@ -24,7 +24,7 @@ bool is_object_gt(struct kobject *kobj)
static struct intel_gt *kobj_to_gt(struct kobject *kobj)
{
- return container_of(kobj, struct kobj_gt, base)->gt;
+ return container_of(kobj, struct intel_gt, sysfs_gt);
}
struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
@@ -72,9 +72,9 @@ static struct attribute *id_attrs[] = {
};
ATTRIBUTE_GROUPS(id);
+/* A kobject needs a release() method even if it does nothing */
static void kobj_gt_release(struct kobject *kobj)
{
- kfree(kobj);
}
static struct kobj_type kobj_gt_type = {
@@ -85,8 +85,6 @@ static struct kobj_type kobj_gt_type = {
void intel_gt_sysfs_register(struct intel_gt *gt)
{
- struct kobj_gt *kg;
-
/*
* We need to make things right with the
* ABI compatibility. The files were originally
@@ -98,25 +96,22 @@ void intel_gt_sysfs_register(struct intel_gt *gt)
if (gt_is_root(gt))
intel_gt_sysfs_pm_init(gt, gt_get_parent_obj(gt));
- kg = kzalloc(sizeof(*kg), GFP_KERNEL);
- if (!kg)
+ /* init and xfer ownership to sysfs tree */
+ if (kobject_init_and_add(&gt->sysfs_gt, &kobj_gt_type,
+ gt->i915->sysfs_gt, "gt%d", gt->info.id))
goto exit_fail;
- kobject_init(&kg->base, &kobj_gt_type);
- kg->gt = gt;
-
- /* xfer ownership to sysfs tree */
- if (kobject_add(&kg->base, gt->i915->sysfs_gt, "gt%d", gt->info.id))
- goto exit_kobj_put;
-
- intel_gt_sysfs_pm_init(gt, &kg->base);
+ intel_gt_sysfs_pm_init(gt, &gt->sysfs_gt);
return;
-exit_kobj_put:
- kobject_put(&kg->base);
-
exit_fail:
+ kobject_put(&gt->sysfs_gt);
drm_warn(&gt->i915->drm,
"failed to initialize gt%d sysfs root\n", gt->info.id);
}
+
+void intel_gt_sysfs_unregister(struct intel_gt *gt)
+{
+ kobject_put(&gt->sysfs_gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
index 9471b26752cf..a99aa7e8b01a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
@@ -13,11 +13,6 @@
struct intel_gt;
-struct kobj_gt {
- struct kobject base;
- struct intel_gt *gt;
-};
-
bool is_object_gt(struct kobject *kobj);
struct drm_i915_private *kobj_to_i915(struct kobject *kobj);
@@ -28,6 +23,7 @@ intel_gt_create_kobj(struct intel_gt *gt,
const char *name);
void intel_gt_sysfs_register(struct intel_gt *gt);
+void intel_gt_sysfs_unregister(struct intel_gt *gt);
struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
const char *name);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index b06611c1d4ad..edd7a3cf5f5f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -224,6 +224,9 @@ struct intel_gt {
} mocs;
struct intel_pxp pxp;
+
+ /* gt/gtN sysfs */
+ struct kobject sysfs_gt;
};
enum intel_gt_scratch_field {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index d078f884b5e3..f0d7b57b741e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -156,7 +156,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
[INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
[INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) },
};
- static const struct uc_fw_platform_requirement *fw_blobs;
+ const struct uc_fw_platform_requirement *fw_blobs;
enum intel_platform p = INTEL_INFO(i915)->platform;
u32 fw_count;
u8 rev = INTEL_REVID(i915);
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 8521daba212a..1e2750210831 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -166,7 +166,14 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
struct device *kdev = kobj_to_dev(kobj);
struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
struct i915_gpu_coredump *gpu;
- ssize_t ret;
+ ssize_t ret = 0;
+
+ /*
+ * FIXME: Concurrent clients triggering resets and reading + clearing
+ * dumps can cause inconsistent sysfs reads when a user calls in with a
+ * non-zero offset to complete a prior partial read but the
+ * gpu_coredump has been cleared or replaced.
+ */
gpu = i915_first_error_state(i915);
if (IS_ERR(gpu)) {
@@ -178,8 +185,10 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
const char *str = "No error state collected\n";
size_t len = strlen(str);
- ret = min_t(size_t, count, len - off);
- memcpy(buf, str + off, ret);
+ if (off < len) {
+ ret = min_t(size_t, count, len - off);
+ memcpy(buf, str + off, ret);
+ }
}
return ret;
@@ -259,4 +268,6 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
device_remove_bin_file(kdev, &dpf_attrs_1);
device_remove_bin_file(kdev, &dpf_attrs);
+
+ kobject_put(dev_priv->sysfs_gt);
}
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 4f6db539571a..0bffb70b3c5f 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -23,6 +23,7 @@
*/
#include <linux/sched/mm.h>
+#include <linux/dma-fence-array.h>
#include <drm/drm_gem.h>
#include "display/intel_frontbuffer.h"
@@ -1823,6 +1824,21 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
if (unlikely(err))
return err;
+ /*
+ * Reserve fences slot early to prevent an allocation after preparing
+ * the workload and associating fences with dma_resv.
+ */
+ if (fence && !(flags & __EXEC_OBJECT_NO_RESERVE)) {
+ struct dma_fence *curr;
+ int idx;
+
+ dma_fence_array_for_each(curr, idx, fence)
+ ;
+ err = dma_resv_reserve_fences(vma->obj->base.resv, idx);
+ if (unlikely(err))
+ return err;
+ }
+
if (flags & EXEC_OBJECT_WRITE) {
struct intel_frontbuffer *front;
@@ -1832,31 +1848,23 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
i915_active_add_request(&front->write, rq);
intel_frontbuffer_put(front);
}
+ }
- if (!(flags & __EXEC_OBJECT_NO_RESERVE)) {
- err = dma_resv_reserve_fences(vma->obj->base.resv, 1);
- if (unlikely(err))
- return err;
- }
+ if (fence) {
+ struct dma_fence *curr;
+ enum dma_resv_usage usage;
+ int idx;
- if (fence) {
- dma_resv_add_fence(vma->obj->base.resv, fence,
- DMA_RESV_USAGE_WRITE);
+ obj->read_domains = 0;
+ if (flags & EXEC_OBJECT_WRITE) {
+ usage = DMA_RESV_USAGE_WRITE;
obj->write_domain = I915_GEM_DOMAIN_RENDER;
- obj->read_domains = 0;
- }
- } else {
- if (!(flags & __EXEC_OBJECT_NO_RESERVE)) {
- err = dma_resv_reserve_fences(vma->obj->base.resv, 1);
- if (unlikely(err))
- return err;
+ } else {
+ usage = DMA_RESV_USAGE_READ;
}
- if (fence) {
- dma_resv_add_fence(vma->obj->base.resv, fence,
- DMA_RESV_USAGE_READ);
- obj->write_domain = 0;
- }
+ dma_fence_array_for_each(curr, idx, fence)
+ dma_resv_add_fence(vma->obj->base.resv, curr, usage);
}
if (flags & EXEC_OBJECT_NEEDS_FENCE && vma->fence)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 75d308ec173d..406e9c324e76 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -109,11 +109,11 @@ void ttm_bo_set_bulk_move(struct ttm_buffer_object *bo,
return;
spin_lock(&bo->bdev->lru_lock);
- if (bo->bulk_move && bo->resource)
- ttm_lru_bulk_move_del(bo->bulk_move, bo->resource);
+ if (bo->resource)
+ ttm_resource_del_bulk_move(bo->resource, bo);
bo->bulk_move = bulk;
- if (bo->bulk_move && bo->resource)
- ttm_lru_bulk_move_add(bo->bulk_move, bo->resource);
+ if (bo->resource)
+ ttm_resource_add_bulk_move(bo->resource, bo);
spin_unlock(&bo->bdev->lru_lock);
}
EXPORT_SYMBOL(ttm_bo_set_bulk_move);
@@ -689,8 +689,11 @@ void ttm_bo_pin(struct ttm_buffer_object *bo)
{
dma_resv_assert_held(bo->base.resv);
WARN_ON_ONCE(!kref_read(&bo->kref));
- if (!(bo->pin_count++) && bo->bulk_move && bo->resource)
- ttm_lru_bulk_move_del(bo->bulk_move, bo->resource);
+ spin_lock(&bo->bdev->lru_lock);
+ if (bo->resource)
+ ttm_resource_del_bulk_move(bo->resource, bo);
+ ++bo->pin_count;
+ spin_unlock(&bo->bdev->lru_lock);
}
EXPORT_SYMBOL(ttm_bo_pin);
@@ -707,8 +710,11 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo)
if (WARN_ON_ONCE(!bo->pin_count))
return;
- if (!(--bo->pin_count) && bo->bulk_move && bo->resource)
- ttm_lru_bulk_move_add(bo->bulk_move, bo->resource);
+ spin_lock(&bo->bdev->lru_lock);
+ --bo->pin_count;
+ if (bo->resource)
+ ttm_resource_add_bulk_move(bo->resource, bo);
+ spin_unlock(&bo->bdev->lru_lock);
}
EXPORT_SYMBOL(ttm_bo_unpin);
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index a0562ab386f5..e7147e304637 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -156,8 +156,12 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
ttm_resource_manager_for_each_res(man, &cursor, res) {
struct ttm_buffer_object *bo = res->bo;
- uint32_t num_pages = PFN_UP(bo->base.size);
+ uint32_t num_pages;
+ if (!bo)
+ continue;
+
+ num_pages = PFN_UP(bo->base.size);
ret = ttm_bo_swapout(bo, ctx, gfp_flags);
/* ttm_bo_swapout has dropped the lru_lock */
if (!ret)
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index 65889b3caf50..20f9adcc3235 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -91,8 +91,8 @@ static void ttm_lru_bulk_move_pos_tail(struct ttm_lru_bulk_move_pos *pos,
}
/* Add the resource to a bulk_move cursor */
-void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,
- struct ttm_resource *res)
+static void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,
+ struct ttm_resource *res)
{
struct ttm_lru_bulk_move_pos *pos = ttm_lru_bulk_move_pos(bulk, res);
@@ -105,8 +105,8 @@ void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,
}
/* Remove the resource from a bulk_move range */
-void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
- struct ttm_resource *res)
+static void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
+ struct ttm_resource *res)
{
struct ttm_lru_bulk_move_pos *pos = ttm_lru_bulk_move_pos(bulk, res);
@@ -122,6 +122,22 @@ void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
}
}
+/* Add the resource to a bulk move if the BO is configured for it */
+void ttm_resource_add_bulk_move(struct ttm_resource *res,
+ struct ttm_buffer_object *bo)
+{
+ if (bo->bulk_move && !bo->pin_count)
+ ttm_lru_bulk_move_add(bo->bulk_move, res);
+}
+
+/* Remove the resource from a bulk move if the BO is configured for it */
+void ttm_resource_del_bulk_move(struct ttm_resource *res,
+ struct ttm_buffer_object *bo)
+{
+ if (bo->bulk_move && !bo->pin_count)
+ ttm_lru_bulk_move_del(bo->bulk_move, res);
+}
+
/* Move a resource to the LRU or bulk tail */
void ttm_resource_move_to_lru_tail(struct ttm_resource *res)
{
@@ -169,15 +185,14 @@ void ttm_resource_init(struct ttm_buffer_object *bo,
res->bus.is_iomem = false;
res->bus.caching = ttm_cached;
res->bo = bo;
- INIT_LIST_HEAD(&res->lru);
man = ttm_manager_type(bo->bdev, place->mem_type);
spin_lock(&bo->bdev->lru_lock);
- man->usage += res->num_pages << PAGE_SHIFT;
- if (bo->bulk_move)
- ttm_lru_bulk_move_add(bo->bulk_move, res);
+ if (bo->pin_count)
+ list_add_tail(&res->lru, &bo->bdev->pinned);
else
- ttm_resource_move_to_lru_tail(res);
+ list_add_tail(&res->lru, &man->lru[bo->priority]);
+ man->usage += res->num_pages << PAGE_SHIFT;
spin_unlock(&bo->bdev->lru_lock);
}
EXPORT_SYMBOL(ttm_resource_init);
@@ -210,8 +225,16 @@ int ttm_resource_alloc(struct ttm_buffer_object *bo,
{
struct ttm_resource_manager *man =
ttm_manager_type(bo->bdev, place->mem_type);
+ int ret;
+
+ ret = man->func->alloc(man, bo, place, res_ptr);
+ if (ret)
+ return ret;
- return man->func->alloc(man, bo, place, res_ptr);
+ spin_lock(&bo->bdev->lru_lock);
+ ttm_resource_add_bulk_move(*res_ptr, bo);
+ spin_unlock(&bo->bdev->lru_lock);
+ return 0;
}
void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res)
@@ -221,12 +244,9 @@ void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res)
if (!*res)
return;
- if (bo->bulk_move) {
- spin_lock(&bo->bdev->lru_lock);
- ttm_lru_bulk_move_del(bo->bulk_move, *res);
- spin_unlock(&bo->bdev->lru_lock);
- }
-
+ spin_lock(&bo->bdev->lru_lock);
+ ttm_resource_del_bulk_move(*res, bo);
+ spin_unlock(&bo->bdev->lru_lock);
man = ttm_manager_type(bo->bdev, (*res)->mem_type);
man->func->free(man, *res);
*res = NULL;
diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c
index 978ee2aab2d4..e0bc73124196 100644
--- a/drivers/hid/hid-hyperv.c
+++ b/drivers/hid/hid-hyperv.c
@@ -199,7 +199,8 @@ static void mousevsc_on_receive_device_info(struct mousevsc_dev *input_device,
if (!input_device->hid_desc)
goto cleanup;
- input_device->report_desc_size = desc->desc[0].wDescriptorLength;
+ input_device->report_desc_size = le16_to_cpu(
+ desc->desc[0].wDescriptorLength);
if (input_device->report_desc_size == 0) {
input_device->dev_info_status = -EINVAL;
goto cleanup;
@@ -217,7 +218,7 @@ static void mousevsc_on_receive_device_info(struct mousevsc_dev *input_device,
memcpy(input_device->report_desc,
((unsigned char *)desc) + desc->bLength,
- desc->desc[0].wDescriptorLength);
+ le16_to_cpu(desc->desc[0].wDescriptorLength));
/* Send the ack */
memset(&ack, 0, sizeof(struct mousevsc_prt_msg));
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index b60f13481bdc..5b120402d405 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -21,6 +21,7 @@
#include <linux/cpu.h>
#include <linux/hyperv.h>
#include <asm/mshyperv.h>
+#include <linux/sched/isolation.h>
#include "hyperv_vmbus.h"
@@ -638,6 +639,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
*/
if (newchannel->offermsg.offer.sub_channel_index == 0) {
mutex_unlock(&vmbus_connection.channel_mutex);
+ cpus_read_unlock();
/*
* Don't call free_channel(), because newchannel->kobj
* is not initialized yet.
@@ -728,16 +730,20 @@ static void init_vp_index(struct vmbus_channel *channel)
u32 i, ncpu = num_online_cpus();
cpumask_var_t available_mask;
struct cpumask *allocated_mask;
+ const struct cpumask *hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ);
u32 target_cpu;
int numa_node;
if (!perf_chn ||
- !alloc_cpumask_var(&available_mask, GFP_KERNEL)) {
+ !alloc_cpumask_var(&available_mask, GFP_KERNEL) ||
+ cpumask_empty(hk_mask)) {
/*
* If the channel is not a performance critical
* channel, bind it to VMBUS_CONNECT_CPU.
* In case alloc_cpumask_var() fails, bind it to
* VMBUS_CONNECT_CPU.
+ * If all the cpus are isolated, bind it to
+ * VMBUS_CONNECT_CPU.
*/
channel->target_cpu = VMBUS_CONNECT_CPU;
if (perf_chn)
@@ -758,17 +764,19 @@ static void init_vp_index(struct vmbus_channel *channel)
}
allocated_mask = &hv_context.hv_numa_map[numa_node];
- if (cpumask_equal(allocated_mask, cpumask_of_node(numa_node))) {
+retry:
+ cpumask_xor(available_mask, allocated_mask, cpumask_of_node(numa_node));
+ cpumask_and(available_mask, available_mask, hk_mask);
+
+ if (cpumask_empty(available_mask)) {
/*
* We have cycled through all the CPUs in the node;
* reset the allocated map.
*/
cpumask_clear(allocated_mask);
+ goto retry;
}
- cpumask_xor(available_mask, allocated_mask,
- cpumask_of_node(numa_node));
-
target_cpu = cpumask_first(available_mask);
cpumask_set_cpu(target_cpu, allocated_mask);
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index c698592b83e4..d35b60c06114 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -394,7 +394,7 @@ kvp_send_key(struct work_struct *dummy)
in_msg = kvp_transaction.kvp_msg;
/*
- * The key/value strings sent from the host are encoded in
+ * The key/value strings sent from the host are encoded
* in utf16; convert it to utf8 strings.
* The host assures us that the utf16 strings will not exceed
* the max lengths specified. We will however, reserve room
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 714d549b7b46..547ae334e5cd 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -21,6 +21,7 @@
#include <linux/kernel_stat.h>
#include <linux/clockchips.h>
#include <linux/cpu.h>
+#include <linux/sched/isolation.h>
#include <linux/sched/task_stack.h>
#include <linux/delay.h>
@@ -1770,6 +1771,9 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
if (target_cpu >= nr_cpumask_bits)
return -EINVAL;
+ if (!cpumask_test_cpu(target_cpu, housekeeping_cpumask(HK_TYPE_MANAGED_IRQ)))
+ return -EINVAL;
+
/* No CPUs should come up or down during this. */
cpus_read_lock();
diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c
index 57e11b2bab74..3633ab691662 100644
--- a/drivers/hwmon/asus-ec-sensors.c
+++ b/drivers/hwmon/asus-ec-sensors.c
@@ -259,7 +259,7 @@ static const struct ec_board_info board_info[] = {
},
{
.board_names = {
- "ROG CROSSHAIR VIII FORMULA"
+ "ROG CROSSHAIR VIII FORMULA",
"ROG CROSSHAIR VIII HERO",
"ROG CROSSHAIR VIII HERO (WI-FI)",
},
diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c
index d78f4bebc718..ea070b91e5b9 100644
--- a/drivers/hwmon/occ/common.c
+++ b/drivers/hwmon/occ/common.c
@@ -1228,10 +1228,15 @@ EXPORT_SYMBOL_GPL(occ_setup);
void occ_shutdown(struct occ *occ)
{
+ mutex_lock(&occ->lock);
+
occ_shutdown_sysfs(occ);
if (occ->hwmon)
hwmon_device_unregister(occ->hwmon);
+ occ->hwmon = NULL;
+
+ mutex_unlock(&occ->lock);
}
EXPORT_SYMBOL_GPL(occ_shutdown);
diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c
index e7d316b1401a..c023b691441e 100644
--- a/drivers/i2c/busses/i2c-designware-common.c
+++ b/drivers/i2c/busses/i2c-designware-common.c
@@ -477,9 +477,6 @@ int i2c_dw_prepare_clk(struct dw_i2c_dev *dev, bool prepare)
{
int ret;
- if (IS_ERR(dev->clk))
- return PTR_ERR(dev->clk);
-
if (prepare) {
/* Optional interface clock */
ret = clk_prepare_enable(dev->pclk);
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 70ade5306e45..ba043b547393 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -320,8 +320,17 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
goto exit_reset;
}
- dev->clk = devm_clk_get(&pdev->dev, NULL);
- if (!i2c_dw_prepare_clk(dev, true)) {
+ dev->clk = devm_clk_get_optional(&pdev->dev, NULL);
+ if (IS_ERR(dev->clk)) {
+ ret = PTR_ERR(dev->clk);
+ goto exit_reset;
+ }
+
+ ret = i2c_dw_prepare_clk(dev, true);
+ if (ret)
+ goto exit_reset;
+
+ if (dev->clk) {
u64 clk_khz;
dev->get_clk_rate_khz = i2c_dw_get_clk_rate_khz;
diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c
index bdecb78bfc26..8e6985354fd5 100644
--- a/drivers/i2c/busses/i2c-mt65xx.c
+++ b/drivers/i2c/busses/i2c-mt65xx.c
@@ -1420,17 +1420,22 @@ static int mtk_i2c_probe(struct platform_device *pdev)
if (ret < 0) {
dev_err(&pdev->dev,
"Request I2C IRQ %d fail\n", irq);
- return ret;
+ goto err_bulk_unprepare;
}
i2c_set_adapdata(&i2c->adap, i2c);
ret = i2c_add_adapter(&i2c->adap);
if (ret)
- return ret;
+ goto err_bulk_unprepare;
platform_set_drvdata(pdev, i2c);
return 0;
+
+err_bulk_unprepare:
+ clk_bulk_unprepare(I2C_MT65XX_CLK_MAX, i2c->clocks);
+
+ return ret;
}
static int mtk_i2c_remove(struct platform_device *pdev)
diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c
index 5960ccde6574..aede9d551130 100644
--- a/drivers/i2c/busses/i2c-npcm7xx.c
+++ b/drivers/i2c/busses/i2c-npcm7xx.c
@@ -2372,8 +2372,7 @@ static struct platform_driver npcm_i2c_bus_driver = {
static int __init npcm_i2c_init(void)
{
npcm_i2c_debugfs_dir = debugfs_create_dir("npcm_i2c", NULL);
- platform_driver_register(&npcm_i2c_bus_driver);
- return 0;
+ return platform_driver_register(&npcm_i2c_bus_driver);
}
module_init(npcm_i2c_init);
diff --git a/drivers/infiniband/hw/mlx5/dm.c b/drivers/infiniband/hw/mlx5/dm.c
index 001d766cf291..3669c90b2dad 100644
--- a/drivers/infiniband/hw/mlx5/dm.c
+++ b/drivers/infiniband/hw/mlx5/dm.c
@@ -336,9 +336,15 @@ err_copy:
static enum mlx5_sw_icm_type get_icm_type(int uapi_type)
{
- return uapi_type == MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM ?
- MLX5_SW_ICM_TYPE_STEERING :
- MLX5_SW_ICM_TYPE_HEADER_MODIFY;
+ switch (uapi_type) {
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ return MLX5_SW_ICM_TYPE_HEADER_MODIFY;
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
+ return MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN;
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ default:
+ return MLX5_SW_ICM_TYPE_STEERING;
+ }
}
static struct ib_dm *handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
@@ -347,11 +353,32 @@ static struct ib_dm *handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
int type)
{
struct mlx5_core_dev *dev = to_mdev(ctx->device)->mdev;
- enum mlx5_sw_icm_type icm_type = get_icm_type(type);
+ enum mlx5_sw_icm_type icm_type;
struct mlx5_ib_dm_icm *dm;
u64 act_size;
int err;
+ if (!capable(CAP_SYS_RAWIO) || !capable(CAP_NET_RAW))
+ return ERR_PTR(-EPERM);
+
+ switch (type) {
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner) ||
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner) ||
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) ||
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2)))
+ return ERR_PTR(-EOPNOTSUPP);
+ break;
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
+ if (!MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) ||
+ !MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2))
+ return ERR_PTR(-EOPNOTSUPP);
+ break;
+ default:
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
dm = kzalloc(sizeof(*dm), GFP_KERNEL);
if (!dm)
return ERR_PTR(-ENOMEM);
@@ -359,19 +386,6 @@ static struct ib_dm *handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
dm->base.type = type;
dm->base.ibdm.device = ctx->device;
- if (!capable(CAP_SYS_RAWIO) || !capable(CAP_NET_RAW)) {
- err = -EPERM;
- goto free;
- }
-
- if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner) ||
- MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner) ||
- MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) ||
- MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2))) {
- err = -EOPNOTSUPP;
- goto free;
- }
-
/* Allocation size must a multiple of the basic block size
* and a power of 2.
*/
@@ -379,6 +393,8 @@ static struct ib_dm *handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
act_size = roundup_pow_of_two(act_size);
dm->base.size = act_size;
+ icm_type = get_icm_type(type);
+
err = mlx5_dm_sw_icm_alloc(dev, icm_type, act_size, attr->alignment,
to_mucontext(ctx)->devx_uid,
&dm->base.dev_addr, &dm->obj_id);
@@ -420,8 +436,8 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
case MLX5_IB_UAPI_DM_TYPE_MEMIC:
return handle_alloc_dm_memic(context, attr, attrs);
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
- return handle_alloc_dm_sw_icm(context, attr, attrs, type);
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
return handle_alloc_dm_sw_icm(context, attr, attrs, type);
default:
return ERR_PTR(-EOPNOTSUPP);
@@ -474,6 +490,7 @@ static int mlx5_ib_dealloc_dm(struct ib_dm *ibdm,
return 0;
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
return mlx5_dm_icm_dealloc(ctx, to_icm(ibdm));
default:
return -EOPNOTSUPP;
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 1e7653c997b5..aedfd7ff4846 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1083,6 +1083,7 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
break;
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS)
return ERR_PTR(-EINVAL);
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 4ab1038b5482..1f23a6be7d88 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -298,7 +298,7 @@ config XTENSA_MX
config XILINX_INTC
bool "Xilinx Interrupt Controller IP"
- depends on MICROBLAZE || ARCH_ZYNQ || ARCH_ZYNQMP
+ depends on OF
select IRQ_DOMAIN
help
Support for the Xilinx Interrupt Controller IP core.
diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
index 12dd48727a15..5ac83185ff47 100644
--- a/drivers/irqchip/irq-apple-aic.c
+++ b/drivers/irqchip/irq-apple-aic.c
@@ -1035,6 +1035,7 @@ static void build_fiq_affinity(struct aic_irq_chip *ic, struct device_node *aff)
continue;
cpu = of_cpu_node_to_id(cpu_node);
+ of_node_put(cpu_node);
if (WARN_ON(cpu < 0))
continue;
@@ -1143,6 +1144,7 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
for_each_child_of_node(affs, chld)
build_fiq_affinity(irqc, chld);
}
+ of_node_put(affs);
set_handle_irq(aic_handle_irq);
set_handle_fiq(aic_handle_fiq);
diff --git a/drivers/irqchip/irq-gic-realview.c b/drivers/irqchip/irq-gic-realview.c
index b4c1924f0255..38fab02ffe9d 100644
--- a/drivers/irqchip/irq-gic-realview.c
+++ b/drivers/irqchip/irq-gic-realview.c
@@ -57,6 +57,7 @@ realview_gic_of_init(struct device_node *node, struct device_node *parent)
/* The PB11MPCore GIC needs to be configured in the syscon */
map = syscon_node_to_regmap(np);
+ of_node_put(np);
if (!IS_ERR(map)) {
/* new irq mode with no DCC */
regmap_write(map, REALVIEW_SYS_LOCK_OFFSET,
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 2be8dea6b6b0..5c1cf907ee68 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1932,7 +1932,7 @@ static void __init gic_populate_ppi_partitions(struct device_node *gic_node)
gic_data.ppi_descs = kcalloc(gic_data.ppi_nr, sizeof(*gic_data.ppi_descs), GFP_KERNEL);
if (!gic_data.ppi_descs)
- return;
+ goto out_put_node;
nr_parts = of_get_child_count(parts_node);
@@ -1973,12 +1973,15 @@ static void __init gic_populate_ppi_partitions(struct device_node *gic_node)
continue;
cpu = of_cpu_node_to_id(cpu_node);
- if (WARN_ON(cpu < 0))
+ if (WARN_ON(cpu < 0)) {
+ of_node_put(cpu_node);
continue;
+ }
pr_cont("%pOF[%d] ", cpu_node, cpu);
cpumask_set_cpu(cpu, &part->mask);
+ of_node_put(cpu_node);
}
pr_cont("}\n");
diff --git a/drivers/irqchip/irq-loongson-liointc.c b/drivers/irqchip/irq-loongson-liointc.c
index aed88857d90f..8d05d8bcf56f 100644
--- a/drivers/irqchip/irq-loongson-liointc.c
+++ b/drivers/irqchip/irq-loongson-liointc.c
@@ -39,6 +39,12 @@
#define LIOINTC_ERRATA_IRQ 10
+#if defined(CONFIG_MIPS)
+#define liointc_core_id get_ebase_cpunum()
+#else
+#define liointc_core_id get_csr_cpuid()
+#endif
+
struct liointc_handler_data {
struct liointc_priv *priv;
u32 parent_int_map;
@@ -57,7 +63,7 @@ static void liointc_chained_handle_irq(struct irq_desc *desc)
struct liointc_handler_data *handler = irq_desc_get_handler_data(desc);
struct irq_chip *chip = irq_desc_get_chip(desc);
struct irq_chip_generic *gc = handler->priv->gc;
- int core = cpu_logical_map(smp_processor_id()) % LIOINTC_NUM_CORES;
+ int core = liointc_core_id % LIOINTC_NUM_CORES;
u32 pending;
chained_irq_enter(chip, desc);
diff --git a/drivers/irqchip/irq-realtek-rtl.c b/drivers/irqchip/irq-realtek-rtl.c
index 50a56820c99b..56bf502d9c67 100644
--- a/drivers/irqchip/irq-realtek-rtl.c
+++ b/drivers/irqchip/irq-realtek-rtl.c
@@ -134,9 +134,9 @@ static int __init map_interrupts(struct device_node *node, struct irq_domain *do
if (!cpu_ictl)
return -EINVAL;
ret = of_property_read_u32(cpu_ictl, "#interrupt-cells", &tmp);
+ of_node_put(cpu_ictl);
if (ret || tmp != 1)
return -EINVAL;
- of_node_put(cpu_ictl);
cpu_int = be32_to_cpup(imap + 2);
if (cpu_int > 7 || cpu_int < 2)
diff --git a/drivers/irqchip/irq-uniphier-aidet.c b/drivers/irqchip/irq-uniphier-aidet.c
index 89121b39be26..716b1bb88bf2 100644
--- a/drivers/irqchip/irq-uniphier-aidet.c
+++ b/drivers/irqchip/irq-uniphier-aidet.c
@@ -237,6 +237,7 @@ static const struct of_device_id uniphier_aidet_match[] = {
{ .compatible = "socionext,uniphier-ld11-aidet" },
{ .compatible = "socionext,uniphier-ld20-aidet" },
{ .compatible = "socionext,uniphier-pxs3-aidet" },
+ { .compatible = "socionext,uniphier-nx1-aidet" },
{ /* sentinel */ }
};
diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c
index cd5642cef01f..651f2f8f685b 100644
--- a/drivers/isdn/hardware/mISDN/hfcsusb.c
+++ b/drivers/isdn/hardware/mISDN/hfcsusb.c
@@ -1557,7 +1557,7 @@ reset_hfcsusb(struct hfcsusb *hw)
write_reg(hw, HFCUSB_USB_SIZE, (hw->packet_size / 8) |
((hw->packet_size / 8) << 4));
- /* set USB_SIZE_I to match the the wMaxPacketSize for ISO transfers */
+ /* set USB_SIZE_I to match the wMaxPacketSize for ISO transfers */
write_reg(hw, HFCUSB_USB_SIZE_I, hw->iso_packet_size);
/* enable PCM/GCI master mode */
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index d21648a923ea..54c0473a51dd 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -33,6 +33,14 @@ struct dm_kobject_holder {
* access their members!
*/
+/*
+ * For mempools pre-allocation at the table loading time.
+ */
+struct dm_md_mempools {
+ struct bio_set bs;
+ struct bio_set io_bs;
+};
+
struct mapped_device {
struct mutex suspend_lock;
@@ -110,8 +118,7 @@ struct mapped_device {
/*
* io objects are allocated from here.
*/
- struct bio_set io_bs;
- struct bio_set bs;
+ struct dm_md_mempools *mempools;
/* kobject and completion */
struct dm_kobject_holder kobj_holder;
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 06f328928a7f..2dda05aada23 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -415,8 +415,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
/*
* Work out how many "unsigned long"s we need to hold the bitset.
*/
- bitset_size = dm_round_up(region_count,
- sizeof(*lc->clean_bits) << BYTE_SHIFT);
+ bitset_size = dm_round_up(region_count, BITS_PER_LONG);
bitset_size >>= BYTE_SHIFT;
lc->bitset_uint32_count = bitset_size / sizeof(*lc->clean_bits);
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 5e41fbae3f6b..9526ccbedafb 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3725,7 +3725,7 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_reap_sync_thread(mddev, false);
+ md_reap_sync_thread(mddev);
}
} else if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
return -EBUSY;
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 6087cdcaad46..a83b98a8d2a9 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -319,7 +319,7 @@ static int setup_clone(struct request *clone, struct request *rq,
{
int r;
- r = blk_rq_prep_clone(clone, rq, &tio->md->bs, gfp_mask,
+ r = blk_rq_prep_clone(clone, rq, &tio->md->mempools->bs, gfp_mask,
dm_rq_bio_constructor, tio);
if (r)
return r;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 0e833a154b31..bd539afbfe88 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1038,17 +1038,6 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
return 0;
}
-void dm_table_free_md_mempools(struct dm_table *t)
-{
- dm_free_md_mempools(t->mempools);
- t->mempools = NULL;
-}
-
-struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t)
-{
- return t->mempools;
-}
-
static int setup_indexes(struct dm_table *t)
{
int i;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index dfb0a551bd88..b6b25d319ef7 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -136,14 +136,6 @@ static int get_swap_bios(void)
return latch;
}
-/*
- * For mempools pre-allocation at the table loading time.
- */
-struct dm_md_mempools {
- struct bio_set bs;
- struct bio_set io_bs;
-};
-
struct table_device {
struct list_head list;
refcount_t count;
@@ -563,6 +555,10 @@ static void dm_start_io_acct(struct dm_io *io, struct bio *clone)
unsigned long flags;
/* Can afford locking given DM_TIO_IS_DUPLICATE_BIO */
spin_lock_irqsave(&io->lock, flags);
+ if (dm_io_flagged(io, DM_IO_ACCOUNTED)) {
+ spin_unlock_irqrestore(&io->lock, flags);
+ return;
+ }
dm_io_set_flag(io, DM_IO_ACCOUNTED);
spin_unlock_irqrestore(&io->lock, flags);
}
@@ -581,7 +577,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
struct dm_target_io *tio;
struct bio *clone;
- clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->io_bs);
+ clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs);
/* Set default bdev, but target must bio_set_dev() before issuing IO */
clone->bi_bdev = md->disk->part0;
@@ -628,7 +624,8 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
} else {
struct mapped_device *md = ci->io->md;
- clone = bio_alloc_clone(NULL, ci->bio, gfp_mask, &md->bs);
+ clone = bio_alloc_clone(NULL, ci->bio, gfp_mask,
+ &md->mempools->bs);
if (!clone)
return NULL;
/* Set default bdev, but target must bio_set_dev() before issuing IO */
@@ -718,18 +715,18 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
}
static inline struct dm_table *dm_get_live_table_bio(struct mapped_device *md,
- int *srcu_idx, struct bio *bio)
+ int *srcu_idx, unsigned bio_opf)
{
- if (bio->bi_opf & REQ_NOWAIT)
+ if (bio_opf & REQ_NOWAIT)
return dm_get_live_table_fast(md);
else
return dm_get_live_table(md, srcu_idx);
}
static inline void dm_put_live_table_bio(struct mapped_device *md, int srcu_idx,
- struct bio *bio)
+ unsigned bio_opf)
{
- if (bio->bi_opf & REQ_NOWAIT)
+ if (bio_opf & REQ_NOWAIT)
dm_put_live_table_fast(md);
else
dm_put_live_table(md, srcu_idx);
@@ -1023,23 +1020,19 @@ static void clone_endio(struct bio *bio)
struct dm_io *io = tio->io;
struct mapped_device *md = io->md;
- if (likely(bio->bi_bdev != md->disk->part0)) {
- struct request_queue *q = bdev_get_queue(bio->bi_bdev);
-
- if (unlikely(error == BLK_STS_TARGET)) {
- if (bio_op(bio) == REQ_OP_DISCARD &&
- !bdev_max_discard_sectors(bio->bi_bdev))
- disable_discard(md);
- else if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
- !q->limits.max_write_zeroes_sectors)
- disable_write_zeroes(md);
- }
-
- if (static_branch_unlikely(&zoned_enabled) &&
- unlikely(blk_queue_is_zoned(q)))
- dm_zone_endio(io, bio);
+ if (unlikely(error == BLK_STS_TARGET)) {
+ if (bio_op(bio) == REQ_OP_DISCARD &&
+ !bdev_max_discard_sectors(bio->bi_bdev))
+ disable_discard(md);
+ else if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
+ !bdev_write_zeroes_sectors(bio->bi_bdev))
+ disable_write_zeroes(md);
}
+ if (static_branch_unlikely(&zoned_enabled) &&
+ unlikely(blk_queue_is_zoned(bdev_get_queue(bio->bi_bdev))))
+ dm_zone_endio(io, bio);
+
if (endio) {
int r = endio(ti, bio, &error);
switch (r) {
@@ -1620,7 +1613,12 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci)
ti = dm_table_find_target(ci->map, ci->sector);
if (unlikely(!ti))
return BLK_STS_IOERR;
- else if (unlikely(ci->is_abnormal_io))
+
+ if (unlikely((ci->bio->bi_opf & REQ_NOWAIT) != 0) &&
+ unlikely(!dm_target_supports_nowait(ti->type)))
+ return BLK_STS_NOTSUPP;
+
+ if (unlikely(ci->is_abnormal_io))
return __process_abnormal_io(ci, ti);
/*
@@ -1722,8 +1720,9 @@ static void dm_submit_bio(struct bio *bio)
struct mapped_device *md = bio->bi_bdev->bd_disk->private_data;
int srcu_idx;
struct dm_table *map;
+ unsigned bio_opf = bio->bi_opf;
- map = dm_get_live_table_bio(md, &srcu_idx, bio);
+ map = dm_get_live_table_bio(md, &srcu_idx, bio_opf);
/* If suspended, or map not yet available, queue this IO for later */
if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) ||
@@ -1739,7 +1738,7 @@ static void dm_submit_bio(struct bio *bio)
dm_split_and_process_bio(md, map, bio);
out:
- dm_put_live_table_bio(md, srcu_idx, bio);
+ dm_put_live_table_bio(md, srcu_idx, bio_opf);
}
static bool dm_poll_dm_io(struct dm_io *io, struct io_comp_batch *iob,
@@ -1876,8 +1875,7 @@ static void cleanup_mapped_device(struct mapped_device *md)
{
if (md->wq)
destroy_workqueue(md->wq);
- bioset_exit(&md->bs);
- bioset_exit(&md->io_bs);
+ dm_free_md_mempools(md->mempools);
if (md->dax_dev) {
dax_remove_host(md->disk);
@@ -2049,48 +2047,6 @@ static void free_dev(struct mapped_device *md)
kvfree(md);
}
-static int __bind_mempools(struct mapped_device *md, struct dm_table *t)
-{
- struct dm_md_mempools *p = dm_table_get_md_mempools(t);
- int ret = 0;
-
- if (dm_table_bio_based(t)) {
- /*
- * The md may already have mempools that need changing.
- * If so, reload bioset because front_pad may have changed
- * because a different table was loaded.
- */
- bioset_exit(&md->bs);
- bioset_exit(&md->io_bs);
-
- } else if (bioset_initialized(&md->bs)) {
- /*
- * There's no need to reload with request-based dm
- * because the size of front_pad doesn't change.
- * Note for future: If you are to reload bioset,
- * prep-ed requests in the queue may refer
- * to bio from the old bioset, so you must walk
- * through the queue to unprep.
- */
- goto out;
- }
-
- BUG_ON(!p ||
- bioset_initialized(&md->bs) ||
- bioset_initialized(&md->io_bs));
-
- ret = bioset_init_from_src(&md->bs, &p->bs);
- if (ret)
- goto out;
- ret = bioset_init_from_src(&md->io_bs, &p->io_bs);
- if (ret)
- bioset_exit(&md->bs);
-out:
- /* mempool bind completed, no longer need any mempools in the table */
- dm_table_free_md_mempools(t);
- return ret;
-}
-
/*
* Bind a table to the device.
*/
@@ -2144,12 +2100,28 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
* immutable singletons - used to optimize dm_mq_queue_rq.
*/
md->immutable_target = dm_table_get_immutable_target(t);
- }
- ret = __bind_mempools(md, t);
- if (ret) {
- old_map = ERR_PTR(ret);
- goto out;
+ /*
+ * There is no need to reload with request-based dm because the
+ * size of front_pad doesn't change.
+ *
+ * Note for future: If you are to reload bioset, prep-ed
+ * requests in the queue may refer to bio from the old bioset,
+ * so you must walk through the queue to unprep.
+ */
+ if (!md->mempools) {
+ md->mempools = t->mempools;
+ t->mempools = NULL;
+ }
+ } else {
+ /*
+ * The md may already have mempools that need changing.
+ * If so, reload bioset because front_pad may have changed
+ * because a different table was loaded.
+ */
+ dm_free_md_mempools(md->mempools);
+ md->mempools = t->mempools;
+ t->mempools = NULL;
}
ret = dm_table_set_restrictions(t, md->queue, limits);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 3f89664fea01..a8405ce305a9 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -71,8 +71,6 @@ struct dm_target *dm_table_get_immutable_target(struct dm_table *t);
struct dm_target *dm_table_get_wildcard_target(struct dm_table *t);
bool dm_table_bio_based(struct dm_table *t);
bool dm_table_request_based(struct dm_table *t);
-void dm_table_free_md_mempools(struct dm_table *t);
-struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
void dm_lock_md_type(struct mapped_device *md);
void dm_unlock_md_type(struct mapped_device *md);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 8273ac5eef06..c7ecb0bffda0 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4831,7 +4831,7 @@ action_store(struct mddev *mddev, const char *page, size_t len)
flush_workqueue(md_misc_wq);
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_reap_sync_thread(mddev, true);
+ md_reap_sync_thread(mddev);
}
mddev_unlock(mddev);
}
@@ -6197,7 +6197,7 @@ static void __md_stop_writes(struct mddev *mddev)
flush_workqueue(md_misc_wq);
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_reap_sync_thread(mddev, true);
+ md_reap_sync_thread(mddev);
}
del_timer_sync(&mddev->safemode_timer);
@@ -9303,7 +9303,7 @@ void md_check_recovery(struct mddev *mddev)
* ->spare_active and clear saved_raid_disk
*/
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_reap_sync_thread(mddev, true);
+ md_reap_sync_thread(mddev);
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
@@ -9338,7 +9338,7 @@ void md_check_recovery(struct mddev *mddev)
goto unlock;
}
if (mddev->sync_thread) {
- md_reap_sync_thread(mddev, true);
+ md_reap_sync_thread(mddev);
goto unlock;
}
/* Set RUNNING before clearing NEEDED to avoid
@@ -9411,18 +9411,14 @@ void md_check_recovery(struct mddev *mddev)
}
EXPORT_SYMBOL(md_check_recovery);
-void md_reap_sync_thread(struct mddev *mddev, bool reconfig_mutex_held)
+void md_reap_sync_thread(struct mddev *mddev)
{
struct md_rdev *rdev;
sector_t old_dev_sectors = mddev->dev_sectors;
bool is_reshaped = false;
- if (reconfig_mutex_held)
- mddev_unlock(mddev);
/* resync has finished, collect result */
md_unregister_thread(&mddev->sync_thread);
- if (reconfig_mutex_held)
- mddev_lock_nointr(mddev);
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
mddev->degraded != mddev->raid_disks) {
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 5f62c46ac2d3..cf2cbb17acbd 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -719,7 +719,7 @@ extern struct md_thread *md_register_thread(
extern void md_unregister_thread(struct md_thread **threadp);
extern void md_wakeup_thread(struct md_thread *thread);
extern void md_check_recovery(struct mddev *mddev);
-extern void md_reap_sync_thread(struct mddev *mddev, bool reconfig_mutex_held);
+extern void md_reap_sync_thread(struct mddev *mddev);
extern int mddev_init_writes_pending(struct mddev *mddev);
extern bool md_write_start(struct mddev *mddev, struct bio *bi);
extern void md_write_inc(struct mddev *mddev, struct bio *bi);
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 973e2e06f19c..0a2e4806b1ec 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -629,9 +629,9 @@ static void ppl_do_flush(struct ppl_io_unit *io)
if (bdev) {
struct bio *bio;
- bio = bio_alloc_bioset(bdev, 0, GFP_NOIO,
+ bio = bio_alloc_bioset(bdev, 0,
REQ_OP_WRITE | REQ_PREFLUSH,
- &ppl_conf->flush_bs);
+ GFP_NOIO, &ppl_conf->flush_bs);
bio->bi_private = io;
bio->bi_end_io = ppl_flush_endio;
diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c
index d6cd5537126c..69f9b0336410 100644
--- a/drivers/misc/atmel-ssc.c
+++ b/drivers/misc/atmel-ssc.c
@@ -232,9 +232,9 @@ static int ssc_probe(struct platform_device *pdev)
clk_disable_unprepare(ssc->clk);
ssc->irq = platform_get_irq(pdev, 0);
- if (!ssc->irq) {
+ if (ssc->irq < 0) {
dev_dbg(&pdev->dev, "could not get irq\n");
- return -ENXIO;
+ return ssc->irq;
}
mutex_lock(&user_lock);
diff --git a/drivers/misc/cardreader/rts5261.c b/drivers/misc/cardreader/rts5261.c
index 749cc5a46d13..b1e76030cafd 100644
--- a/drivers/misc/cardreader/rts5261.c
+++ b/drivers/misc/cardreader/rts5261.c
@@ -407,6 +407,8 @@ static void rts5261_init_from_hw(struct rtsx_pcr *pcr)
// default
setting_reg1 = PCR_SETTING_REG1;
setting_reg2 = PCR_SETTING_REG2;
+ } else {
+ return;
}
pci_read_config_dword(pdev, setting_reg2, &lval2);
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
index 8d169a35cf13..c9c56fd194c1 100644
--- a/drivers/misc/eeprom/at25.c
+++ b/drivers/misc/eeprom/at25.c
@@ -79,6 +79,11 @@ static int at25_ee_read(void *priv, unsigned int offset,
{
struct at25_data *at25 = priv;
char *buf = val;
+ size_t max_chunk = spi_max_transfer_size(at25->spi);
+ size_t num_msgs = DIV_ROUND_UP(count, max_chunk);
+ size_t nr_bytes = 0;
+ unsigned int msg_offset;
+ size_t msg_count;
u8 *cp;
ssize_t status;
struct spi_transfer t[2];
@@ -92,54 +97,59 @@ static int at25_ee_read(void *priv, unsigned int offset,
if (unlikely(!count))
return -EINVAL;
- cp = at25->command;
+ msg_offset = (unsigned int)offset;
+ msg_count = min(count, max_chunk);
+ while (num_msgs) {
+ cp = at25->command;
- instr = AT25_READ;
- if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR)
- if (offset >= BIT(at25->addrlen * 8))
- instr |= AT25_INSTR_BIT3;
+ instr = AT25_READ;
+ if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR)
+ if (msg_offset >= BIT(at25->addrlen * 8))
+ instr |= AT25_INSTR_BIT3;
- mutex_lock(&at25->lock);
+ mutex_lock(&at25->lock);
- *cp++ = instr;
-
- /* 8/16/24-bit address is written MSB first */
- switch (at25->addrlen) {
- default: /* case 3 */
- *cp++ = offset >> 16;
- fallthrough;
- case 2:
- *cp++ = offset >> 8;
- fallthrough;
- case 1:
- case 0: /* can't happen: for better code generation */
- *cp++ = offset >> 0;
- }
+ *cp++ = instr;
- spi_message_init(&m);
- memset(t, 0, sizeof(t));
+ /* 8/16/24-bit address is written MSB first */
+ switch (at25->addrlen) {
+ default: /* case 3 */
+ *cp++ = msg_offset >> 16;
+ fallthrough;
+ case 2:
+ *cp++ = msg_offset >> 8;
+ fallthrough;
+ case 1:
+ case 0: /* can't happen: for better code generation */
+ *cp++ = msg_offset >> 0;
+ }
- t[0].tx_buf = at25->command;
- t[0].len = at25->addrlen + 1;
- spi_message_add_tail(&t[0], &m);
+ spi_message_init(&m);
+ memset(t, 0, sizeof(t));
- t[1].rx_buf = buf;
- t[1].len = count;
- spi_message_add_tail(&t[1], &m);
+ t[0].tx_buf = at25->command;
+ t[0].len = at25->addrlen + 1;
+ spi_message_add_tail(&t[0], &m);
- /*
- * Read it all at once.
- *
- * REVISIT that's potentially a problem with large chips, if
- * other devices on the bus need to be accessed regularly or
- * this chip is clocked very slowly.
- */
- status = spi_sync(at25->spi, &m);
- dev_dbg(&at25->spi->dev, "read %zu bytes at %d --> %zd\n",
- count, offset, status);
+ t[1].rx_buf = buf + nr_bytes;
+ t[1].len = msg_count;
+ spi_message_add_tail(&t[1], &m);
- mutex_unlock(&at25->lock);
- return status;
+ status = spi_sync(at25->spi, &m);
+
+ mutex_unlock(&at25->lock);
+
+ if (status)
+ return status;
+
+ --num_msgs;
+ msg_offset += msg_count;
+ nr_bytes += msg_count;
+ }
+
+ dev_dbg(&at25->spi->dev, "read %zu bytes at %d\n",
+ count, offset);
+ return 0;
}
/* Read extra registers as ID or serial number */
@@ -190,6 +200,7 @@ ATTRIBUTE_GROUPS(sernum);
static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count)
{
struct at25_data *at25 = priv;
+ size_t maxsz = spi_max_transfer_size(at25->spi);
const char *buf = val;
int status = 0;
unsigned buf_size;
@@ -253,6 +264,8 @@ static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count)
segment = buf_size - (offset % buf_size);
if (segment > count)
segment = count;
+ if (segment > maxsz)
+ segment = maxsz;
memcpy(cp, buf, segment);
status = spi_write(at25->spi, bounce,
segment + at25->addrlen + 1);
diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c
index cebcca6d6d3e..cf2b8261da14 100644
--- a/drivers/misc/mei/hbm.c
+++ b/drivers/misc/mei/hbm.c
@@ -1351,7 +1351,8 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
dev->hbm_state != MEI_HBM_CAP_SETUP) {
- if (dev->dev_state == MEI_DEV_POWER_DOWN) {
+ if (dev->dev_state == MEI_DEV_POWER_DOWN ||
+ dev->dev_state == MEI_DEV_POWERING_DOWN) {
dev_dbg(dev->dev, "hbm: capabilities response: on shutdown, ignoring\n");
return 0;
}
diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h
index 64ce3f830262..15e8e2b322b1 100644
--- a/drivers/misc/mei/hw-me-regs.h
+++ b/drivers/misc/mei/hw-me-regs.h
@@ -109,6 +109,8 @@
#define MEI_DEV_ID_ADP_P 0x51E0 /* Alder Lake Point P */
#define MEI_DEV_ID_ADP_N 0x54E0 /* Alder Lake Point N */
+#define MEI_DEV_ID_RPL_S 0x7A68 /* Raptor Lake Point S */
+
/*
* MEI HW Section
*/
diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c
index 9870bf717979..befa491e3344 100644
--- a/drivers/misc/mei/hw-me.c
+++ b/drivers/misc/mei/hw-me.c
@@ -1154,6 +1154,8 @@ static int mei_me_hw_reset(struct mei_device *dev, bool intr_enable)
ret = mei_me_d0i3_exit_sync(dev);
if (ret)
return ret;
+ } else {
+ hw->pg_state = MEI_PG_OFF;
}
}
diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
index 33e58821e478..5435604327a7 100644
--- a/drivers/misc/mei/pci-me.c
+++ b/drivers/misc/mei/pci-me.c
@@ -116,6 +116,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
{MEI_PCI_DEVICE(MEI_DEV_ID_ADP_P, MEI_ME_PCH15_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_ADP_N, MEI_ME_PCH15_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_CFG)},
+
/* required last entry */
{0, }
};
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index 195dc897188b..9da4489dc345 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -1356,7 +1356,7 @@ static void msdc_data_xfer_next(struct msdc_host *host, struct mmc_request *mrq)
msdc_request_done(host, mrq);
}
-static bool msdc_data_xfer_done(struct msdc_host *host, u32 events,
+static void msdc_data_xfer_done(struct msdc_host *host, u32 events,
struct mmc_request *mrq, struct mmc_data *data)
{
struct mmc_command *stop;
@@ -1376,7 +1376,7 @@ static bool msdc_data_xfer_done(struct msdc_host *host, u32 events,
spin_unlock_irqrestore(&host->lock, flags);
if (done)
- return true;
+ return;
stop = data->stop;
if (check_data || (stop && stop->error)) {
@@ -1385,12 +1385,15 @@ static bool msdc_data_xfer_done(struct msdc_host *host, u32 events,
sdr_set_field(host->base + MSDC_DMA_CTRL, MSDC_DMA_CTRL_STOP,
1);
+ ret = readl_poll_timeout_atomic(host->base + MSDC_DMA_CTRL, val,
+ !(val & MSDC_DMA_CTRL_STOP), 1, 20000);
+ if (ret)
+ dev_dbg(host->dev, "DMA stop timed out\n");
+
ret = readl_poll_timeout_atomic(host->base + MSDC_DMA_CFG, val,
!(val & MSDC_DMA_CFG_STS), 1, 20000);
- if (ret) {
- dev_dbg(host->dev, "DMA stop timed out\n");
- return false;
- }
+ if (ret)
+ dev_dbg(host->dev, "DMA inactive timed out\n");
sdr_clr_bits(host->base + MSDC_INTEN, data_ints_mask);
dev_dbg(host->dev, "DMA stop\n");
@@ -1415,9 +1418,7 @@ static bool msdc_data_xfer_done(struct msdc_host *host, u32 events,
}
msdc_data_xfer_next(host, mrq);
- done = true;
}
- return done;
}
static void msdc_set_buswidth(struct msdc_host *host, u32 width)
@@ -2416,6 +2417,9 @@ static void msdc_cqe_disable(struct mmc_host *mmc, bool recovery)
if (recovery) {
sdr_set_field(host->base + MSDC_DMA_CTRL,
MSDC_DMA_CTRL_STOP, 1);
+ if (WARN_ON(readl_poll_timeout(host->base + MSDC_DMA_CTRL, val,
+ !(val & MSDC_DMA_CTRL_STOP), 1, 3000)))
+ return;
if (WARN_ON(readl_poll_timeout(host->base + MSDC_DMA_CFG, val,
!(val & MSDC_DMA_CFG_STS), 1, 3000)))
return;
diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c
index 92c20cb8074a..0d4d343dbb77 100644
--- a/drivers/mmc/host/sdhci-pci-o2micro.c
+++ b/drivers/mmc/host/sdhci-pci-o2micro.c
@@ -152,6 +152,8 @@ static int sdhci_o2_get_cd(struct mmc_host *mmc)
if (!(sdhci_readw(host, O2_PLL_DLL_WDT_CONTROL1) & O2_PLL_LOCK_STATUS))
sdhci_o2_enable_internal_clock(host);
+ else
+ sdhci_o2_wait_card_detect_stable(host);
return !!(sdhci_readl(host, SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT);
}
diff --git a/drivers/net/amt.c b/drivers/net/amt.c
index be2719a3ba70..732f4c0daa73 100644
--- a/drivers/net/amt.c
+++ b/drivers/net/amt.c
@@ -1373,11 +1373,11 @@ static void amt_add_srcs(struct amt_dev *amt, struct amt_tunnel_list *tunnel,
int i;
if (!v6) {
- igmp_grec = (struct igmpv3_grec *)grec;
+ igmp_grec = grec;
nsrcs = ntohs(igmp_grec->grec_nsrcs);
} else {
#if IS_ENABLED(CONFIG_IPV6)
- mld_grec = (struct mld2_grec *)grec;
+ mld_grec = grec;
nsrcs = ntohs(mld_grec->grec_nsrcs);
#else
return;
@@ -1458,11 +1458,11 @@ static void amt_lookup_act_srcs(struct amt_tunnel_list *tunnel,
int i, j;
if (!v6) {
- igmp_grec = (struct igmpv3_grec *)grec;
+ igmp_grec = grec;
nsrcs = ntohs(igmp_grec->grec_nsrcs);
} else {
#if IS_ENABLED(CONFIG_IPV6)
- mld_grec = (struct mld2_grec *)grec;
+ mld_grec = grec;
nsrcs = ntohs(mld_grec->grec_nsrcs);
#else
return;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 3d427183ec8e..e75acb14d066 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1026,12 +1026,38 @@ out:
}
+/**
+ * bond_choose_primary_or_current - select the primary or high priority slave
+ * @bond: our bonding struct
+ *
+ * - Check if there is a primary link. If the primary link was set and is up,
+ * go on and do link reselection.
+ *
+ * - If primary link is not set or down, find the highest priority link.
+ * If the highest priority link is not current slave, set it as primary
+ * link and do link reselection.
+ */
static struct slave *bond_choose_primary_or_current(struct bonding *bond)
{
struct slave *prim = rtnl_dereference(bond->primary_slave);
struct slave *curr = rtnl_dereference(bond->curr_active_slave);
+ struct slave *slave, *hprio = NULL;
+ struct list_head *iter;
if (!prim || prim->link != BOND_LINK_UP) {
+ bond_for_each_slave(bond, slave, iter) {
+ if (slave->link == BOND_LINK_UP) {
+ hprio = hprio ?: slave;
+ if (slave->prio > hprio->prio)
+ hprio = slave;
+ }
+ }
+
+ if (hprio && hprio != curr) {
+ prim = hprio;
+ goto link_reselect;
+ }
+
if (!curr || curr->link != BOND_LINK_UP)
return NULL;
return curr;
@@ -1042,6 +1068,7 @@ static struct slave *bond_choose_primary_or_current(struct bonding *bond)
return prim;
}
+link_reselect:
if (!curr || curr->link != BOND_LINK_UP)
return prim;
@@ -3684,9 +3711,11 @@ re_arm:
if (!rtnl_trylock())
return;
- if (should_notify_peers)
+ if (should_notify_peers) {
+ bond->send_peer_notif--;
call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
bond->dev);
+ }
if (should_notify_rtnl) {
bond_slave_state_notify(bond);
bond_slave_link_notify(bond);
diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index 5a6f44455b95..c2d080fc4fc4 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -27,6 +27,7 @@ static size_t bond_get_slave_size(const struct net_device *bond_dev,
nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_AD_AGGREGATOR_ID */
nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE */
nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE */
+ nla_total_size(sizeof(s32)) + /* IFLA_BOND_SLAVE_PRIO */
0;
}
@@ -53,6 +54,9 @@ static int bond_fill_slave_info(struct sk_buff *skb,
if (nla_put_u16(skb, IFLA_BOND_SLAVE_QUEUE_ID, slave->queue_id))
goto nla_put_failure;
+ if (nla_put_s32(skb, IFLA_BOND_SLAVE_PRIO, slave->prio))
+ goto nla_put_failure;
+
if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) {
const struct aggregator *agg;
const struct port *ad_port;
@@ -117,6 +121,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = {
static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = {
[IFLA_BOND_SLAVE_QUEUE_ID] = { .type = NLA_U16 },
+ [IFLA_BOND_SLAVE_PRIO] = { .type = NLA_S32 },
};
static int bond_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -157,6 +162,16 @@ static int bond_slave_changelink(struct net_device *bond_dev,
return err;
}
+ if (data[IFLA_BOND_SLAVE_PRIO]) {
+ int prio = nla_get_s32(data[IFLA_BOND_SLAVE_PRIO]);
+
+ bond_opt_slave_initval(&newval, &slave_dev, prio);
+ err = __bond_opt_set(bond, BOND_OPT_PRIO, &newval,
+ data[IFLA_BOND_SLAVE_PRIO], extack);
+ if (err)
+ return err;
+ }
+
return 0;
}
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 96eef19cffc4..3498db1c1b3c 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -40,6 +40,8 @@ static int bond_option_arp_validate_set(struct bonding *bond,
const struct bond_opt_value *newval);
static int bond_option_arp_all_targets_set(struct bonding *bond,
const struct bond_opt_value *newval);
+static int bond_option_prio_set(struct bonding *bond,
+ const struct bond_opt_value *newval);
static int bond_option_primary_set(struct bonding *bond,
const struct bond_opt_value *newval);
static int bond_option_primary_reselect_set(struct bonding *bond,
@@ -365,6 +367,16 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = {
.values = bond_intmax_tbl,
.set = bond_option_miimon_set
},
+ [BOND_OPT_PRIO] = {
+ .id = BOND_OPT_PRIO,
+ .name = "prio",
+ .desc = "Link priority for failover re-selection",
+ .flags = BOND_OPTFLAG_RAWVAL,
+ .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_ACTIVEBACKUP) |
+ BIT(BOND_MODE_TLB) |
+ BIT(BOND_MODE_ALB)),
+ .set = bond_option_prio_set
+ },
[BOND_OPT_PRIMARY] = {
.id = BOND_OPT_PRIMARY,
.name = "primary",
@@ -1306,6 +1318,27 @@ static int bond_option_missed_max_set(struct bonding *bond,
return 0;
}
+static int bond_option_prio_set(struct bonding *bond,
+ const struct bond_opt_value *newval)
+{
+ struct slave *slave;
+
+ slave = bond_slave_get_rtnl(newval->slave_dev);
+ if (!slave) {
+ netdev_dbg(newval->slave_dev, "%s called on NULL slave\n", __func__);
+ return -ENODEV;
+ }
+ slave->prio = newval->value;
+
+ if (rtnl_dereference(bond->primary_slave))
+ slave_warn(bond->dev, slave->dev,
+ "prio updated, but will not affect failover re-selection as primary slave have been set\n");
+ else
+ bond_select_active_slave(bond);
+
+ return 0;
+}
+
static int bond_option_primary_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig
index 6d1fcb08bba1..702d68ae435a 100644
--- a/drivers/net/dsa/Kconfig
+++ b/drivers/net/dsa/Kconfig
@@ -70,6 +70,15 @@ config NET_DSA_QCA8K
source "drivers/net/dsa/realtek/Kconfig"
+config NET_DSA_RZN1_A5PSW
+ tristate "Renesas RZ/N1 A5PSW Ethernet switch support"
+ depends on OF && ARCH_RZN1
+ select NET_DSA_TAG_RZN1_A5PSW
+ select PCS_RZN1_MIIC
+ help
+ This driver supports the A5PSW switch, which is embedded in Renesas
+ RZ/N1 SoC.
+
config NET_DSA_SMSC_LAN9303
tristate
select NET_DSA_TAG_LAN9303
diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile
index e73838c12256..b32907afa702 100644
--- a/drivers/net/dsa/Makefile
+++ b/drivers/net/dsa/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_NET_DSA_LANTIQ_GSWIP) += lantiq_gswip.o
obj-$(CONFIG_NET_DSA_MT7530) += mt7530.o
obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o
obj-$(CONFIG_NET_DSA_QCA8K) += qca8k.o
+obj-$(CONFIG_NET_DSA_RZN1_A5PSW) += rzn1_a5psw.o
obj-$(CONFIG_NET_DSA_SMSC_LAN9303) += lan9303-core.o
obj-$(CONFIG_NET_DSA_SMSC_LAN9303_I2C) += lan9303_i2c.o
obj-$(CONFIG_NET_DSA_SMSC_LAN9303_MDIO) += lan9303_mdio.o
diff --git a/drivers/net/dsa/microchip/Kconfig b/drivers/net/dsa/microchip/Kconfig
index c9e2a8989556..2edb88080790 100644
--- a/drivers/net/dsa/microchip/Kconfig
+++ b/drivers/net/dsa/microchip/Kconfig
@@ -1,49 +1,29 @@
# SPDX-License-Identifier: GPL-2.0-only
-config NET_DSA_MICROCHIP_KSZ_COMMON
- select NET_DSA_TAG_KSZ
- tristate
-
-menuconfig NET_DSA_MICROCHIP_KSZ9477
- tristate "Microchip KSZ9477 series switch support"
+menuconfig NET_DSA_MICROCHIP_KSZ_COMMON
+ tristate "Microchip KSZ8795/KSZ9477 series switch support"
depends on NET_DSA
- select NET_DSA_MICROCHIP_KSZ_COMMON
+ select NET_DSA_TAG_KSZ
help
- This driver adds support for Microchip KSZ9477 switch chips.
+ This driver adds support for Microchip KSZ9477 series switch and
+ KSZ8795/KSZ88x3 switch chips.
config NET_DSA_MICROCHIP_KSZ9477_I2C
- tristate "KSZ9477 series I2C connected switch driver"
- depends on NET_DSA_MICROCHIP_KSZ9477 && I2C
+ tristate "KSZ series I2C connected switch driver"
+ depends on NET_DSA_MICROCHIP_KSZ_COMMON && I2C
select REGMAP_I2C
help
Select to enable support for registering switches configured through I2C.
-config NET_DSA_MICROCHIP_KSZ9477_SPI
- tristate "KSZ9477 series SPI connected switch driver"
- depends on NET_DSA_MICROCHIP_KSZ9477 && SPI
+config NET_DSA_MICROCHIP_KSZ_SPI
+ tristate "KSZ series SPI connected switch driver"
+ depends on NET_DSA_MICROCHIP_KSZ_COMMON && SPI
select REGMAP_SPI
help
Select to enable support for registering switches configured through SPI.
-menuconfig NET_DSA_MICROCHIP_KSZ8795
- tristate "Microchip KSZ8795 series switch support"
- depends on NET_DSA
- select NET_DSA_MICROCHIP_KSZ_COMMON
- help
- This driver adds support for Microchip KSZ8795/KSZ88X3 switch chips.
-
-config NET_DSA_MICROCHIP_KSZ8795_SPI
- tristate "KSZ8795 series SPI connected switch driver"
- depends on NET_DSA_MICROCHIP_KSZ8795 && SPI
- select REGMAP_SPI
- help
- This driver accesses KSZ8795 chip through SPI.
-
- It is required to use the KSZ8795 switch driver as the only access
- is through SPI.
-
config NET_DSA_MICROCHIP_KSZ8863_SMI
tristate "KSZ series SMI connected switch driver"
- depends on NET_DSA_MICROCHIP_KSZ8795
+ depends on NET_DSA_MICROCHIP_KSZ_COMMON
select MDIO_BITBANG
help
Select to enable support for registering switches configured through
diff --git a/drivers/net/dsa/microchip/Makefile b/drivers/net/dsa/microchip/Makefile
index 2a03b21a3386..b2ba7c1bcb93 100644
--- a/drivers/net/dsa/microchip/Makefile
+++ b/drivers/net/dsa/microchip/Makefile
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ_COMMON) += ksz_common.o
-obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ9477) += ksz9477.o
+obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ_COMMON) += ksz_switch.o
+ksz_switch-objs := ksz_common.o
+ksz_switch-objs += ksz9477.o
+ksz_switch-objs += ksz8795.o
obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ9477_I2C) += ksz9477_i2c.o
-obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ9477_SPI) += ksz9477_spi.o
-obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ8795) += ksz8795.o
-obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ8795_SPI) += ksz8795_spi.o
+obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ_SPI) += ksz_spi.o
obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ8863_SMI) += ksz8863_smi.o
diff --git a/drivers/net/dsa/microchip/ksz8.h b/drivers/net/dsa/microchip/ksz8.h
index cae76f5e7787..de246989c81b 100644
--- a/drivers/net/dsa/microchip/ksz8.h
+++ b/drivers/net/dsa/microchip/ksz8.h
@@ -9,6 +9,8 @@
#define __KSZ8XXX_H
#include <linux/types.h>
+#include <net/dsa.h>
+#include "ksz_common.h"
enum ksz_regs {
REG_IND_CTRL_0,
@@ -68,4 +70,50 @@ struct ksz8 {
void *priv;
};
+int ksz8_setup(struct dsa_switch *ds);
+u32 ksz8_get_port_addr(int port, int offset);
+void ksz8_cfg_port_member(struct ksz_device *dev, int port, u8 member);
+void ksz8_flush_dyn_mac_table(struct ksz_device *dev, int port);
+void ksz8_port_setup(struct ksz_device *dev, int port, bool cpu_port);
+void ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val);
+void ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val);
+int ksz8_r_dyn_mac_table(struct ksz_device *dev, u16 addr, u8 *mac_addr,
+ u8 *fid, u8 *src_port, u8 *timestamp, u16 *entries);
+int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
+ struct alu_struct *alu);
+void ksz8_w_sta_mac_table(struct ksz_device *dev, u16 addr,
+ struct alu_struct *alu);
+void ksz8_r_mib_cnt(struct ksz_device *dev, int port, u16 addr, u64 *cnt);
+void ksz8_r_mib_pkt(struct ksz_device *dev, int port, u16 addr,
+ u64 *dropped, u64 *cnt);
+void ksz8_freeze_mib(struct ksz_device *dev, int port, bool freeze);
+void ksz8_port_init_cnt(struct ksz_device *dev, int port);
+int ksz8_fdb_dump(struct ksz_device *dev, int port,
+ dsa_fdb_dump_cb_t *cb, void *data);
+int ksz8_mdb_add(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_mdb *mdb, struct dsa_db db);
+int ksz8_mdb_del(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_mdb *mdb, struct dsa_db db);
+int ksz8_port_vlan_filtering(struct ksz_device *dev, int port, bool flag,
+ struct netlink_ext_ack *extack);
+int ksz8_port_vlan_add(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct netlink_ext_ack *extack);
+int ksz8_port_vlan_del(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_vlan *vlan);
+int ksz8_port_mirror_add(struct ksz_device *dev, int port,
+ struct dsa_mall_mirror_tc_entry *mirror,
+ bool ingress, struct netlink_ext_ack *extack);
+void ksz8_port_mirror_del(struct ksz_device *dev, int port,
+ struct dsa_mall_mirror_tc_entry *mirror);
+int ksz8_get_stp_reg(void);
+void ksz8_get_caps(struct ksz_device *dev, int port,
+ struct phylink_config *config);
+void ksz8_config_cpu_port(struct dsa_switch *ds);
+int ksz8_enable_stp_addr(struct ksz_device *dev);
+int ksz8_reset_switch(struct ksz_device *dev);
+int ksz8_switch_detect(struct ksz_device *dev);
+int ksz8_switch_init(struct ksz_device *dev);
+void ksz8_switch_exit(struct ksz_device *dev);
+
#endif
diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index 12a599d5e61a..df7d782e3fcd 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -162,7 +162,7 @@ static int ksz8_ind_write8(struct ksz_device *dev, u8 table, u16 addr, u8 data)
return ret;
}
-static int ksz8_reset_switch(struct ksz_device *dev)
+int ksz8_reset_switch(struct ksz_device *dev)
{
if (ksz_is_ksz88x3(dev)) {
/* reset switch */
@@ -213,7 +213,7 @@ static void ksz8795_set_prio_queue(struct ksz_device *dev, int port, int queue)
true);
}
-static void ksz8_r_mib_cnt(struct ksz_device *dev, int port, u16 addr, u64 *cnt)
+void ksz8_r_mib_cnt(struct ksz_device *dev, int port, u16 addr, u64 *cnt)
{
struct ksz8 *ksz8 = dev->priv;
const u32 *masks;
@@ -334,8 +334,8 @@ static void ksz8863_r_mib_pkt(struct ksz_device *dev, int port, u16 addr,
}
}
-static void ksz8_r_mib_pkt(struct ksz_device *dev, int port, u16 addr,
- u64 *dropped, u64 *cnt)
+void ksz8_r_mib_pkt(struct ksz_device *dev, int port, u16 addr,
+ u64 *dropped, u64 *cnt)
{
if (ksz_is_ksz88x3(dev))
ksz8863_r_mib_pkt(dev, port, addr, dropped, cnt);
@@ -343,7 +343,7 @@ static void ksz8_r_mib_pkt(struct ksz_device *dev, int port, u16 addr,
ksz8795_r_mib_pkt(dev, port, addr, dropped, cnt);
}
-static void ksz8_freeze_mib(struct ksz_device *dev, int port, bool freeze)
+void ksz8_freeze_mib(struct ksz_device *dev, int port, bool freeze)
{
if (ksz_is_ksz88x3(dev))
return;
@@ -358,7 +358,7 @@ static void ksz8_freeze_mib(struct ksz_device *dev, int port, bool freeze)
ksz_cfg(dev, REG_SW_CTRL_6, BIT(port), false);
}
-static void ksz8_port_init_cnt(struct ksz_device *dev, int port)
+void ksz8_port_init_cnt(struct ksz_device *dev, int port)
{
struct ksz_port_mib *mib = &dev->ports[port].mib;
u64 *dropped;
@@ -447,9 +447,8 @@ static int ksz8_valid_dyn_entry(struct ksz_device *dev, u8 *data)
return 0;
}
-static int ksz8_r_dyn_mac_table(struct ksz_device *dev, u16 addr,
- u8 *mac_addr, u8 *fid, u8 *src_port,
- u8 *timestamp, u16 *entries)
+int ksz8_r_dyn_mac_table(struct ksz_device *dev, u16 addr, u8 *mac_addr,
+ u8 *fid, u8 *src_port, u8 *timestamp, u16 *entries)
{
struct ksz8 *ksz8 = dev->priv;
u32 data_hi, data_lo;
@@ -512,8 +511,8 @@ static int ksz8_r_dyn_mac_table(struct ksz_device *dev, u16 addr,
return rc;
}
-static int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
- struct alu_struct *alu)
+int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
+ struct alu_struct *alu)
{
struct ksz8 *ksz8 = dev->priv;
u32 data_hi, data_lo;
@@ -551,8 +550,8 @@ static int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
return -ENXIO;
}
-static void ksz8_w_sta_mac_table(struct ksz_device *dev, u16 addr,
- struct alu_struct *alu)
+void ksz8_w_sta_mac_table(struct ksz_device *dev, u16 addr,
+ struct alu_struct *alu)
{
struct ksz8 *ksz8 = dev->priv;
u32 data_hi, data_lo;
@@ -663,7 +662,7 @@ static void ksz8_w_vlan_table(struct ksz_device *dev, u16 vid, u16 vlan)
ksz8_w_table(dev, TABLE_VLAN, addr, buf);
}
-static void ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val)
+void ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val)
{
struct ksz8 *ksz8 = dev->priv;
u8 restart, speed, ctrl, link;
@@ -786,7 +785,7 @@ static void ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val)
*val = data;
}
-static void ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val)
+void ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val)
{
struct ksz8 *ksz8 = dev->priv;
u8 restart, speed, ctrl, data;
@@ -898,30 +897,7 @@ static void ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val)
}
}
-static enum dsa_tag_protocol ksz8_get_tag_protocol(struct dsa_switch *ds,
- int port,
- enum dsa_tag_protocol mp)
-{
- struct ksz_device *dev = ds->priv;
-
- /* ksz88x3 uses the same tag schema as KSZ9893 */
- return ksz_is_ksz88x3(dev) ?
- DSA_TAG_PROTO_KSZ9893 : DSA_TAG_PROTO_KSZ8795;
-}
-
-static u32 ksz8_sw_get_phy_flags(struct dsa_switch *ds, int port)
-{
- /* Silicon Errata Sheet (DS80000830A):
- * Port 1 does not work with LinkMD Cable-Testing.
- * Port 1 does not respond to received PAUSE control frames.
- */
- if (!port)
- return MICREL_KSZ8_P1_ERRATA;
-
- return 0;
-}
-
-static void ksz8_cfg_port_member(struct ksz_device *dev, int port, u8 member)
+void ksz8_cfg_port_member(struct ksz_device *dev, int port, u8 member)
{
u8 data;
@@ -931,12 +907,7 @@ static void ksz8_cfg_port_member(struct ksz_device *dev, int port, u8 member)
ksz_pwrite8(dev, port, P_MIRROR_CTRL, data);
}
-static void ksz8_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
-{
- ksz_port_stp_state_set(ds, port, state, P_STP_CTRL);
-}
-
-static void ksz8_flush_dyn_mac_table(struct ksz_device *dev, int port)
+void ksz8_flush_dyn_mac_table(struct ksz_device *dev, int port)
{
u8 learn[DSA_MAX_PORTS];
int first, index, cnt;
@@ -969,11 +940,109 @@ static void ksz8_flush_dyn_mac_table(struct ksz_device *dev, int port)
}
}
-static int ksz8_port_vlan_filtering(struct dsa_switch *ds, int port, bool flag,
- struct netlink_ext_ack *extack)
+int ksz8_fdb_dump(struct ksz_device *dev, int port,
+ dsa_fdb_dump_cb_t *cb, void *data)
{
- struct ksz_device *dev = ds->priv;
+ int ret = 0;
+ u16 i = 0;
+ u16 entries = 0;
+ u8 timestamp = 0;
+ u8 fid;
+ u8 member;
+ struct alu_struct alu;
+
+ do {
+ alu.is_static = false;
+ ret = ksz8_r_dyn_mac_table(dev, i, alu.mac, &fid, &member,
+ &timestamp, &entries);
+ if (!ret && (member & BIT(port))) {
+ ret = cb(alu.mac, alu.fid, alu.is_static, data);
+ if (ret)
+ break;
+ }
+ i++;
+ } while (i < entries);
+ if (i >= entries)
+ ret = 0;
+
+ return ret;
+}
+
+int ksz8_mdb_add(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_mdb *mdb, struct dsa_db db)
+{
+ struct alu_struct alu;
+ int index;
+ int empty = 0;
+
+ alu.port_forward = 0;
+ for (index = 0; index < dev->info->num_statics; index++) {
+ if (!ksz8_r_sta_mac_table(dev, index, &alu)) {
+ /* Found one already in static MAC table. */
+ if (!memcmp(alu.mac, mdb->addr, ETH_ALEN) &&
+ alu.fid == mdb->vid)
+ break;
+ /* Remember the first empty entry. */
+ } else if (!empty) {
+ empty = index + 1;
+ }
+ }
+
+ /* no available entry */
+ if (index == dev->info->num_statics && !empty)
+ return -ENOSPC;
+
+ /* add entry */
+ if (index == dev->info->num_statics) {
+ index = empty - 1;
+ memset(&alu, 0, sizeof(alu));
+ memcpy(alu.mac, mdb->addr, ETH_ALEN);
+ alu.is_static = true;
+ }
+ alu.port_forward |= BIT(port);
+ if (mdb->vid) {
+ alu.is_use_fid = true;
+ /* Need a way to map VID to FID. */
+ alu.fid = mdb->vid;
+ }
+ ksz8_w_sta_mac_table(dev, index, &alu);
+
+ return 0;
+}
+
+int ksz8_mdb_del(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_mdb *mdb, struct dsa_db db)
+{
+ struct alu_struct alu;
+ int index;
+
+ for (index = 0; index < dev->info->num_statics; index++) {
+ if (!ksz8_r_sta_mac_table(dev, index, &alu)) {
+ /* Found one already in static MAC table. */
+ if (!memcmp(alu.mac, mdb->addr, ETH_ALEN) &&
+ alu.fid == mdb->vid)
+ break;
+ }
+ }
+
+ /* no available entry */
+ if (index == dev->info->num_statics)
+ goto exit;
+
+ /* clear port */
+ alu.port_forward &= ~BIT(port);
+ if (!alu.port_forward)
+ alu.is_static = false;
+ ksz8_w_sta_mac_table(dev, index, &alu);
+
+exit:
+ return 0;
+}
+
+int ksz8_port_vlan_filtering(struct ksz_device *dev, int port, bool flag,
+ struct netlink_ext_ack *extack)
+{
if (ksz_is_ksz88x3(dev))
return -ENOTSUPP;
@@ -998,12 +1067,11 @@ static void ksz8_port_enable_pvid(struct ksz_device *dev, int port, bool state)
}
}
-static int ksz8_port_vlan_add(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan,
- struct netlink_ext_ack *extack)
+int ksz8_port_vlan_add(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct netlink_ext_ack *extack)
{
bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
- struct ksz_device *dev = ds->priv;
struct ksz_port *p = &dev->ports[port];
u16 data, new_pvid = 0;
u8 fid, member, valid;
@@ -1071,10 +1139,9 @@ static int ksz8_port_vlan_add(struct dsa_switch *ds, int port,
return 0;
}
-static int ksz8_port_vlan_del(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan)
+int ksz8_port_vlan_del(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_vlan *vlan)
{
- struct ksz_device *dev = ds->priv;
u16 data, pvid;
u8 fid, member, valid;
@@ -1104,12 +1171,10 @@ static int ksz8_port_vlan_del(struct dsa_switch *ds, int port,
return 0;
}
-static int ksz8_port_mirror_add(struct dsa_switch *ds, int port,
- struct dsa_mall_mirror_tc_entry *mirror,
- bool ingress, struct netlink_ext_ack *extack)
+int ksz8_port_mirror_add(struct ksz_device *dev, int port,
+ struct dsa_mall_mirror_tc_entry *mirror,
+ bool ingress, struct netlink_ext_ack *extack)
{
- struct ksz_device *dev = ds->priv;
-
if (ingress) {
ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_RX, true);
dev->mirror_rx |= BIT(port);
@@ -1128,10 +1193,9 @@ static int ksz8_port_mirror_add(struct dsa_switch *ds, int port,
return 0;
}
-static void ksz8_port_mirror_del(struct dsa_switch *ds, int port,
- struct dsa_mall_mirror_tc_entry *mirror)
+void ksz8_port_mirror_del(struct ksz_device *dev, int port,
+ struct dsa_mall_mirror_tc_entry *mirror)
{
- struct ksz_device *dev = ds->priv;
u8 data;
if (mirror->ingress) {
@@ -1197,7 +1261,7 @@ static void ksz8795_cpu_interface_select(struct ksz_device *dev, int port)
p->phydev.duplex = 1;
}
-static void ksz8_port_setup(struct ksz_device *dev, int port, bool cpu_port)
+void ksz8_port_setup(struct ksz_device *dev, int port, bool cpu_port)
{
struct dsa_switch *ds = dev->ds;
struct ksz8 *ksz8 = dev->priv;
@@ -1234,7 +1298,7 @@ static void ksz8_port_setup(struct ksz_device *dev, int port, bool cpu_port)
ksz8_cfg_port_member(dev, port, member);
}
-static void ksz8_config_cpu_port(struct dsa_switch *ds)
+void ksz8_config_cpu_port(struct dsa_switch *ds)
{
struct ksz_device *dev = ds->priv;
struct ksz8 *ksz8 = dev->priv;
@@ -1258,7 +1322,7 @@ static void ksz8_config_cpu_port(struct dsa_switch *ds)
for (i = 0; i < dev->phy_port_cnt; i++) {
p = &dev->ports[i];
- ksz8_port_stp_state_set(ds, i, BR_STATE_DISABLED);
+ ksz_port_stp_state_set(ds, i, BR_STATE_DISABLED);
/* Last port may be disabled. */
if (i == dev->phy_port_cnt)
@@ -1272,7 +1336,7 @@ static void ksz8_config_cpu_port(struct dsa_switch *ds)
continue;
if (!ksz_is_ksz88x3(dev)) {
ksz_pread8(dev, i, regs[P_REMOTE_STATUS], &remote);
- if (remote & PORT_FIBER_MODE)
+ if (remote & KSZ8_PORT_FIBER_MODE)
p->fiber = 1;
}
if (p->fiber)
@@ -1301,22 +1365,26 @@ static int ksz8_handle_global_errata(struct dsa_switch *ds)
return ret;
}
-static int ksz8_setup(struct dsa_switch *ds)
+int ksz8_enable_stp_addr(struct ksz_device *dev)
{
- struct ksz_device *dev = ds->priv;
struct alu_struct alu;
- int i, ret = 0;
- dev->vlan_cache = devm_kcalloc(dev->dev, sizeof(struct vlan_table),
- dev->info->num_vlans, GFP_KERNEL);
- if (!dev->vlan_cache)
- return -ENOMEM;
+ /* Setup STP address for STP operation. */
+ memset(&alu, 0, sizeof(alu));
+ ether_addr_copy(alu.mac, eth_stp_addr);
+ alu.is_static = true;
+ alu.is_override = true;
+ alu.port_forward = dev->info->cpu_ports;
- ret = ksz8_reset_switch(dev);
- if (ret) {
- dev_err(ds->dev, "failed to reset switch\n");
- return ret;
- }
+ ksz8_w_sta_mac_table(dev, 0, &alu);
+
+ return 0;
+}
+
+int ksz8_setup(struct dsa_switch *ds)
+{
+ struct ksz_device *dev = ds->priv;
+ int i;
ksz_cfg(dev, S_REPLACE_VID_CTRL, SW_FLOW_CTRL, true);
@@ -1335,10 +1403,6 @@ static int ksz8_setup(struct dsa_switch *ds)
UNICAST_VLAN_BOUNDARY | NO_EXC_COLLISION_DROP,
UNICAST_VLAN_BOUNDARY | NO_EXC_COLLISION_DROP);
- ksz8_config_cpu_port(ds);
-
- ksz_cfg(dev, REG_SW_CTRL_2, MULTICAST_STORM_DISABLE, true);
-
ksz_cfg(dev, S_REPLACE_VID_CTRL, SW_REPLACE_VID, false);
ksz_cfg(dev, S_MIRROR_CTRL, SW_MIRROR_RX_TX, false);
@@ -1346,38 +1410,15 @@ static int ksz8_setup(struct dsa_switch *ds)
if (!ksz_is_ksz88x3(dev))
ksz_cfg(dev, REG_SW_CTRL_19, SW_INS_TAG_ENABLE, true);
- /* set broadcast storm protection 10% rate */
- regmap_update_bits(dev->regmap[1], S_REPLACE_VID_CTRL,
- BROADCAST_STORM_RATE,
- (BROADCAST_STORM_VALUE *
- BROADCAST_STORM_PROT_RATE) / 100);
-
for (i = 0; i < (dev->info->num_vlans / 4); i++)
ksz8_r_vlan_entries(dev, i);
- /* Setup STP address for STP operation. */
- memset(&alu, 0, sizeof(alu));
- ether_addr_copy(alu.mac, eth_stp_addr);
- alu.is_static = true;
- alu.is_override = true;
- alu.port_forward = dev->info->cpu_ports;
-
- ksz8_w_sta_mac_table(dev, 0, &alu);
-
- ksz_init_mib_timer(dev);
-
- ds->configure_vlan_while_not_filtering = false;
-
return ksz8_handle_global_errata(ds);
}
-static void ksz8_get_caps(struct dsa_switch *ds, int port,
- struct phylink_config *config)
+void ksz8_get_caps(struct ksz_device *dev, int port,
+ struct phylink_config *config)
{
- struct ksz_device *dev = ds->priv;
-
- ksz_phylink_get_caps(ds, port, config);
-
config->mac_capabilities = MAC_10 | MAC_100;
/* Silicon Errata Sheet (DS80000830A):
@@ -1393,88 +1434,15 @@ static void ksz8_get_caps(struct dsa_switch *ds, int port,
config->mac_capabilities |= MAC_ASYM_PAUSE;
}
-static const struct dsa_switch_ops ksz8_switch_ops = {
- .get_tag_protocol = ksz8_get_tag_protocol,
- .get_phy_flags = ksz8_sw_get_phy_flags,
- .setup = ksz8_setup,
- .phy_read = ksz_phy_read16,
- .phy_write = ksz_phy_write16,
- .phylink_get_caps = ksz8_get_caps,
- .phylink_mac_link_down = ksz_mac_link_down,
- .port_enable = ksz_enable_port,
- .get_strings = ksz_get_strings,
- .get_ethtool_stats = ksz_get_ethtool_stats,
- .get_sset_count = ksz_sset_count,
- .port_bridge_join = ksz_port_bridge_join,
- .port_bridge_leave = ksz_port_bridge_leave,
- .port_stp_state_set = ksz8_port_stp_state_set,
- .port_fast_age = ksz_port_fast_age,
- .port_vlan_filtering = ksz8_port_vlan_filtering,
- .port_vlan_add = ksz8_port_vlan_add,
- .port_vlan_del = ksz8_port_vlan_del,
- .port_fdb_dump = ksz_port_fdb_dump,
- .port_mdb_add = ksz_port_mdb_add,
- .port_mdb_del = ksz_port_mdb_del,
- .port_mirror_add = ksz8_port_mirror_add,
- .port_mirror_del = ksz8_port_mirror_del,
-};
-
-static u32 ksz8_get_port_addr(int port, int offset)
+u32 ksz8_get_port_addr(int port, int offset)
{
return PORT_CTRL_ADDR(port, offset);
}
-static int ksz8_switch_detect(struct ksz_device *dev)
-{
- u8 id1, id2;
- u16 id16;
- int ret;
-
- /* read chip id */
- ret = ksz_read16(dev, REG_CHIP_ID0, &id16);
- if (ret)
- return ret;
-
- id1 = id16 >> 8;
- id2 = id16 & SW_CHIP_ID_M;
-
- switch (id1) {
- case KSZ87_FAMILY_ID:
- if ((id2 != CHIP_ID_94 && id2 != CHIP_ID_95))
- return -ENODEV;
-
- if (id2 == CHIP_ID_95) {
- u8 val;
-
- id2 = 0x95;
- ksz_read8(dev, REG_PORT_STATUS_0, &val);
- if (val & PORT_FIBER_MODE)
- id2 = 0x65;
- } else if (id2 == CHIP_ID_94) {
- id2 = 0x94;
- }
- break;
- case KSZ88_FAMILY_ID:
- if (id2 != CHIP_ID_63)
- return -ENODEV;
- break;
- default:
- dev_err(dev->dev, "invalid family id: %d\n", id1);
- return -ENODEV;
- }
- id16 &= ~0xff;
- id16 |= id2;
- dev->chip_id = id16;
-
- return 0;
-}
-
-static int ksz8_switch_init(struct ksz_device *dev)
+int ksz8_switch_init(struct ksz_device *dev)
{
struct ksz8 *ksz8 = dev->priv;
- dev->ds->ops = &ksz8_switch_ops;
-
dev->cpu_port = fls(dev->info->cpu_ports) - 1;
dev->phy_port_cnt = dev->info->port_cnt - 1;
dev->port_mask = (BIT(dev->phy_port_cnt) - 1) | dev->info->cpu_ports;
@@ -1502,37 +1470,11 @@ static int ksz8_switch_init(struct ksz_device *dev)
return 0;
}
-static void ksz8_switch_exit(struct ksz_device *dev)
+void ksz8_switch_exit(struct ksz_device *dev)
{
ksz8_reset_switch(dev);
}
-static const struct ksz_dev_ops ksz8_dev_ops = {
- .get_port_addr = ksz8_get_port_addr,
- .cfg_port_member = ksz8_cfg_port_member,
- .flush_dyn_mac_table = ksz8_flush_dyn_mac_table,
- .port_setup = ksz8_port_setup,
- .r_phy = ksz8_r_phy,
- .w_phy = ksz8_w_phy,
- .r_dyn_mac_table = ksz8_r_dyn_mac_table,
- .r_sta_mac_table = ksz8_r_sta_mac_table,
- .w_sta_mac_table = ksz8_w_sta_mac_table,
- .r_mib_cnt = ksz8_r_mib_cnt,
- .r_mib_pkt = ksz8_r_mib_pkt,
- .freeze_mib = ksz8_freeze_mib,
- .port_init_cnt = ksz8_port_init_cnt,
- .shutdown = ksz8_reset_switch,
- .detect = ksz8_switch_detect,
- .init = ksz8_switch_init,
- .exit = ksz8_switch_exit,
-};
-
-int ksz8_switch_register(struct ksz_device *dev)
-{
- return ksz_switch_register(dev, &ksz8_dev_ops);
-}
-EXPORT_SYMBOL(ksz8_switch_register);
-
MODULE_AUTHOR("Tristram Ha <Tristram.Ha@microchip.com>");
MODULE_DESCRIPTION("Microchip KSZ8795 Series Switch DSA Driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/dsa/microchip/ksz8795_reg.h b/drivers/net/dsa/microchip/ksz8795_reg.h
index 4109433b6b6c..32d985296520 100644
--- a/drivers/net/dsa/microchip/ksz8795_reg.h
+++ b/drivers/net/dsa/microchip/ksz8795_reg.h
@@ -14,22 +14,8 @@
#define KS_PRIO_M 0x3
#define KS_PRIO_S 2
-#define REG_CHIP_ID0 0x00
-
-#define KSZ87_FAMILY_ID 0x87
-#define KSZ88_FAMILY_ID 0x88
-
-#define REG_CHIP_ID1 0x01
-
-#define SW_CHIP_ID_M 0xF0
-#define SW_CHIP_ID_S 4
#define SW_REVISION_M 0x0E
#define SW_REVISION_S 1
-#define SW_START 0x01
-
-#define CHIP_ID_94 0x60
-#define CHIP_ID_95 0x90
-#define CHIP_ID_63 0x30
#define KSZ8863_REG_SW_RESET 0x43
@@ -57,7 +43,6 @@
#define REG_SW_CTRL_2 0x04
#define UNICAST_VLAN_BOUNDARY BIT(7)
-#define MULTICAST_STORM_DISABLE BIT(6)
#define SW_BACK_PRESSURE BIT(5)
#define FAIR_FLOW_CTRL BIT(4)
#define NO_EXC_COLLISION_DROP BIT(3)
@@ -77,13 +62,9 @@
#define SW_FLOW_CTRL BIT(5)
#define SW_10_MBIT BIT(4)
#define SW_REPLACE_VID BIT(3)
-#define BROADCAST_STORM_RATE_HI 0x07
#define REG_SW_CTRL_5 0x07
-#define BROADCAST_STORM_RATE_LO 0xFF
-#define BROADCAST_STORM_RATE 0x07FF
-
#define REG_SW_CTRL_6 0x08
#define SW_MIB_COUNTER_FLUSH BIT(7)
@@ -217,8 +198,6 @@
#define REG_PORT_4_STATUS_0 0x48
/* For KSZ8765. */
-#define PORT_FIBER_MODE BIT(7)
-
#define PORT_REMOTE_ASYM_PAUSE BIT(5)
#define PORT_REMOTE_SYM_PAUSE BIT(4)
#define PORT_REMOTE_100BTX_FD BIT(3)
@@ -322,7 +301,6 @@
#define REG_PORT_CTRL_5 0x05
-#define REG_PORT_STATUS_0 0x08
#define REG_PORT_STATUS_1 0x09
#define REG_PORT_LINK_MD_CTRL 0x0A
#define REG_PORT_LINK_MD_RESULT 0x0B
@@ -813,12 +791,6 @@
#define REG_IND_EEE_GLOB2_LO 0x34
#define REG_IND_EEE_GLOB2_HI 0x35
-/* Driver set switch broadcast storm protection at 10% rate. */
-#define BROADCAST_STORM_PROT_RATE 10
-
-/* 148,800 frames * 67 ms / 100 */
-#define BROADCAST_STORM_VALUE 9969
-
/**
* MIB_COUNTER_VALUE 00-00000000-3FFFFFFF
* MIB_TOTAL_BYTES 00-0000000F-FFFFFFFF
diff --git a/drivers/net/dsa/microchip/ksz8863_smi.c b/drivers/net/dsa/microchip/ksz8863_smi.c
index b6f99e641dca..d71df05b8b7b 100644
--- a/drivers/net/dsa/microchip/ksz8863_smi.c
+++ b/drivers/net/dsa/microchip/ksz8863_smi.c
@@ -174,7 +174,7 @@ static int ksz8863_smi_probe(struct mdio_device *mdiodev)
if (mdiodev->dev.platform_data)
dev->pdata = mdiodev->dev.platform_data;
- ret = ksz8_switch_register(dev);
+ ret = ksz_switch_register(dev);
/* Main DSA driver may not be started yet. */
if (ret)
diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
index ab40b700cf1a..fa498ad8ca40 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -17,6 +17,7 @@
#include "ksz9477_reg.h"
#include "ksz_common.h"
+#include "ksz9477.h"
/* Used with variable features to indicate capabilities. */
#define GBIT_SUPPORT BIT(0)
@@ -47,9 +48,8 @@ static void ksz9477_port_cfg32(struct ksz_device *dev, int port, int offset,
bits, set ? bits : 0);
}
-static int ksz9477_change_mtu(struct dsa_switch *ds, int port, int mtu)
+int ksz9477_change_mtu(struct ksz_device *dev, int port, int mtu)
{
- struct ksz_device *dev = ds->priv;
u16 frame_size, max_frame = 0;
int i;
@@ -65,7 +65,7 @@ static int ksz9477_change_mtu(struct dsa_switch *ds, int port, int mtu)
REG_SW_MTU_MASK, max_frame);
}
-static int ksz9477_max_mtu(struct dsa_switch *ds, int port)
+int ksz9477_max_mtu(struct ksz_device *dev, int port)
{
return KSZ9477_MAX_FRAME_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN;
}
@@ -175,7 +175,7 @@ static int ksz9477_wait_alu_sta_ready(struct ksz_device *dev)
10, 1000);
}
-static int ksz9477_reset_switch(struct ksz_device *dev)
+int ksz9477_reset_switch(struct ksz_device *dev)
{
u8 data8;
u32 data32;
@@ -198,12 +198,6 @@ static int ksz9477_reset_switch(struct ksz_device *dev)
ksz_write32(dev, REG_SW_PORT_INT_MASK__4, 0x7F);
ksz_read32(dev, REG_SW_PORT_INT_STATUS__4, &data32);
- /* set broadcast storm protection 10% rate */
- regmap_update_bits(dev->regmap[1], REG_SW_MAC_CTRL_2,
- BROADCAST_STORM_RATE,
- (BROADCAST_STORM_VALUE *
- BROADCAST_STORM_PROT_RATE) / 100);
-
data8 = SW_ENABLE_REFCLKO;
if (dev->synclko_disable)
data8 = 0;
@@ -214,8 +208,7 @@ static int ksz9477_reset_switch(struct ksz_device *dev)
return 0;
}
-static void ksz9477_r_mib_cnt(struct ksz_device *dev, int port, u16 addr,
- u64 *cnt)
+void ksz9477_r_mib_cnt(struct ksz_device *dev, int port, u16 addr, u64 *cnt)
{
struct ksz_port *p = &dev->ports[port];
unsigned int val;
@@ -242,14 +235,14 @@ static void ksz9477_r_mib_cnt(struct ksz_device *dev, int port, u16 addr,
*cnt += data;
}
-static void ksz9477_r_mib_pkt(struct ksz_device *dev, int port, u16 addr,
- u64 *dropped, u64 *cnt)
+void ksz9477_r_mib_pkt(struct ksz_device *dev, int port, u16 addr,
+ u64 *dropped, u64 *cnt)
{
addr = dev->info->mib_names[addr].index;
ksz9477_r_mib_cnt(dev, port, addr, cnt);
}
-static void ksz9477_freeze_mib(struct ksz_device *dev, int port, bool freeze)
+void ksz9477_freeze_mib(struct ksz_device *dev, int port, bool freeze)
{
u32 val = freeze ? MIB_COUNTER_FLUSH_FREEZE : 0;
struct ksz_port *p = &dev->ports[port];
@@ -263,7 +256,7 @@ static void ksz9477_freeze_mib(struct ksz_device *dev, int port, bool freeze)
mutex_unlock(&p->mib.cnt_mutex);
}
-static void ksz9477_port_init_cnt(struct ksz_device *dev, int port)
+void ksz9477_port_init_cnt(struct ksz_device *dev, int port)
{
struct ksz_port_mib *mib = &dev->ports[port].mib;
@@ -276,21 +269,8 @@ static void ksz9477_port_init_cnt(struct ksz_device *dev, int port)
mutex_unlock(&mib->cnt_mutex);
}
-static enum dsa_tag_protocol ksz9477_get_tag_protocol(struct dsa_switch *ds,
- int port,
- enum dsa_tag_protocol mp)
-{
- enum dsa_tag_protocol proto = DSA_TAG_PROTO_KSZ9477;
- struct ksz_device *dev = ds->priv;
-
- if (dev->features & IS_9893)
- proto = DSA_TAG_PROTO_KSZ9893;
- return proto;
-}
-
-static int ksz9477_phy_read16(struct dsa_switch *ds, int addr, int reg)
+void ksz9477_r_phy(struct ksz_device *dev, u16 addr, u16 reg, u16 *data)
{
- struct ksz_device *dev = ds->priv;
u16 val = 0xffff;
/* No real PHY after this. Simulate the PHY.
@@ -335,39 +315,28 @@ static int ksz9477_phy_read16(struct dsa_switch *ds, int addr, int reg)
ksz_pread16(dev, addr, 0x100 + (reg << 1), &val);
}
- return val;
+ *data = val;
}
-static int ksz9477_phy_write16(struct dsa_switch *ds, int addr, int reg,
- u16 val)
+void ksz9477_w_phy(struct ksz_device *dev, u16 addr, u16 reg, u16 val)
{
- struct ksz_device *dev = ds->priv;
-
/* No real PHY after this. */
if (addr >= dev->phy_port_cnt)
- return 0;
+ return;
/* No gigabit support. Do not write to this register. */
if (!(dev->features & GBIT_SUPPORT) && reg == MII_CTRL1000)
- return 0;
- ksz_pwrite16(dev, addr, 0x100 + (reg << 1), val);
+ return;
- return 0;
+ ksz_pwrite16(dev, addr, 0x100 + (reg << 1), val);
}
-static void ksz9477_cfg_port_member(struct ksz_device *dev, int port,
- u8 member)
+void ksz9477_cfg_port_member(struct ksz_device *dev, int port, u8 member)
{
ksz_pwrite32(dev, port, REG_PORT_VLAN_MEMBERSHIP__4, member);
}
-static void ksz9477_port_stp_state_set(struct dsa_switch *ds, int port,
- u8 state)
-{
- ksz_port_stp_state_set(ds, port, state, P_STP_CTRL);
-}
-
-static void ksz9477_flush_dyn_mac_table(struct ksz_device *dev, int port)
+void ksz9477_flush_dyn_mac_table(struct ksz_device *dev, int port)
{
u8 data;
@@ -389,12 +358,9 @@ static void ksz9477_flush_dyn_mac_table(struct ksz_device *dev, int port)
}
}
-static int ksz9477_port_vlan_filtering(struct dsa_switch *ds, int port,
- bool flag,
- struct netlink_ext_ack *extack)
+int ksz9477_port_vlan_filtering(struct ksz_device *dev, int port,
+ bool flag, struct netlink_ext_ack *extack)
{
- struct ksz_device *dev = ds->priv;
-
if (flag) {
ksz_port_cfg(dev, port, REG_PORT_LUE_CTRL,
PORT_VLAN_LOOKUP_VID_0, true);
@@ -408,11 +374,10 @@ static int ksz9477_port_vlan_filtering(struct dsa_switch *ds, int port,
return 0;
}
-static int ksz9477_port_vlan_add(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan,
- struct netlink_ext_ack *extack)
+int ksz9477_port_vlan_add(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct netlink_ext_ack *extack)
{
- struct ksz_device *dev = ds->priv;
u32 vlan_table[3];
bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
int err;
@@ -445,10 +410,9 @@ static int ksz9477_port_vlan_add(struct dsa_switch *ds, int port,
return 0;
}
-static int ksz9477_port_vlan_del(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan)
+int ksz9477_port_vlan_del(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_vlan *vlan)
{
- struct ksz_device *dev = ds->priv;
bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
u32 vlan_table[3];
u16 pvid;
@@ -479,11 +443,9 @@ static int ksz9477_port_vlan_del(struct dsa_switch *ds, int port,
return 0;
}
-static int ksz9477_port_fdb_add(struct dsa_switch *ds, int port,
- const unsigned char *addr, u16 vid,
- struct dsa_db db)
+int ksz9477_fdb_add(struct ksz_device *dev, int port,
+ const unsigned char *addr, u16 vid, struct dsa_db db)
{
- struct ksz_device *dev = ds->priv;
u32 alu_table[4];
u32 data;
int ret = 0;
@@ -537,11 +499,9 @@ exit:
return ret;
}
-static int ksz9477_port_fdb_del(struct dsa_switch *ds, int port,
- const unsigned char *addr, u16 vid,
- struct dsa_db db)
+int ksz9477_fdb_del(struct ksz_device *dev, int port,
+ const unsigned char *addr, u16 vid, struct dsa_db db)
{
- struct ksz_device *dev = ds->priv;
u32 alu_table[4];
u32 data;
int ret = 0;
@@ -628,10 +588,9 @@ static void ksz9477_convert_alu(struct alu_struct *alu, u32 *alu_table)
alu->mac[5] = alu_table[3] & 0xFF;
}
-static int ksz9477_port_fdb_dump(struct dsa_switch *ds, int port,
- dsa_fdb_dump_cb_t *cb, void *data)
+int ksz9477_fdb_dump(struct ksz_device *dev, int port,
+ dsa_fdb_dump_cb_t *cb, void *data)
{
- struct ksz_device *dev = ds->priv;
int ret = 0;
u32 ksz_data;
u32 alu_table[4];
@@ -680,11 +639,9 @@ exit:
return ret;
}
-static int ksz9477_port_mdb_add(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb,
- struct dsa_db db)
+int ksz9477_mdb_add(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_mdb *mdb, struct dsa_db db)
{
- struct ksz_device *dev = ds->priv;
u32 static_table[4];
u32 data;
int index;
@@ -756,11 +713,9 @@ exit:
return err;
}
-static int ksz9477_port_mdb_del(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb,
- struct dsa_db db)
+int ksz9477_mdb_del(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_mdb *mdb, struct dsa_db db)
{
- struct ksz_device *dev = ds->priv;
u32 static_table[4];
u32 data;
int index;
@@ -832,11 +787,10 @@ exit:
return ret;
}
-static int ksz9477_port_mirror_add(struct dsa_switch *ds, int port,
- struct dsa_mall_mirror_tc_entry *mirror,
- bool ingress, struct netlink_ext_ack *extack)
+int ksz9477_port_mirror_add(struct ksz_device *dev, int port,
+ struct dsa_mall_mirror_tc_entry *mirror,
+ bool ingress, struct netlink_ext_ack *extack)
{
- struct ksz_device *dev = ds->priv;
u8 data;
int p;
@@ -872,10 +826,9 @@ static int ksz9477_port_mirror_add(struct dsa_switch *ds, int port,
return 0;
}
-static void ksz9477_port_mirror_del(struct dsa_switch *ds, int port,
- struct dsa_mall_mirror_tc_entry *mirror)
+void ksz9477_port_mirror_del(struct ksz_device *dev, int port,
+ struct dsa_mall_mirror_tc_entry *mirror)
{
- struct ksz_device *dev = ds->priv;
bool in_use = false;
u8 data;
int p;
@@ -1097,16 +1050,17 @@ static void ksz9477_phy_errata_setup(struct ksz_device *dev, int port)
ksz9477_port_mmd_write(dev, port, 0x1c, 0x20, 0xeeee);
}
-static void ksz9477_get_caps(struct dsa_switch *ds, int port,
- struct phylink_config *config)
+void ksz9477_get_caps(struct ksz_device *dev, int port,
+ struct phylink_config *config)
{
- ksz_phylink_get_caps(ds, port, config);
+ config->mac_capabilities = MAC_10 | MAC_100 | MAC_ASYM_PAUSE |
+ MAC_SYM_PAUSE;
- config->mac_capabilities = MAC_10 | MAC_100 | MAC_1000FD |
- MAC_ASYM_PAUSE | MAC_SYM_PAUSE;
+ if (dev->features & GBIT_SUPPORT)
+ config->mac_capabilities |= MAC_1000FD;
}
-static void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port)
+void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port)
{
struct ksz_port *p = &dev->ports[port];
struct dsa_switch *ds = dev->ds;
@@ -1203,7 +1157,7 @@ static void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port)
ksz_pread16(dev, port, REG_PORT_PHY_INT_ENABLE, &data16);
}
-static void ksz9477_config_cpu_port(struct dsa_switch *ds)
+void ksz9477_config_cpu_port(struct dsa_switch *ds)
{
struct ksz_device *dev = ds->priv;
struct ksz_port *p;
@@ -1260,7 +1214,7 @@ static void ksz9477_config_cpu_port(struct dsa_switch *ds)
continue;
p = &dev->ports[i];
- ksz9477_port_stp_state_set(ds, i, BR_STATE_DISABLED);
+ ksz_port_stp_state_set(ds, i, BR_STATE_DISABLED);
p->on = 1;
if (i < dev->phy_port_cnt)
p->phy = 1;
@@ -1273,22 +1227,41 @@ static void ksz9477_config_cpu_port(struct dsa_switch *ds)
}
}
-static int ksz9477_setup(struct dsa_switch *ds)
+int ksz9477_enable_stp_addr(struct ksz_device *dev)
{
- struct ksz_device *dev = ds->priv;
- int ret = 0;
+ u32 data;
+ int ret;
- dev->vlan_cache = devm_kcalloc(dev->dev, sizeof(struct vlan_table),
- dev->info->num_vlans, GFP_KERNEL);
- if (!dev->vlan_cache)
- return -ENOMEM;
+ /* Enable Reserved multicast table */
+ ksz_cfg(dev, REG_SW_LUE_CTRL_0, SW_RESV_MCAST_ENABLE, true);
- ret = ksz9477_reset_switch(dev);
- if (ret) {
- dev_err(ds->dev, "failed to reset switch\n");
+ /* Set the Override bit for forwarding BPDU packet to CPU */
+ ret = ksz_write32(dev, REG_SW_ALU_VAL_B,
+ ALU_V_OVERRIDE | BIT(dev->cpu_port));
+ if (ret < 0)
+ return ret;
+
+ data = ALU_STAT_START | ALU_RESV_MCAST_ADDR;
+
+ ret = ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data);
+ if (ret < 0)
+ return ret;
+
+ /* wait to be finished */
+ ret = ksz9477_wait_alu_sta_ready(dev);
+ if (ret < 0) {
+ dev_err(dev->dev, "Failed to update Reserved Multicast table\n");
return ret;
}
+ return 0;
+}
+
+int ksz9477_setup(struct dsa_switch *ds)
+{
+ struct ksz_device *dev = ds->priv;
+ int ret = 0;
+
/* Required for port partitioning. */
ksz9477_cfg32(dev, REG_SW_QM_CTRL__4, UNICAST_VLAN_BOUNDARY,
true);
@@ -1305,69 +1278,27 @@ static int ksz9477_setup(struct dsa_switch *ds)
if (ret)
return ret;
- ksz9477_config_cpu_port(ds);
-
- ksz_cfg(dev, REG_SW_MAC_CTRL_1, MULTICAST_STORM_DISABLE, true);
-
/* queue based egress rate limit */
ksz_cfg(dev, REG_SW_MAC_CTRL_5, SW_OUT_RATE_LIMIT_QUEUE_BASED, true);
/* enable global MIB counter freeze function */
ksz_cfg(dev, REG_SW_MAC_CTRL_6, SW_MIB_COUNTER_FREEZE, true);
- /* start switch */
- ksz_cfg(dev, REG_SW_OPERATION, SW_START, true);
-
- ksz_init_mib_timer(dev);
-
- ds->configure_vlan_while_not_filtering = false;
-
return 0;
}
-static const struct dsa_switch_ops ksz9477_switch_ops = {
- .get_tag_protocol = ksz9477_get_tag_protocol,
- .setup = ksz9477_setup,
- .phy_read = ksz9477_phy_read16,
- .phy_write = ksz9477_phy_write16,
- .phylink_mac_link_down = ksz_mac_link_down,
- .phylink_get_caps = ksz9477_get_caps,
- .port_enable = ksz_enable_port,
- .get_strings = ksz_get_strings,
- .get_ethtool_stats = ksz_get_ethtool_stats,
- .get_sset_count = ksz_sset_count,
- .port_bridge_join = ksz_port_bridge_join,
- .port_bridge_leave = ksz_port_bridge_leave,
- .port_stp_state_set = ksz9477_port_stp_state_set,
- .port_fast_age = ksz_port_fast_age,
- .port_vlan_filtering = ksz9477_port_vlan_filtering,
- .port_vlan_add = ksz9477_port_vlan_add,
- .port_vlan_del = ksz9477_port_vlan_del,
- .port_fdb_dump = ksz9477_port_fdb_dump,
- .port_fdb_add = ksz9477_port_fdb_add,
- .port_fdb_del = ksz9477_port_fdb_del,
- .port_mdb_add = ksz9477_port_mdb_add,
- .port_mdb_del = ksz9477_port_mdb_del,
- .port_mirror_add = ksz9477_port_mirror_add,
- .port_mirror_del = ksz9477_port_mirror_del,
- .get_stats64 = ksz_get_stats64,
- .port_change_mtu = ksz9477_change_mtu,
- .port_max_mtu = ksz9477_max_mtu,
-};
-
-static u32 ksz9477_get_port_addr(int port, int offset)
+u32 ksz9477_get_port_addr(int port, int offset)
{
return PORT_CTRL_ADDR(port, offset);
}
-static int ksz9477_switch_detect(struct ksz_device *dev)
+int ksz9477_switch_init(struct ksz_device *dev)
{
u8 data8;
- u8 id_hi;
- u8 id_lo;
- u32 id32;
int ret;
+ dev->port_mask = (1 << dev->info->port_cnt) - 1;
+
/* turn off SPI DO Edge select */
ret = ksz_read8(dev, REG_SW_GLOBAL_SERIAL_CTRL_0, &data8);
if (ret)
@@ -1378,10 +1309,6 @@ static int ksz9477_switch_detect(struct ksz_device *dev)
if (ret)
return ret;
- /* read chip id */
- ret = ksz_read32(dev, REG_CHIP_ID0__1, &id32);
- if (ret)
- return ret;
ret = ksz_read8(dev, REG_GLOBAL_OPTIONS, &data8);
if (ret)
return ret;
@@ -1392,12 +1319,7 @@ static int ksz9477_switch_detect(struct ksz_device *dev)
/* Default capability is gigabit capable. */
dev->features = GBIT_SUPPORT;
- dev_dbg(dev->dev, "Switch detect: ID=%08x%02x\n", id32, data8);
- id_hi = (u8)(id32 >> 16);
- id_lo = (u8)(id32 >> 8);
- if ((id_lo & 0xf) == 3) {
- /* Chip is from KSZ9893 design. */
- dev_info(dev->dev, "Found KSZ9893\n");
+ if (dev->chip_id == KSZ9893_CHIP_ID) {
dev->features |= IS_9893;
/* Chip does not support gigabit. */
@@ -1405,7 +1327,6 @@ static int ksz9477_switch_detect(struct ksz_device *dev)
dev->features &= ~GBIT_SUPPORT;
dev->phy_port_cnt = 2;
} else {
- dev_info(dev->dev, "Found KSZ9477 or compatible\n");
/* Chip uses new XMII register definitions. */
dev->features |= NEW_XMII;
@@ -1414,72 +1335,14 @@ static int ksz9477_switch_detect(struct ksz_device *dev)
dev->features &= ~GBIT_SUPPORT;
}
- /* Change chip id to known ones so it can be matched against them. */
- id32 = (id_hi << 16) | (id_lo << 8);
-
- dev->chip_id = id32;
-
return 0;
}
-static int ksz9477_switch_init(struct ksz_device *dev)
-{
- dev->ds->ops = &ksz9477_switch_ops;
-
- dev->port_mask = (1 << dev->info->port_cnt) - 1;
-
- return 0;
-}
-
-static void ksz9477_switch_exit(struct ksz_device *dev)
+void ksz9477_switch_exit(struct ksz_device *dev)
{
ksz9477_reset_switch(dev);
}
-static const struct ksz_dev_ops ksz9477_dev_ops = {
- .get_port_addr = ksz9477_get_port_addr,
- .cfg_port_member = ksz9477_cfg_port_member,
- .flush_dyn_mac_table = ksz9477_flush_dyn_mac_table,
- .port_setup = ksz9477_port_setup,
- .r_mib_cnt = ksz9477_r_mib_cnt,
- .r_mib_pkt = ksz9477_r_mib_pkt,
- .r_mib_stat64 = ksz_r_mib_stats64,
- .freeze_mib = ksz9477_freeze_mib,
- .port_init_cnt = ksz9477_port_init_cnt,
- .shutdown = ksz9477_reset_switch,
- .detect = ksz9477_switch_detect,
- .init = ksz9477_switch_init,
- .exit = ksz9477_switch_exit,
-};
-
-int ksz9477_switch_register(struct ksz_device *dev)
-{
- int ret, i;
- struct phy_device *phydev;
-
- ret = ksz_switch_register(dev, &ksz9477_dev_ops);
- if (ret)
- return ret;
-
- for (i = 0; i < dev->phy_port_cnt; ++i) {
- if (!dsa_is_user_port(dev->ds, i))
- continue;
-
- phydev = dsa_to_port(dev->ds, i)->slave->phydev;
-
- /* The MAC actually cannot run in 1000 half-duplex mode. */
- phy_remove_link_mode(phydev,
- ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
-
- /* PHY does not support gigabit. */
- if (!(dev->features & GBIT_SUPPORT))
- phy_remove_link_mode(phydev,
- ETHTOOL_LINK_MODE_1000baseT_Full_BIT);
- }
- return ret;
-}
-EXPORT_SYMBOL(ksz9477_switch_register);
-
MODULE_AUTHOR("Woojung Huh <Woojung.Huh@microchip.com>");
MODULE_DESCRIPTION("Microchip KSZ9477 Series Switch DSA Driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/dsa/microchip/ksz9477.h b/drivers/net/dsa/microchip/ksz9477.h
new file mode 100644
index 000000000000..cd278b307b3c
--- /dev/null
+++ b/drivers/net/dsa/microchip/ksz9477.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Microchip KSZ9477 series Header file
+ *
+ * Copyright (C) 2017-2022 Microchip Technology Inc.
+ */
+
+#ifndef __KSZ9477_H
+#define __KSZ9477_H
+
+#include <net/dsa.h>
+#include "ksz_common.h"
+
+int ksz9477_setup(struct dsa_switch *ds);
+u32 ksz9477_get_port_addr(int port, int offset);
+void ksz9477_cfg_port_member(struct ksz_device *dev, int port, u8 member);
+void ksz9477_flush_dyn_mac_table(struct ksz_device *dev, int port);
+void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port);
+void ksz9477_r_phy(struct ksz_device *dev, u16 addr, u16 reg, u16 *data);
+void ksz9477_w_phy(struct ksz_device *dev, u16 addr, u16 reg, u16 val);
+void ksz9477_r_mib_cnt(struct ksz_device *dev, int port, u16 addr, u64 *cnt);
+void ksz9477_r_mib_pkt(struct ksz_device *dev, int port, u16 addr,
+ u64 *dropped, u64 *cnt);
+void ksz9477_freeze_mib(struct ksz_device *dev, int port, bool freeze);
+void ksz9477_port_init_cnt(struct ksz_device *dev, int port);
+int ksz9477_port_vlan_filtering(struct ksz_device *dev, int port,
+ bool flag, struct netlink_ext_ack *extack);
+int ksz9477_port_vlan_add(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct netlink_ext_ack *extack);
+int ksz9477_port_vlan_del(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_vlan *vlan);
+int ksz9477_port_mirror_add(struct ksz_device *dev, int port,
+ struct dsa_mall_mirror_tc_entry *mirror,
+ bool ingress, struct netlink_ext_ack *extack);
+void ksz9477_port_mirror_del(struct ksz_device *dev, int port,
+ struct dsa_mall_mirror_tc_entry *mirror);
+int ksz9477_get_stp_reg(void);
+void ksz9477_get_caps(struct ksz_device *dev, int port,
+ struct phylink_config *config);
+int ksz9477_fdb_dump(struct ksz_device *dev, int port,
+ dsa_fdb_dump_cb_t *cb, void *data);
+int ksz9477_fdb_add(struct ksz_device *dev, int port,
+ const unsigned char *addr, u16 vid, struct dsa_db db);
+int ksz9477_fdb_del(struct ksz_device *dev, int port,
+ const unsigned char *addr, u16 vid, struct dsa_db db);
+int ksz9477_mdb_add(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_mdb *mdb, struct dsa_db db);
+int ksz9477_mdb_del(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_mdb *mdb, struct dsa_db db);
+int ksz9477_change_mtu(struct ksz_device *dev, int port, int mtu);
+int ksz9477_max_mtu(struct ksz_device *dev, int port);
+void ksz9477_config_cpu_port(struct dsa_switch *ds);
+int ksz9477_enable_stp_addr(struct ksz_device *dev);
+int ksz9477_reset_switch(struct ksz_device *dev);
+int ksz9477_dsa_init(struct ksz_device *dev);
+int ksz9477_switch_init(struct ksz_device *dev);
+void ksz9477_switch_exit(struct ksz_device *dev);
+
+#endif
diff --git a/drivers/net/dsa/microchip/ksz9477_i2c.c b/drivers/net/dsa/microchip/ksz9477_i2c.c
index faa3163c86b0..99966514d444 100644
--- a/drivers/net/dsa/microchip/ksz9477_i2c.c
+++ b/drivers/net/dsa/microchip/ksz9477_i2c.c
@@ -41,7 +41,7 @@ static int ksz9477_i2c_probe(struct i2c_client *i2c,
if (i2c->dev.platform_data)
dev->pdata = i2c->dev.platform_data;
- ret = ksz9477_switch_register(dev);
+ ret = ksz_switch_register(dev);
/* Main DSA driver may not be started yet. */
if (ret)
@@ -71,8 +71,8 @@ static void ksz9477_i2c_shutdown(struct i2c_client *i2c)
if (!dev)
return;
- if (dev->dev_ops->shutdown)
- dev->dev_ops->shutdown(dev);
+ if (dev->dev_ops->reset)
+ dev->dev_ops->reset(dev);
dsa_switch_shutdown(dev->ds);
diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h
index 7a2c8d4767af..c0ad83753b13 100644
--- a/drivers/net/dsa/microchip/ksz9477_reg.h
+++ b/drivers/net/dsa/microchip/ksz9477_reg.h
@@ -25,7 +25,6 @@
#define REG_CHIP_ID2__1 0x0002
-#define CHIP_ID_63 0x63
#define CHIP_ID_66 0x66
#define CHIP_ID_67 0x67
#define CHIP_ID_77 0x77
@@ -166,7 +165,6 @@
#define SW_DOUBLE_TAG BIT(7)
#define SW_RESET BIT(1)
-#define SW_START BIT(0)
#define REG_SW_MAC_ADDR_0 0x0302
#define REG_SW_MAC_ADDR_1 0x0303
@@ -266,7 +264,6 @@
#define REG_SW_MAC_CTRL_1 0x0331
-#define MULTICAST_STORM_DISABLE BIT(6)
#define SW_BACK_PRESSURE BIT(5)
#define FAIR_FLOW_CTRL BIT(4)
#define NO_EXC_COLLISION_DROP BIT(3)
@@ -277,13 +274,9 @@
#define REG_SW_MAC_CTRL_2 0x0332
#define SW_REPLACE_VID BIT(3)
-#define BROADCAST_STORM_RATE_HI 0x07
#define REG_SW_MAC_CTRL_3 0x0333
-#define BROADCAST_STORM_RATE_LO 0xFF
-#define BROADCAST_STORM_RATE 0x07FF
-
#define REG_SW_MAC_CTRL_4 0x0334
#define SW_PASS_PAUSE BIT(3)
@@ -1653,12 +1646,6 @@
#define PTP_TRIG_UNIT_M (BIT(MAX_TRIG_UNIT) - 1)
#define PTP_TS_UNIT_M (BIT(MAX_TIMESTAMP_UNIT) - 1)
-/* Driver set switch broadcast storm protection at 10% rate. */
-#define BROADCAST_STORM_PROT_RATE 10
-
-/* 148,800 frames * 67 ms / 100 */
-#define BROADCAST_STORM_VALUE 9969
-
#define KSZ9477_MAX_FRAME_SIZE 9000
#endif /* KSZ9477_REGS_H */
diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c
deleted file mode 100644
index 1bc8b0cbe458..000000000000
--- a/drivers/net/dsa/microchip/ksz9477_spi.c
+++ /dev/null
@@ -1,150 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Microchip KSZ9477 series register access through SPI
- *
- * Copyright (C) 2017-2019 Microchip Technology Inc.
- */
-
-#include <asm/unaligned.h>
-
-#include <linux/delay.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/regmap.h>
-#include <linux/spi/spi.h>
-
-#include "ksz_common.h"
-
-#define SPI_ADDR_SHIFT 24
-#define SPI_ADDR_ALIGN 3
-#define SPI_TURNAROUND_SHIFT 5
-
-KSZ_REGMAP_TABLE(ksz9477, 32, SPI_ADDR_SHIFT,
- SPI_TURNAROUND_SHIFT, SPI_ADDR_ALIGN);
-
-static int ksz9477_spi_probe(struct spi_device *spi)
-{
- struct regmap_config rc;
- struct ksz_device *dev;
- int i, ret;
-
- dev = ksz_switch_alloc(&spi->dev, spi);
- if (!dev)
- return -ENOMEM;
-
- for (i = 0; i < ARRAY_SIZE(ksz9477_regmap_config); i++) {
- rc = ksz9477_regmap_config[i];
- rc.lock_arg = &dev->regmap_mutex;
- dev->regmap[i] = devm_regmap_init_spi(spi, &rc);
- if (IS_ERR(dev->regmap[i])) {
- ret = PTR_ERR(dev->regmap[i]);
- dev_err(&spi->dev,
- "Failed to initialize regmap%i: %d\n",
- ksz9477_regmap_config[i].val_bits, ret);
- return ret;
- }
- }
-
- if (spi->dev.platform_data)
- dev->pdata = spi->dev.platform_data;
-
- /* setup spi */
- spi->mode = SPI_MODE_3;
- ret = spi_setup(spi);
- if (ret)
- return ret;
-
- ret = ksz9477_switch_register(dev);
-
- /* Main DSA driver may not be started yet. */
- if (ret)
- return ret;
-
- spi_set_drvdata(spi, dev);
-
- return 0;
-}
-
-static void ksz9477_spi_remove(struct spi_device *spi)
-{
- struct ksz_device *dev = spi_get_drvdata(spi);
-
- if (dev)
- ksz_switch_remove(dev);
-
- spi_set_drvdata(spi, NULL);
-}
-
-static void ksz9477_spi_shutdown(struct spi_device *spi)
-{
- struct ksz_device *dev = spi_get_drvdata(spi);
-
- if (dev)
- dsa_switch_shutdown(dev->ds);
-
- spi_set_drvdata(spi, NULL);
-}
-
-static const struct of_device_id ksz9477_dt_ids[] = {
- {
- .compatible = "microchip,ksz9477",
- .data = &ksz_switch_chips[KSZ9477]
- },
- {
- .compatible = "microchip,ksz9897",
- .data = &ksz_switch_chips[KSZ9897]
- },
- {
- .compatible = "microchip,ksz9893",
- .data = &ksz_switch_chips[KSZ9893]
- },
- {
- .compatible = "microchip,ksz9563",
- .data = &ksz_switch_chips[KSZ9893]
- },
- {
- .compatible = "microchip,ksz8563",
- .data = &ksz_switch_chips[KSZ9893]
- },
- {
- .compatible = "microchip,ksz9567",
- .data = &ksz_switch_chips[KSZ9567]
- },
- {},
-};
-MODULE_DEVICE_TABLE(of, ksz9477_dt_ids);
-
-static const struct spi_device_id ksz9477_spi_ids[] = {
- { "ksz9477" },
- { "ksz9897" },
- { "ksz9893" },
- { "ksz9563" },
- { "ksz8563" },
- { "ksz9567" },
- { },
-};
-MODULE_DEVICE_TABLE(spi, ksz9477_spi_ids);
-
-static struct spi_driver ksz9477_spi_driver = {
- .driver = {
- .name = "ksz9477-switch",
- .owner = THIS_MODULE,
- .of_match_table = of_match_ptr(ksz9477_dt_ids),
- },
- .id_table = ksz9477_spi_ids,
- .probe = ksz9477_spi_probe,
- .remove = ksz9477_spi_remove,
- .shutdown = ksz9477_spi_shutdown,
-};
-
-module_spi_driver(ksz9477_spi_driver);
-
-MODULE_ALIAS("spi:ksz9477");
-MODULE_ALIAS("spi:ksz9897");
-MODULE_ALIAS("spi:ksz9893");
-MODULE_ALIAS("spi:ksz9563");
-MODULE_ALIAS("spi:ksz8563");
-MODULE_ALIAS("spi:ksz9567");
-MODULE_AUTHOR("Woojung Huh <Woojung.Huh@microchip.com>");
-MODULE_DESCRIPTION("Microchip KSZ9477 Series Switch SPI access Driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 9ca8c8d7740f..59582eb3bcaf 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -16,10 +16,13 @@
#include <linux/if_bridge.h>
#include <linux/of_device.h>
#include <linux/of_net.h>
+#include <linux/micrel_phy.h>
#include <net/dsa.h>
#include <net/switchdev.h>
#include "ksz_common.h"
+#include "ksz8.h"
+#include "ksz9477.h"
#define MIB_COUNTER_NUM 0x20
@@ -138,6 +141,66 @@ static const struct ksz_mib_names ksz9477_mib_names[] = {
{ 0x83, "tx_discards" },
};
+static const struct ksz_dev_ops ksz8_dev_ops = {
+ .setup = ksz8_setup,
+ .get_port_addr = ksz8_get_port_addr,
+ .cfg_port_member = ksz8_cfg_port_member,
+ .flush_dyn_mac_table = ksz8_flush_dyn_mac_table,
+ .port_setup = ksz8_port_setup,
+ .r_phy = ksz8_r_phy,
+ .w_phy = ksz8_w_phy,
+ .r_mib_pkt = ksz8_r_mib_pkt,
+ .freeze_mib = ksz8_freeze_mib,
+ .port_init_cnt = ksz8_port_init_cnt,
+ .fdb_dump = ksz8_fdb_dump,
+ .mdb_add = ksz8_mdb_add,
+ .mdb_del = ksz8_mdb_del,
+ .vlan_filtering = ksz8_port_vlan_filtering,
+ .vlan_add = ksz8_port_vlan_add,
+ .vlan_del = ksz8_port_vlan_del,
+ .mirror_add = ksz8_port_mirror_add,
+ .mirror_del = ksz8_port_mirror_del,
+ .get_caps = ksz8_get_caps,
+ .config_cpu_port = ksz8_config_cpu_port,
+ .enable_stp_addr = ksz8_enable_stp_addr,
+ .reset = ksz8_reset_switch,
+ .init = ksz8_switch_init,
+ .exit = ksz8_switch_exit,
+};
+
+static const struct ksz_dev_ops ksz9477_dev_ops = {
+ .setup = ksz9477_setup,
+ .get_port_addr = ksz9477_get_port_addr,
+ .cfg_port_member = ksz9477_cfg_port_member,
+ .flush_dyn_mac_table = ksz9477_flush_dyn_mac_table,
+ .port_setup = ksz9477_port_setup,
+ .r_phy = ksz9477_r_phy,
+ .w_phy = ksz9477_w_phy,
+ .r_mib_cnt = ksz9477_r_mib_cnt,
+ .r_mib_pkt = ksz9477_r_mib_pkt,
+ .r_mib_stat64 = ksz_r_mib_stats64,
+ .freeze_mib = ksz9477_freeze_mib,
+ .port_init_cnt = ksz9477_port_init_cnt,
+ .vlan_filtering = ksz9477_port_vlan_filtering,
+ .vlan_add = ksz9477_port_vlan_add,
+ .vlan_del = ksz9477_port_vlan_del,
+ .mirror_add = ksz9477_port_mirror_add,
+ .mirror_del = ksz9477_port_mirror_del,
+ .get_caps = ksz9477_get_caps,
+ .fdb_dump = ksz9477_fdb_dump,
+ .fdb_add = ksz9477_fdb_add,
+ .fdb_del = ksz9477_fdb_del,
+ .mdb_add = ksz9477_mdb_add,
+ .mdb_del = ksz9477_mdb_del,
+ .change_mtu = ksz9477_change_mtu,
+ .max_mtu = ksz9477_max_mtu,
+ .config_cpu_port = ksz9477_config_cpu_port,
+ .enable_stp_addr = ksz9477_enable_stp_addr,
+ .reset = ksz9477_reset_switch,
+ .init = ksz9477_switch_init,
+ .exit = ksz9477_switch_exit,
+};
+
const struct ksz_chip_data ksz_switch_chips[] = {
[KSZ8795] = {
.chip_id = KSZ8795_CHIP_ID,
@@ -147,10 +210,15 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.num_statics = 8,
.cpu_ports = 0x10, /* can be configured as cpu port */
.port_cnt = 5, /* total cpu and user ports */
+ .ops = &ksz8_dev_ops,
.ksz87xx_eee_link_erratum = true,
.mib_names = ksz9477_mib_names,
.mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
.reg_mib_cnt = MIB_COUNTER_NUM,
+ .stp_ctrl_reg = 0x02,
+ .broadcast_ctrl_reg = 0x06,
+ .multicast_ctrl_reg = 0x04,
+ .start_ctrl_reg = 0x01,
.supports_mii = {false, false, false, false, true},
.supports_rmii = {false, false, false, false, true},
.supports_rgmii = {false, false, false, false, true},
@@ -179,10 +247,15 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.num_statics = 8,
.cpu_ports = 0x10, /* can be configured as cpu port */
.port_cnt = 5, /* total cpu and user ports */
+ .ops = &ksz8_dev_ops,
.ksz87xx_eee_link_erratum = true,
.mib_names = ksz9477_mib_names,
.mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
.reg_mib_cnt = MIB_COUNTER_NUM,
+ .stp_ctrl_reg = 0x02,
+ .broadcast_ctrl_reg = 0x06,
+ .multicast_ctrl_reg = 0x04,
+ .start_ctrl_reg = 0x01,
.supports_mii = {false, false, false, false, true},
.supports_rmii = {false, false, false, false, true},
.supports_rgmii = {false, false, false, false, true},
@@ -197,10 +270,15 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.num_statics = 8,
.cpu_ports = 0x10, /* can be configured as cpu port */
.port_cnt = 5, /* total cpu and user ports */
+ .ops = &ksz8_dev_ops,
.ksz87xx_eee_link_erratum = true,
.mib_names = ksz9477_mib_names,
.mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
.reg_mib_cnt = MIB_COUNTER_NUM,
+ .stp_ctrl_reg = 0x02,
+ .broadcast_ctrl_reg = 0x06,
+ .multicast_ctrl_reg = 0x04,
+ .start_ctrl_reg = 0x01,
.supports_mii = {false, false, false, false, true},
.supports_rmii = {false, false, false, false, true},
.supports_rgmii = {false, false, false, false, true},
@@ -215,9 +293,14 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.num_statics = 8,
.cpu_ports = 0x4, /* can be configured as cpu port */
.port_cnt = 3,
+ .ops = &ksz8_dev_ops,
.mib_names = ksz88xx_mib_names,
.mib_cnt = ARRAY_SIZE(ksz88xx_mib_names),
.reg_mib_cnt = MIB_COUNTER_NUM,
+ .stp_ctrl_reg = 0x02,
+ .broadcast_ctrl_reg = 0x06,
+ .multicast_ctrl_reg = 0x04,
+ .start_ctrl_reg = 0x01,
.supports_mii = {false, false, true},
.supports_rmii = {false, false, true},
.internal_phy = {true, true, false},
@@ -231,10 +314,15 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.num_statics = 16,
.cpu_ports = 0x7F, /* can be configured as cpu port */
.port_cnt = 7, /* total physical port count */
+ .ops = &ksz9477_dev_ops,
.phy_errata_9477 = true,
.mib_names = ksz9477_mib_names,
.mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
.reg_mib_cnt = MIB_COUNTER_NUM,
+ .stp_ctrl_reg = 0x0B04,
+ .broadcast_ctrl_reg = 0x0332,
+ .multicast_ctrl_reg = 0x0331,
+ .start_ctrl_reg = 0x0300,
.supports_mii = {false, false, false, false,
false, true, false},
.supports_rmii = {false, false, false, false,
@@ -253,10 +341,15 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.num_statics = 16,
.cpu_ports = 0x7F, /* can be configured as cpu port */
.port_cnt = 7, /* total physical port count */
+ .ops = &ksz9477_dev_ops,
.phy_errata_9477 = true,
.mib_names = ksz9477_mib_names,
.mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
.reg_mib_cnt = MIB_COUNTER_NUM,
+ .stp_ctrl_reg = 0x0B04,
+ .broadcast_ctrl_reg = 0x0332,
+ .multicast_ctrl_reg = 0x0331,
+ .start_ctrl_reg = 0x0300,
.supports_mii = {false, false, false, false,
false, true, true},
.supports_rmii = {false, false, false, false,
@@ -275,9 +368,14 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.num_statics = 16,
.cpu_ports = 0x07, /* can be configured as cpu port */
.port_cnt = 3, /* total port count */
+ .ops = &ksz9477_dev_ops,
.mib_names = ksz9477_mib_names,
.mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
.reg_mib_cnt = MIB_COUNTER_NUM,
+ .stp_ctrl_reg = 0x0B04,
+ .broadcast_ctrl_reg = 0x0332,
+ .multicast_ctrl_reg = 0x0331,
+ .start_ctrl_reg = 0x0300,
.supports_mii = {false, false, true},
.supports_rmii = {false, false, true},
.supports_rgmii = {false, false, true},
@@ -292,10 +390,15 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.num_statics = 16,
.cpu_ports = 0x7F, /* can be configured as cpu port */
.port_cnt = 7, /* total physical port count */
+ .ops = &ksz9477_dev_ops,
.phy_errata_9477 = true,
.mib_names = ksz9477_mib_names,
.mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
.reg_mib_cnt = MIB_COUNTER_NUM,
+ .stp_ctrl_reg = 0x0B04,
+ .broadcast_ctrl_reg = 0x0332,
+ .multicast_ctrl_reg = 0x0331,
+ .start_ctrl_reg = 0x0300,
.supports_mii = {false, false, false, false,
false, true, true},
.supports_rmii = {false, false, false, false,
@@ -317,6 +420,10 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.mib_names = ksz9477_mib_names,
.mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
.reg_mib_cnt = MIB_COUNTER_NUM,
+ .stp_ctrl_reg = 0x0B04,
+ .broadcast_ctrl_reg = 0x0332,
+ .multicast_ctrl_reg = 0x0331,
+ .start_ctrl_reg = 0x0300,
.supports_mii = {false, false, false, false, true},
.supports_rmii = {false, false, false, false, true},
.supports_rgmii = {false, false, false, false, true},
@@ -334,6 +441,10 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.mib_names = ksz9477_mib_names,
.mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
.reg_mib_cnt = MIB_COUNTER_NUM,
+ .stp_ctrl_reg = 0x0B04,
+ .broadcast_ctrl_reg = 0x0332,
+ .multicast_ctrl_reg = 0x0331,
+ .start_ctrl_reg = 0x0300,
.supports_mii = {false, false, false, false, true, true},
.supports_rmii = {false, false, false, false, true, true},
.supports_rgmii = {false, false, false, false, true, true},
@@ -351,6 +462,10 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.mib_names = ksz9477_mib_names,
.mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
.reg_mib_cnt = MIB_COUNTER_NUM,
+ .stp_ctrl_reg = 0x0B04,
+ .broadcast_ctrl_reg = 0x0332,
+ .multicast_ctrl_reg = 0x0331,
+ .start_ctrl_reg = 0x0300,
.supports_mii = {false, false, false, false,
true, true, false, false},
.supports_rmii = {false, false, false, false,
@@ -372,6 +487,10 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.mib_names = ksz9477_mib_names,
.mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
.reg_mib_cnt = MIB_COUNTER_NUM,
+ .stp_ctrl_reg = 0x0B04,
+ .broadcast_ctrl_reg = 0x0332,
+ .multicast_ctrl_reg = 0x0331,
+ .start_ctrl_reg = 0x0300,
.supports_mii = {false, false, false, false,
true, true, false, false},
.supports_rmii = {false, false, false, false,
@@ -393,6 +512,10 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.mib_names = ksz9477_mib_names,
.mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
.reg_mib_cnt = MIB_COUNTER_NUM,
+ .stp_ctrl_reg = 0x0B04,
+ .broadcast_ctrl_reg = 0x0332,
+ .multicast_ctrl_reg = 0x0331,
+ .start_ctrl_reg = 0x0300,
.supports_mii = {false, false, false, false,
true, true, false, false},
.supports_rmii = {false, false, false, false,
@@ -436,8 +559,8 @@ static int ksz_check_device_id(struct ksz_device *dev)
return 0;
}
-void ksz_phylink_get_caps(struct dsa_switch *ds, int port,
- struct phylink_config *config)
+static void ksz_phylink_get_caps(struct dsa_switch *ds, int port,
+ struct phylink_config *config)
{
struct ksz_device *dev = ds->priv;
@@ -456,8 +579,10 @@ void ksz_phylink_get_caps(struct dsa_switch *ds, int port,
if (dev->info->internal_phy[port])
__set_bit(PHY_INTERFACE_MODE_INTERNAL,
config->supported_interfaces);
+
+ if (dev->dev_ops->get_caps)
+ dev->dev_ops->get_caps(dev, port, config);
}
-EXPORT_SYMBOL_GPL(ksz_phylink_get_caps);
void ksz_r_mib_stats64(struct ksz_device *dev, int port)
{
@@ -500,10 +625,9 @@ void ksz_r_mib_stats64(struct ksz_device *dev, int port)
spin_unlock(&mib->stats64_lock);
}
-EXPORT_SYMBOL_GPL(ksz_r_mib_stats64);
-void ksz_get_stats64(struct dsa_switch *ds, int port,
- struct rtnl_link_stats64 *s)
+static void ksz_get_stats64(struct dsa_switch *ds, int port,
+ struct rtnl_link_stats64 *s)
{
struct ksz_device *dev = ds->priv;
struct ksz_port_mib *mib;
@@ -514,10 +638,9 @@ void ksz_get_stats64(struct dsa_switch *ds, int port,
memcpy(s, &mib->stats64, sizeof(*s));
spin_unlock(&mib->stats64_lock);
}
-EXPORT_SYMBOL_GPL(ksz_get_stats64);
-void ksz_get_strings(struct dsa_switch *ds, int port,
- u32 stringset, uint8_t *buf)
+static void ksz_get_strings(struct dsa_switch *ds, int port,
+ u32 stringset, uint8_t *buf)
{
struct ksz_device *dev = ds->priv;
int i;
@@ -530,9 +653,8 @@ void ksz_get_strings(struct dsa_switch *ds, int port,
dev->info->mib_names[i].string, ETH_GSTRING_LEN);
}
}
-EXPORT_SYMBOL_GPL(ksz_get_strings);
-void ksz_update_port_member(struct ksz_device *dev, int port)
+static void ksz_update_port_member(struct ksz_device *dev, int port)
{
struct ksz_port *p = &dev->ports[port];
struct dsa_switch *ds = dev->ds;
@@ -589,7 +711,52 @@ void ksz_update_port_member(struct ksz_device *dev, int port)
dev->dev_ops->cfg_port_member(dev, port, port_member | cpu_port);
}
-EXPORT_SYMBOL_GPL(ksz_update_port_member);
+
+static int ksz_setup(struct dsa_switch *ds)
+{
+ struct ksz_device *dev = ds->priv;
+ int ret;
+
+ dev->vlan_cache = devm_kcalloc(dev->dev, sizeof(struct vlan_table),
+ dev->info->num_vlans, GFP_KERNEL);
+ if (!dev->vlan_cache)
+ return -ENOMEM;
+
+ ret = dev->dev_ops->reset(dev);
+ if (ret) {
+ dev_err(ds->dev, "failed to reset switch\n");
+ return ret;
+ }
+
+ /* set broadcast storm protection 10% rate */
+ regmap_update_bits(dev->regmap[1], dev->info->broadcast_ctrl_reg,
+ BROADCAST_STORM_RATE,
+ (BROADCAST_STORM_VALUE *
+ BROADCAST_STORM_PROT_RATE) / 100);
+
+ dev->dev_ops->config_cpu_port(ds);
+
+ dev->dev_ops->enable_stp_addr(dev);
+
+ regmap_update_bits(dev->regmap[0], dev->info->multicast_ctrl_reg,
+ MULTICAST_STORM_DISABLE, MULTICAST_STORM_DISABLE);
+
+ ksz_init_mib_timer(dev);
+
+ ds->configure_vlan_while_not_filtering = false;
+
+ if (dev->dev_ops->setup) {
+ ret = dev->dev_ops->setup(ds);
+ if (ret)
+ return ret;
+ }
+
+ /* start switch */
+ regmap_update_bits(dev->regmap[0], dev->info->start_ctrl_reg,
+ SW_START, SW_START);
+
+ return 0;
+}
static void port_r_cnt(struct ksz_device *dev, int port)
{
@@ -667,9 +834,8 @@ void ksz_init_mib_timer(struct ksz_device *dev)
memset(mib->counters, 0, dev->info->mib_cnt * sizeof(u64));
}
}
-EXPORT_SYMBOL_GPL(ksz_init_mib_timer);
-int ksz_phy_read16(struct dsa_switch *ds, int addr, int reg)
+static int ksz_phy_read16(struct dsa_switch *ds, int addr, int reg)
{
struct ksz_device *dev = ds->priv;
u16 val = 0xffff;
@@ -678,9 +844,8 @@ int ksz_phy_read16(struct dsa_switch *ds, int addr, int reg)
return val;
}
-EXPORT_SYMBOL_GPL(ksz_phy_read16);
-int ksz_phy_write16(struct dsa_switch *ds, int addr, int reg, u16 val)
+static int ksz_phy_write16(struct dsa_switch *ds, int addr, int reg, u16 val)
{
struct ksz_device *dev = ds->priv;
@@ -688,10 +853,25 @@ int ksz_phy_write16(struct dsa_switch *ds, int addr, int reg, u16 val)
return 0;
}
-EXPORT_SYMBOL_GPL(ksz_phy_write16);
-void ksz_mac_link_down(struct dsa_switch *ds, int port, unsigned int mode,
- phy_interface_t interface)
+static u32 ksz_get_phy_flags(struct dsa_switch *ds, int port)
+{
+ struct ksz_device *dev = ds->priv;
+
+ if (dev->chip_id == KSZ8830_CHIP_ID) {
+ /* Silicon Errata Sheet (DS80000830A):
+ * Port 1 does not work with LinkMD Cable-Testing.
+ * Port 1 does not respond to received PAUSE control frames.
+ */
+ if (!port)
+ return MICREL_KSZ8_P1_ERRATA;
+ }
+
+ return 0;
+}
+
+static void ksz_mac_link_down(struct dsa_switch *ds, int port,
+ unsigned int mode, phy_interface_t interface)
{
struct ksz_device *dev = ds->priv;
struct ksz_port *p = &dev->ports[port];
@@ -702,9 +882,8 @@ void ksz_mac_link_down(struct dsa_switch *ds, int port, unsigned int mode,
if (dev->mib_read_interval)
schedule_delayed_work(&dev->mib_read, 0);
}
-EXPORT_SYMBOL_GPL(ksz_mac_link_down);
-int ksz_sset_count(struct dsa_switch *ds, int port, int sset)
+static int ksz_sset_count(struct dsa_switch *ds, int port, int sset)
{
struct ksz_device *dev = ds->priv;
@@ -713,9 +892,9 @@ int ksz_sset_count(struct dsa_switch *ds, int port, int sset)
return dev->info->mib_cnt;
}
-EXPORT_SYMBOL_GPL(ksz_sset_count);
-void ksz_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *buf)
+static void ksz_get_ethtool_stats(struct dsa_switch *ds, int port,
+ uint64_t *buf)
{
const struct dsa_port *dp = dsa_to_port(ds, port);
struct ksz_device *dev = ds->priv;
@@ -731,12 +910,11 @@ void ksz_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *buf)
memcpy(buf, mib->counters, dev->info->mib_cnt * sizeof(u64));
mutex_unlock(&mib->cnt_mutex);
}
-EXPORT_SYMBOL_GPL(ksz_get_ethtool_stats);
-int ksz_port_bridge_join(struct dsa_switch *ds, int port,
- struct dsa_bridge bridge,
- bool *tx_fwd_offload,
- struct netlink_ext_ack *extack)
+static int ksz_port_bridge_join(struct dsa_switch *ds, int port,
+ struct dsa_bridge bridge,
+ bool *tx_fwd_offload,
+ struct netlink_ext_ack *extack)
{
/* port_stp_state_set() will be called after to put the port in
* appropriate state so there is no need to do anything.
@@ -744,135 +922,83 @@ int ksz_port_bridge_join(struct dsa_switch *ds, int port,
return 0;
}
-EXPORT_SYMBOL_GPL(ksz_port_bridge_join);
-void ksz_port_bridge_leave(struct dsa_switch *ds, int port,
- struct dsa_bridge bridge)
+static void ksz_port_bridge_leave(struct dsa_switch *ds, int port,
+ struct dsa_bridge bridge)
{
/* port_stp_state_set() will be called after to put the port in
* forwarding state so there is no need to do anything.
*/
}
-EXPORT_SYMBOL_GPL(ksz_port_bridge_leave);
-void ksz_port_fast_age(struct dsa_switch *ds, int port)
+static void ksz_port_fast_age(struct dsa_switch *ds, int port)
{
struct ksz_device *dev = ds->priv;
dev->dev_ops->flush_dyn_mac_table(dev, port);
}
-EXPORT_SYMBOL_GPL(ksz_port_fast_age);
-int ksz_port_fdb_dump(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb,
- void *data)
+static int ksz_port_fdb_add(struct dsa_switch *ds, int port,
+ const unsigned char *addr, u16 vid,
+ struct dsa_db db)
{
struct ksz_device *dev = ds->priv;
- int ret = 0;
- u16 i = 0;
- u16 entries = 0;
- u8 timestamp = 0;
- u8 fid;
- u8 member;
- struct alu_struct alu;
-
- do {
- alu.is_static = false;
- ret = dev->dev_ops->r_dyn_mac_table(dev, i, alu.mac, &fid,
- &member, &timestamp,
- &entries);
- if (!ret && (member & BIT(port))) {
- ret = cb(alu.mac, alu.fid, alu.is_static, data);
- if (ret)
- break;
- }
- i++;
- } while (i < entries);
- if (i >= entries)
- ret = 0;
- return ret;
+ if (!dev->dev_ops->fdb_add)
+ return -EOPNOTSUPP;
+
+ return dev->dev_ops->fdb_add(dev, port, addr, vid, db);
}
-EXPORT_SYMBOL_GPL(ksz_port_fdb_dump);
-int ksz_port_mdb_add(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb,
- struct dsa_db db)
+static int ksz_port_fdb_del(struct dsa_switch *ds, int port,
+ const unsigned char *addr,
+ u16 vid, struct dsa_db db)
{
struct ksz_device *dev = ds->priv;
- struct alu_struct alu;
- int index;
- int empty = 0;
-
- alu.port_forward = 0;
- for (index = 0; index < dev->info->num_statics; index++) {
- if (!dev->dev_ops->r_sta_mac_table(dev, index, &alu)) {
- /* Found one already in static MAC table. */
- if (!memcmp(alu.mac, mdb->addr, ETH_ALEN) &&
- alu.fid == mdb->vid)
- break;
- /* Remember the first empty entry. */
- } else if (!empty) {
- empty = index + 1;
- }
- }
- /* no available entry */
- if (index == dev->info->num_statics && !empty)
- return -ENOSPC;
+ if (!dev->dev_ops->fdb_del)
+ return -EOPNOTSUPP;
- /* add entry */
- if (index == dev->info->num_statics) {
- index = empty - 1;
- memset(&alu, 0, sizeof(alu));
- memcpy(alu.mac, mdb->addr, ETH_ALEN);
- alu.is_static = true;
- }
- alu.port_forward |= BIT(port);
- if (mdb->vid) {
- alu.is_use_fid = true;
+ return dev->dev_ops->fdb_del(dev, port, addr, vid, db);
+}
- /* Need a way to map VID to FID. */
- alu.fid = mdb->vid;
- }
- dev->dev_ops->w_sta_mac_table(dev, index, &alu);
+static int ksz_port_fdb_dump(struct dsa_switch *ds, int port,
+ dsa_fdb_dump_cb_t *cb, void *data)
+{
+ struct ksz_device *dev = ds->priv;
- return 0;
+ if (!dev->dev_ops->fdb_dump)
+ return -EOPNOTSUPP;
+
+ return dev->dev_ops->fdb_dump(dev, port, cb, data);
}
-EXPORT_SYMBOL_GPL(ksz_port_mdb_add);
-int ksz_port_mdb_del(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb,
- struct dsa_db db)
+static int ksz_port_mdb_add(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct dsa_db db)
{
struct ksz_device *dev = ds->priv;
- struct alu_struct alu;
- int index;
-
- for (index = 0; index < dev->info->num_statics; index++) {
- if (!dev->dev_ops->r_sta_mac_table(dev, index, &alu)) {
- /* Found one already in static MAC table. */
- if (!memcmp(alu.mac, mdb->addr, ETH_ALEN) &&
- alu.fid == mdb->vid)
- break;
- }
- }
- /* no available entry */
- if (index == dev->info->num_statics)
- goto exit;
+ if (!dev->dev_ops->mdb_add)
+ return -EOPNOTSUPP;
- /* clear port */
- alu.port_forward &= ~BIT(port);
- if (!alu.port_forward)
- alu.is_static = false;
- dev->dev_ops->w_sta_mac_table(dev, index, &alu);
+ return dev->dev_ops->mdb_add(dev, port, mdb, db);
+}
-exit:
- return 0;
+static int ksz_port_mdb_del(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct dsa_db db)
+{
+ struct ksz_device *dev = ds->priv;
+
+ if (!dev->dev_ops->mdb_del)
+ return -EOPNOTSUPP;
+
+ return dev->dev_ops->mdb_del(dev, port, mdb, db);
}
-EXPORT_SYMBOL_GPL(ksz_port_mdb_del);
-int ksz_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy)
+static int ksz_enable_port(struct dsa_switch *ds, int port,
+ struct phy_device *phy)
{
struct ksz_device *dev = ds->priv;
@@ -888,14 +1014,15 @@ int ksz_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy)
return 0;
}
-EXPORT_SYMBOL_GPL(ksz_enable_port);
-void ksz_port_stp_state_set(struct dsa_switch *ds, int port,
- u8 state, int reg)
+void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
{
struct ksz_device *dev = ds->priv;
struct ksz_port *p;
u8 data;
+ int reg;
+
+ reg = dev->info->stp_ctrl_reg;
ksz_pread8(dev, port, reg, &data);
data &= ~(PORT_TX_ENABLE | PORT_RX_ENABLE | PORT_LEARN_DISABLE);
@@ -928,7 +1055,202 @@ void ksz_port_stp_state_set(struct dsa_switch *ds, int port,
ksz_update_port_member(dev, port);
}
-EXPORT_SYMBOL_GPL(ksz_port_stp_state_set);
+
+static enum dsa_tag_protocol ksz_get_tag_protocol(struct dsa_switch *ds,
+ int port,
+ enum dsa_tag_protocol mp)
+{
+ struct ksz_device *dev = ds->priv;
+ enum dsa_tag_protocol proto = DSA_TAG_PROTO_NONE;
+
+ if (dev->chip_id == KSZ8795_CHIP_ID ||
+ dev->chip_id == KSZ8794_CHIP_ID ||
+ dev->chip_id == KSZ8765_CHIP_ID)
+ proto = DSA_TAG_PROTO_KSZ8795;
+
+ if (dev->chip_id == KSZ8830_CHIP_ID ||
+ dev->chip_id == KSZ9893_CHIP_ID)
+ proto = DSA_TAG_PROTO_KSZ9893;
+
+ if (dev->chip_id == KSZ9477_CHIP_ID ||
+ dev->chip_id == KSZ9897_CHIP_ID ||
+ dev->chip_id == KSZ9567_CHIP_ID)
+ proto = DSA_TAG_PROTO_KSZ9477;
+
+ return proto;
+}
+
+static int ksz_port_vlan_filtering(struct dsa_switch *ds, int port,
+ bool flag, struct netlink_ext_ack *extack)
+{
+ struct ksz_device *dev = ds->priv;
+
+ if (!dev->dev_ops->vlan_filtering)
+ return -EOPNOTSUPP;
+
+ return dev->dev_ops->vlan_filtering(dev, port, flag, extack);
+}
+
+static int ksz_port_vlan_add(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct netlink_ext_ack *extack)
+{
+ struct ksz_device *dev = ds->priv;
+
+ if (!dev->dev_ops->vlan_add)
+ return -EOPNOTSUPP;
+
+ return dev->dev_ops->vlan_add(dev, port, vlan, extack);
+}
+
+static int ksz_port_vlan_del(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan)
+{
+ struct ksz_device *dev = ds->priv;
+
+ if (!dev->dev_ops->vlan_del)
+ return -EOPNOTSUPP;
+
+ return dev->dev_ops->vlan_del(dev, port, vlan);
+}
+
+static int ksz_port_mirror_add(struct dsa_switch *ds, int port,
+ struct dsa_mall_mirror_tc_entry *mirror,
+ bool ingress, struct netlink_ext_ack *extack)
+{
+ struct ksz_device *dev = ds->priv;
+
+ if (!dev->dev_ops->mirror_add)
+ return -EOPNOTSUPP;
+
+ return dev->dev_ops->mirror_add(dev, port, mirror, ingress, extack);
+}
+
+static void ksz_port_mirror_del(struct dsa_switch *ds, int port,
+ struct dsa_mall_mirror_tc_entry *mirror)
+{
+ struct ksz_device *dev = ds->priv;
+
+ if (dev->dev_ops->mirror_del)
+ dev->dev_ops->mirror_del(dev, port, mirror);
+}
+
+static int ksz_change_mtu(struct dsa_switch *ds, int port, int mtu)
+{
+ struct ksz_device *dev = ds->priv;
+
+ if (!dev->dev_ops->change_mtu)
+ return -EOPNOTSUPP;
+
+ return dev->dev_ops->change_mtu(dev, port, mtu);
+}
+
+static int ksz_max_mtu(struct dsa_switch *ds, int port)
+{
+ struct ksz_device *dev = ds->priv;
+
+ if (!dev->dev_ops->max_mtu)
+ return -EOPNOTSUPP;
+
+ return dev->dev_ops->max_mtu(dev, port);
+}
+
+static int ksz_switch_detect(struct ksz_device *dev)
+{
+ u8 id1, id2;
+ u16 id16;
+ u32 id32;
+ int ret;
+
+ /* read chip id */
+ ret = ksz_read16(dev, REG_CHIP_ID0, &id16);
+ if (ret)
+ return ret;
+
+ id1 = FIELD_GET(SW_FAMILY_ID_M, id16);
+ id2 = FIELD_GET(SW_CHIP_ID_M, id16);
+
+ switch (id1) {
+ case KSZ87_FAMILY_ID:
+ if (id2 == KSZ87_CHIP_ID_95) {
+ u8 val;
+
+ dev->chip_id = KSZ8795_CHIP_ID;
+
+ ksz_read8(dev, KSZ8_PORT_STATUS_0, &val);
+ if (val & KSZ8_PORT_FIBER_MODE)
+ dev->chip_id = KSZ8765_CHIP_ID;
+ } else if (id2 == KSZ87_CHIP_ID_94) {
+ dev->chip_id = KSZ8794_CHIP_ID;
+ } else {
+ return -ENODEV;
+ }
+ break;
+ case KSZ88_FAMILY_ID:
+ if (id2 == KSZ88_CHIP_ID_63)
+ dev->chip_id = KSZ8830_CHIP_ID;
+ else
+ return -ENODEV;
+ break;
+ default:
+ ret = ksz_read32(dev, REG_CHIP_ID0, &id32);
+ if (ret)
+ return ret;
+
+ dev->chip_rev = FIELD_GET(SW_REV_ID_M, id32);
+ id32 &= ~0xFF;
+
+ switch (id32) {
+ case KSZ9477_CHIP_ID:
+ case KSZ9897_CHIP_ID:
+ case KSZ9893_CHIP_ID:
+ case KSZ9567_CHIP_ID:
+ case LAN9370_CHIP_ID:
+ case LAN9371_CHIP_ID:
+ case LAN9372_CHIP_ID:
+ case LAN9373_CHIP_ID:
+ case LAN9374_CHIP_ID:
+ dev->chip_id = id32;
+ break;
+ default:
+ dev_err(dev->dev,
+ "unsupported switch detected %x)\n", id32);
+ return -ENODEV;
+ }
+ }
+ return 0;
+}
+
+static const struct dsa_switch_ops ksz_switch_ops = {
+ .get_tag_protocol = ksz_get_tag_protocol,
+ .get_phy_flags = ksz_get_phy_flags,
+ .setup = ksz_setup,
+ .phy_read = ksz_phy_read16,
+ .phy_write = ksz_phy_write16,
+ .phylink_get_caps = ksz_phylink_get_caps,
+ .phylink_mac_link_down = ksz_mac_link_down,
+ .port_enable = ksz_enable_port,
+ .get_strings = ksz_get_strings,
+ .get_ethtool_stats = ksz_get_ethtool_stats,
+ .get_sset_count = ksz_sset_count,
+ .port_bridge_join = ksz_port_bridge_join,
+ .port_bridge_leave = ksz_port_bridge_leave,
+ .port_stp_state_set = ksz_port_stp_state_set,
+ .port_fast_age = ksz_port_fast_age,
+ .port_vlan_filtering = ksz_port_vlan_filtering,
+ .port_vlan_add = ksz_port_vlan_add,
+ .port_vlan_del = ksz_port_vlan_del,
+ .port_fdb_dump = ksz_port_fdb_dump,
+ .port_fdb_add = ksz_port_fdb_add,
+ .port_fdb_del = ksz_port_fdb_del,
+ .port_mdb_add = ksz_port_mdb_add,
+ .port_mdb_del = ksz_port_mdb_del,
+ .port_mirror_add = ksz_port_mirror_add,
+ .port_mirror_del = ksz_port_mirror_del,
+ .get_stats64 = ksz_get_stats64,
+ .port_change_mtu = ksz_change_mtu,
+ .port_max_mtu = ksz_max_mtu,
+};
struct ksz_device *ksz_switch_alloc(struct device *base, void *priv)
{
@@ -941,6 +1263,7 @@ struct ksz_device *ksz_switch_alloc(struct device *base, void *priv)
ds->dev = base;
ds->num_ports = DSA_MAX_PORTS;
+ ds->ops = &ksz_switch_ops;
swdev = devm_kzalloc(base, sizeof(*swdev), GFP_KERNEL);
if (!swdev)
@@ -956,8 +1279,7 @@ struct ksz_device *ksz_switch_alloc(struct device *base, void *priv)
}
EXPORT_SYMBOL(ksz_switch_alloc);
-int ksz_switch_register(struct ksz_device *dev,
- const struct ksz_dev_ops *ops)
+int ksz_switch_register(struct ksz_device *dev)
{
const struct ksz_chip_data *info;
struct device_node *port, *ports;
@@ -986,10 +1308,9 @@ int ksz_switch_register(struct ksz_device *dev,
mutex_init(&dev->alu_mutex);
mutex_init(&dev->vlan_mutex);
- dev->dev_ops = ops;
-
- if (dev->dev_ops->detect(dev))
- return -EINVAL;
+ ret = ksz_switch_detect(dev);
+ if (ret)
+ return ret;
info = ksz_lookup_info(dev->chip_id);
if (!info)
@@ -998,10 +1319,15 @@ int ksz_switch_register(struct ksz_device *dev,
/* Update the compatible info with the probed one */
dev->info = info;
+ dev_info(dev->dev, "found switch: %s, rev %i\n",
+ dev->info->dev_name, dev->chip_rev);
+
ret = ksz_check_device_id(dev);
if (ret)
return ret;
+ dev->dev_ops = dev->info->ops;
+
ret = dev->dev_ops->init(dev);
if (ret)
return ret;
@@ -1072,7 +1398,7 @@ int ksz_switch_register(struct ksz_device *dev,
/* Start the MIB timer. */
schedule_delayed_work(&dev->mib_read, 0);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(ksz_switch_register);
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index 8500eaedad67..0e7f15efbb79 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -41,11 +41,16 @@ struct ksz_chip_data {
int num_statics;
int cpu_ports;
int port_cnt;
+ const struct ksz_dev_ops *ops;
bool phy_errata_9477;
bool ksz87xx_eee_link_erratum;
const struct ksz_mib_names *mib_names;
int mib_cnt;
u8 reg_mib_cnt;
+ int stp_ctrl_reg;
+ int broadcast_ctrl_reg;
+ int multicast_ctrl_reg;
+ int start_ctrl_reg;
bool supports_mii[KSZ_MAX_NUM_PORTS];
bool supports_rmii[KSZ_MAX_NUM_PORTS];
bool supports_rgmii[KSZ_MAX_NUM_PORTS];
@@ -90,6 +95,7 @@ struct ksz_device {
/* chip specific data */
u32 chip_id;
+ u8 chip_rev;
int cpu_port; /* port connected to CPU */
int phy_port_cnt;
phy_interface_t compat_interface;
@@ -160,6 +166,7 @@ struct alu_struct {
};
struct ksz_dev_ops {
+ int (*setup)(struct dsa_switch *ds);
u32 (*get_port_addr)(int port, int offset);
void (*cfg_port_member)(struct ksz_device *dev, int port, u8 member);
void (*flush_dyn_mac_table)(struct ksz_device *dev, int port);
@@ -167,71 +174,57 @@ struct ksz_dev_ops {
void (*port_setup)(struct ksz_device *dev, int port, bool cpu_port);
void (*r_phy)(struct ksz_device *dev, u16 phy, u16 reg, u16 *val);
void (*w_phy)(struct ksz_device *dev, u16 phy, u16 reg, u16 val);
- int (*r_dyn_mac_table)(struct ksz_device *dev, u16 addr, u8 *mac_addr,
- u8 *fid, u8 *src_port, u8 *timestamp,
- u16 *entries);
- int (*r_sta_mac_table)(struct ksz_device *dev, u16 addr,
- struct alu_struct *alu);
- void (*w_sta_mac_table)(struct ksz_device *dev, u16 addr,
- struct alu_struct *alu);
void (*r_mib_cnt)(struct ksz_device *dev, int port, u16 addr,
u64 *cnt);
void (*r_mib_pkt)(struct ksz_device *dev, int port, u16 addr,
u64 *dropped, u64 *cnt);
void (*r_mib_stat64)(struct ksz_device *dev, int port);
+ int (*vlan_filtering)(struct ksz_device *dev, int port,
+ bool flag, struct netlink_ext_ack *extack);
+ int (*vlan_add)(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct netlink_ext_ack *extack);
+ int (*vlan_del)(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_vlan *vlan);
+ int (*mirror_add)(struct ksz_device *dev, int port,
+ struct dsa_mall_mirror_tc_entry *mirror,
+ bool ingress, struct netlink_ext_ack *extack);
+ void (*mirror_del)(struct ksz_device *dev, int port,
+ struct dsa_mall_mirror_tc_entry *mirror);
+ int (*fdb_add)(struct ksz_device *dev, int port,
+ const unsigned char *addr, u16 vid, struct dsa_db db);
+ int (*fdb_del)(struct ksz_device *dev, int port,
+ const unsigned char *addr, u16 vid, struct dsa_db db);
+ int (*fdb_dump)(struct ksz_device *dev, int port,
+ dsa_fdb_dump_cb_t *cb, void *data);
+ int (*mdb_add)(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct dsa_db db);
+ int (*mdb_del)(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct dsa_db db);
+ void (*get_caps)(struct ksz_device *dev, int port,
+ struct phylink_config *config);
+ int (*change_mtu)(struct ksz_device *dev, int port, int mtu);
+ int (*max_mtu)(struct ksz_device *dev, int port);
void (*freeze_mib)(struct ksz_device *dev, int port, bool freeze);
void (*port_init_cnt)(struct ksz_device *dev, int port);
- int (*shutdown)(struct ksz_device *dev);
- int (*detect)(struct ksz_device *dev);
+ void (*config_cpu_port)(struct dsa_switch *ds);
+ int (*enable_stp_addr)(struct ksz_device *dev);
+ int (*reset)(struct ksz_device *dev);
int (*init)(struct ksz_device *dev);
void (*exit)(struct ksz_device *dev);
};
struct ksz_device *ksz_switch_alloc(struct device *base, void *priv);
-int ksz_switch_register(struct ksz_device *dev,
- const struct ksz_dev_ops *ops);
+int ksz_switch_register(struct ksz_device *dev);
void ksz_switch_remove(struct ksz_device *dev);
-int ksz8_switch_register(struct ksz_device *dev);
-int ksz9477_switch_register(struct ksz_device *dev);
-
-void ksz_update_port_member(struct ksz_device *dev, int port);
void ksz_init_mib_timer(struct ksz_device *dev);
void ksz_r_mib_stats64(struct ksz_device *dev, int port);
-void ksz_get_stats64(struct dsa_switch *ds, int port,
- struct rtnl_link_stats64 *s);
-void ksz_phylink_get_caps(struct dsa_switch *ds, int port,
- struct phylink_config *config);
+void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state);
extern const struct ksz_chip_data ksz_switch_chips[];
-/* Common DSA access functions */
-
-int ksz_phy_read16(struct dsa_switch *ds, int addr, int reg);
-int ksz_phy_write16(struct dsa_switch *ds, int addr, int reg, u16 val);
-void ksz_mac_link_down(struct dsa_switch *ds, int port, unsigned int mode,
- phy_interface_t interface);
-int ksz_sset_count(struct dsa_switch *ds, int port, int sset);
-void ksz_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *buf);
-int ksz_port_bridge_join(struct dsa_switch *ds, int port,
- struct dsa_bridge bridge, bool *tx_fwd_offload,
- struct netlink_ext_ack *extack);
-void ksz_port_bridge_leave(struct dsa_switch *ds, int port,
- struct dsa_bridge bridge);
-void ksz_port_stp_state_set(struct dsa_switch *ds, int port,
- u8 state, int reg);
-void ksz_port_fast_age(struct dsa_switch *ds, int port);
-int ksz_port_fdb_dump(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb,
- void *data);
-int ksz_port_mdb_add(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb,
- struct dsa_db db);
-int ksz_port_mdb_del(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb,
- struct dsa_db db);
-int ksz_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy);
-void ksz_get_strings(struct dsa_switch *ds, int port,
- u32 stringset, uint8_t *buf);
-
/* Common register access functions */
static inline int ksz_read8(struct ksz_device *dev, u32 reg, u8 *val)
@@ -353,6 +346,37 @@ static inline void ksz_regmap_unlock(void *__mtx)
#define PORT_RX_ENABLE BIT(1)
#define PORT_LEARN_DISABLE BIT(0)
+/* Switch ID Defines */
+#define REG_CHIP_ID0 0x00
+
+#define SW_FAMILY_ID_M GENMASK(15, 8)
+#define KSZ87_FAMILY_ID 0x87
+#define KSZ88_FAMILY_ID 0x88
+
+#define KSZ8_PORT_STATUS_0 0x08
+#define KSZ8_PORT_FIBER_MODE BIT(7)
+
+#define SW_CHIP_ID_M GENMASK(7, 4)
+#define KSZ87_CHIP_ID_94 0x6
+#define KSZ87_CHIP_ID_95 0x9
+#define KSZ88_CHIP_ID_63 0x3
+
+#define SW_REV_ID_M GENMASK(7, 4)
+
+/* Driver set switch broadcast storm protection at 10% rate. */
+#define BROADCAST_STORM_PROT_RATE 10
+
+/* 148,800 frames * 67 ms / 100 */
+#define BROADCAST_STORM_VALUE 9969
+
+#define BROADCAST_STORM_RATE_HI 0x07
+#define BROADCAST_STORM_RATE_LO 0xFF
+#define BROADCAST_STORM_RATE 0x07FF
+
+#define MULTICAST_STORM_DISABLE BIT(6)
+
+#define SW_START 0x01
+
/* Regmap tables generation */
#define KSZ_SPI_OP_RD 3
#define KSZ_SPI_OP_WR 2
diff --git a/drivers/net/dsa/microchip/ksz8795_spi.c b/drivers/net/dsa/microchip/ksz_spi.c
index 961a74c359a8..344ff92db099 100644
--- a/drivers/net/dsa/microchip/ksz8795_spi.c
+++ b/drivers/net/dsa/microchip/ksz_spi.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * Microchip KSZ8795 series register access through SPI
+ * Microchip ksz series register access through SPI
*
* Copyright (C) 2017 Microchip Technology Inc.
* Tristram Ha <Tristram.Ha@microchip.com>
@@ -25,13 +25,20 @@
#define KSZ8863_SPI_ADDR_ALIGN 8
#define KSZ8863_SPI_TURNAROUND_SHIFT 0
+#define KSZ9477_SPI_ADDR_SHIFT 24
+#define KSZ9477_SPI_ADDR_ALIGN 3
+#define KSZ9477_SPI_TURNAROUND_SHIFT 5
+
KSZ_REGMAP_TABLE(ksz8795, 16, KSZ8795_SPI_ADDR_SHIFT,
KSZ8795_SPI_TURNAROUND_SHIFT, KSZ8795_SPI_ADDR_ALIGN);
KSZ_REGMAP_TABLE(ksz8863, 16, KSZ8863_SPI_ADDR_SHIFT,
KSZ8863_SPI_TURNAROUND_SHIFT, KSZ8863_SPI_ADDR_ALIGN);
-static int ksz8795_spi_probe(struct spi_device *spi)
+KSZ_REGMAP_TABLE(ksz9477, 32, KSZ9477_SPI_ADDR_SHIFT,
+ KSZ9477_SPI_TURNAROUND_SHIFT, KSZ9477_SPI_ADDR_ALIGN);
+
+static int ksz_spi_probe(struct spi_device *spi)
{
const struct regmap_config *regmap_config;
const struct ksz_chip_data *chip;
@@ -57,8 +64,12 @@ static int ksz8795_spi_probe(struct spi_device *spi)
if (chip->chip_id == KSZ8830_CHIP_ID)
regmap_config = ksz8863_regmap_config;
- else
+ else if (chip->chip_id == KSZ8795_CHIP_ID ||
+ chip->chip_id == KSZ8794_CHIP_ID ||
+ chip->chip_id == KSZ8765_CHIP_ID)
regmap_config = ksz8795_regmap_config;
+ else
+ regmap_config = ksz9477_regmap_config;
for (i = 0; i < ARRAY_SIZE(ksz8795_regmap_config); i++) {
rc = regmap_config[i];
@@ -82,7 +93,7 @@ static int ksz8795_spi_probe(struct spi_device *spi)
if (ret)
return ret;
- ret = ksz8_switch_register(dev);
+ ret = ksz_switch_register(dev);
/* Main DSA driver may not be started yet. */
if (ret)
@@ -93,7 +104,7 @@ static int ksz8795_spi_probe(struct spi_device *spi)
return 0;
}
-static void ksz8795_spi_remove(struct spi_device *spi)
+static void ksz_spi_remove(struct spi_device *spi)
{
struct ksz_device *dev = spi_get_drvdata(spi);
@@ -103,22 +114,22 @@ static void ksz8795_spi_remove(struct spi_device *spi)
spi_set_drvdata(spi, NULL);
}
-static void ksz8795_spi_shutdown(struct spi_device *spi)
+static void ksz_spi_shutdown(struct spi_device *spi)
{
struct ksz_device *dev = spi_get_drvdata(spi);
if (!dev)
return;
- if (dev->dev_ops->shutdown)
- dev->dev_ops->shutdown(dev);
+ if (dev->dev_ops->reset)
+ dev->dev_ops->reset(dev);
dsa_switch_shutdown(dev->ds);
spi_set_drvdata(spi, NULL);
}
-static const struct of_device_id ksz8795_dt_ids[] = {
+static const struct of_device_id ksz_dt_ids[] = {
{
.compatible = "microchip,ksz8765",
.data = &ksz_switch_chips[KSZ8765]
@@ -139,34 +150,70 @@ static const struct of_device_id ksz8795_dt_ids[] = {
.compatible = "microchip,ksz8873",
.data = &ksz_switch_chips[KSZ8830]
},
+ {
+ .compatible = "microchip,ksz9477",
+ .data = &ksz_switch_chips[KSZ9477]
+ },
+ {
+ .compatible = "microchip,ksz9897",
+ .data = &ksz_switch_chips[KSZ9897]
+ },
+ {
+ .compatible = "microchip,ksz9893",
+ .data = &ksz_switch_chips[KSZ9893]
+ },
+ {
+ .compatible = "microchip,ksz9563",
+ .data = &ksz_switch_chips[KSZ9893]
+ },
+ {
+ .compatible = "microchip,ksz8563",
+ .data = &ksz_switch_chips[KSZ9893]
+ },
+ {
+ .compatible = "microchip,ksz9567",
+ .data = &ksz_switch_chips[KSZ9567]
+ },
{},
};
-MODULE_DEVICE_TABLE(of, ksz8795_dt_ids);
+MODULE_DEVICE_TABLE(of, ksz_dt_ids);
-static const struct spi_device_id ksz8795_spi_ids[] = {
+static const struct spi_device_id ksz_spi_ids[] = {
{ "ksz8765" },
{ "ksz8794" },
{ "ksz8795" },
{ "ksz8863" },
{ "ksz8873" },
+ { "ksz9477" },
+ { "ksz9897" },
+ { "ksz9893" },
+ { "ksz9563" },
+ { "ksz8563" },
+ { "ksz9567" },
{ },
};
-MODULE_DEVICE_TABLE(spi, ksz8795_spi_ids);
+MODULE_DEVICE_TABLE(spi, ksz_spi_ids);
-static struct spi_driver ksz8795_spi_driver = {
+static struct spi_driver ksz_spi_driver = {
.driver = {
- .name = "ksz8795-switch",
+ .name = "ksz-switch",
.owner = THIS_MODULE,
- .of_match_table = of_match_ptr(ksz8795_dt_ids),
+ .of_match_table = of_match_ptr(ksz_dt_ids),
},
- .id_table = ksz8795_spi_ids,
- .probe = ksz8795_spi_probe,
- .remove = ksz8795_spi_remove,
- .shutdown = ksz8795_spi_shutdown,
+ .id_table = ksz_spi_ids,
+ .probe = ksz_spi_probe,
+ .remove = ksz_spi_remove,
+ .shutdown = ksz_spi_shutdown,
};
-module_spi_driver(ksz8795_spi_driver);
+module_spi_driver(ksz_spi_driver);
+MODULE_ALIAS("spi:ksz9477");
+MODULE_ALIAS("spi:ksz9897");
+MODULE_ALIAS("spi:ksz9893");
+MODULE_ALIAS("spi:ksz9563");
+MODULE_ALIAS("spi:ksz8563");
+MODULE_ALIAS("spi:ksz9567");
MODULE_AUTHOR("Tristram Ha <Tristram.Ha@microchip.com>");
-MODULE_DESCRIPTION("Microchip KSZ8795 Series Switch SPI Driver");
+MODULE_DESCRIPTION("Microchip ksz Series Switch SPI Driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 2b02d823d497..835807911be0 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -1038,6 +1038,7 @@ static int
mt7530_port_enable(struct dsa_switch *ds, int port,
struct phy_device *phy)
{
+ struct dsa_port *dp = dsa_to_port(ds, port);
struct mt7530_priv *priv = ds->priv;
mutex_lock(&priv->reg_mutex);
@@ -1046,7 +1047,11 @@ mt7530_port_enable(struct dsa_switch *ds, int port,
* restore the port matrix if the port is the member of a certain
* bridge.
*/
- priv->ports[port].pm |= PCR_MATRIX(BIT(MT7530_CPU_PORT));
+ if (dsa_port_is_user(dp)) {
+ struct dsa_port *cpu_dp = dp->cpu_dp;
+
+ priv->ports[port].pm |= PCR_MATRIX(BIT(cpu_dp->index));
+ }
priv->ports[port].enable = true;
mt7530_rmw(priv, MT7530_PCR_P(port), PCR_MATRIX_MASK,
priv->ports[port].pm);
@@ -1195,7 +1200,8 @@ mt7530_port_bridge_join(struct dsa_switch *ds, int port,
struct netlink_ext_ack *extack)
{
struct dsa_port *dp = dsa_to_port(ds, port), *other_dp;
- u32 port_bitmap = BIT(MT7530_CPU_PORT);
+ struct dsa_port *cpu_dp = dp->cpu_dp;
+ u32 port_bitmap = BIT(cpu_dp->index);
struct mt7530_priv *priv = ds->priv;
mutex_lock(&priv->reg_mutex);
@@ -1272,9 +1278,12 @@ mt7530_port_set_vlan_unaware(struct dsa_switch *ds, int port)
* the CPU port get out of VLAN filtering mode.
*/
if (all_user_ports_removed) {
- mt7530_write(priv, MT7530_PCR_P(MT7530_CPU_PORT),
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_port *cpu_dp = dp->cpu_dp;
+
+ mt7530_write(priv, MT7530_PCR_P(cpu_dp->index),
PCR_MATRIX(dsa_user_ports(priv->ds)));
- mt7530_write(priv, MT7530_PVC_P(MT7530_CPU_PORT), PORT_SPEC_TAG
+ mt7530_write(priv, MT7530_PVC_P(cpu_dp->index), PORT_SPEC_TAG
| PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
}
}
@@ -1312,6 +1321,7 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port,
struct dsa_bridge bridge)
{
struct dsa_port *dp = dsa_to_port(ds, port), *other_dp;
+ struct dsa_port *cpu_dp = dp->cpu_dp;
struct mt7530_priv *priv = ds->priv;
mutex_lock(&priv->reg_mutex);
@@ -1340,8 +1350,8 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port,
*/
if (priv->ports[port].enable)
mt7530_rmw(priv, MT7530_PCR_P(port), PCR_MATRIX_MASK,
- PCR_MATRIX(BIT(MT7530_CPU_PORT)));
- priv->ports[port].pm = PCR_MATRIX(BIT(MT7530_CPU_PORT));
+ PCR_MATRIX(BIT(cpu_dp->index)));
+ priv->ports[port].pm = PCR_MATRIX(BIT(cpu_dp->index));
/* When a port is removed from the bridge, the port would be set up
* back to the default as is at initial boot which is a VLAN-unaware
@@ -1508,6 +1518,9 @@ static int
mt7530_port_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering,
struct netlink_ext_ack *extack)
{
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_port *cpu_dp = dp->cpu_dp;
+
if (vlan_filtering) {
/* The port is being kept as VLAN-unaware port when bridge is
* set up with vlan_filtering not being set, Otherwise, the
@@ -1515,7 +1528,7 @@ mt7530_port_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering,
* for becoming a VLAN-aware port.
*/
mt7530_port_set_vlan_aware(ds, port);
- mt7530_port_set_vlan_aware(ds, MT7530_CPU_PORT);
+ mt7530_port_set_vlan_aware(ds, cpu_dp->index);
} else {
mt7530_port_set_vlan_unaware(ds, port);
}
@@ -1527,11 +1540,11 @@ static void
mt7530_hw_vlan_add(struct mt7530_priv *priv,
struct mt7530_hw_vlan_entry *entry)
{
+ struct dsa_port *dp = dsa_to_port(priv->ds, entry->port);
u8 new_members;
u32 val;
- new_members = entry->old_members | BIT(entry->port) |
- BIT(MT7530_CPU_PORT);
+ new_members = entry->old_members | BIT(entry->port);
/* Validate the entry with independent learning, create egress tag per
* VLAN and joining the port as one of the port members.
@@ -1542,22 +1555,20 @@ mt7530_hw_vlan_add(struct mt7530_priv *priv,
/* Decide whether adding tag or not for those outgoing packets from the
* port inside the VLAN.
- */
- val = entry->untagged ? MT7530_VLAN_EGRESS_UNTAG :
- MT7530_VLAN_EGRESS_TAG;
- mt7530_rmw(priv, MT7530_VAWD2,
- ETAG_CTRL_P_MASK(entry->port),
- ETAG_CTRL_P(entry->port, val));
-
- /* CPU port is always taken as a tagged port for serving more than one
+ * CPU port is always taken as a tagged port for serving more than one
* VLANs across and also being applied with egress type stack mode for
* that VLAN tags would be appended after hardware special tag used as
* DSA tag.
*/
+ if (dsa_port_is_cpu(dp))
+ val = MT7530_VLAN_EGRESS_STACK;
+ else if (entry->untagged)
+ val = MT7530_VLAN_EGRESS_UNTAG;
+ else
+ val = MT7530_VLAN_EGRESS_TAG;
mt7530_rmw(priv, MT7530_VAWD2,
- ETAG_CTRL_P_MASK(MT7530_CPU_PORT),
- ETAG_CTRL_P(MT7530_CPU_PORT,
- MT7530_VLAN_EGRESS_STACK));
+ ETAG_CTRL_P_MASK(entry->port),
+ ETAG_CTRL_P(entry->port, val));
}
static void
@@ -1576,11 +1587,7 @@ mt7530_hw_vlan_del(struct mt7530_priv *priv,
return;
}
- /* If certain member apart from CPU port is still alive in the VLAN,
- * the entry would be kept valid. Otherwise, the entry is got to be
- * disabled.
- */
- if (new_members && new_members != BIT(MT7530_CPU_PORT)) {
+ if (new_members) {
val = IVL_MAC | VTAG_EN | PORT_MEM(new_members) |
VLAN_VALID;
mt7530_write(priv, MT7530_VAWD1, val);
@@ -2098,11 +2105,12 @@ static int
mt7530_setup(struct dsa_switch *ds)
{
struct mt7530_priv *priv = ds->priv;
+ struct device_node *dn = NULL;
struct device_node *phy_node;
struct device_node *mac_np;
struct mt7530_dummy_poll p;
phy_interface_t interface;
- struct device_node *dn;
+ struct dsa_port *cpu_dp;
u32 id, val;
int ret, i;
@@ -2110,7 +2118,19 @@ mt7530_setup(struct dsa_switch *ds)
* controller also is the container for two GMACs nodes representing
* as two netdev instances.
*/
- dn = dsa_to_port(ds, MT7530_CPU_PORT)->master->dev.of_node->parent;
+ dsa_switch_for_each_cpu_port(cpu_dp, ds) {
+ dn = cpu_dp->master->dev.of_node->parent;
+ /* It doesn't matter which CPU port is found first,
+ * their masters should share the same parent OF node
+ */
+ break;
+ }
+
+ if (!dn) {
+ dev_err(ds->dev, "parent OF node of DSA master not found");
+ return -EINVAL;
+ }
+
ds->assisted_learning_on_cpu_port = true;
ds->mtu_enforcement_ingress = true;
@@ -2272,6 +2292,7 @@ mt7531_setup(struct dsa_switch *ds)
{
struct mt7530_priv *priv = ds->priv;
struct mt7530_dummy_poll p;
+ struct dsa_port *cpu_dp;
u32 val, id;
int ret, i;
@@ -2344,8 +2365,11 @@ mt7531_setup(struct dsa_switch *ds)
CORE_PLL_GROUP4, val);
/* BPDU to CPU port */
- mt7530_rmw(priv, MT7531_CFC, MT7531_CPU_PMAP_MASK,
- BIT(MT7530_CPU_PORT));
+ dsa_switch_for_each_cpu_port(cpu_dp, ds) {
+ mt7530_rmw(priv, MT7531_CFC, MT7531_CPU_PMAP_MASK,
+ BIT(cpu_dp->index));
+ break;
+ }
mt7530_rmw(priv, MT753X_BPC, MT753X_BPDU_PORT_FW_MASK,
MT753X_BPDU_CPU_ONLY);
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
index 71e36b69b96d..e509af95c354 100644
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -8,7 +8,6 @@
#define MT7530_NUM_PORTS 7
#define MT7530_NUM_PHYS 5
-#define MT7530_CPU_PORT 6
#define MT7530_NUM_FDB_RECORDS 2048
#define MT7530_ALL_MEMBERS 0xff
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 0b49d243e00b..37b649501500 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -449,9 +449,6 @@ static int mv88e6xxx_port_setup_mac(struct mv88e6xxx_chip *chip, int port,
goto restore_link;
}
- if (speed == SPEED_MAX && chip->info->ops->port_max_speed_mode)
- mode = chip->info->ops->port_max_speed_mode(port);
-
if (chip->info->ops->port_set_pause) {
err = chip->info->ops->port_set_pause(chip, port, pause);
if (err)
@@ -3280,28 +3277,51 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
{
struct device_node *phy_handle = NULL;
struct dsa_switch *ds = chip->ds;
+ phy_interface_t mode;
struct dsa_port *dp;
- int tx_amp;
+ int tx_amp, speed;
int err;
u16 reg;
chip->ports[port].chip = chip;
chip->ports[port].port = port;
+ dp = dsa_to_port(ds, port);
+
/* MAC Forcing register: don't force link, speed, duplex or flow control
* state to any particular values on physical ports, but force the CPU
* port and all DSA ports to their maximum bandwidth and full duplex.
*/
- if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
+ if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) {
+ unsigned long caps = dp->pl_config.mac_capabilities;
+
+ if (chip->info->ops->port_max_speed_mode)
+ mode = chip->info->ops->port_max_speed_mode(port);
+ else
+ mode = PHY_INTERFACE_MODE_NA;
+
+ if (caps & MAC_10000FD)
+ speed = SPEED_10000;
+ else if (caps & MAC_5000FD)
+ speed = SPEED_5000;
+ else if (caps & MAC_2500FD)
+ speed = SPEED_2500;
+ else if (caps & MAC_1000)
+ speed = SPEED_1000;
+ else if (caps & MAC_100)
+ speed = SPEED_100;
+ else
+ speed = SPEED_10;
+
err = mv88e6xxx_port_setup_mac(chip, port, LINK_FORCED_UP,
- SPEED_MAX, DUPLEX_FULL,
- PAUSE_OFF,
- PHY_INTERFACE_MODE_NA);
- else
+ speed, DUPLEX_FULL,
+ PAUSE_OFF, mode);
+ } else {
err = mv88e6xxx_port_setup_mac(chip, port, LINK_UNFORCED,
SPEED_UNFORCED, DUPLEX_UNFORCED,
PAUSE_ON,
PHY_INTERFACE_MODE_NA);
+ }
if (err)
return err;
@@ -3473,7 +3493,6 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
}
if (chip->info->ops->serdes_set_tx_amplitude) {
- dp = dsa_to_port(ds, port);
if (dp)
phy_handle = of_parse_phandle(dp->dn, "phy-handle", 0);
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 5e03cfe50156..e693154cf803 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -488,14 +488,13 @@ struct mv88e6xxx_ops {
int (*port_set_pause)(struct mv88e6xxx_chip *chip, int port,
int pause);
-#define SPEED_MAX INT_MAX
#define SPEED_UNFORCED -2
#define DUPLEX_UNFORCED -2
/* Port's MAC speed (in Mbps) and MAC duplex mode
*
* Depending on the chip, 10, 100, 200, 1000, 2500, 10000 are valid.
- * Use SPEED_UNFORCED for normal detection, SPEED_MAX for max value.
+ * Use SPEED_UNFORCED for normal detection.
*
* Use DUPLEX_HALF or DUPLEX_FULL to force half or full duplex,
* or DUPLEX_UNFORCED for normal duplex detection.
diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
index 795b3128768f..90c55f23b7c9 100644
--- a/drivers/net/dsa/mv88e6xxx/port.c
+++ b/drivers/net/dsa/mv88e6xxx/port.c
@@ -294,28 +294,10 @@ static int mv88e6xxx_port_set_speed_duplex(struct mv88e6xxx_chip *chip,
return 0;
}
-/* Support 10, 100, 200 Mbps (e.g. 88E6065 family) */
-int mv88e6065_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
- int speed, int duplex)
-{
- if (speed == SPEED_MAX)
- speed = 200;
-
- if (speed > 200)
- return -EOPNOTSUPP;
-
- /* Setting 200 Mbps on port 0 to 3 selects 100 Mbps */
- return mv88e6xxx_port_set_speed_duplex(chip, port, speed, false, false,
- duplex);
-}
-
/* Support 10, 100, 1000 Mbps (e.g. 88E6185 family) */
int mv88e6185_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
int speed, int duplex)
{
- if (speed == SPEED_MAX)
- speed = 1000;
-
if (speed == 200 || speed > 1000)
return -EOPNOTSUPP;
@@ -327,9 +309,6 @@ int mv88e6185_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
int mv88e6250_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
int speed, int duplex)
{
- if (speed == SPEED_MAX)
- speed = 100;
-
if (speed > 100)
return -EOPNOTSUPP;
@@ -341,9 +320,6 @@ int mv88e6250_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
int mv88e6341_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
int speed, int duplex)
{
- if (speed == SPEED_MAX)
- speed = port < 5 ? 1000 : 2500;
-
if (speed > 2500)
return -EOPNOTSUPP;
@@ -369,9 +345,6 @@ phy_interface_t mv88e6341_port_max_speed_mode(int port)
int mv88e6352_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
int speed, int duplex)
{
- if (speed == SPEED_MAX)
- speed = 1000;
-
if (speed > 1000)
return -EOPNOTSUPP;
@@ -386,9 +359,6 @@ int mv88e6352_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
int mv88e6390_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
int speed, int duplex)
{
- if (speed == SPEED_MAX)
- speed = port < 9 ? 1000 : 2500;
-
if (speed > 2500)
return -EOPNOTSUPP;
@@ -414,9 +384,6 @@ phy_interface_t mv88e6390_port_max_speed_mode(int port)
int mv88e6390x_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
int speed, int duplex)
{
- if (speed == SPEED_MAX)
- speed = port < 9 ? 1000 : 10000;
-
if (speed == 200 && port != 0)
return -EOPNOTSUPP;
@@ -445,9 +412,6 @@ int mv88e6393x_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
u16 reg, ctrl;
int err;
- if (speed == SPEED_MAX)
- speed = (port > 0 && port < 9) ? 1000 : 10000;
-
if (speed == 200 && port != 0)
return -EOPNOTSUPP;
diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h
index e0a705d82019..cb04243f37c1 100644
--- a/drivers/net/dsa/mv88e6xxx/port.h
+++ b/drivers/net/dsa/mv88e6xxx/port.h
@@ -342,8 +342,6 @@ int mv88e6xxx_port_set_link(struct mv88e6xxx_chip *chip, int port, int link);
int mv88e6xxx_port_sync_link(struct mv88e6xxx_chip *chip, int port, unsigned int mode, bool isup);
int mv88e6185_port_sync_link(struct mv88e6xxx_chip *chip, int port, unsigned int mode, bool isup);
-int mv88e6065_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
- int speed, int duplex);
int mv88e6185_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
int speed, int duplex);
int mv88e6250_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index 570d0204b7be..dd9085ae0922 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -1196,10 +1196,13 @@ static void vsc9959_tas_gcl_set(struct ocelot *ocelot, const u32 gcl_ix,
static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
struct tc_taprio_qopt_offload *taprio)
{
+ struct ocelot_port *ocelot_port = ocelot->ports[port];
struct timespec64 base_ts;
int ret, i;
u32 val;
+ mutex_lock(&ocelot->tas_lock);
+
if (!taprio->enable) {
ocelot_rmw_rix(ocelot,
QSYS_TAG_CONFIG_INIT_GATE_STATE(0xFF),
@@ -1207,15 +1210,20 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
QSYS_TAG_CONFIG_INIT_GATE_STATE_M,
QSYS_TAG_CONFIG, port);
+ mutex_unlock(&ocelot->tas_lock);
return 0;
}
if (taprio->cycle_time > NSEC_PER_SEC ||
- taprio->cycle_time_extension >= NSEC_PER_SEC)
- return -EINVAL;
+ taprio->cycle_time_extension >= NSEC_PER_SEC) {
+ ret = -EINVAL;
+ goto err;
+ }
- if (taprio->num_entries > VSC9959_TAS_GCL_ENTRY_MAX)
- return -ERANGE;
+ if (taprio->num_entries > VSC9959_TAS_GCL_ENTRY_MAX) {
+ ret = -ERANGE;
+ goto err;
+ }
/* Enable guard band. The switch will schedule frames without taking
* their length into account. Thus we'll always need to enable the
@@ -1236,8 +1244,10 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
* config is pending, need reset the TAS module
*/
val = ocelot_read(ocelot, QSYS_PARAM_STATUS_REG_8);
- if (val & QSYS_PARAM_STATUS_REG_8_CONFIG_PENDING)
- return -EBUSY;
+ if (val & QSYS_PARAM_STATUS_REG_8_CONFIG_PENDING) {
+ ret = -EBUSY;
+ goto err;
+ }
ocelot_rmw_rix(ocelot,
QSYS_TAG_CONFIG_ENABLE |
@@ -1248,6 +1258,8 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
QSYS_TAG_CONFIG_SCH_TRAFFIC_QUEUES_M,
QSYS_TAG_CONFIG, port);
+ ocelot_port->base_time = taprio->base_time;
+
vsc9959_new_base_time(ocelot, taprio->base_time,
taprio->cycle_time, &base_ts);
ocelot_write(ocelot, base_ts.tv_nsec, QSYS_PARAM_CFG_REG_1);
@@ -1271,9 +1283,67 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
!(val & QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE),
10, 100000);
+err:
+ mutex_unlock(&ocelot->tas_lock);
+
return ret;
}
+static void vsc9959_tas_clock_adjust(struct ocelot *ocelot)
+{
+ struct ocelot_port *ocelot_port;
+ struct timespec64 base_ts;
+ u64 cycletime;
+ int port;
+ u32 val;
+
+ mutex_lock(&ocelot->tas_lock);
+
+ for (port = 0; port < ocelot->num_phys_ports; port++) {
+ val = ocelot_read_rix(ocelot, QSYS_TAG_CONFIG, port);
+ if (!(val & QSYS_TAG_CONFIG_ENABLE))
+ continue;
+
+ ocelot_rmw(ocelot,
+ QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM(port),
+ QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM_M,
+ QSYS_TAS_PARAM_CFG_CTRL);
+
+ ocelot_rmw_rix(ocelot,
+ QSYS_TAG_CONFIG_INIT_GATE_STATE(0xFF),
+ QSYS_TAG_CONFIG_ENABLE |
+ QSYS_TAG_CONFIG_INIT_GATE_STATE_M,
+ QSYS_TAG_CONFIG, port);
+
+ cycletime = ocelot_read(ocelot, QSYS_PARAM_CFG_REG_4);
+ ocelot_port = ocelot->ports[port];
+
+ vsc9959_new_base_time(ocelot, ocelot_port->base_time,
+ cycletime, &base_ts);
+
+ ocelot_write(ocelot, base_ts.tv_nsec, QSYS_PARAM_CFG_REG_1);
+ ocelot_write(ocelot, lower_32_bits(base_ts.tv_sec),
+ QSYS_PARAM_CFG_REG_2);
+ val = upper_32_bits(base_ts.tv_sec);
+ ocelot_rmw(ocelot,
+ QSYS_PARAM_CFG_REG_3_BASE_TIME_SEC_MSB(val),
+ QSYS_PARAM_CFG_REG_3_BASE_TIME_SEC_MSB_M,
+ QSYS_PARAM_CFG_REG_3);
+
+ ocelot_rmw(ocelot, QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE,
+ QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE,
+ QSYS_TAS_PARAM_CFG_CTRL);
+
+ ocelot_rmw_rix(ocelot,
+ QSYS_TAG_CONFIG_INIT_GATE_STATE(0xFF) |
+ QSYS_TAG_CONFIG_ENABLE,
+ QSYS_TAG_CONFIG_ENABLE |
+ QSYS_TAG_CONFIG_INIT_GATE_STATE_M,
+ QSYS_TAG_CONFIG, port);
+ }
+ mutex_unlock(&ocelot->tas_lock);
+}
+
static int vsc9959_qos_port_cbs_set(struct dsa_switch *ds, int port,
struct tc_cbs_qopt_offload *cbs_qopt)
{
@@ -2210,6 +2280,7 @@ static const struct ocelot_ops vsc9959_ops = {
.psfp_filter_del = vsc9959_psfp_filter_del,
.psfp_stats_get = vsc9959_psfp_stats_get,
.cut_through_fwd = vsc9959_cut_through_fwd,
+ .tas_clock_adjust = vsc9959_tas_clock_adjust,
};
static const struct felix_info felix_info_vsc9959 = {
diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c
index e5098cfe44bc..f23ce56fa591 100644
--- a/drivers/net/dsa/qca/ar9331.c
+++ b/drivers/net/dsa/qca/ar9331.c
@@ -818,7 +818,7 @@ static int __ar9331_mdio_write(struct mii_bus *sbus, u8 mode, u16 reg, u16 val)
FIELD_GET(AR9331_SW_LOW_ADDR_PHY, reg);
r = FIELD_GET(AR9331_SW_LOW_ADDR_REG, reg);
- return mdiobus_write(sbus, p, r, val);
+ return __mdiobus_write(sbus, p, r, val);
}
static int __ar9331_mdio_read(struct mii_bus *sbus, u16 reg)
@@ -829,7 +829,7 @@ static int __ar9331_mdio_read(struct mii_bus *sbus, u16 reg)
FIELD_GET(AR9331_SW_LOW_ADDR_PHY, reg);
r = FIELD_GET(AR9331_SW_LOW_ADDR_REG, reg);
- return mdiobus_read(sbus, p, r);
+ return __mdiobus_read(sbus, p, r);
}
static int ar9331_mdio_read(void *ctx, const void *reg_buf, size_t reg_len,
@@ -849,6 +849,8 @@ static int ar9331_mdio_read(void *ctx, const void *reg_buf, size_t reg_len,
return 0;
}
+ mutex_lock_nested(&sbus->mdio_lock, MDIO_MUTEX_NESTED);
+
ret = __ar9331_mdio_read(sbus, reg);
if (ret < 0)
goto error;
@@ -860,9 +862,13 @@ static int ar9331_mdio_read(void *ctx, const void *reg_buf, size_t reg_len,
*(u32 *)val_buf |= ret << 16;
+ mutex_unlock(&sbus->mdio_lock);
+
return 0;
error:
+ mutex_unlock(&sbus->mdio_lock);
dev_err_ratelimited(&sbus->dev, "Bus error. Failed to read register.\n");
+
return ret;
}
@@ -872,12 +878,15 @@ static int ar9331_mdio_write(void *ctx, u32 reg, u32 val)
struct mii_bus *sbus = priv->sbus;
int ret;
+ mutex_lock_nested(&sbus->mdio_lock, MDIO_MUTEX_NESTED);
if (reg == AR9331_SW_REG_PAGE) {
ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_PAGE,
0, val);
if (ret < 0)
goto error;
+ mutex_unlock(&sbus->mdio_lock);
+
return 0;
}
@@ -897,10 +906,14 @@ static int ar9331_mdio_write(void *ctx, u32 reg, u32 val)
if (ret < 0)
goto error;
+ mutex_unlock(&sbus->mdio_lock);
+
return 0;
error:
+ mutex_unlock(&sbus->mdio_lock);
dev_err_ratelimited(&sbus->dev, "Bus error. Failed to write register.\n");
+
return ret;
}
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 2727d3169c25..1cbb05b0323f 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -2334,6 +2334,7 @@ static int
qca8k_port_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
{
struct qca8k_priv *priv = ds->priv;
+ int ret;
/* We have only have a general MTU setting.
* DSA always set the CPU port's MTU to the largest MTU of the slave
@@ -2344,8 +2345,27 @@ qca8k_port_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
if (!dsa_is_cpu_port(ds, port))
return 0;
+ /* To change the MAX_FRAME_SIZE the cpu ports must be off or
+ * the switch panics.
+ * Turn off both cpu ports before applying the new value to prevent
+ * this.
+ */
+ if (priv->port_enabled_map & BIT(0))
+ qca8k_port_set_status(priv, 0, 0);
+
+ if (priv->port_enabled_map & BIT(6))
+ qca8k_port_set_status(priv, 6, 0);
+
/* Include L2 header / FCS length */
- return qca8k_write(priv, QCA8K_MAX_FRAME_SIZE, new_mtu + ETH_HLEN + ETH_FCS_LEN);
+ ret = qca8k_write(priv, QCA8K_MAX_FRAME_SIZE, new_mtu + ETH_HLEN + ETH_FCS_LEN);
+
+ if (priv->port_enabled_map & BIT(0))
+ qca8k_port_set_status(priv, 0, 1);
+
+ if (priv->port_enabled_map & BIT(6))
+ qca8k_port_set_status(priv, 6, 1);
+
+ return ret;
}
static int
diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h
index 04408e11402a..ec58d0e80a70 100644
--- a/drivers/net/dsa/qca8k.h
+++ b/drivers/net/dsa/qca8k.h
@@ -15,7 +15,7 @@
#define QCA8K_ETHERNET_MDIO_PRIORITY 7
#define QCA8K_ETHERNET_PHY_PRIORITY 6
-#define QCA8K_ETHERNET_TIMEOUT 100
+#define QCA8K_ETHERNET_TIMEOUT 5
#define QCA8K_NUM_PORTS 7
#define QCA8K_NUM_CPU_PORTS 2
diff --git a/drivers/net/dsa/realtek/rtl8365mb.c b/drivers/net/dsa/realtek/rtl8365mb.c
index 769f672e9128..da31d8b839ac 100644
--- a/drivers/net/dsa/realtek/rtl8365mb.c
+++ b/drivers/net/dsa/realtek/rtl8365mb.c
@@ -101,27 +101,14 @@
#include "realtek.h"
-/* Chip-specific data and limits */
-#define RTL8365MB_CHIP_ID_8365MB_VC 0x6367
-#define RTL8365MB_CHIP_VER_8365MB_VC 0x0040
-
-#define RTL8365MB_CHIP_ID_8367S 0x6367
-#define RTL8365MB_CHIP_VER_8367S 0x00A0
-
-#define RTL8365MB_CHIP_ID_8367RB 0x6367
-#define RTL8365MB_CHIP_VER_8367RB 0x0020
-
/* Family-specific data and limits */
#define RTL8365MB_PHYADDRMAX 7
#define RTL8365MB_NUM_PHYREGS 32
#define RTL8365MB_PHYREGMAX (RTL8365MB_NUM_PHYREGS - 1)
-/* RTL8370MB and RTL8310SR, possibly suportable by this driver, have 10 ports */
-#define RTL8365MB_MAX_NUM_PORTS 10
+#define RTL8365MB_MAX_NUM_PORTS 11
+#define RTL8365MB_MAX_NUM_EXTINTS 3
#define RTL8365MB_LEARN_LIMIT_MAX 2112
-/* valid for all 6-port or less variants */
-static const int rtl8365mb_extint_port_map[] = { -1, -1, -1, -1, -1, -1, 1, 2, -1, -1};
-
/* Chip identification registers */
#define RTL8365MB_CHIP_ID_REG 0x1300
@@ -201,7 +188,7 @@ static const int rtl8365mb_extint_port_map[] = { -1, -1, -1, -1, -1, -1, 1, 2,
/* The PHY OCP addresses of PHY registers 0~31 start here */
#define RTL8365MB_PHY_OCP_ADDR_PHYREG_BASE 0xA400
-/* EXT interface port mode values - used in DIGITAL_INTERFACE_SELECT */
+/* External interface port mode values - used in DIGITAL_INTERFACE_SELECT */
#define RTL8365MB_EXT_PORT_MODE_DISABLE 0
#define RTL8365MB_EXT_PORT_MODE_RGMII 1
#define RTL8365MB_EXT_PORT_MODE_MII_MAC 2
@@ -217,19 +204,7 @@ static const int rtl8365mb_extint_port_map[] = { -1, -1, -1, -1, -1, -1, 1, 2,
#define RTL8365MB_EXT_PORT_MODE_1000X 12
#define RTL8365MB_EXT_PORT_MODE_100FX 13
-/* Realtek docs and driver uses logic number as EXT_PORT0=16, EXT_PORT1=17,
- * EXT_PORT2=18, to interact with switch ports. That logic number is internally
- * converted to either a physical port number (0..9) or an external interface id (0..2),
- * depending on which function was called. The external interface id is calculated as
- * (ext_id=logic_port-15), while the logical to physical map depends on the chip id/version.
- *
- * EXT_PORT0 mentioned in datasheets and rtl8367c driver is used in this driver
- * as extid==1, EXT_PORT2, mentioned in Realtek rtl8367c driver for 10-port switches,
- * would have an ext_id of 3 (out of range for most extint macros) and ext_id 0 does
- * not seem to be used as well for this family.
- */
-
-/* EXT interface mode configuration registers 0~1 */
+/* External interface mode configuration registers 0~1 */
#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG0 0x1305 /* EXT1 */
#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG1 0x13C3 /* EXT2 */
#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG(_extint) \
@@ -241,7 +216,7 @@ static const int rtl8365mb_extint_port_map[] = { -1, -1, -1, -1, -1, -1, 1, 2,
#define RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_OFFSET(_extint) \
(((_extint) % 2) * 4)
-/* EXT interface RGMII TX/RX delay configuration registers 0~2 */
+/* External interface RGMII TX/RX delay configuration registers 0~2 */
#define RTL8365MB_EXT_RGMXF_REG0 0x1306 /* EXT0 */
#define RTL8365MB_EXT_RGMXF_REG1 0x1307 /* EXT1 */
#define RTL8365MB_EXT_RGMXF_REG2 0x13C5 /* EXT2 */
@@ -258,7 +233,7 @@ static const int rtl8365mb_extint_port_map[] = { -1, -1, -1, -1, -1, -1, 1, 2,
#define RTL8365MB_PORT_SPEED_100M 1
#define RTL8365MB_PORT_SPEED_1000M 2
-/* EXT interface force configuration registers 0~2 */
+/* External interface force configuration registers 0~2 */
#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG0 0x1310 /* EXT0 */
#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG1 0x1311 /* EXT1 */
#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG2 0x13C4 /* EXT2 */
@@ -490,6 +465,95 @@ static const struct rtl8365mb_jam_tbl_entry rtl8365mb_init_jam_common[] = {
{ 0x1D32, 0x0002 },
};
+enum rtl8365mb_phy_interface_mode {
+ RTL8365MB_PHY_INTERFACE_MODE_INVAL = 0,
+ RTL8365MB_PHY_INTERFACE_MODE_INTERNAL = BIT(0),
+ RTL8365MB_PHY_INTERFACE_MODE_MII = BIT(1),
+ RTL8365MB_PHY_INTERFACE_MODE_TMII = BIT(2),
+ RTL8365MB_PHY_INTERFACE_MODE_RMII = BIT(3),
+ RTL8365MB_PHY_INTERFACE_MODE_RGMII = BIT(4),
+ RTL8365MB_PHY_INTERFACE_MODE_SGMII = BIT(5),
+ RTL8365MB_PHY_INTERFACE_MODE_HSGMII = BIT(6),
+};
+
+/**
+ * struct rtl8365mb_extint - external interface info
+ * @port: the port with an external interface
+ * @id: the external interface ID, which is either 0, 1, or 2
+ * @supported_interfaces: a bitmask of supported PHY interface modes
+ *
+ * Represents a mapping: port -> { id, supported_interfaces }. To be embedded
+ * in &struct rtl8365mb_chip_info for every port with an external interface.
+ */
+struct rtl8365mb_extint {
+ int port;
+ int id;
+ unsigned int supported_interfaces;
+};
+
+/**
+ * struct rtl8365mb_chip_info - static chip-specific info
+ * @name: human-readable chip name
+ * @chip_id: chip identifier
+ * @chip_ver: chip silicon revision
+ * @extints: available external interfaces
+ * @jam_table: chip-specific initialization jam table
+ * @jam_size: size of the chip's jam table
+ *
+ * These data are specific to a given chip in the family of switches supported
+ * by this driver. When adding support for another chip in the family, a new
+ * chip info should be added to the rtl8365mb_chip_infos array.
+ */
+struct rtl8365mb_chip_info {
+ const char *name;
+ u32 chip_id;
+ u32 chip_ver;
+ const struct rtl8365mb_extint extints[RTL8365MB_MAX_NUM_EXTINTS];
+ const struct rtl8365mb_jam_tbl_entry *jam_table;
+ size_t jam_size;
+};
+
+/* Chip info for each supported switch in the family */
+#define PHY_INTF(_mode) (RTL8365MB_PHY_INTERFACE_MODE_ ## _mode)
+static const struct rtl8365mb_chip_info rtl8365mb_chip_infos[] = {
+ {
+ .name = "RTL8365MB-VC",
+ .chip_id = 0x6367,
+ .chip_ver = 0x0040,
+ .extints = {
+ { 6, 1, PHY_INTF(MII) | PHY_INTF(TMII) |
+ PHY_INTF(RMII) | PHY_INTF(RGMII) },
+ },
+ .jam_table = rtl8365mb_init_jam_8365mb_vc,
+ .jam_size = ARRAY_SIZE(rtl8365mb_init_jam_8365mb_vc),
+ },
+ {
+ .name = "RTL8367S",
+ .chip_id = 0x6367,
+ .chip_ver = 0x00A0,
+ .extints = {
+ { 6, 1, PHY_INTF(SGMII) | PHY_INTF(HSGMII) },
+ { 7, 2, PHY_INTF(MII) | PHY_INTF(TMII) |
+ PHY_INTF(RMII) | PHY_INTF(RGMII) },
+ },
+ .jam_table = rtl8365mb_init_jam_8365mb_vc,
+ .jam_size = ARRAY_SIZE(rtl8365mb_init_jam_8365mb_vc),
+ },
+ {
+ .name = "RTL8367RB-VB",
+ .chip_id = 0x6367,
+ .chip_ver = 0x0020,
+ .extints = {
+ { 6, 1, PHY_INTF(MII) | PHY_INTF(TMII) |
+ PHY_INTF(RMII) | PHY_INTF(RGMII) },
+ { 7, 2, PHY_INTF(MII) | PHY_INTF(TMII) |
+ PHY_INTF(RMII) | PHY_INTF(RGMII) },
+ },
+ .jam_table = rtl8365mb_init_jam_8365mb_vc,
+ .jam_size = ARRAY_SIZE(rtl8365mb_init_jam_8365mb_vc),
+ },
+};
+
enum rtl8365mb_stp_state {
RTL8365MB_STP_STATE_DISABLED = 0,
RTL8365MB_STP_STATE_BLOCKING = 1,
@@ -559,33 +623,23 @@ struct rtl8365mb_port {
};
/**
- * struct rtl8365mb - private chip-specific driver data
+ * struct rtl8365mb - driver private data
* @priv: pointer to parent realtek_priv data
* @irq: registered IRQ or zero
- * @chip_id: chip identifier
- * @chip_ver: chip silicon revision
- * @port_mask: mask of all ports
- * @learn_limit_max: maximum number of L2 addresses the chip can learn
+ * @chip_info: chip-specific info about the attached switch
* @cpu: CPU tagging and CPU port configuration for this chip
* @mib_lock: prevent concurrent reads of MIB counters
* @ports: per-port data
- * @jam_table: chip-specific initialization jam table
- * @jam_size: size of the chip's jam table
*
* Private data for this driver.
*/
struct rtl8365mb {
struct realtek_priv *priv;
int irq;
- u32 chip_id;
- u32 chip_ver;
- u32 port_mask;
- u32 learn_limit_max;
+ const struct rtl8365mb_chip_info *chip_info;
struct rtl8365mb_cpu cpu;
struct mutex mib_lock;
struct rtl8365mb_port ports[RTL8365MB_MAX_NUM_PORTS];
- const struct rtl8365mb_jam_tbl_entry *jam_table;
- size_t jam_size;
};
static int rtl8365mb_phy_poll_busy(struct realtek_priv *priv)
@@ -780,6 +834,26 @@ static int rtl8365mb_dsa_phy_write(struct dsa_switch *ds, int phy, int regnum,
return rtl8365mb_phy_write(ds->priv, phy, regnum, val);
}
+static const struct rtl8365mb_extint *
+rtl8365mb_get_port_extint(struct realtek_priv *priv, int port)
+{
+ struct rtl8365mb *mb = priv->chip_data;
+ int i;
+
+ for (i = 0; i < RTL8365MB_MAX_NUM_EXTINTS; i++) {
+ const struct rtl8365mb_extint *extint =
+ &mb->chip_info->extints[i];
+
+ if (!extint->supported_interfaces)
+ continue;
+
+ if (extint->port == port)
+ return extint;
+ }
+
+ return NULL;
+}
+
static enum dsa_tag_protocol
rtl8365mb_get_tag_protocol(struct dsa_switch *ds, int port,
enum dsa_tag_protocol mp)
@@ -800,20 +874,17 @@ rtl8365mb_get_tag_protocol(struct dsa_switch *ds, int port,
static int rtl8365mb_ext_config_rgmii(struct realtek_priv *priv, int port,
phy_interface_t interface)
{
+ const struct rtl8365mb_extint *extint =
+ rtl8365mb_get_port_extint(priv, port);
struct device_node *dn;
struct dsa_port *dp;
int tx_delay = 0;
int rx_delay = 0;
- int ext_int;
u32 val;
int ret;
- ext_int = rtl8365mb_extint_port_map[port];
-
- if (ext_int <= 0) {
- dev_err(priv->dev, "Port %d is not an external interface port\n", port);
- return -EINVAL;
- }
+ if (!extint)
+ return -ENODEV;
dp = dsa_to_port(priv->ds, port);
dn = dp->dn;
@@ -847,7 +918,7 @@ static int rtl8365mb_ext_config_rgmii(struct realtek_priv *priv, int port,
tx_delay = val / 2;
else
dev_warn(priv->dev,
- "EXT interface TX delay must be 0 or 2 ns\n");
+ "RGMII TX delay must be 0 or 2 ns\n");
}
if (!of_property_read_u32(dn, "rx-internal-delay-ps", &val)) {
@@ -857,11 +928,11 @@ static int rtl8365mb_ext_config_rgmii(struct realtek_priv *priv, int port,
rx_delay = val;
else
dev_warn(priv->dev,
- "EXT interface RX delay must be 0 to 2.1 ns\n");
+ "RGMII RX delay must be 0 to 2.1 ns\n");
}
ret = regmap_update_bits(
- priv->map, RTL8365MB_EXT_RGMXF_REG(ext_int),
+ priv->map, RTL8365MB_EXT_RGMXF_REG(extint->id),
RTL8365MB_EXT_RGMXF_TXDELAY_MASK |
RTL8365MB_EXT_RGMXF_RXDELAY_MASK,
FIELD_PREP(RTL8365MB_EXT_RGMXF_TXDELAY_MASK, tx_delay) |
@@ -870,11 +941,11 @@ static int rtl8365mb_ext_config_rgmii(struct realtek_priv *priv, int port,
return ret;
ret = regmap_update_bits(
- priv->map, RTL8365MB_DIGITAL_INTERFACE_SELECT_REG(ext_int),
- RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_MASK(ext_int),
+ priv->map, RTL8365MB_DIGITAL_INTERFACE_SELECT_REG(extint->id),
+ RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_MASK(extint->id),
RTL8365MB_EXT_PORT_MODE_RGMII
<< RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_OFFSET(
- ext_int));
+ extint->id));
if (ret)
return ret;
@@ -885,21 +956,18 @@ static int rtl8365mb_ext_config_forcemode(struct realtek_priv *priv, int port,
bool link, int speed, int duplex,
bool tx_pause, bool rx_pause)
{
+ const struct rtl8365mb_extint *extint =
+ rtl8365mb_get_port_extint(priv, port);
u32 r_tx_pause;
u32 r_rx_pause;
u32 r_duplex;
u32 r_speed;
u32 r_link;
- int ext_int;
int val;
int ret;
- ext_int = rtl8365mb_extint_port_map[port];
-
- if (ext_int <= 0) {
- dev_err(priv->dev, "Port %d is not an external interface port\n", port);
- return -EINVAL;
- }
+ if (!extint)
+ return -ENODEV;
if (link) {
/* Force the link up with the desired configuration */
@@ -947,7 +1015,7 @@ static int rtl8365mb_ext_config_forcemode(struct realtek_priv *priv, int port,
r_duplex) |
FIELD_PREP(RTL8365MB_DIGITAL_INTERFACE_FORCE_SPEED_MASK, r_speed);
ret = regmap_write(priv->map,
- RTL8365MB_DIGITAL_INTERFACE_FORCE_REG(ext_int),
+ RTL8365MB_DIGITAL_INTERFACE_FORCE_REG(extint->id),
val);
if (ret)
return ret;
@@ -958,7 +1026,13 @@ static int rtl8365mb_ext_config_forcemode(struct realtek_priv *priv, int port,
static void rtl8365mb_phylink_get_caps(struct dsa_switch *ds, int port,
struct phylink_config *config)
{
- if (dsa_is_user_port(ds, port)) {
+ const struct rtl8365mb_extint *extint =
+ rtl8365mb_get_port_extint(ds->priv, port);
+
+ config->mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE |
+ MAC_10 | MAC_100 | MAC_1000FD;
+
+ if (!extint) {
__set_bit(PHY_INTERFACE_MODE_INTERNAL,
config->supported_interfaces);
@@ -967,12 +1041,16 @@ static void rtl8365mb_phylink_get_caps(struct dsa_switch *ds, int port,
*/
__set_bit(PHY_INTERFACE_MODE_GMII,
config->supported_interfaces);
- } else if (dsa_is_cpu_port(ds, port)) {
- phy_interface_set_rgmii(config->supported_interfaces);
+ return;
}
- config->mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE |
- MAC_10 | MAC_100 | MAC_1000FD;
+ /* Populate according to the modes supported by _this driver_,
+ * not necessarily the modes supported by the hardware, some of
+ * which remain unimplemented.
+ */
+
+ if (extint->supported_interfaces & RTL8365MB_PHY_INTERFACE_MODE_RGMII)
+ phy_interface_set_rgmii(config->supported_interfaces);
}
static void rtl8365mb_phylink_mac_config(struct dsa_switch *ds, int port,
@@ -1091,15 +1169,13 @@ static void rtl8365mb_port_stp_state_set(struct dsa_switch *ds, int port,
static int rtl8365mb_port_set_learning(struct realtek_priv *priv, int port,
bool enable)
{
- struct rtl8365mb *mb = priv->chip_data;
-
/* Enable/disable learning by limiting the number of L2 addresses the
* port can learn. Realtek documentation states that a limit of zero
* disables learning. When enabling learning, set it to the chip's
* maximum.
*/
return regmap_write(priv->map, RTL8365MB_LUT_PORT_LEARN_LIMIT_REG(port),
- enable ? mb->learn_limit_max : 0);
+ enable ? RTL8365MB_LEARN_LIMIT_MAX : 0);
}
static int rtl8365mb_port_set_isolation(struct realtek_priv *priv, int port,
@@ -1489,13 +1565,10 @@ static irqreturn_t rtl8365mb_irq(int irq, void *data)
{
struct realtek_priv *priv = data;
unsigned long line_changes = 0;
- struct rtl8365mb *mb;
u32 stat;
int line;
int ret;
- mb = priv->chip_data;
-
ret = rtl8365mb_get_and_clear_status_reg(priv, RTL8365MB_INTR_STATUS_REG,
&stat);
if (ret)
@@ -1520,7 +1593,7 @@ static irqreturn_t rtl8365mb_irq(int irq, void *data)
linkdown_ind = FIELD_GET(RTL8365MB_PORT_LINKDOWN_IND_MASK, val);
- line_changes = (linkup_ind | linkdown_ind) & mb->port_mask;
+ line_changes = linkup_ind | linkdown_ind;
}
if (!line_changes)
@@ -1792,14 +1865,17 @@ static int rtl8365mb_change_tag_protocol(struct dsa_switch *ds,
static int rtl8365mb_switch_init(struct realtek_priv *priv)
{
struct rtl8365mb *mb = priv->chip_data;
+ const struct rtl8365mb_chip_info *ci;
int ret;
int i;
+ ci = mb->chip_info;
+
/* Do any chip-specific init jam before getting to the common stuff */
- if (mb->jam_table) {
- for (i = 0; i < mb->jam_size; i++) {
- ret = regmap_write(priv->map, mb->jam_table[i].reg,
- mb->jam_table[i].val);
+ if (ci->jam_table) {
+ for (i = 0; i < ci->jam_size; i++) {
+ ret = regmap_write(priv->map, ci->jam_table[i].reg,
+ ci->jam_table[i].val);
if (ret)
return ret;
}
@@ -1972,6 +2048,7 @@ static int rtl8365mb_detect(struct realtek_priv *priv)
u32 chip_id;
u32 chip_ver;
int ret;
+ int i;
ret = rtl8365mb_get_chip_id_and_ver(priv->map, &chip_id, &chip_ver);
if (ret) {
@@ -1980,54 +2057,32 @@ static int rtl8365mb_detect(struct realtek_priv *priv)
return ret;
}
- switch (chip_id) {
- case RTL8365MB_CHIP_ID_8365MB_VC:
- switch (chip_ver) {
- case RTL8365MB_CHIP_VER_8365MB_VC:
- dev_info(priv->dev,
- "found an RTL8365MB-VC switch (ver=0x%04x)\n",
- chip_ver);
- break;
- case RTL8365MB_CHIP_VER_8367RB:
- dev_info(priv->dev,
- "found an RTL8367RB-VB switch (ver=0x%04x)\n",
- chip_ver);
- break;
- case RTL8365MB_CHIP_VER_8367S:
- dev_info(priv->dev,
- "found an RTL8367S switch (ver=0x%04x)\n",
- chip_ver);
+ for (i = 0; i < ARRAY_SIZE(rtl8365mb_chip_infos); i++) {
+ const struct rtl8365mb_chip_info *ci = &rtl8365mb_chip_infos[i];
+
+ if (ci->chip_id == chip_id && ci->chip_ver == chip_ver) {
+ mb->chip_info = ci;
break;
- default:
- dev_err(priv->dev, "unrecognized switch version (ver=0x%04x)",
- chip_ver);
- return -ENODEV;
}
+ }
- priv->num_ports = RTL8365MB_MAX_NUM_PORTS;
-
- mb->priv = priv;
- mb->chip_id = chip_id;
- mb->chip_ver = chip_ver;
- mb->port_mask = GENMASK(priv->num_ports - 1, 0);
- mb->learn_limit_max = RTL8365MB_LEARN_LIMIT_MAX;
- mb->jam_table = rtl8365mb_init_jam_8365mb_vc;
- mb->jam_size = ARRAY_SIZE(rtl8365mb_init_jam_8365mb_vc);
-
- mb->cpu.trap_port = RTL8365MB_MAX_NUM_PORTS;
- mb->cpu.insert = RTL8365MB_CPU_INSERT_TO_ALL;
- mb->cpu.position = RTL8365MB_CPU_POS_AFTER_SA;
- mb->cpu.rx_length = RTL8365MB_CPU_RXLEN_64BYTES;
- mb->cpu.format = RTL8365MB_CPU_FORMAT_8BYTES;
-
- break;
- default:
+ if (!mb->chip_info) {
dev_err(priv->dev,
- "found an unknown Realtek switch (id=0x%04x, ver=0x%04x)\n",
- chip_id, chip_ver);
+ "unrecognized switch (id=0x%04x, ver=0x%04x)", chip_id,
+ chip_ver);
return -ENODEV;
}
+ dev_info(priv->dev, "found an %s switch\n", mb->chip_info->name);
+
+ priv->num_ports = RTL8365MB_MAX_NUM_PORTS;
+ mb->priv = priv;
+ mb->cpu.trap_port = RTL8365MB_MAX_NUM_PORTS;
+ mb->cpu.insert = RTL8365MB_CPU_INSERT_TO_ALL;
+ mb->cpu.position = RTL8365MB_CPU_POS_AFTER_SA;
+ mb->cpu.rx_length = RTL8365MB_CPU_RXLEN_64BYTES;
+ mb->cpu.format = RTL8365MB_CPU_FORMAT_8BYTES;
+
return 0;
}
diff --git a/drivers/net/dsa/rzn1_a5psw.c b/drivers/net/dsa/rzn1_a5psw.c
new file mode 100644
index 000000000000..3e910da98ae2
--- /dev/null
+++ b/drivers/net/dsa/rzn1_a5psw.c
@@ -0,0 +1,1062 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 Schneider-Electric
+ *
+ * Clément Léger <clement.leger@bootlin.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/etherdevice.h>
+#include <linux/if_bridge.h>
+#include <linux/if_ether.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <net/dsa.h>
+
+#include "rzn1_a5psw.h"
+
+struct a5psw_stats {
+ u16 offset;
+ const char name[ETH_GSTRING_LEN];
+};
+
+#define STAT_DESC(_offset) { \
+ .offset = A5PSW_##_offset, \
+ .name = __stringify(_offset), \
+}
+
+static const struct a5psw_stats a5psw_stats[] = {
+ STAT_DESC(aFramesTransmittedOK),
+ STAT_DESC(aFramesReceivedOK),
+ STAT_DESC(aFrameCheckSequenceErrors),
+ STAT_DESC(aAlignmentErrors),
+ STAT_DESC(aOctetsTransmittedOK),
+ STAT_DESC(aOctetsReceivedOK),
+ STAT_DESC(aTxPAUSEMACCtrlFrames),
+ STAT_DESC(aRxPAUSEMACCtrlFrames),
+ STAT_DESC(ifInErrors),
+ STAT_DESC(ifOutErrors),
+ STAT_DESC(ifInUcastPkts),
+ STAT_DESC(ifInMulticastPkts),
+ STAT_DESC(ifInBroadcastPkts),
+ STAT_DESC(ifOutDiscards),
+ STAT_DESC(ifOutUcastPkts),
+ STAT_DESC(ifOutMulticastPkts),
+ STAT_DESC(ifOutBroadcastPkts),
+ STAT_DESC(etherStatsDropEvents),
+ STAT_DESC(etherStatsOctets),
+ STAT_DESC(etherStatsPkts),
+ STAT_DESC(etherStatsUndersizePkts),
+ STAT_DESC(etherStatsOversizePkts),
+ STAT_DESC(etherStatsPkts64Octets),
+ STAT_DESC(etherStatsPkts65to127Octets),
+ STAT_DESC(etherStatsPkts128to255Octets),
+ STAT_DESC(etherStatsPkts256to511Octets),
+ STAT_DESC(etherStatsPkts1024to1518Octets),
+ STAT_DESC(etherStatsPkts1519toXOctets),
+ STAT_DESC(etherStatsJabbers),
+ STAT_DESC(etherStatsFragments),
+ STAT_DESC(VLANReceived),
+ STAT_DESC(VLANTransmitted),
+ STAT_DESC(aDeferred),
+ STAT_DESC(aMultipleCollisions),
+ STAT_DESC(aSingleCollisions),
+ STAT_DESC(aLateCollisions),
+ STAT_DESC(aExcessiveCollisions),
+ STAT_DESC(aCarrierSenseErrors),
+};
+
+static void a5psw_reg_writel(struct a5psw *a5psw, int offset, u32 value)
+{
+ writel(value, a5psw->base + offset);
+}
+
+static u32 a5psw_reg_readl(struct a5psw *a5psw, int offset)
+{
+ return readl(a5psw->base + offset);
+}
+
+static void a5psw_reg_rmw(struct a5psw *a5psw, int offset, u32 mask, u32 val)
+{
+ u32 reg;
+
+ spin_lock(&a5psw->reg_lock);
+
+ reg = a5psw_reg_readl(a5psw, offset);
+ reg &= ~mask;
+ reg |= val;
+ a5psw_reg_writel(a5psw, offset, reg);
+
+ spin_unlock(&a5psw->reg_lock);
+}
+
+static enum dsa_tag_protocol a5psw_get_tag_protocol(struct dsa_switch *ds,
+ int port,
+ enum dsa_tag_protocol mp)
+{
+ return DSA_TAG_PROTO_RZN1_A5PSW;
+}
+
+static void a5psw_port_pattern_set(struct a5psw *a5psw, int port, int pattern,
+ bool enable)
+{
+ u32 rx_match = 0;
+
+ if (enable)
+ rx_match |= A5PSW_RXMATCH_CONFIG_PATTERN(pattern);
+
+ a5psw_reg_rmw(a5psw, A5PSW_RXMATCH_CONFIG(port),
+ A5PSW_RXMATCH_CONFIG_PATTERN(pattern), rx_match);
+}
+
+static void a5psw_port_mgmtfwd_set(struct a5psw *a5psw, int port, bool enable)
+{
+ /* Enable "management forward" pattern matching, this will forward
+ * packets from this port only towards the management port and thus
+ * isolate the port.
+ */
+ a5psw_port_pattern_set(a5psw, port, A5PSW_PATTERN_MGMTFWD, enable);
+}
+
+static void a5psw_port_enable_set(struct a5psw *a5psw, int port, bool enable)
+{
+ u32 port_ena = 0;
+
+ if (enable)
+ port_ena |= A5PSW_PORT_ENA_TX_RX(port);
+
+ a5psw_reg_rmw(a5psw, A5PSW_PORT_ENA, A5PSW_PORT_ENA_TX_RX(port),
+ port_ena);
+}
+
+static int a5psw_lk_execute_ctrl(struct a5psw *a5psw, u32 *ctrl)
+{
+ int ret;
+
+ a5psw_reg_writel(a5psw, A5PSW_LK_ADDR_CTRL, *ctrl);
+
+ ret = readl_poll_timeout(a5psw->base + A5PSW_LK_ADDR_CTRL, *ctrl,
+ !(*ctrl & A5PSW_LK_ADDR_CTRL_BUSY),
+ A5PSW_LK_BUSY_USEC_POLL, A5PSW_CTRL_TIMEOUT);
+ if (ret)
+ dev_err(a5psw->dev, "LK_CTRL timeout waiting for BUSY bit\n");
+
+ return ret;
+}
+
+static void a5psw_port_fdb_flush(struct a5psw *a5psw, int port)
+{
+ u32 ctrl = A5PSW_LK_ADDR_CTRL_DELETE_PORT | BIT(port);
+
+ mutex_lock(&a5psw->lk_lock);
+ a5psw_lk_execute_ctrl(a5psw, &ctrl);
+ mutex_unlock(&a5psw->lk_lock);
+}
+
+static void a5psw_port_authorize_set(struct a5psw *a5psw, int port,
+ bool authorize)
+{
+ u32 reg = a5psw_reg_readl(a5psw, A5PSW_AUTH_PORT(port));
+
+ if (authorize)
+ reg |= A5PSW_AUTH_PORT_AUTHORIZED;
+ else
+ reg &= ~A5PSW_AUTH_PORT_AUTHORIZED;
+
+ a5psw_reg_writel(a5psw, A5PSW_AUTH_PORT(port), reg);
+}
+
+static void a5psw_port_disable(struct dsa_switch *ds, int port)
+{
+ struct a5psw *a5psw = ds->priv;
+
+ a5psw_port_authorize_set(a5psw, port, false);
+ a5psw_port_enable_set(a5psw, port, false);
+}
+
+static int a5psw_port_enable(struct dsa_switch *ds, int port,
+ struct phy_device *phy)
+{
+ struct a5psw *a5psw = ds->priv;
+
+ a5psw_port_authorize_set(a5psw, port, true);
+ a5psw_port_enable_set(a5psw, port, true);
+
+ return 0;
+}
+
+static int a5psw_port_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
+{
+ struct a5psw *a5psw = ds->priv;
+
+ new_mtu += ETH_HLEN + A5PSW_EXTRA_MTU_LEN + ETH_FCS_LEN;
+ a5psw_reg_writel(a5psw, A5PSW_FRM_LENGTH(port), new_mtu);
+
+ return 0;
+}
+
+static int a5psw_port_max_mtu(struct dsa_switch *ds, int port)
+{
+ return A5PSW_MAX_MTU;
+}
+
+static void a5psw_phylink_get_caps(struct dsa_switch *ds, int port,
+ struct phylink_config *config)
+{
+ unsigned long *intf = config->supported_interfaces;
+
+ config->mac_capabilities = MAC_1000FD;
+
+ if (dsa_is_cpu_port(ds, port)) {
+ /* GMII is used internally and GMAC2 is connected to the switch
+ * using 1000Mbps Full-Duplex mode only (cf ethernet manual)
+ */
+ __set_bit(PHY_INTERFACE_MODE_GMII, intf);
+ } else {
+ config->mac_capabilities |= MAC_100 | MAC_10;
+ phy_interface_set_rgmii(intf);
+ __set_bit(PHY_INTERFACE_MODE_RMII, intf);
+ __set_bit(PHY_INTERFACE_MODE_MII, intf);
+ }
+}
+
+static struct phylink_pcs *
+a5psw_phylink_mac_select_pcs(struct dsa_switch *ds, int port,
+ phy_interface_t interface)
+{
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct a5psw *a5psw = ds->priv;
+
+ if (!dsa_port_is_cpu(dp) && a5psw->pcs[port])
+ return a5psw->pcs[port];
+
+ return NULL;
+}
+
+static void a5psw_phylink_mac_link_down(struct dsa_switch *ds, int port,
+ unsigned int mode,
+ phy_interface_t interface)
+{
+ struct a5psw *a5psw = ds->priv;
+ u32 cmd_cfg;
+
+ cmd_cfg = a5psw_reg_readl(a5psw, A5PSW_CMD_CFG(port));
+ cmd_cfg &= ~(A5PSW_CMD_CFG_RX_ENA | A5PSW_CMD_CFG_TX_ENA);
+ a5psw_reg_writel(a5psw, A5PSW_CMD_CFG(port), cmd_cfg);
+}
+
+static void a5psw_phylink_mac_link_up(struct dsa_switch *ds, int port,
+ unsigned int mode,
+ phy_interface_t interface,
+ struct phy_device *phydev, int speed,
+ int duplex, bool tx_pause, bool rx_pause)
+{
+ u32 cmd_cfg = A5PSW_CMD_CFG_RX_ENA | A5PSW_CMD_CFG_TX_ENA |
+ A5PSW_CMD_CFG_TX_CRC_APPEND;
+ struct a5psw *a5psw = ds->priv;
+
+ if (speed == SPEED_1000)
+ cmd_cfg |= A5PSW_CMD_CFG_ETH_SPEED;
+
+ if (duplex == DUPLEX_HALF)
+ cmd_cfg |= A5PSW_CMD_CFG_HD_ENA;
+
+ cmd_cfg |= A5PSW_CMD_CFG_CNTL_FRM_ENA;
+
+ if (!rx_pause)
+ cmd_cfg &= ~A5PSW_CMD_CFG_PAUSE_IGNORE;
+
+ a5psw_reg_writel(a5psw, A5PSW_CMD_CFG(port), cmd_cfg);
+}
+
+static int a5psw_set_ageing_time(struct dsa_switch *ds, unsigned int msecs)
+{
+ struct a5psw *a5psw = ds->priv;
+ unsigned long rate;
+ u64 max, tmp;
+ u32 agetime;
+
+ rate = clk_get_rate(a5psw->clk);
+ max = div64_ul(((u64)A5PSW_LK_AGETIME_MASK * A5PSW_TABLE_ENTRIES * 1024),
+ rate) * 1000;
+ if (msecs > max)
+ return -EINVAL;
+
+ tmp = div_u64(rate, MSEC_PER_SEC);
+ agetime = div_u64(msecs * tmp, 1024 * A5PSW_TABLE_ENTRIES);
+
+ a5psw_reg_writel(a5psw, A5PSW_LK_AGETIME, agetime);
+
+ return 0;
+}
+
+static void a5psw_flooding_set_resolution(struct a5psw *a5psw, int port,
+ bool set)
+{
+ u8 offsets[] = {A5PSW_UCAST_DEF_MASK, A5PSW_BCAST_DEF_MASK,
+ A5PSW_MCAST_DEF_MASK};
+ int i;
+
+ if (set)
+ a5psw->bridged_ports |= BIT(port);
+ else
+ a5psw->bridged_ports &= ~BIT(port);
+
+ for (i = 0; i < ARRAY_SIZE(offsets); i++)
+ a5psw_reg_writel(a5psw, offsets[i], a5psw->bridged_ports);
+}
+
+static int a5psw_port_bridge_join(struct dsa_switch *ds, int port,
+ struct dsa_bridge bridge,
+ bool *tx_fwd_offload,
+ struct netlink_ext_ack *extack)
+{
+ struct a5psw *a5psw = ds->priv;
+
+ /* We only support 1 bridge device */
+ if (a5psw->br_dev && bridge.dev != a5psw->br_dev) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Forwarding offload supported for a single bridge");
+ return -EOPNOTSUPP;
+ }
+
+ a5psw->br_dev = bridge.dev;
+ a5psw_flooding_set_resolution(a5psw, port, true);
+ a5psw_port_mgmtfwd_set(a5psw, port, false);
+
+ return 0;
+}
+
+static void a5psw_port_bridge_leave(struct dsa_switch *ds, int port,
+ struct dsa_bridge bridge)
+{
+ struct a5psw *a5psw = ds->priv;
+
+ a5psw_flooding_set_resolution(a5psw, port, false);
+ a5psw_port_mgmtfwd_set(a5psw, port, true);
+
+ /* No more ports bridged */
+ if (a5psw->bridged_ports == BIT(A5PSW_CPU_PORT))
+ a5psw->br_dev = NULL;
+}
+
+static void a5psw_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
+{
+ u32 mask = A5PSW_INPUT_LEARN_DIS(port) | A5PSW_INPUT_LEARN_BLOCK(port);
+ struct a5psw *a5psw = ds->priv;
+ u32 reg = 0;
+
+ switch (state) {
+ case BR_STATE_DISABLED:
+ case BR_STATE_BLOCKING:
+ reg |= A5PSW_INPUT_LEARN_DIS(port);
+ reg |= A5PSW_INPUT_LEARN_BLOCK(port);
+ break;
+ case BR_STATE_LISTENING:
+ reg |= A5PSW_INPUT_LEARN_DIS(port);
+ break;
+ case BR_STATE_LEARNING:
+ reg |= A5PSW_INPUT_LEARN_BLOCK(port);
+ break;
+ case BR_STATE_FORWARDING:
+ default:
+ break;
+ }
+
+ a5psw_reg_rmw(a5psw, A5PSW_INPUT_LEARN, mask, reg);
+}
+
+static void a5psw_port_fast_age(struct dsa_switch *ds, int port)
+{
+ struct a5psw *a5psw = ds->priv;
+
+ a5psw_port_fdb_flush(a5psw, port);
+}
+
+static int a5psw_lk_execute_lookup(struct a5psw *a5psw, union lk_data *lk_data,
+ u16 *entry)
+{
+ u32 ctrl;
+ int ret;
+
+ a5psw_reg_writel(a5psw, A5PSW_LK_DATA_LO, lk_data->lo);
+ a5psw_reg_writel(a5psw, A5PSW_LK_DATA_HI, lk_data->hi);
+
+ ctrl = A5PSW_LK_ADDR_CTRL_LOOKUP;
+ ret = a5psw_lk_execute_ctrl(a5psw, &ctrl);
+ if (ret)
+ return ret;
+
+ *entry = ctrl & A5PSW_LK_ADDR_CTRL_ADDRESS;
+
+ return 0;
+}
+
+static int a5psw_port_fdb_add(struct dsa_switch *ds, int port,
+ const unsigned char *addr, u16 vid,
+ struct dsa_db db)
+{
+ struct a5psw *a5psw = ds->priv;
+ union lk_data lk_data = {0};
+ bool inc_learncount = false;
+ int ret = 0;
+ u16 entry;
+ u32 reg;
+
+ ether_addr_copy(lk_data.entry.mac, addr);
+ lk_data.entry.port_mask = BIT(port);
+
+ mutex_lock(&a5psw->lk_lock);
+
+ /* Set the value to be written in the lookup table */
+ ret = a5psw_lk_execute_lookup(a5psw, &lk_data, &entry);
+ if (ret)
+ goto lk_unlock;
+
+ lk_data.hi = a5psw_reg_readl(a5psw, A5PSW_LK_DATA_HI);
+ if (!lk_data.entry.valid) {
+ inc_learncount = true;
+ /* port_mask set to 0x1f when entry is not valid, clear it */
+ lk_data.entry.port_mask = 0;
+ lk_data.entry.prio = 0;
+ }
+
+ lk_data.entry.port_mask |= BIT(port);
+ lk_data.entry.is_static = 1;
+ lk_data.entry.valid = 1;
+
+ a5psw_reg_writel(a5psw, A5PSW_LK_DATA_HI, lk_data.hi);
+
+ reg = A5PSW_LK_ADDR_CTRL_WRITE | entry;
+ ret = a5psw_lk_execute_ctrl(a5psw, &reg);
+ if (ret)
+ goto lk_unlock;
+
+ if (inc_learncount) {
+ reg = A5PSW_LK_LEARNCOUNT_MODE_INC;
+ a5psw_reg_writel(a5psw, A5PSW_LK_LEARNCOUNT, reg);
+ }
+
+lk_unlock:
+ mutex_unlock(&a5psw->lk_lock);
+
+ return ret;
+}
+
+static int a5psw_port_fdb_del(struct dsa_switch *ds, int port,
+ const unsigned char *addr, u16 vid,
+ struct dsa_db db)
+{
+ struct a5psw *a5psw = ds->priv;
+ union lk_data lk_data = {0};
+ bool clear = false;
+ u16 entry;
+ u32 reg;
+ int ret;
+
+ ether_addr_copy(lk_data.entry.mac, addr);
+
+ mutex_lock(&a5psw->lk_lock);
+
+ ret = a5psw_lk_execute_lookup(a5psw, &lk_data, &entry);
+ if (ret)
+ goto lk_unlock;
+
+ lk_data.hi = a5psw_reg_readl(a5psw, A5PSW_LK_DATA_HI);
+
+ /* Our hardware does not associate any VID to the FDB entries so this
+ * means that if two entries were added for the same mac but for
+ * different VID, then, on the deletion of the first one, we would also
+ * delete the second one. Since there is unfortunately nothing we can do
+ * about that, do not return an error...
+ */
+ if (!lk_data.entry.valid)
+ goto lk_unlock;
+
+ lk_data.entry.port_mask &= ~BIT(port);
+ /* If there is no more port in the mask, clear the entry */
+ if (lk_data.entry.port_mask == 0)
+ clear = true;
+
+ a5psw_reg_writel(a5psw, A5PSW_LK_DATA_HI, lk_data.hi);
+
+ reg = entry;
+ if (clear)
+ reg |= A5PSW_LK_ADDR_CTRL_CLEAR;
+ else
+ reg |= A5PSW_LK_ADDR_CTRL_WRITE;
+
+ ret = a5psw_lk_execute_ctrl(a5psw, &reg);
+ if (ret)
+ goto lk_unlock;
+
+ /* Decrement LEARNCOUNT */
+ if (clear) {
+ reg = A5PSW_LK_LEARNCOUNT_MODE_DEC;
+ a5psw_reg_writel(a5psw, A5PSW_LK_LEARNCOUNT, reg);
+ }
+
+lk_unlock:
+ mutex_unlock(&a5psw->lk_lock);
+
+ return ret;
+}
+
+static int a5psw_port_fdb_dump(struct dsa_switch *ds, int port,
+ dsa_fdb_dump_cb_t *cb, void *data)
+{
+ struct a5psw *a5psw = ds->priv;
+ union lk_data lk_data;
+ int i = 0, ret = 0;
+ u32 reg;
+
+ mutex_lock(&a5psw->lk_lock);
+
+ for (i = 0; i < A5PSW_TABLE_ENTRIES; i++) {
+ reg = A5PSW_LK_ADDR_CTRL_READ | A5PSW_LK_ADDR_CTRL_WAIT | i;
+
+ ret = a5psw_lk_execute_ctrl(a5psw, &reg);
+ if (ret)
+ goto out_unlock;
+
+ lk_data.hi = a5psw_reg_readl(a5psw, A5PSW_LK_DATA_HI);
+ /* If entry is not valid or does not contain the port, skip */
+ if (!lk_data.entry.valid ||
+ !(lk_data.entry.port_mask & BIT(port)))
+ continue;
+
+ lk_data.lo = a5psw_reg_readl(a5psw, A5PSW_LK_DATA_LO);
+
+ ret = cb(lk_data.entry.mac, 0, lk_data.entry.is_static, data);
+ if (ret)
+ goto out_unlock;
+ }
+
+out_unlock:
+ mutex_unlock(&a5psw->lk_lock);
+
+ return ret;
+}
+
+static u64 a5psw_read_stat(struct a5psw *a5psw, u32 offset, int port)
+{
+ u32 reg_lo, reg_hi;
+
+ reg_lo = a5psw_reg_readl(a5psw, offset + A5PSW_PORT_OFFSET(port));
+ /* A5PSW_STATS_HIWORD is latched on stat read */
+ reg_hi = a5psw_reg_readl(a5psw, A5PSW_STATS_HIWORD);
+
+ return ((u64)reg_hi << 32) | reg_lo;
+}
+
+static void a5psw_get_strings(struct dsa_switch *ds, int port, u32 stringset,
+ uint8_t *data)
+{
+ unsigned int u;
+
+ if (stringset != ETH_SS_STATS)
+ return;
+
+ for (u = 0; u < ARRAY_SIZE(a5psw_stats); u++) {
+ memcpy(data + u * ETH_GSTRING_LEN, a5psw_stats[u].name,
+ ETH_GSTRING_LEN);
+ }
+}
+
+static void a5psw_get_ethtool_stats(struct dsa_switch *ds, int port,
+ uint64_t *data)
+{
+ struct a5psw *a5psw = ds->priv;
+ unsigned int u;
+
+ for (u = 0; u < ARRAY_SIZE(a5psw_stats); u++)
+ data[u] = a5psw_read_stat(a5psw, a5psw_stats[u].offset, port);
+}
+
+static int a5psw_get_sset_count(struct dsa_switch *ds, int port, int sset)
+{
+ if (sset != ETH_SS_STATS)
+ return 0;
+
+ return ARRAY_SIZE(a5psw_stats);
+}
+
+static void a5psw_get_eth_mac_stats(struct dsa_switch *ds, int port,
+ struct ethtool_eth_mac_stats *mac_stats)
+{
+ struct a5psw *a5psw = ds->priv;
+
+#define RD(name) a5psw_read_stat(a5psw, A5PSW_##name, port)
+ mac_stats->FramesTransmittedOK = RD(aFramesTransmittedOK);
+ mac_stats->SingleCollisionFrames = RD(aSingleCollisions);
+ mac_stats->MultipleCollisionFrames = RD(aMultipleCollisions);
+ mac_stats->FramesReceivedOK = RD(aFramesReceivedOK);
+ mac_stats->FrameCheckSequenceErrors = RD(aFrameCheckSequenceErrors);
+ mac_stats->AlignmentErrors = RD(aAlignmentErrors);
+ mac_stats->OctetsTransmittedOK = RD(aOctetsTransmittedOK);
+ mac_stats->FramesWithDeferredXmissions = RD(aDeferred);
+ mac_stats->LateCollisions = RD(aLateCollisions);
+ mac_stats->FramesAbortedDueToXSColls = RD(aExcessiveCollisions);
+ mac_stats->FramesLostDueToIntMACXmitError = RD(ifOutErrors);
+ mac_stats->CarrierSenseErrors = RD(aCarrierSenseErrors);
+ mac_stats->OctetsReceivedOK = RD(aOctetsReceivedOK);
+ mac_stats->FramesLostDueToIntMACRcvError = RD(ifInErrors);
+ mac_stats->MulticastFramesXmittedOK = RD(ifOutMulticastPkts);
+ mac_stats->BroadcastFramesXmittedOK = RD(ifOutBroadcastPkts);
+ mac_stats->FramesWithExcessiveDeferral = RD(aDeferred);
+ mac_stats->MulticastFramesReceivedOK = RD(ifInMulticastPkts);
+ mac_stats->BroadcastFramesReceivedOK = RD(ifInBroadcastPkts);
+#undef RD
+}
+
+static const struct ethtool_rmon_hist_range a5psw_rmon_ranges[] = {
+ { 0, 64 },
+ { 65, 127 },
+ { 128, 255 },
+ { 256, 511 },
+ { 512, 1023 },
+ { 1024, 1518 },
+ { 1519, A5PSW_MAX_MTU },
+ {}
+};
+
+static void a5psw_get_rmon_stats(struct dsa_switch *ds, int port,
+ struct ethtool_rmon_stats *rmon_stats,
+ const struct ethtool_rmon_hist_range **ranges)
+{
+ struct a5psw *a5psw = ds->priv;
+
+#define RD(name) a5psw_read_stat(a5psw, A5PSW_##name, port)
+ rmon_stats->undersize_pkts = RD(etherStatsUndersizePkts);
+ rmon_stats->oversize_pkts = RD(etherStatsOversizePkts);
+ rmon_stats->fragments = RD(etherStatsFragments);
+ rmon_stats->jabbers = RD(etherStatsJabbers);
+ rmon_stats->hist[0] = RD(etherStatsPkts64Octets);
+ rmon_stats->hist[1] = RD(etherStatsPkts65to127Octets);
+ rmon_stats->hist[2] = RD(etherStatsPkts128to255Octets);
+ rmon_stats->hist[3] = RD(etherStatsPkts256to511Octets);
+ rmon_stats->hist[4] = RD(etherStatsPkts512to1023Octets);
+ rmon_stats->hist[5] = RD(etherStatsPkts1024to1518Octets);
+ rmon_stats->hist[6] = RD(etherStatsPkts1519toXOctets);
+#undef RD
+
+ *ranges = a5psw_rmon_ranges;
+}
+
+static void a5psw_get_eth_ctrl_stats(struct dsa_switch *ds, int port,
+ struct ethtool_eth_ctrl_stats *ctrl_stats)
+{
+ struct a5psw *a5psw = ds->priv;
+ u64 stat;
+
+ stat = a5psw_read_stat(a5psw, A5PSW_aTxPAUSEMACCtrlFrames, port);
+ ctrl_stats->MACControlFramesTransmitted = stat;
+ stat = a5psw_read_stat(a5psw, A5PSW_aRxPAUSEMACCtrlFrames, port);
+ ctrl_stats->MACControlFramesReceived = stat;
+}
+
+static int a5psw_setup(struct dsa_switch *ds)
+{
+ struct a5psw *a5psw = ds->priv;
+ int port, vlan, ret;
+ struct dsa_port *dp;
+ u32 reg;
+
+ /* Validate that there is only 1 CPU port with index A5PSW_CPU_PORT */
+ dsa_switch_for_each_cpu_port(dp, ds) {
+ if (dp->index != A5PSW_CPU_PORT) {
+ dev_err(a5psw->dev, "Invalid CPU port\n");
+ return -EINVAL;
+ }
+ }
+
+ /* Configure management port */
+ reg = A5PSW_CPU_PORT | A5PSW_MGMT_CFG_DISCARD;
+ a5psw_reg_writel(a5psw, A5PSW_MGMT_CFG, reg);
+
+ /* Set pattern 0 to forward all frame to mgmt port */
+ a5psw_reg_writel(a5psw, A5PSW_PATTERN_CTRL(A5PSW_PATTERN_MGMTFWD),
+ A5PSW_PATTERN_CTRL_MGMTFWD);
+
+ /* Enable port tagging */
+ reg = FIELD_PREP(A5PSW_MGMT_TAG_CFG_TAGFIELD, ETH_P_DSA_A5PSW);
+ reg |= A5PSW_MGMT_TAG_CFG_ENABLE | A5PSW_MGMT_TAG_CFG_ALL_FRAMES;
+ a5psw_reg_writel(a5psw, A5PSW_MGMT_TAG_CFG, reg);
+
+ /* Enable normal switch operation */
+ reg = A5PSW_LK_ADDR_CTRL_BLOCKING | A5PSW_LK_ADDR_CTRL_LEARNING |
+ A5PSW_LK_ADDR_CTRL_AGEING | A5PSW_LK_ADDR_CTRL_ALLOW_MIGR |
+ A5PSW_LK_ADDR_CTRL_CLEAR_TABLE;
+ a5psw_reg_writel(a5psw, A5PSW_LK_CTRL, reg);
+
+ ret = readl_poll_timeout(a5psw->base + A5PSW_LK_CTRL, reg,
+ !(reg & A5PSW_LK_ADDR_CTRL_CLEAR_TABLE),
+ A5PSW_LK_BUSY_USEC_POLL, A5PSW_CTRL_TIMEOUT);
+ if (ret) {
+ dev_err(a5psw->dev, "Failed to clear lookup table\n");
+ return ret;
+ }
+
+ /* Reset learn count to 0 */
+ reg = A5PSW_LK_LEARNCOUNT_MODE_SET;
+ a5psw_reg_writel(a5psw, A5PSW_LK_LEARNCOUNT, reg);
+
+ /* Clear VLAN resource table */
+ reg = A5PSW_VLAN_RES_WR_PORTMASK | A5PSW_VLAN_RES_WR_TAGMASK;
+ for (vlan = 0; vlan < A5PSW_VLAN_COUNT; vlan++)
+ a5psw_reg_writel(a5psw, A5PSW_VLAN_RES(vlan), reg);
+
+ /* Reset all ports */
+ dsa_switch_for_each_port(dp, ds) {
+ port = dp->index;
+
+ /* Reset the port */
+ a5psw_reg_writel(a5psw, A5PSW_CMD_CFG(port),
+ A5PSW_CMD_CFG_SW_RESET);
+
+ /* Enable only CPU port */
+ a5psw_port_enable_set(a5psw, port, dsa_port_is_cpu(dp));
+
+ if (dsa_port_is_unused(dp))
+ continue;
+
+ /* Enable egress flooding for CPU port */
+ if (dsa_port_is_cpu(dp))
+ a5psw_flooding_set_resolution(a5psw, port, true);
+
+ /* Enable management forward only for user ports */
+ if (dsa_port_is_user(dp))
+ a5psw_port_mgmtfwd_set(a5psw, port, true);
+ }
+
+ return 0;
+}
+
+static const struct dsa_switch_ops a5psw_switch_ops = {
+ .get_tag_protocol = a5psw_get_tag_protocol,
+ .setup = a5psw_setup,
+ .port_disable = a5psw_port_disable,
+ .port_enable = a5psw_port_enable,
+ .phylink_get_caps = a5psw_phylink_get_caps,
+ .phylink_mac_select_pcs = a5psw_phylink_mac_select_pcs,
+ .phylink_mac_link_down = a5psw_phylink_mac_link_down,
+ .phylink_mac_link_up = a5psw_phylink_mac_link_up,
+ .port_change_mtu = a5psw_port_change_mtu,
+ .port_max_mtu = a5psw_port_max_mtu,
+ .get_sset_count = a5psw_get_sset_count,
+ .get_strings = a5psw_get_strings,
+ .get_ethtool_stats = a5psw_get_ethtool_stats,
+ .get_eth_mac_stats = a5psw_get_eth_mac_stats,
+ .get_eth_ctrl_stats = a5psw_get_eth_ctrl_stats,
+ .get_rmon_stats = a5psw_get_rmon_stats,
+ .set_ageing_time = a5psw_set_ageing_time,
+ .port_bridge_join = a5psw_port_bridge_join,
+ .port_bridge_leave = a5psw_port_bridge_leave,
+ .port_stp_state_set = a5psw_port_stp_state_set,
+ .port_fast_age = a5psw_port_fast_age,
+ .port_fdb_add = a5psw_port_fdb_add,
+ .port_fdb_del = a5psw_port_fdb_del,
+ .port_fdb_dump = a5psw_port_fdb_dump,
+};
+
+static int a5psw_mdio_wait_busy(struct a5psw *a5psw)
+{
+ u32 status;
+ int err;
+
+ err = readl_poll_timeout(a5psw->base + A5PSW_MDIO_CFG_STATUS, status,
+ !(status & A5PSW_MDIO_CFG_STATUS_BUSY), 10,
+ 1000 * USEC_PER_MSEC);
+ if (err)
+ dev_err(a5psw->dev, "MDIO command timeout\n");
+
+ return err;
+}
+
+static int a5psw_mdio_read(struct mii_bus *bus, int phy_id, int phy_reg)
+{
+ struct a5psw *a5psw = bus->priv;
+ u32 cmd, status;
+ int ret;
+
+ if (phy_reg & MII_ADDR_C45)
+ return -EOPNOTSUPP;
+
+ cmd = A5PSW_MDIO_COMMAND_READ;
+ cmd |= FIELD_PREP(A5PSW_MDIO_COMMAND_REG_ADDR, phy_reg);
+ cmd |= FIELD_PREP(A5PSW_MDIO_COMMAND_PHY_ADDR, phy_id);
+
+ a5psw_reg_writel(a5psw, A5PSW_MDIO_COMMAND, cmd);
+
+ ret = a5psw_mdio_wait_busy(a5psw);
+ if (ret)
+ return ret;
+
+ ret = a5psw_reg_readl(a5psw, A5PSW_MDIO_DATA) & A5PSW_MDIO_DATA_MASK;
+
+ status = a5psw_reg_readl(a5psw, A5PSW_MDIO_CFG_STATUS);
+ if (status & A5PSW_MDIO_CFG_STATUS_READERR)
+ return -EIO;
+
+ return ret;
+}
+
+static int a5psw_mdio_write(struct mii_bus *bus, int phy_id, int phy_reg,
+ u16 phy_data)
+{
+ struct a5psw *a5psw = bus->priv;
+ u32 cmd;
+
+ if (phy_reg & MII_ADDR_C45)
+ return -EOPNOTSUPP;
+
+ cmd = FIELD_PREP(A5PSW_MDIO_COMMAND_REG_ADDR, phy_reg);
+ cmd |= FIELD_PREP(A5PSW_MDIO_COMMAND_PHY_ADDR, phy_id);
+
+ a5psw_reg_writel(a5psw, A5PSW_MDIO_COMMAND, cmd);
+ a5psw_reg_writel(a5psw, A5PSW_MDIO_DATA, phy_data);
+
+ return a5psw_mdio_wait_busy(a5psw);
+}
+
+static int a5psw_mdio_config(struct a5psw *a5psw, u32 mdio_freq)
+{
+ unsigned long rate;
+ unsigned long div;
+ u32 cfgstatus;
+
+ rate = clk_get_rate(a5psw->hclk);
+ div = ((rate / mdio_freq) / 2);
+ if (div > FIELD_MAX(A5PSW_MDIO_CFG_STATUS_CLKDIV) ||
+ div < A5PSW_MDIO_CLK_DIV_MIN) {
+ dev_err(a5psw->dev, "MDIO clock div %ld out of range\n", div);
+ return -ERANGE;
+ }
+
+ cfgstatus = FIELD_PREP(A5PSW_MDIO_CFG_STATUS_CLKDIV, div);
+
+ a5psw_reg_writel(a5psw, A5PSW_MDIO_CFG_STATUS, cfgstatus);
+
+ return 0;
+}
+
+static int a5psw_probe_mdio(struct a5psw *a5psw, struct device_node *node)
+{
+ struct device *dev = a5psw->dev;
+ struct mii_bus *bus;
+ u32 mdio_freq;
+ int ret;
+
+ if (of_property_read_u32(node, "clock-frequency", &mdio_freq))
+ mdio_freq = A5PSW_MDIO_DEF_FREQ;
+
+ ret = a5psw_mdio_config(a5psw, mdio_freq);
+ if (ret)
+ return ret;
+
+ bus = devm_mdiobus_alloc(dev);
+ if (!bus)
+ return -ENOMEM;
+
+ bus->name = "a5psw_mdio";
+ bus->read = a5psw_mdio_read;
+ bus->write = a5psw_mdio_write;
+ bus->priv = a5psw;
+ bus->parent = dev;
+ snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev_name(dev));
+
+ a5psw->mii_bus = bus;
+
+ return devm_of_mdiobus_register(dev, bus, node);
+}
+
+static void a5psw_pcs_free(struct a5psw *a5psw)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(a5psw->pcs); i++) {
+ if (a5psw->pcs[i])
+ miic_destroy(a5psw->pcs[i]);
+ }
+}
+
+static int a5psw_pcs_get(struct a5psw *a5psw)
+{
+ struct device_node *ports, *port, *pcs_node;
+ struct phylink_pcs *pcs;
+ int ret;
+ u32 reg;
+
+ ports = of_get_child_by_name(a5psw->dev->of_node, "ethernet-ports");
+ if (!ports)
+ return -EINVAL;
+
+ for_each_available_child_of_node(ports, port) {
+ pcs_node = of_parse_phandle(port, "pcs-handle", 0);
+ if (!pcs_node)
+ continue;
+
+ if (of_property_read_u32(port, "reg", &reg)) {
+ ret = -EINVAL;
+ goto free_pcs;
+ }
+
+ if (reg >= ARRAY_SIZE(a5psw->pcs)) {
+ ret = -ENODEV;
+ goto free_pcs;
+ }
+
+ pcs = miic_create(a5psw->dev, pcs_node);
+ if (IS_ERR(pcs)) {
+ dev_err(a5psw->dev, "Failed to create PCS for port %d\n",
+ reg);
+ ret = PTR_ERR(pcs);
+ goto free_pcs;
+ }
+
+ a5psw->pcs[reg] = pcs;
+ }
+ of_node_put(ports);
+
+ return 0;
+
+free_pcs:
+ of_node_put(port);
+ of_node_put(ports);
+ a5psw_pcs_free(a5psw);
+
+ return ret;
+}
+
+static int a5psw_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct device_node *mdio;
+ struct dsa_switch *ds;
+ struct a5psw *a5psw;
+ int ret;
+
+ a5psw = devm_kzalloc(dev, sizeof(*a5psw), GFP_KERNEL);
+ if (!a5psw)
+ return -ENOMEM;
+
+ a5psw->dev = dev;
+ mutex_init(&a5psw->lk_lock);
+ spin_lock_init(&a5psw->reg_lock);
+ a5psw->base = devm_platform_ioremap_resource(pdev, 0);
+ if (!a5psw->base)
+ return -EINVAL;
+
+ ret = a5psw_pcs_get(a5psw);
+ if (ret)
+ return ret;
+
+ a5psw->hclk = devm_clk_get(dev, "hclk");
+ if (IS_ERR(a5psw->hclk)) {
+ dev_err(dev, "failed get hclk clock\n");
+ ret = PTR_ERR(a5psw->hclk);
+ goto free_pcs;
+ }
+
+ a5psw->clk = devm_clk_get(dev, "clk");
+ if (IS_ERR(a5psw->clk)) {
+ dev_err(dev, "failed get clk_switch clock\n");
+ ret = PTR_ERR(a5psw->clk);
+ goto free_pcs;
+ }
+
+ ret = clk_prepare_enable(a5psw->clk);
+ if (ret)
+ goto free_pcs;
+
+ ret = clk_prepare_enable(a5psw->hclk);
+ if (ret)
+ goto clk_disable;
+
+ mdio = of_get_child_by_name(dev->of_node, "mdio");
+ if (of_device_is_available(mdio)) {
+ ret = a5psw_probe_mdio(a5psw, mdio);
+ if (ret) {
+ of_node_put(mdio);
+ dev_err(dev, "Failed to register MDIO: %d\n", ret);
+ goto hclk_disable;
+ }
+ }
+
+ of_node_put(mdio);
+
+ ds = &a5psw->ds;
+ ds->dev = dev;
+ ds->num_ports = A5PSW_PORTS_NUM;
+ ds->ops = &a5psw_switch_ops;
+ ds->priv = a5psw;
+
+ ret = dsa_register_switch(ds);
+ if (ret) {
+ dev_err(dev, "Failed to register DSA switch: %d\n", ret);
+ goto hclk_disable;
+ }
+
+ return 0;
+
+hclk_disable:
+ clk_disable_unprepare(a5psw->hclk);
+clk_disable:
+ clk_disable_unprepare(a5psw->clk);
+free_pcs:
+ a5psw_pcs_free(a5psw);
+
+ return ret;
+}
+
+static int a5psw_remove(struct platform_device *pdev)
+{
+ struct a5psw *a5psw = platform_get_drvdata(pdev);
+
+ if (!a5psw)
+ return 0;
+
+ dsa_unregister_switch(&a5psw->ds);
+ a5psw_pcs_free(a5psw);
+ clk_disable_unprepare(a5psw->hclk);
+ clk_disable_unprepare(a5psw->clk);
+
+ platform_set_drvdata(pdev, NULL);
+
+ return 0;
+}
+
+static void a5psw_shutdown(struct platform_device *pdev)
+{
+ struct a5psw *a5psw = platform_get_drvdata(pdev);
+
+ if (!a5psw)
+ return;
+
+ dsa_switch_shutdown(&a5psw->ds);
+
+ platform_set_drvdata(pdev, NULL);
+}
+
+static const struct of_device_id a5psw_of_mtable[] = {
+ { .compatible = "renesas,rzn1-a5psw", },
+ { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, a5psw_of_mtable);
+
+static struct platform_driver a5psw_driver = {
+ .driver = {
+ .name = "rzn1_a5psw",
+ .of_match_table = of_match_ptr(a5psw_of_mtable),
+ },
+ .probe = a5psw_probe,
+ .remove = a5psw_remove,
+ .shutdown = a5psw_shutdown,
+};
+module_platform_driver(a5psw_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Renesas RZ/N1 Advanced 5-port Switch driver");
+MODULE_AUTHOR("Clément Léger <clement.leger@bootlin.com>");
diff --git a/drivers/net/dsa/rzn1_a5psw.h b/drivers/net/dsa/rzn1_a5psw.h
new file mode 100644
index 000000000000..c67abd49c013
--- /dev/null
+++ b/drivers/net/dsa/rzn1_a5psw.h
@@ -0,0 +1,259 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 Schneider Electric
+ *
+ * Clément Léger <clement.leger@bootlin.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/platform_device.h>
+#include <linux/pcs-rzn1-miic.h>
+#include <net/dsa.h>
+
+#define A5PSW_REVISION 0x0
+#define A5PSW_PORT_OFFSET(port) (0x400 * (port))
+
+#define A5PSW_PORT_ENA 0x8
+#define A5PSW_PORT_ENA_RX_SHIFT 16
+#define A5PSW_PORT_ENA_TX_RX(port) (BIT((port) + A5PSW_PORT_ENA_RX_SHIFT) | \
+ BIT(port))
+#define A5PSW_UCAST_DEF_MASK 0xC
+
+#define A5PSW_VLAN_VERIFY 0x10
+#define A5PSW_VLAN_VERI_SHIFT 0
+#define A5PSW_VLAN_DISC_SHIFT 16
+
+#define A5PSW_BCAST_DEF_MASK 0x14
+#define A5PSW_MCAST_DEF_MASK 0x18
+
+#define A5PSW_INPUT_LEARN 0x1C
+#define A5PSW_INPUT_LEARN_DIS(p) BIT((p) + 16)
+#define A5PSW_INPUT_LEARN_BLOCK(p) BIT(p)
+
+#define A5PSW_MGMT_CFG 0x20
+#define A5PSW_MGMT_CFG_DISCARD BIT(7)
+
+#define A5PSW_MODE_CFG 0x24
+#define A5PSW_MODE_STATS_RESET BIT(31)
+
+#define A5PSW_VLAN_IN_MODE 0x28
+#define A5PSW_VLAN_IN_MODE_PORT_SHIFT(port) ((port) * 2)
+#define A5PSW_VLAN_IN_MODE_PORT(port) (GENMASK(1, 0) << \
+ A5PSW_VLAN_IN_MODE_PORT_SHIFT(port))
+#define A5PSW_VLAN_IN_MODE_SINGLE_PASSTHROUGH 0x0
+#define A5PSW_VLAN_IN_MODE_SINGLE_REPLACE 0x1
+#define A5PSW_VLAN_IN_MODE_TAG_ALWAYS 0x2
+
+#define A5PSW_VLAN_OUT_MODE 0x2C
+#define A5PSW_VLAN_OUT_MODE_PORT(port) (GENMASK(1, 0) << ((port) * 2))
+#define A5PSW_VLAN_OUT_MODE_DIS 0x0
+#define A5PSW_VLAN_OUT_MODE_STRIP 0x1
+#define A5PSW_VLAN_OUT_MODE_TAG_THROUGH 0x2
+#define A5PSW_VLAN_OUT_MODE_TRANSPARENT 0x3
+
+#define A5PSW_VLAN_IN_MODE_ENA 0x30
+#define A5PSW_VLAN_TAG_ID 0x34
+
+#define A5PSW_SYSTEM_TAGINFO(port) (0x200 + A5PSW_PORT_OFFSET(port))
+
+#define A5PSW_AUTH_PORT(port) (0x240 + 4 * (port))
+#define A5PSW_AUTH_PORT_AUTHORIZED BIT(0)
+
+#define A5PSW_VLAN_RES(entry) (0x280 + 4 * (entry))
+#define A5PSW_VLAN_RES_WR_PORTMASK BIT(30)
+#define A5PSW_VLAN_RES_WR_TAGMASK BIT(29)
+#define A5PSW_VLAN_RES_RD_TAGMASK BIT(28)
+#define A5PSW_VLAN_RES_ID GENMASK(16, 5)
+#define A5PSW_VLAN_RES_PORTMASK GENMASK(4, 0)
+
+#define A5PSW_RXMATCH_CONFIG(port) (0x3e80 + 4 * (port))
+#define A5PSW_RXMATCH_CONFIG_PATTERN(p) BIT(p)
+
+#define A5PSW_PATTERN_CTRL(p) (0x3eb0 + 4 * (p))
+#define A5PSW_PATTERN_CTRL_MGMTFWD BIT(1)
+
+#define A5PSW_LK_CTRL 0x400
+#define A5PSW_LK_ADDR_CTRL_BLOCKING BIT(0)
+#define A5PSW_LK_ADDR_CTRL_LEARNING BIT(1)
+#define A5PSW_LK_ADDR_CTRL_AGEING BIT(2)
+#define A5PSW_LK_ADDR_CTRL_ALLOW_MIGR BIT(3)
+#define A5PSW_LK_ADDR_CTRL_CLEAR_TABLE BIT(6)
+
+#define A5PSW_LK_ADDR_CTRL 0x408
+#define A5PSW_LK_ADDR_CTRL_BUSY BIT(31)
+#define A5PSW_LK_ADDR_CTRL_DELETE_PORT BIT(30)
+#define A5PSW_LK_ADDR_CTRL_CLEAR BIT(29)
+#define A5PSW_LK_ADDR_CTRL_LOOKUP BIT(28)
+#define A5PSW_LK_ADDR_CTRL_WAIT BIT(27)
+#define A5PSW_LK_ADDR_CTRL_READ BIT(26)
+#define A5PSW_LK_ADDR_CTRL_WRITE BIT(25)
+#define A5PSW_LK_ADDR_CTRL_ADDRESS GENMASK(12, 0)
+
+#define A5PSW_LK_DATA_LO 0x40C
+#define A5PSW_LK_DATA_HI 0x410
+#define A5PSW_LK_DATA_HI_VALID BIT(16)
+#define A5PSW_LK_DATA_HI_PORT BIT(16)
+
+#define A5PSW_LK_LEARNCOUNT 0x418
+#define A5PSW_LK_LEARNCOUNT_COUNT GENMASK(13, 0)
+#define A5PSW_LK_LEARNCOUNT_MODE GENMASK(31, 30)
+#define A5PSW_LK_LEARNCOUNT_MODE_SET 0x0
+#define A5PSW_LK_LEARNCOUNT_MODE_INC 0x1
+#define A5PSW_LK_LEARNCOUNT_MODE_DEC 0x2
+
+#define A5PSW_MGMT_TAG_CFG 0x480
+#define A5PSW_MGMT_TAG_CFG_TAGFIELD GENMASK(31, 16)
+#define A5PSW_MGMT_TAG_CFG_ALL_FRAMES BIT(1)
+#define A5PSW_MGMT_TAG_CFG_ENABLE BIT(0)
+
+#define A5PSW_LK_AGETIME 0x41C
+#define A5PSW_LK_AGETIME_MASK GENMASK(23, 0)
+
+#define A5PSW_MDIO_CFG_STATUS 0x700
+#define A5PSW_MDIO_CFG_STATUS_CLKDIV GENMASK(15, 7)
+#define A5PSW_MDIO_CFG_STATUS_READERR BIT(1)
+#define A5PSW_MDIO_CFG_STATUS_BUSY BIT(0)
+
+#define A5PSW_MDIO_COMMAND 0x704
+/* Register is named TRAININIT in datasheet and should be set when reading */
+#define A5PSW_MDIO_COMMAND_READ BIT(15)
+#define A5PSW_MDIO_COMMAND_PHY_ADDR GENMASK(9, 5)
+#define A5PSW_MDIO_COMMAND_REG_ADDR GENMASK(4, 0)
+
+#define A5PSW_MDIO_DATA 0x708
+#define A5PSW_MDIO_DATA_MASK GENMASK(15, 0)
+
+#define A5PSW_CMD_CFG(port) (0x808 + A5PSW_PORT_OFFSET(port))
+#define A5PSW_CMD_CFG_CNTL_FRM_ENA BIT(23)
+#define A5PSW_CMD_CFG_SW_RESET BIT(13)
+#define A5PSW_CMD_CFG_TX_CRC_APPEND BIT(11)
+#define A5PSW_CMD_CFG_HD_ENA BIT(10)
+#define A5PSW_CMD_CFG_PAUSE_IGNORE BIT(8)
+#define A5PSW_CMD_CFG_CRC_FWD BIT(6)
+#define A5PSW_CMD_CFG_ETH_SPEED BIT(3)
+#define A5PSW_CMD_CFG_RX_ENA BIT(1)
+#define A5PSW_CMD_CFG_TX_ENA BIT(0)
+
+#define A5PSW_FRM_LENGTH(port) (0x814 + A5PSW_PORT_OFFSET(port))
+#define A5PSW_FRM_LENGTH_MASK GENMASK(13, 0)
+
+#define A5PSW_STATUS(port) (0x840 + A5PSW_PORT_OFFSET(port))
+
+#define A5PSW_STATS_HIWORD 0x900
+
+/* Stats */
+#define A5PSW_aFramesTransmittedOK 0x868
+#define A5PSW_aFramesReceivedOK 0x86C
+#define A5PSW_aFrameCheckSequenceErrors 0x870
+#define A5PSW_aAlignmentErrors 0x874
+#define A5PSW_aOctetsTransmittedOK 0x878
+#define A5PSW_aOctetsReceivedOK 0x87C
+#define A5PSW_aTxPAUSEMACCtrlFrames 0x880
+#define A5PSW_aRxPAUSEMACCtrlFrames 0x884
+/* If */
+#define A5PSW_ifInErrors 0x888
+#define A5PSW_ifOutErrors 0x88C
+#define A5PSW_ifInUcastPkts 0x890
+#define A5PSW_ifInMulticastPkts 0x894
+#define A5PSW_ifInBroadcastPkts 0x898
+#define A5PSW_ifOutDiscards 0x89C
+#define A5PSW_ifOutUcastPkts 0x8A0
+#define A5PSW_ifOutMulticastPkts 0x8A4
+#define A5PSW_ifOutBroadcastPkts 0x8A8
+/* Ether */
+#define A5PSW_etherStatsDropEvents 0x8AC
+#define A5PSW_etherStatsOctets 0x8B0
+#define A5PSW_etherStatsPkts 0x8B4
+#define A5PSW_etherStatsUndersizePkts 0x8B8
+#define A5PSW_etherStatsOversizePkts 0x8BC
+#define A5PSW_etherStatsPkts64Octets 0x8C0
+#define A5PSW_etherStatsPkts65to127Octets 0x8C4
+#define A5PSW_etherStatsPkts128to255Octets 0x8C8
+#define A5PSW_etherStatsPkts256to511Octets 0x8CC
+#define A5PSW_etherStatsPkts512to1023Octets 0x8D0
+#define A5PSW_etherStatsPkts1024to1518Octets 0x8D4
+#define A5PSW_etherStatsPkts1519toXOctets 0x8D8
+#define A5PSW_etherStatsJabbers 0x8DC
+#define A5PSW_etherStatsFragments 0x8E0
+
+#define A5PSW_VLANReceived 0x8E8
+#define A5PSW_VLANTransmitted 0x8EC
+
+#define A5PSW_aDeferred 0x910
+#define A5PSW_aMultipleCollisions 0x914
+#define A5PSW_aSingleCollisions 0x918
+#define A5PSW_aLateCollisions 0x91C
+#define A5PSW_aExcessiveCollisions 0x920
+#define A5PSW_aCarrierSenseErrors 0x924
+
+#define A5PSW_VLAN_TAG(prio, id) (((prio) << 12) | (id))
+#define A5PSW_PORTS_NUM 5
+#define A5PSW_CPU_PORT (A5PSW_PORTS_NUM - 1)
+#define A5PSW_MDIO_DEF_FREQ 2500000
+#define A5PSW_MDIO_TIMEOUT 100
+#define A5PSW_JUMBO_LEN (10 * SZ_1K)
+#define A5PSW_MDIO_CLK_DIV_MIN 5
+#define A5PSW_TAG_LEN 8
+#define A5PSW_VLAN_COUNT 32
+
+/* Ensure enough space for 2 VLAN tags */
+#define A5PSW_EXTRA_MTU_LEN (A5PSW_TAG_LEN + 8)
+#define A5PSW_MAX_MTU (A5PSW_JUMBO_LEN - A5PSW_EXTRA_MTU_LEN)
+
+#define A5PSW_PATTERN_MGMTFWD 0
+
+#define A5PSW_LK_BUSY_USEC_POLL 10
+#define A5PSW_CTRL_TIMEOUT 1000
+#define A5PSW_TABLE_ENTRIES 8192
+
+struct fdb_entry {
+ u8 mac[ETH_ALEN];
+ u16 valid:1;
+ u16 is_static:1;
+ u16 prio:3;
+ u16 port_mask:5;
+ u16 reserved:6;
+} __packed;
+
+union lk_data {
+ struct {
+ u32 lo;
+ u32 hi;
+ };
+ struct fdb_entry entry;
+};
+
+/**
+ * struct a5psw - switch struct
+ * @base: Base address of the switch
+ * @hclk: hclk_switch clock
+ * @clk: clk_switch clock
+ * @dev: Device associated to the switch
+ * @mii_bus: MDIO bus struct
+ * @mdio_freq: MDIO bus frequency requested
+ * @pcs: Array of PCS connected to the switch ports (not for the CPU)
+ * @ds: DSA switch struct
+ * @stats_lock: lock to access statistics (shared HI counter)
+ * @lk_lock: Lock for the lookup table
+ * @reg_lock: Lock for register read-modify-write operation
+ * @bridged_ports: Mask of ports that are bridged and should be flooded
+ * @br_dev: Bridge net device
+ */
+struct a5psw {
+ void __iomem *base;
+ struct clk *hclk;
+ struct clk *clk;
+ struct device *dev;
+ struct mii_bus *mii_bus;
+ struct phylink_pcs *pcs[A5PSW_PORTS_NUM - 1];
+ struct dsa_switch ds;
+ struct mutex lk_lock;
+ spinlock_t reg_lock;
+ u32 bridged_ports;
+ struct net_device *br_dev;
+};
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 72b6fc1932b5..b253e27bcfb4 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -2330,7 +2330,7 @@ int sja1105_static_config_reload(struct sja1105_private *priv,
else
mode = MLO_AN_PHY;
- rc = xpcs_do_config(xpcs, priv->phy_mode[i], mode);
+ rc = xpcs_do_config(xpcs, priv->phy_mode[i], mode, NULL);
if (rc < 0)
goto out;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
index 4ebd2410185a..4d790a89fe77 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
@@ -338,7 +338,7 @@ static int xgbe_platform_probe(struct platform_device *pdev)
* the PHY resources listed last
*/
phy_memnum = xgbe_resource_count(pdev, IORESOURCE_MEM) - 3;
- phy_irqnum = xgbe_resource_count(pdev, IORESOURCE_IRQ) - 1;
+ phy_irqnum = platform_irq_count(pdev) - 1;
dma_irqnum = 1;
dma_irqend = phy_irqnum;
} else {
@@ -348,7 +348,7 @@ static int xgbe_platform_probe(struct platform_device *pdev)
phy_memnum = 0;
phy_irqnum = 0;
dma_irqnum = 1;
- dma_irqend = xgbe_resource_count(pdev, IORESOURCE_IRQ);
+ dma_irqend = platform_irq_count(pdev);
}
/* Obtain the mmio areas for the device */
diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c
index cac509708e9d..1c6ea6766aa1 100644
--- a/drivers/net/ethernet/atheros/ag71xx.c
+++ b/drivers/net/ethernet/atheros/ag71xx.c
@@ -946,7 +946,7 @@ static unsigned int ag71xx_max_frame_len(unsigned int mtu)
return ETH_HLEN + VLAN_HLEN + mtu + ETH_FCS_LEN;
}
-static void ag71xx_hw_set_macaddr(struct ag71xx *ag, unsigned char *mac)
+static void ag71xx_hw_set_macaddr(struct ag71xx *ag, const unsigned char *mac)
{
u32 t;
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index 698438a2ee0f..514d61dd91c7 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -388,7 +388,7 @@ static int bcm_enet_receive_queue(struct net_device *dev, int budget)
priv->rx_buf_size, DMA_FROM_DEVICE);
priv->rx_buf[desc_idx] = NULL;
- skb = build_skb(buf, priv->rx_frag_size);
+ skb = napi_build_skb(buf, priv->rx_frag_size);
if (unlikely(!skb)) {
skb_free_frag(buf);
dev->stats.rx_dropped++;
@@ -468,7 +468,7 @@ static int bcm_enet_tx_reclaim(struct net_device *dev, int force)
dev->stats.tx_errors++;
bytes += skb->len;
- dev_kfree_skb(skb);
+ napi_consume_skb(skb, !force);
released++;
}
diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c
index e6f48786949c..02bd3cf9a260 100644
--- a/drivers/net/ethernet/broadcom/bgmac-bcma.c
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c
@@ -332,7 +332,6 @@ static void bgmac_remove(struct bcma_device *core)
bcma_mdio_mii_unregister(bgmac->mii_bus);
bgmac_enet_remove(bgmac);
bcma_set_drvdata(core, NULL);
- kfree(bgmac);
}
static struct bcma_driver bgmac_bcma_driver = {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 56b46b8206a7..b474a4fe4039 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -10658,7 +10658,7 @@ static void __bnxt_close_nic(struct bnxt *bp, bool irq_re_init,
while (bnxt_drv_busy(bp))
msleep(20);
- /* Flush rings and and disable interrupts */
+ /* Flush rings and disable interrupts */
bnxt_shutdown_nic(bp, irq_re_init);
/* TODO CHIMP_FW: Link/PHY related cleanup if (link_re_init) */
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index d89098f4ede8..d0ea8dbfa213 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -4913,8 +4913,8 @@ static int macb_probe(struct platform_device *pdev)
/* MTU range: 68 - 1500 or 10240 */
dev->min_mtu = GEM_MTU_MIN_SIZE;
- if (bp->caps & MACB_CAPS_JUMBO)
- dev->max_mtu = gem_readl(bp, JML) - ETH_HLEN - ETH_FCS_LEN;
+ if ((bp->caps & MACB_CAPS_JUMBO) && bp->jumbo_max_len)
+ dev->max_mtu = bp->jumbo_max_len - ETH_HLEN - ETH_FCS_LEN;
else
dev->max_mtu = ETH_DATA_LEN;
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 7de3800437c9..c2822e635f89 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -2859,7 +2859,7 @@ static const struct net_device_ops cxgb4vf_netdev_ops = {
* address stored on the adapter
* @adapter: The adapter
*
- * Find the the port mask for the VF based on the index of mac
+ * Find the port mask for the VF based on the index of mac
* address stored in the adapter. If no mac address is stored on
* the adapter for the VF, use the port mask received from the
* firmware.
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
index d546993bda09..1c52592d3b65 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
@@ -877,7 +877,7 @@ int t4vf_get_sge_params(struct adapter *adapter)
/* T4 uses a single control field to specify both the PCIe Padding and
* Packing Boundary. T5 introduced the ability to specify these
- * separately with the Padding Boundary in SGE_CONTROL and and Packing
+ * separately with the Padding Boundary in SGE_CONTROL and Packing
* Boundary in SGE_CONTROL2. So for T5 and later we need to grab
* SGE_CONTROL in order to determine how ingress packet data will be
* laid out in Packed Buffer Mode. Unfortunately, older versions of
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c
index d081168c95fa..da42dd53a87c 100644
--- a/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c
+++ b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c
@@ -78,6 +78,7 @@ static const char * const txq_stat_names[] = {
"tx_cso",
"tx_tso",
"tx_encapsulated_tso",
+ "tx_uso",
"tx_more",
"tx_queue_stops",
"tx_queue_restarts",
@@ -778,6 +779,7 @@ static void fun_get_ethtool_stats(struct net_device *netdev,
ADD_STAT(txs.tx_cso);
ADD_STAT(txs.tx_tso);
ADD_STAT(txs.tx_encap_tso);
+ ADD_STAT(txs.tx_uso);
ADD_STAT(txs.tx_more);
ADD_STAT(txs.tx_nstops);
ADD_STAT(txs.tx_nrestarts);
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_main.c b/drivers/net/ethernet/fungible/funeth/funeth_main.c
index 9485cf699c5d..f247b7ad3a88 100644
--- a/drivers/net/ethernet/fungible/funeth/funeth_main.c
+++ b/drivers/net/ethernet/fungible/funeth/funeth_main.c
@@ -1357,7 +1357,8 @@ static const struct net_device_ops fun_netdev_ops = {
#define GSO_ENCAP_FLAGS (NETIF_F_GSO_GRE | NETIF_F_GSO_IPXIP4 | \
NETIF_F_GSO_IPXIP6 | NETIF_F_GSO_UDP_TUNNEL | \
NETIF_F_GSO_UDP_TUNNEL_CSUM)
-#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
+#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN | \
+ NETIF_F_GSO_UDP_L4)
#define VLAN_FEAT (NETIF_F_SG | NETIF_F_HW_CSUM | TSO_FLAGS | \
GSO_ENCAP_FLAGS | NETIF_F_HIGHDMA)
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_tx.c b/drivers/net/ethernet/fungible/funeth/funeth_tx.c
index ff6e29237253..0a4a590218ba 100644
--- a/drivers/net/ethernet/fungible/funeth/funeth_tx.c
+++ b/drivers/net/ethernet/fungible/funeth/funeth_tx.c
@@ -130,6 +130,7 @@ static unsigned int write_pkt_desc(struct sk_buff *skb, struct funeth_txq *q,
struct fun_dataop_gl *gle;
const struct tcphdr *th;
unsigned int ngle, i;
+ unsigned int l4_hlen;
u16 flags;
if (unlikely(map_skb(skb, q->dma_dev, addrs, lens))) {
@@ -178,6 +179,7 @@ static unsigned int write_pkt_desc(struct sk_buff *skb, struct funeth_txq *q,
FUN_ETH_UPDATE_INNER_L3_LEN;
}
th = inner_tcp_hdr(skb);
+ l4_hlen = __tcp_hdrlen(th);
fun_eth_offload_init(&req->offload, flags,
shinfo->gso_size,
tcp_hdr_doff_flags(th), 0,
@@ -185,6 +187,24 @@ static unsigned int write_pkt_desc(struct sk_buff *skb, struct funeth_txq *q,
skb_inner_transport_offset(skb),
skb_network_offset(skb), ol4_ofst);
FUN_QSTAT_INC(q, tx_encap_tso);
+ } else if (shinfo->gso_type & SKB_GSO_UDP_L4) {
+ flags = FUN_ETH_INNER_LSO | FUN_ETH_INNER_UDP |
+ FUN_ETH_UPDATE_INNER_L4_CKSUM |
+ FUN_ETH_UPDATE_INNER_L4_LEN |
+ FUN_ETH_UPDATE_INNER_L3_LEN;
+
+ if (ip_hdr(skb)->version == 4)
+ flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM;
+ else
+ flags |= FUN_ETH_INNER_IPV6;
+
+ l4_hlen = sizeof(struct udphdr);
+ fun_eth_offload_init(&req->offload, flags,
+ shinfo->gso_size,
+ cpu_to_be16(l4_hlen << 10), 0,
+ skb_network_offset(skb),
+ skb_transport_offset(skb), 0, 0);
+ FUN_QSTAT_INC(q, tx_uso);
} else {
/* HW considers one set of headers as inner */
flags = FUN_ETH_INNER_LSO |
@@ -195,6 +215,7 @@ static unsigned int write_pkt_desc(struct sk_buff *skb, struct funeth_txq *q,
else
flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM;
th = tcp_hdr(skb);
+ l4_hlen = __tcp_hdrlen(th);
fun_eth_offload_init(&req->offload, flags,
shinfo->gso_size,
tcp_hdr_doff_flags(th), 0,
@@ -209,7 +230,7 @@ static unsigned int write_pkt_desc(struct sk_buff *skb, struct funeth_txq *q,
extra_pkts = shinfo->gso_segs - 1;
extra_bytes = (be16_to_cpu(req->offload.inner_l4_off) +
- __tcp_hdrlen(th)) * extra_pkts;
+ l4_hlen) * extra_pkts;
} else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
flags = FUN_ETH_UPDATE_INNER_L4_CKSUM;
if (skb->csum_offset == offsetof(struct udphdr, check))
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h
index 04c9f91b7489..1711f82cad71 100644
--- a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h
+++ b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h
@@ -82,6 +82,7 @@ struct funeth_txq_stats { /* per Tx queue SW counters */
u64 tx_cso; /* # of packets with checksum offload */
u64 tx_tso; /* # of non-encapsulated TSO super-packets */
u64 tx_encap_tso; /* # of encapsulated TSO super-packets */
+ u64 tx_uso; /* # of non-encapsulated UDP LSO super-packets */
u64 tx_more; /* # of DBs elided due to xmit_more */
u64 tx_nstops; /* # of times the queue has stopped */
u64 tx_nrestarts; /* # of times the queue has restarted */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 8a3a446219f7..94f80e1c4020 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -769,6 +769,7 @@ struct hnae3_tc_info {
u8 prio_tc[HNAE3_MAX_USER_PRIO]; /* TC indexed by prio */
u16 tqp_count[HNAE3_MAX_TC];
u16 tqp_offset[HNAE3_MAX_TC];
+ u8 max_tc; /* Total number of TCs */
u8 num_tc; /* Total number of enabled TCs */
bool mqprio_active;
};
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 6d20974519fe..4c7988e308a2 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -1129,7 +1129,7 @@ hns3_is_ringparam_changed(struct net_device *ndev,
if (old_ringparam->tx_desc_num == new_ringparam->tx_desc_num &&
old_ringparam->rx_desc_num == new_ringparam->rx_desc_num &&
old_ringparam->rx_buf_len == new_ringparam->rx_buf_len) {
- netdev_info(ndev, "ringparam not changed\n");
+ netdev_info(ndev, "descriptor number and rx buffer length not changed\n");
return false;
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 1ebad0e50e6a..fae79764dc44 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3268,7 +3268,7 @@ static int hclge_tp_port_init(struct hclge_dev *hdev)
static int hclge_update_port_info(struct hclge_dev *hdev)
{
struct hclge_mac *mac = &hdev->hw.mac;
- int speed = HCLGE_MAC_SPEED_UNKNOWN;
+ int speed;
int ret;
/* get the port info from SFP cmd if not copper port */
@@ -3279,10 +3279,13 @@ static int hclge_update_port_info(struct hclge_dev *hdev)
if (!hdev->support_sfp_query)
return 0;
- if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2)
+ if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
+ speed = mac->speed;
ret = hclge_get_sfp_info(hdev, mac);
- else
+ } else {
+ speed = HCLGE_MAC_SPEED_UNKNOWN;
ret = hclge_get_sfp_speed(hdev, &speed);
+ }
if (ret == -EOPNOTSUPP) {
hdev->support_sfp_query = false;
@@ -3294,6 +3297,8 @@ static int hclge_update_port_info(struct hclge_dev *hdev)
if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
if (mac->speed_type == QUERY_ACTIVE_SPEED) {
hclge_update_port_capability(hdev, mac);
+ if (mac->speed != speed)
+ (void)hclge_tm_port_shaper_cfg(hdev);
return 0;
}
return hclge_cfg_mac_speed_dup(hdev, mac->speed,
@@ -3376,6 +3381,12 @@ static int hclge_set_vf_link_state(struct hnae3_handle *handle, int vf,
link_state_old = vport->vf_info.link_state;
vport->vf_info.link_state = link_state;
+ /* return success directly if the VF is unalive, VF will
+ * query link state itself when it starts work.
+ */
+ if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state))
+ return 0;
+
ret = hclge_push_vf_link_status(vport);
if (ret) {
vport->vf_info.link_state = link_state_old;
@@ -10117,6 +10128,7 @@ static int hclge_modify_port_base_vlan_tag(struct hclge_vport *vport,
if (ret)
return ret;
+ vport->port_base_vlan_cfg.tbl_sta = false;
/* remove old VLAN tag */
if (old_info->vlan_tag == 0)
ret = hclge_set_vf_vlan_common(hdev, vport->vport_id,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 1f87a8a3fe32..2f33b036a47a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -282,8 +282,8 @@ static int hclge_tm_pg_to_pri_map_cfg(struct hclge_dev *hdev,
return hclge_cmd_send(&hdev->hw, &desc, 1);
}
-static int hclge_tm_qs_to_pri_map_cfg(struct hclge_dev *hdev,
- u16 qs_id, u8 pri)
+static int hclge_tm_qs_to_pri_map_cfg(struct hclge_dev *hdev, u16 qs_id, u8 pri,
+ bool link_vld)
{
struct hclge_qs_to_pri_link_cmd *map;
struct hclge_desc desc;
@@ -294,7 +294,7 @@ static int hclge_tm_qs_to_pri_map_cfg(struct hclge_dev *hdev,
map->qs_id = cpu_to_le16(qs_id);
map->priority = pri;
- map->link_vld = HCLGE_TM_QS_PRI_LINK_VLD_MSK;
+ map->link_vld = link_vld ? HCLGE_TM_QS_PRI_LINK_VLD_MSK : 0;
return hclge_cmd_send(&hdev->hw, &desc, 1);
}
@@ -420,7 +420,7 @@ static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev,
return hclge_cmd_send(&hdev->hw, &desc, 1);
}
-static int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev)
+int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev)
{
struct hclge_port_shapping_cmd *shap_cfg_cmd;
struct hclge_shaper_ir_para ir_para;
@@ -642,11 +642,13 @@ static void hclge_tm_update_kinfo_rss_size(struct hclge_vport *vport)
* one tc for VF for simplicity. VF's vport_id is non zero.
*/
if (vport->vport_id) {
+ kinfo->tc_info.max_tc = 1;
kinfo->tc_info.num_tc = 1;
vport->qs_offset = HNAE3_MAX_TC +
vport->vport_id - HCLGE_VF_VPORT_START_NUM;
vport_max_rss_size = hdev->vf_rss_size_max;
} else {
+ kinfo->tc_info.max_tc = hdev->tc_max;
kinfo->tc_info.num_tc =
min_t(u16, vport->alloc_tqps, hdev->tm_info.num_tc);
vport->qs_offset = 0;
@@ -679,7 +681,9 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
kinfo->num_tqps = hclge_vport_get_tqp_num(vport);
vport->dwrr = 100; /* 100 percent as init */
vport->bw_limit = hdev->tm_info.pg_info[0].bw_limit;
- hdev->rss_cfg.rss_size = kinfo->rss_size;
+
+ if (vport->vport_id == PF_VPORT_ID)
+ hdev->rss_cfg.rss_size = kinfo->rss_size;
/* when enable mqprio, the tc_info has been updated. */
if (kinfo->tc_info.mqprio_active)
@@ -714,14 +718,22 @@ static void hclge_tm_vport_info_update(struct hclge_dev *hdev)
static void hclge_tm_tc_info_init(struct hclge_dev *hdev)
{
- u8 i;
+ u8 i, tc_sch_mode;
+ u32 bw_limit;
+
+ for (i = 0; i < hdev->tc_max; i++) {
+ if (i < hdev->tm_info.num_tc) {
+ tc_sch_mode = HCLGE_SCH_MODE_DWRR;
+ bw_limit = hdev->tm_info.pg_info[0].bw_limit;
+ } else {
+ tc_sch_mode = HCLGE_SCH_MODE_SP;
+ bw_limit = 0;
+ }
- for (i = 0; i < hdev->tm_info.num_tc; i++) {
hdev->tm_info.tc_info[i].tc_id = i;
- hdev->tm_info.tc_info[i].tc_sch_mode = HCLGE_SCH_MODE_DWRR;
+ hdev->tm_info.tc_info[i].tc_sch_mode = tc_sch_mode;
hdev->tm_info.tc_info[i].pgid = 0;
- hdev->tm_info.tc_info[i].bw_limit =
- hdev->tm_info.pg_info[0].bw_limit;
+ hdev->tm_info.tc_info[i].bw_limit = bw_limit;
}
for (i = 0; i < HNAE3_MAX_USER_PRIO; i++)
@@ -926,10 +938,13 @@ static int hclge_tm_pri_q_qs_cfg_tc_base(struct hclge_dev *hdev)
for (k = 0; k < hdev->num_alloc_vport; k++) {
struct hnae3_knic_private_info *kinfo = &vport[k].nic.kinfo;
- for (i = 0; i < kinfo->tc_info.num_tc; i++) {
+ for (i = 0; i < kinfo->tc_info.max_tc; i++) {
+ u8 pri = i < kinfo->tc_info.num_tc ? i : 0;
+ bool link_vld = i < kinfo->tc_info.num_tc;
+
ret = hclge_tm_qs_to_pri_map_cfg(hdev,
vport[k].qs_offset + i,
- i);
+ pri, link_vld);
if (ret)
return ret;
}
@@ -949,7 +964,7 @@ static int hclge_tm_pri_q_qs_cfg_vnet_base(struct hclge_dev *hdev)
for (i = 0; i < HNAE3_MAX_TC; i++) {
ret = hclge_tm_qs_to_pri_map_cfg(hdev,
vport[k].qs_offset + i,
- k);
+ k, true);
if (ret)
return ret;
}
@@ -989,33 +1004,39 @@ static int hclge_tm_pri_tc_base_shaper_cfg(struct hclge_dev *hdev)
{
u32 max_tm_rate = hdev->ae_dev->dev_specs.max_tm_rate;
struct hclge_shaper_ir_para ir_para;
- u32 shaper_para;
+ u32 shaper_para_c, shaper_para_p;
int ret;
u32 i;
- for (i = 0; i < hdev->tm_info.num_tc; i++) {
+ for (i = 0; i < hdev->tc_max; i++) {
u32 rate = hdev->tm_info.tc_info[i].bw_limit;
- ret = hclge_shaper_para_calc(rate, HCLGE_SHAPER_LVL_PRI,
- &ir_para, max_tm_rate);
- if (ret)
- return ret;
+ if (rate) {
+ ret = hclge_shaper_para_calc(rate, HCLGE_SHAPER_LVL_PRI,
+ &ir_para, max_tm_rate);
+ if (ret)
+ return ret;
+
+ shaper_para_c = hclge_tm_get_shapping_para(0, 0, 0,
+ HCLGE_SHAPER_BS_U_DEF,
+ HCLGE_SHAPER_BS_S_DEF);
+ shaper_para_p = hclge_tm_get_shapping_para(ir_para.ir_b,
+ ir_para.ir_u,
+ ir_para.ir_s,
+ HCLGE_SHAPER_BS_U_DEF,
+ HCLGE_SHAPER_BS_S_DEF);
+ } else {
+ shaper_para_c = 0;
+ shaper_para_p = 0;
+ }
- shaper_para = hclge_tm_get_shapping_para(0, 0, 0,
- HCLGE_SHAPER_BS_U_DEF,
- HCLGE_SHAPER_BS_S_DEF);
ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_C_BUCKET, i,
- shaper_para, rate);
+ shaper_para_c, rate);
if (ret)
return ret;
- shaper_para = hclge_tm_get_shapping_para(ir_para.ir_b,
- ir_para.ir_u,
- ir_para.ir_s,
- HCLGE_SHAPER_BS_U_DEF,
- HCLGE_SHAPER_BS_S_DEF);
ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_P_BUCKET, i,
- shaper_para, rate);
+ shaper_para_p, rate);
if (ret)
return ret;
}
@@ -1125,7 +1146,7 @@ static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev)
int ret;
u32 i, k;
- for (i = 0; i < hdev->tm_info.num_tc; i++) {
+ for (i = 0; i < hdev->tc_max; i++) {
pg_info =
&hdev->tm_info.pg_info[hdev->tm_info.tc_info[i].pgid];
dwrr = pg_info->tc_dwrr[i];
@@ -1135,9 +1156,15 @@ static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev)
return ret;
for (k = 0; k < hdev->num_alloc_vport; k++) {
+ struct hnae3_knic_private_info *kinfo = &vport[k].nic.kinfo;
+
+ if (i >= kinfo->tc_info.max_tc)
+ continue;
+
+ dwrr = i < kinfo->tc_info.num_tc ? vport[k].dwrr : 0;
ret = hclge_tm_qs_weight_cfg(
hdev, vport[k].qs_offset + i,
- vport[k].dwrr);
+ dwrr);
if (ret)
return ret;
}
@@ -1303,6 +1330,7 @@ static int hclge_tm_schd_mode_tc_base_cfg(struct hclge_dev *hdev, u8 pri_id)
{
struct hclge_vport *vport = hdev->vport;
int ret;
+ u8 mode;
u16 i;
ret = hclge_tm_pri_schd_mode_cfg(hdev, pri_id);
@@ -1310,9 +1338,16 @@ static int hclge_tm_schd_mode_tc_base_cfg(struct hclge_dev *hdev, u8 pri_id)
return ret;
for (i = 0; i < hdev->num_alloc_vport; i++) {
+ struct hnae3_knic_private_info *kinfo = &vport[i].nic.kinfo;
+
+ if (pri_id >= kinfo->tc_info.max_tc)
+ continue;
+
+ mode = pri_id < kinfo->tc_info.num_tc ? HCLGE_SCH_MODE_DWRR :
+ HCLGE_SCH_MODE_SP;
ret = hclge_tm_qs_schd_mode_cfg(hdev,
vport[i].qs_offset + pri_id,
- HCLGE_SCH_MODE_DWRR);
+ mode);
if (ret)
return ret;
}
@@ -1353,7 +1388,7 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev)
u8 i;
if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) {
- for (i = 0; i < hdev->tm_info.num_tc; i++) {
+ for (i = 0; i < hdev->tc_max; i++) {
ret = hclge_tm_schd_mode_tc_base_cfg(hdev, i);
if (ret)
return ret;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 619cc30a2dfc..d943943912f7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -237,6 +237,7 @@ int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr);
void hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats);
void hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats);
int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate);
+int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev);
int hclge_tm_get_qset_num(struct hclge_dev *hdev, u16 *qset_num);
int hclge_tm_get_pri_num(struct hclge_dev *hdev, u8 *pri_num);
int hclge_tm_get_qset_map_pri(struct hclge_dev *hdev, u16 qset_id, u8 *priority,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c
index 60ae8bfc5f69..1749d26f4bef 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c
@@ -43,9 +43,7 @@ static bool check_image_valid(struct hinic_devlink_priv *priv, const u8 *buf,
for (i = 0; i < fw_image->fw_info.fw_section_cnt; i++) {
len += fw_image->fw_section_info[i].fw_section_len;
- memcpy(&host_image->image_section_info[i],
- &fw_image->fw_section_info[i],
- sizeof(struct fw_section_info_st));
+ host_image->image_section_info[i] = fw_image->fw_section_info[i];
}
if (len != fw_image->fw_len ||
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index c65e9e2dcb42..55841816272b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1143,6 +1143,71 @@ static int i40e_get_link_ksettings(struct net_device *netdev,
return 0;
}
+#define I40E_LBIT_SIZE 8
+/**
+ * i40e_speed_to_link_speed - Translate decimal speed to i40e_aq_link_speed
+ * @speed: speed in decimal
+ * @ks: ethtool ksettings
+ *
+ * Return i40e_aq_link_speed based on speed
+ **/
+static enum i40e_aq_link_speed
+i40e_speed_to_link_speed(__u32 speed, const struct ethtool_link_ksettings *ks)
+{
+ enum i40e_aq_link_speed link_speed = I40E_LINK_SPEED_UNKNOWN;
+ bool speed_changed = false;
+ int i, j;
+
+ static const struct {
+ __u32 speed;
+ enum i40e_aq_link_speed link_speed;
+ __u8 bit[I40E_LBIT_SIZE];
+ } i40e_speed_lut[] = {
+#define I40E_LBIT(mode) ETHTOOL_LINK_MODE_ ## mode ##_Full_BIT
+ {SPEED_100, I40E_LINK_SPEED_100MB, {I40E_LBIT(100baseT)} },
+ {SPEED_1000, I40E_LINK_SPEED_1GB,
+ {I40E_LBIT(1000baseT), I40E_LBIT(1000baseX),
+ I40E_LBIT(1000baseKX)} },
+ {SPEED_10000, I40E_LINK_SPEED_10GB,
+ {I40E_LBIT(10000baseT), I40E_LBIT(10000baseKR),
+ I40E_LBIT(10000baseLR), I40E_LBIT(10000baseCR),
+ I40E_LBIT(10000baseSR), I40E_LBIT(10000baseKX4)} },
+
+ {SPEED_25000, I40E_LINK_SPEED_25GB,
+ {I40E_LBIT(25000baseCR), I40E_LBIT(25000baseKR),
+ I40E_LBIT(25000baseSR)} },
+ {SPEED_40000, I40E_LINK_SPEED_40GB,
+ {I40E_LBIT(40000baseKR4), I40E_LBIT(40000baseCR4),
+ I40E_LBIT(40000baseSR4), I40E_LBIT(40000baseLR4)} },
+ {SPEED_20000, I40E_LINK_SPEED_20GB,
+ {I40E_LBIT(20000baseKR2)} },
+ {SPEED_2500, I40E_LINK_SPEED_2_5GB, {I40E_LBIT(2500baseT)} },
+ {SPEED_5000, I40E_LINK_SPEED_5GB, {I40E_LBIT(2500baseT)} }
+#undef I40E_LBIT
+};
+
+ for (i = 0; i < ARRAY_SIZE(i40e_speed_lut); i++) {
+ if (i40e_speed_lut[i].speed == speed) {
+ for (j = 0; j < I40E_LBIT_SIZE; j++) {
+ if (test_bit(i40e_speed_lut[i].bit[j],
+ ks->link_modes.supported)) {
+ speed_changed = true;
+ break;
+ }
+ if (!i40e_speed_lut[i].bit[j])
+ break;
+ }
+ if (speed_changed) {
+ link_speed = i40e_speed_lut[i].link_speed;
+ break;
+ }
+ }
+ }
+ return link_speed;
+}
+
+#undef I40E_LBIT_SIZE
+
/**
* i40e_set_link_ksettings - Set Speed and Duplex
* @netdev: network interface device structure
@@ -1159,12 +1224,14 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
struct ethtool_link_ksettings copy_ks;
struct i40e_aq_set_phy_config config;
struct i40e_pf *pf = np->vsi->back;
+ enum i40e_aq_link_speed link_speed;
struct i40e_vsi *vsi = np->vsi;
struct i40e_hw *hw = &pf->hw;
bool autoneg_changed = false;
i40e_status status = 0;
int timeout = 50;
int err = 0;
+ __u32 speed;
u8 autoneg;
/* Changing port settings is not supported if this isn't the
@@ -1197,6 +1264,7 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
/* save autoneg out of ksettings */
autoneg = copy_ks.base.autoneg;
+ speed = copy_ks.base.speed;
/* get our own copy of the bits to check against */
memset(&safe_ks, 0, sizeof(struct ethtool_link_ksettings));
@@ -1215,6 +1283,7 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
/* set autoneg back to what it currently is */
copy_ks.base.autoneg = safe_ks.base.autoneg;
+ copy_ks.base.speed = safe_ks.base.speed;
/* If copy_ks.base and safe_ks.base are not the same now, then they are
* trying to set something that we do not support.
@@ -1331,6 +1400,27 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
40000baseLR4_Full))
config.link_speed |= I40E_LINK_SPEED_40GB;
+ /* Autonegotiation must be disabled to change speed */
+ if ((speed != SPEED_UNKNOWN && safe_ks.base.speed != speed) &&
+ (autoneg == AUTONEG_DISABLE ||
+ (safe_ks.base.autoneg == AUTONEG_DISABLE && !autoneg_changed))) {
+ link_speed = i40e_speed_to_link_speed(speed, ks);
+ if (link_speed == I40E_LINK_SPEED_UNKNOWN) {
+ netdev_info(netdev, "Given speed is not supported\n");
+ err = -EOPNOTSUPP;
+ goto done;
+ } else {
+ config.link_speed = link_speed;
+ }
+ } else {
+ if (safe_ks.base.speed != speed) {
+ netdev_info(netdev,
+ "Unable to set speed, disable autoneg\n");
+ err = -EOPNOTSUPP;
+ goto done;
+ }
+ }
+
/* If speed didn't get set, set it to what it currently is.
* This is needed because if advertise is 0 (as it is when autoneg
* is disabled) then speed won't get set.
@@ -2588,15 +2678,16 @@ static void i40e_diag_test(struct net_device *netdev,
set_bit(__I40E_TESTING, pf->state);
+ if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
+ test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) {
+ dev_warn(&pf->pdev->dev,
+ "Cannot start offline testing when PF is in reset state.\n");
+ goto skip_ol_tests;
+ }
+
if (i40e_active_vfs(pf) || i40e_active_vmdqs(pf)) {
dev_warn(&pf->pdev->dev,
"Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n");
- data[I40E_ETH_TEST_REG] = 1;
- data[I40E_ETH_TEST_EEPROM] = 1;
- data[I40E_ETH_TEST_INTR] = 1;
- data[I40E_ETH_TEST_LINK] = 1;
- eth_test->flags |= ETH_TEST_FL_FAILED;
- clear_bit(__I40E_TESTING, pf->state);
goto skip_ol_tests;
}
@@ -2643,9 +2734,17 @@ static void i40e_diag_test(struct net_device *netdev,
data[I40E_ETH_TEST_INTR] = 0;
}
-skip_ol_tests:
-
netif_info(pf, drv, netdev, "testing finished\n");
+ return;
+
+skip_ol_tests:
+ data[I40E_ETH_TEST_REG] = 1;
+ data[I40E_ETH_TEST_EEPROM] = 1;
+ data[I40E_ETH_TEST_INTR] = 1;
+ data[I40E_ETH_TEST_LINK] = 1;
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+ clear_bit(__I40E_TESTING, pf->state);
+ netif_info(pf, drv, netdev, "testing failed\n");
}
static void i40e_get_wol(struct net_device *netdev,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 1599ac538e7f..797f61b2cd96 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4162,7 +4162,6 @@ static void i40e_free_misc_vector(struct i40e_pf *pf)
i40e_flush(&pf->hw);
if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) {
- synchronize_irq(pf->msix_entries[0].vector);
free_irq(pf->msix_entries[0].vector, pf);
clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state);
}
@@ -4901,7 +4900,6 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi)
irq_set_affinity_notifier(irq_num, NULL);
/* remove our suggested affinity mask for this IRQ */
irq_update_affinity_hint(irq_num, NULL);
- synchronize_irq(irq_num);
free_irq(irq_num, vsi->q_vectors[i]);
/* Tear down the interrupt queue link list
@@ -8667,6 +8665,11 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi,
return -EOPNOTSUPP;
}
+ if (!tc) {
+ dev_err(&pf->pdev->dev, "Unable to add filter because of invalid destination");
+ return -EINVAL;
+ }
+
if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
test_bit(__I40E_RESET_INTR_RECEIVED, pf->state))
return -EBUSY;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 7bc1174edf6b..a327189deda0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1483,10 +1483,8 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
if (!rx_ring->rx_bi)
return;
- if (rx_ring->skb) {
- dev_kfree_skb(rx_ring->skb);
- rx_ring->skb = NULL;
- }
+ dev_kfree_skb(rx_ring->skb);
+ rx_ring->skb = NULL;
if (rx_ring->xsk_pool) {
i40e_xsk_clean_rx_ring(rx_ring);
@@ -2291,16 +2289,14 @@ int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, struct i40e_ring *xdp_ring)
* i40e_run_xdp - run an XDP program
* @rx_ring: Rx ring being processed
* @xdp: XDP buffer containing the frame
+ * @xdp_prog: XDP program to run
**/
-static int i40e_run_xdp(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
+static int i40e_run_xdp(struct i40e_ring *rx_ring, struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
{
int err, result = I40E_XDP_PASS;
struct i40e_ring *xdp_ring;
- struct bpf_prog *xdp_prog;
u32 act;
- xdp_prog = READ_ONCE(rx_ring->xdp_prog);
-
if (!xdp_prog)
goto xdp_out;
@@ -2445,6 +2441,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
unsigned int offset = rx_ring->rx_offset;
struct sk_buff *skb = rx_ring->skb;
unsigned int xdp_xmit = 0;
+ struct bpf_prog *xdp_prog;
bool failure = false;
struct xdp_buff xdp;
int xdp_res = 0;
@@ -2454,6 +2451,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
#endif
xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq);
+ xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+
while (likely(total_rx_packets < (unsigned int)budget)) {
struct i40e_rx_buffer *rx_buffer;
union i40e_rx_desc *rx_desc;
@@ -2509,11 +2508,12 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
hard_start = page_address(rx_buffer->page) +
rx_buffer->page_offset - offset;
xdp_prepare_buff(&xdp, hard_start, offset, size, true);
+ xdp_buff_clear_frags_flag(&xdp);
#if (PAGE_SIZE > 4096)
/* At larger PAGE_SIZE, frame_sz depend on len size */
xdp.frame_sz = i40e_rx_frame_truesize(rx_ring, size);
#endif
- xdp_res = i40e_run_xdp(rx_ring, &xdp);
+ xdp_res = i40e_run_xdp(rx_ring, &xdp, xdp_prog);
}
if (xdp_res) {
@@ -3713,35 +3713,55 @@ u16 i40e_lan_select_queue(struct net_device *netdev,
static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,
struct i40e_ring *xdp_ring)
{
- u16 i = xdp_ring->next_to_use;
- struct i40e_tx_buffer *tx_bi;
- struct i40e_tx_desc *tx_desc;
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
+ u8 nr_frags = unlikely(xdp_frame_has_frags(xdpf)) ? sinfo->nr_frags : 0;
+ u16 i = 0, index = xdp_ring->next_to_use;
+ struct i40e_tx_buffer *tx_head = &xdp_ring->tx_bi[index];
+ struct i40e_tx_buffer *tx_bi = tx_head;
+ struct i40e_tx_desc *tx_desc = I40E_TX_DESC(xdp_ring, index);
void *data = xdpf->data;
u32 size = xdpf->len;
- dma_addr_t dma;
- if (!unlikely(I40E_DESC_UNUSED(xdp_ring))) {
+ if (unlikely(I40E_DESC_UNUSED(xdp_ring) < 1 + nr_frags)) {
xdp_ring->tx_stats.tx_busy++;
return I40E_XDP_CONSUMED;
}
- dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE);
- if (dma_mapping_error(xdp_ring->dev, dma))
- return I40E_XDP_CONSUMED;
- tx_bi = &xdp_ring->tx_bi[i];
- tx_bi->bytecount = size;
- tx_bi->gso_segs = 1;
- tx_bi->xdpf = xdpf;
+ tx_head->bytecount = xdp_get_frame_len(xdpf);
+ tx_head->gso_segs = 1;
+ tx_head->xdpf = xdpf;
- /* record length, and DMA address */
- dma_unmap_len_set(tx_bi, len, size);
- dma_unmap_addr_set(tx_bi, dma, dma);
+ for (;;) {
+ dma_addr_t dma;
- tx_desc = I40E_TX_DESC(xdp_ring, i);
- tx_desc->buffer_addr = cpu_to_le64(dma);
- tx_desc->cmd_type_offset_bsz = build_ctob(I40E_TX_DESC_CMD_ICRC
- | I40E_TXD_CMD,
- 0, size, 0);
+ dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(xdp_ring->dev, dma))
+ goto unmap;
+
+ /* record length, and DMA address */
+ dma_unmap_len_set(tx_bi, len, size);
+ dma_unmap_addr_set(tx_bi, dma, dma);
+
+ tx_desc->buffer_addr = cpu_to_le64(dma);
+ tx_desc->cmd_type_offset_bsz =
+ build_ctob(I40E_TX_DESC_CMD_ICRC, 0, size, 0);
+
+ if (++index == xdp_ring->count)
+ index = 0;
+
+ if (i == nr_frags)
+ break;
+
+ tx_bi = &xdp_ring->tx_bi[index];
+ tx_desc = I40E_TX_DESC(xdp_ring, index);
+
+ data = skb_frag_address(&sinfo->frags[i]);
+ size = skb_frag_size(&sinfo->frags[i]);
+ i++;
+ }
+
+ tx_desc->cmd_type_offset_bsz |=
+ cpu_to_le64(I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT);
/* Make certain all of the status bits have been updated
* before next_to_watch is written.
@@ -3749,14 +3769,30 @@ static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,
smp_wmb();
xdp_ring->xdp_tx_active++;
- i++;
- if (i == xdp_ring->count)
- i = 0;
- tx_bi->next_to_watch = tx_desc;
- xdp_ring->next_to_use = i;
+ tx_head->next_to_watch = tx_desc;
+ xdp_ring->next_to_use = index;
return I40E_XDP_TX;
+
+unmap:
+ for (;;) {
+ tx_bi = &xdp_ring->tx_bi[index];
+ if (dma_unmap_len(tx_bi, len))
+ dma_unmap_page(xdp_ring->dev,
+ dma_unmap_addr(tx_bi, dma),
+ dma_unmap_len(tx_bi, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_bi, len, 0);
+ if (tx_bi == tx_head)
+ break;
+
+ if (!index)
+ index += xdp_ring->count;
+ index--;
+ }
+
+ return I40E_XDP_CONSUMED;
}
/**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 9949469333d5..d01fb592778c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -2282,7 +2282,7 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg)
}
if (vf->adq_enabled) {
- for (i = 0; i < I40E_MAX_VF_VSI; i++)
+ for (i = 0; i < vf->num_tc; i++)
num_qps_all += vf->ch[i].num_qps;
if (num_qps_all != qci->num_queue_pairs) {
aq_ret = I40E_ERR_PARAM;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index af3e7e6afc85..6d4009e0cbd6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -143,20 +143,17 @@ int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool,
* i40e_run_xdp_zc - Executes an XDP program on an xdp_buff
* @rx_ring: Rx ring
* @xdp: xdp_buff used as input to the XDP program
+ * @xdp_prog: XDP program to run
*
* Returns any of I40E_XDP_{PASS, CONSUMED, TX, REDIR}
**/
-static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
+static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
{
int err, result = I40E_XDP_PASS;
struct i40e_ring *xdp_ring;
- struct bpf_prog *xdp_prog;
u32 act;
- /* NB! xdp_prog will always be !NULL, due to the fact that
- * this path is enabled by setting an XDP program.
- */
- xdp_prog = READ_ONCE(rx_ring->xdp_prog);
act = bpf_prog_run_xdp(xdp_prog, xdp);
if (likely(act == XDP_REDIRECT)) {
@@ -339,9 +336,15 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
u16 next_to_clean = rx_ring->next_to_clean;
u16 count_mask = rx_ring->count - 1;
unsigned int xdp_res, xdp_xmit = 0;
+ struct bpf_prog *xdp_prog;
bool failure = false;
u16 cleaned_count;
+ /* NB! xdp_prog will always be !NULL, due to the fact that
+ * this path is enabled by setting an XDP program.
+ */
+ xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+
while (likely(total_rx_packets < (unsigned int)budget)) {
union i40e_rx_desc *rx_desc;
unsigned int rx_packets;
@@ -378,7 +381,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
xsk_buff_set_size(bi, size);
xsk_buff_dma_sync_for_cpu(bi, rx_ring->xsk_pool);
- xdp_res = i40e_run_xdp_zc(rx_ring, bi);
+ xdp_res = i40e_run_xdp_zc(rx_ring, bi, xdp_prog);
i40e_handle_xdp_result_zc(rx_ring, bi, rx_desc, &rx_packets,
&rx_bytes, size, xdp_res, &failure);
if (failure)
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 95772e17e5be..541103909ef4 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -985,7 +985,7 @@ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter,
f->add = true;
f->add_handled = false;
f->is_new_mac = true;
- f->is_primary = false;
+ f->is_primary = ether_addr_equal(macaddr, adapter->hw.mac.addr);
adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER;
} else {
f->remove = false;
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 1e71b70f0e52..70335f6e8524 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -2190,6 +2190,42 @@ ice_setup_autoneg(struct ice_port_info *p, struct ethtool_link_ksettings *ks,
}
/**
+ * ice_set_phy_type_from_speed - set phy_types based on speeds
+ * and advertised modes
+ * @ks: ethtool link ksettings struct
+ * @phy_type_low: pointer to the lower part of phy_type
+ * @phy_type_high: pointer to the higher part of phy_type
+ * @adv_link_speed: targeted link speeds bitmap
+ */
+static void
+ice_set_phy_type_from_speed(const struct ethtool_link_ksettings *ks,
+ u64 *phy_type_low, u64 *phy_type_high,
+ u16 adv_link_speed)
+{
+ /* Handle 1000M speed in a special way because ice_update_phy_type
+ * enables all link modes, but having mixed copper and optical
+ * standards is not supported.
+ */
+ adv_link_speed &= ~ICE_AQ_LINK_SPEED_1000MB;
+
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 1000baseT_Full))
+ *phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_T |
+ ICE_PHY_TYPE_LOW_1G_SGMII;
+
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 1000baseKX_Full))
+ *phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_KX;
+
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 1000baseX_Full))
+ *phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_SX |
+ ICE_PHY_TYPE_LOW_1000BASE_LX;
+
+ ice_update_phy_type(phy_type_low, phy_type_high, adv_link_speed);
+}
+
+/**
* ice_set_link_ksettings - Set Speed and Duplex
* @netdev: network interface device structure
* @ks: ethtool ksettings
@@ -2320,7 +2356,8 @@ ice_set_link_ksettings(struct net_device *netdev,
adv_link_speed = curr_link_speed;
/* Convert the advertise link speeds to their corresponded PHY_TYPE */
- ice_update_phy_type(&phy_type_low, &phy_type_high, adv_link_speed);
+ ice_set_phy_type_from_speed(ks, &phy_type_low, &phy_type_high,
+ adv_link_speed);
if (!autoneg_changed && adv_link_speed == curr_link_speed) {
netdev_info(netdev, "Nothing changed, exiting without setting anything.\n");
@@ -3470,6 +3507,16 @@ static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch)
new_rx = ch->combined_count + ch->rx_count;
new_tx = ch->combined_count + ch->tx_count;
+ if (new_rx < vsi->tc_cfg.numtc) {
+ netdev_err(dev, "Cannot set less Rx channels, than Traffic Classes you have (%u)\n",
+ vsi->tc_cfg.numtc);
+ return -EINVAL;
+ }
+ if (new_tx < vsi->tc_cfg.numtc) {
+ netdev_err(dev, "Cannot set less Tx channels, than Traffic Classes you have (%u)\n",
+ vsi->tc_cfg.numtc);
+ return -EINVAL;
+ }
if (new_rx > ice_get_max_rxq(pf)) {
netdev_err(dev, "Maximum allowed Rx channels is %d\n",
ice_get_max_rxq(pf));
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index b28fb8eacffb..a6c4be5e5566 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -912,7 +912,7 @@ static void ice_set_dflt_vsi_ctx(struct ice_hw *hw, struct ice_vsi_ctx *ctxt)
* @vsi: the VSI being configured
* @ctxt: VSI context structure
*/
-static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
+static int ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
{
u16 offset = 0, qmap = 0, tx_count = 0, pow = 0;
u16 num_txq_per_tc, num_rxq_per_tc;
@@ -985,7 +985,18 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
else
vsi->num_rxq = num_rxq_per_tc;
+ if (vsi->num_rxq > vsi->alloc_rxq) {
+ dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Rx queues (%u), than were allocated (%u)!\n",
+ vsi->num_rxq, vsi->alloc_rxq);
+ return -EINVAL;
+ }
+
vsi->num_txq = tx_count;
+ if (vsi->num_txq > vsi->alloc_txq) {
+ dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Tx queues (%u), than were allocated (%u)!\n",
+ vsi->num_txq, vsi->alloc_txq);
+ return -EINVAL;
+ }
if (vsi->type == ICE_VSI_VF && vsi->num_txq != vsi->num_rxq) {
dev_dbg(ice_pf_to_dev(vsi->back), "VF VSI should have same number of Tx and Rx queues. Hence making them equal\n");
@@ -1003,6 +1014,8 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
*/
ctxt->info.q_mapping[0] = cpu_to_le16(vsi->rxq_map[0]);
ctxt->info.q_mapping[1] = cpu_to_le16(vsi->num_rxq);
+
+ return 0;
}
/**
@@ -1190,7 +1203,10 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi)
if (vsi->type == ICE_VSI_CHNL) {
ice_chnl_vsi_setup_q_map(vsi, ctxt);
} else {
- ice_vsi_setup_q_map(vsi, ctxt);
+ ret = ice_vsi_setup_q_map(vsi, ctxt);
+ if (ret)
+ goto out;
+
if (!init_vsi) /* means VSI being updated */
/* must to indicate which section of VSI context are
* being modified
@@ -3467,7 +3483,7 @@ void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc)
*
* Prepares VSI tc_config to have queue configurations based on MQPRIO options.
*/
-static void
+static int
ice_vsi_setup_q_map_mqprio(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt,
u8 ena_tc)
{
@@ -3516,7 +3532,18 @@ ice_vsi_setup_q_map_mqprio(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt,
/* Set actual Tx/Rx queue pairs */
vsi->num_txq = offset + qcount_tx;
+ if (vsi->num_txq > vsi->alloc_txq) {
+ dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Tx queues (%u), than were allocated (%u)!\n",
+ vsi->num_txq, vsi->alloc_txq);
+ return -EINVAL;
+ }
+
vsi->num_rxq = offset + qcount_rx;
+ if (vsi->num_rxq > vsi->alloc_rxq) {
+ dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Rx queues (%u), than were allocated (%u)!\n",
+ vsi->num_rxq, vsi->alloc_rxq);
+ return -EINVAL;
+ }
/* Setup queue TC[0].qmap for given VSI context */
ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
@@ -3534,6 +3561,8 @@ ice_vsi_setup_q_map_mqprio(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt,
dev_dbg(ice_pf_to_dev(vsi->back), "vsi->num_rxq = %d\n", vsi->num_rxq);
dev_dbg(ice_pf_to_dev(vsi->back), "all_numtc %u, all_enatc: 0x%04x, tc_cfg.numtc %u\n",
vsi->all_numtc, vsi->all_enatc, vsi->tc_cfg.numtc);
+
+ return 0;
}
/**
@@ -3583,9 +3612,12 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
if (vsi->type == ICE_VSI_PF &&
test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
- ice_vsi_setup_q_map_mqprio(vsi, ctx, ena_tc);
+ ret = ice_vsi_setup_q_map_mqprio(vsi, ctx, ena_tc);
else
- ice_vsi_setup_q_map(vsi, ctx);
+ ret = ice_vsi_setup_q_map(vsi, ctx);
+
+ if (ret)
+ goto out;
/* must to indicate which section of VSI context are being modified */
ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index e1cae253412c..c1ac2f746714 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -5763,25 +5763,38 @@ static netdev_features_t
ice_fix_features(struct net_device *netdev, netdev_features_t features)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
- netdev_features_t supported_vlan_filtering;
- netdev_features_t requested_vlan_filtering;
- struct ice_vsi *vsi = np->vsi;
-
- requested_vlan_filtering = features & NETIF_VLAN_FILTERING_FEATURES;
-
- /* make sure supported_vlan_filtering works for both SVM and DVM */
- supported_vlan_filtering = NETIF_F_HW_VLAN_CTAG_FILTER;
- if (ice_is_dvm_ena(&vsi->back->hw))
- supported_vlan_filtering |= NETIF_F_HW_VLAN_STAG_FILTER;
-
- if (requested_vlan_filtering &&
- requested_vlan_filtering != supported_vlan_filtering) {
- if (requested_vlan_filtering & NETIF_F_HW_VLAN_CTAG_FILTER) {
- netdev_warn(netdev, "cannot support requested VLAN filtering settings, enabling all supported VLAN filtering settings\n");
- features |= supported_vlan_filtering;
+ netdev_features_t req_vlan_fltr, cur_vlan_fltr;
+ bool cur_ctag, cur_stag, req_ctag, req_stag;
+
+ cur_vlan_fltr = netdev->features & NETIF_VLAN_FILTERING_FEATURES;
+ cur_ctag = cur_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER;
+ cur_stag = cur_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER;
+
+ req_vlan_fltr = features & NETIF_VLAN_FILTERING_FEATURES;
+ req_ctag = req_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER;
+ req_stag = req_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER;
+
+ if (req_vlan_fltr != cur_vlan_fltr) {
+ if (ice_is_dvm_ena(&np->vsi->back->hw)) {
+ if (req_ctag && req_stag) {
+ features |= NETIF_VLAN_FILTERING_FEATURES;
+ } else if (!req_ctag && !req_stag) {
+ features &= ~NETIF_VLAN_FILTERING_FEATURES;
+ } else if ((!cur_ctag && req_ctag && !cur_stag) ||
+ (!cur_stag && req_stag && !cur_ctag)) {
+ features |= NETIF_VLAN_FILTERING_FEATURES;
+ netdev_warn(netdev, "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been enabled for both types.\n");
+ } else if ((cur_ctag && !req_ctag && cur_stag) ||
+ (cur_stag && !req_stag && cur_ctag)) {
+ features &= ~NETIF_VLAN_FILTERING_FEATURES;
+ netdev_warn(netdev, "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been disabled for both types.\n");
+ }
} else {
- netdev_warn(netdev, "cannot support requested VLAN filtering settings, clearing all supported VLAN filtering settings\n");
- features &= ~supported_vlan_filtering;
+ if (req_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER)
+ netdev_warn(netdev, "cannot support requested 802.1ad filtering setting in SVM mode\n");
+
+ if (req_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER)
+ features |= NETIF_F_HW_VLAN_CTAG_FILTER;
}
}
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index 662947c882e8..ef9344ef0d8e 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -2271,7 +2271,7 @@ static int
ice_ptp_init_tx_e822(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port)
{
tx->quad = port / ICE_PORTS_PER_QUAD;
- tx->quad_offset = tx->quad * INDEX_PER_PORT;
+ tx->quad_offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT;
tx->len = INDEX_PER_PORT;
return ice_ptp_alloc_tx_tracker(tx);
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h
index afd048d69959..10e396abf130 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.h
@@ -49,6 +49,37 @@ struct ice_perout_channel {
* To allow multiple ports to access the shared register block independently,
* the blocks are split up so that indexes are assigned to each port based on
* hardware logical port number.
+ *
+ * The timestamp blocks are handled differently for E810- and E822-based
+ * devices. In E810 devices, each port has its own block of timestamps, while in
+ * E822 there is a need to logically break the block of registers into smaller
+ * chunks based on the port number to avoid collisions.
+ *
+ * Example for port 5 in E810:
+ * +--------+--------+--------+--------+--------+--------+--------+--------+
+ * |register|register|register|register|register|register|register|register|
+ * | block | block | block | block | block | block | block | block |
+ * | for | for | for | for | for | for | for | for |
+ * | port 0 | port 1 | port 2 | port 3 | port 4 | port 5 | port 6 | port 7 |
+ * +--------+--------+--------+--------+--------+--------+--------+--------+
+ * ^^
+ * ||
+ * |--- quad offset is always 0
+ * ---- quad number
+ *
+ * Example for port 5 in E822:
+ * +-----------------------------+-----------------------------+
+ * | register block for quad 0 | register block for quad 1 |
+ * |+------+------+------+------+|+------+------+------+------+|
+ * ||port 0|port 1|port 2|port 3|||port 0|port 1|port 2|port 3||
+ * |+------+------+------+------+|+------+------+------+------+|
+ * +-----------------------------+-------^---------------------+
+ * ^ |
+ * | --- quad offset*
+ * ---- quad number
+ *
+ * * PHY port 5 is port 1 in quad 1
+ *
*/
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
index 0a0c55fb8699..b803f2ab3cc7 100644
--- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
@@ -524,6 +524,7 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
*/
fltr->rid = rule_added.rid;
fltr->rule_id = rule_added.rule_id;
+ fltr->dest_id = rule_added.vsi_handle;
exit:
kfree(list);
@@ -993,7 +994,9 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi,
n_proto_key = ntohs(match.key->n_proto);
n_proto_mask = ntohs(match.mask->n_proto);
- if (n_proto_key == ETH_P_ALL || n_proto_key == 0) {
+ if (n_proto_key == ETH_P_ALL || n_proto_key == 0 ||
+ fltr->tunnel_type == TNL_GTPU ||
+ fltr->tunnel_type == TNL_GTPC) {
n_proto_key = 0;
n_proto_mask = 0;
} else {
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
index cd8e6b50968c..7adf9ddf129e 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
@@ -504,6 +504,11 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
}
if (ice_is_vf_disabled(vf)) {
+ vsi = ice_get_vf_vsi(vf);
+ if (WARN_ON(!vsi))
+ return -EINVAL;
+ ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id);
+ ice_vsi_stop_all_rx_rings(vsi);
dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n",
vf->vf_id);
return 0;
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
index 99cb382e71fe..b2b5d2ee83a5 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
@@ -1592,35 +1592,27 @@ error_param:
*/
static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
{
- enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
struct virtchnl_vsi_queue_config_info *qci =
(struct virtchnl_vsi_queue_config_info *)msg;
struct virtchnl_queue_pair_info *qpi;
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
- int i, q_idx;
+ int i = -1, q_idx;
- if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
goto error_param;
- }
- if (!ice_vc_isvalid_vsi_id(vf, qci->vsi_id)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ if (!ice_vc_isvalid_vsi_id(vf, qci->vsi_id))
goto error_param;
- }
vsi = ice_get_vf_vsi(vf);
- if (!vsi) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ if (!vsi)
goto error_param;
- }
if (qci->num_queue_pairs > ICE_MAX_RSS_QS_PER_VF ||
qci->num_queue_pairs > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) {
dev_err(ice_pf_to_dev(pf), "VF-%d requesting more than supported number of queues: %d\n",
vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq));
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
@@ -1633,7 +1625,6 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
!ice_vc_isvalid_ring_len(qpi->txq.ring_len) ||
!ice_vc_isvalid_ring_len(qpi->rxq.ring_len) ||
!ice_vc_isvalid_q_id(vf, qci->vsi_id, qpi->txq.queue_id)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
@@ -1643,7 +1634,6 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
* for selected "vsi"
*/
if (q_idx >= vsi->alloc_txq || q_idx >= vsi->alloc_rxq) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
@@ -1653,14 +1643,13 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
vsi->tx_rings[i]->count = qpi->txq.ring_len;
/* Disable any existing queue first */
- if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx))
goto error_param;
- }
/* Configure a queue with the requested settings */
if (ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure TX queue %d\n",
+ vf->vf_id, i);
goto error_param;
}
}
@@ -1674,17 +1663,13 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
if (qpi->rxq.databuffer_size != 0 &&
(qpi->rxq.databuffer_size > ((16 * 1024) - 128) ||
- qpi->rxq.databuffer_size < 1024)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ qpi->rxq.databuffer_size < 1024))
goto error_param;
- }
vsi->rx_buf_len = qpi->rxq.databuffer_size;
vsi->rx_rings[i]->rx_buf_len = vsi->rx_buf_len;
if (qpi->rxq.max_pkt_size > max_frame_size ||
- qpi->rxq.max_pkt_size < 64) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ qpi->rxq.max_pkt_size < 64)
goto error_param;
- }
vsi->max_frame = qpi->rxq.max_pkt_size;
/* add space for the port VLAN since the VF driver is
@@ -1695,16 +1680,30 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
vsi->max_frame += VLAN_HLEN;
if (ice_vsi_cfg_single_rxq(vsi, q_idx)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure RX queue %d\n",
+ vf->vf_id, i);
goto error_param;
}
}
}
+ /* send the response to the VF */
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+ VIRTCHNL_STATUS_SUCCESS, NULL, 0);
error_param:
+ /* disable whatever we can */
+ for (; i >= 0; i--) {
+ if (ice_vsi_ctrl_one_rx_ring(vsi, false, i, true))
+ dev_err(ice_pf_to_dev(pf), "VF-%d could not disable RX queue %d\n",
+ vf->vf_id, i);
+ if (ice_vf_vsi_dis_single_txq(vf, vsi, i))
+ dev_err(ice_pf_to_dev(pf), "VF-%d could not disable TX queue %d\n",
+ vf->vf_id, i);
+ }
+
/* send the response to the VF */
- return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES, v_ret,
- NULL, 0);
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+ VIRTCHNL_STATUS_ERR_PARAM, NULL, 0);
}
/**
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 68be2976f539..c5f04c40284b 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -4819,8 +4819,11 @@ static void igb_clean_tx_ring(struct igb_ring *tx_ring)
while (i != tx_ring->next_to_use) {
union e1000_adv_tx_desc *eop_desc, *tx_desc;
- /* Free all the Tx ring sk_buffs */
- dev_kfree_skb_any(tx_buffer->skb);
+ /* Free all the Tx ring sk_buffs or xdp frames */
+ if (tx_buffer->type == IGB_TYPE_SKB)
+ dev_kfree_skb_any(tx_buffer->skb);
+ else
+ xdp_return_frame(tx_buffer->xdpf);
/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
@@ -9898,11 +9901,10 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
struct e1000_hw *hw = &adapter->hw;
u32 dmac_thr;
u16 hwm;
+ u32 reg;
if (hw->mac.type > e1000_82580) {
if (adapter->flags & IGB_FLAG_DMAC) {
- u32 reg;
-
/* force threshold to 0. */
wr32(E1000_DMCTXTH, 0);
@@ -9935,7 +9937,6 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
/* Disable BMC-to-OS Watchdog Enable */
if (hw->mac.type != e1000_i354)
reg &= ~E1000_DMACR_DC_BMC2OSW_EN;
-
wr32(E1000_DMACR, reg);
/* no lower threshold to disable
@@ -9952,12 +9953,12 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
*/
wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
(IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
+ }
- /* make low power state decision controlled
- * by DMA coal
- */
+ if (hw->mac.type >= e1000_i210 ||
+ (adapter->flags & IGB_FLAG_DMAC)) {
reg = rd32(E1000_PCIEMISC);
- reg &= ~E1000_PCIEMISC_LX_DECISION;
+ reg |= E1000_PCIEMISC_LX_DECISION;
wr32(E1000_PCIEMISC, reg);
} /* endif adapter->dmac is not disabled */
} else if (hw->mac.type == e1000_82580) {
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
index bc614a4def9e..3f60a80e34c8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -1390,7 +1390,8 @@ static int otx2vf_get_link_ksettings(struct net_device *netdev,
static const struct ethtool_ops otx2vf_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
- ETHTOOL_COALESCE_MAX_FRAMES,
+ ETHTOOL_COALESCE_MAX_FRAMES |
+ ETHTOOL_COALESCE_USE_ADAPTIVE,
.supported_ring_params = ETHTOOL_RING_USE_RX_BUF_LEN |
ETHTOOL_RING_USE_CQE_SIZE,
.get_link = otx2_get_link,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 2ccf7bef9b05..735dc805dad7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -479,6 +479,30 @@ static int mlx5_set_extended_dest(struct mlx5_core_dev *dev,
return 0;
}
+
+static void
+mlx5_cmd_set_fte_flow_meter(struct fs_fte *fte, void *in_flow_context)
+{
+ void *exe_aso_ctrl;
+ void *execute_aso;
+
+ execute_aso = MLX5_ADDR_OF(flow_context, in_flow_context,
+ execute_aso[0]);
+ MLX5_SET(execute_aso, execute_aso, valid, 1);
+ MLX5_SET(execute_aso, execute_aso, aso_object_id,
+ fte->action.exe_aso.object_id);
+
+ exe_aso_ctrl = MLX5_ADDR_OF(execute_aso, execute_aso, exe_aso_ctrl);
+ MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, return_reg_id,
+ fte->action.exe_aso.return_reg_id);
+ MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, aso_type,
+ fte->action.exe_aso.type);
+ MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, init_color,
+ fte->action.exe_aso.flow_meter.init_color);
+ MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, meter_id,
+ fte->action.exe_aso.flow_meter.meter_idx);
+}
+
static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
int opmod, int modify_mask,
struct mlx5_flow_table *ft,
@@ -663,6 +687,15 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
list_size);
}
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) {
+ if (fte->action.exe_aso.type == MLX5_EXE_ASO_FLOW_METER) {
+ mlx5_cmd_set_fte_flow_meter(fte, in_flow_context);
+ } else {
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+ }
+
err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
err_out:
kvfree(in);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 21e5c709b2d3..f1b908d40fa6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -2895,6 +2895,14 @@ static int create_fdb_bypass(struct mlx5_flow_steering *steering)
return 0;
}
+static void cleanup_fdb_root_ns(struct mlx5_flow_steering *steering)
+{
+ cleanup_root_ns(steering->fdb_root_ns);
+ steering->fdb_root_ns = NULL;
+ kfree(steering->fdb_sub_ns);
+ steering->fdb_sub_ns = NULL;
+}
+
static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
{
struct fs_prio *maj_prio;
@@ -2945,10 +2953,7 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
return 0;
out_err:
- cleanup_root_ns(steering->fdb_root_ns);
- kfree(steering->fdb_sub_ns);
- steering->fdb_sub_ns = NULL;
- steering->fdb_root_ns = NULL;
+ cleanup_fdb_root_ns(steering);
return err;
}
@@ -3108,10 +3113,7 @@ void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev)
struct mlx5_flow_steering *steering = dev->priv.steering;
cleanup_root_ns(steering->root_ns);
- cleanup_root_ns(steering->fdb_root_ns);
- steering->fdb_root_ns = NULL;
- kfree(steering->fdb_sub_ns);
- steering->fdb_sub_ns = NULL;
+ cleanup_fdb_root_ns(steering);
cleanup_root_ns(steering->port_sel_root_ns);
cleanup_root_ns(steering->sniffer_rx_root_ns);
cleanup_root_ns(steering->sniffer_tx_root_ns);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
index 3d5e57ff558c..7e02cbe8c3b9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
@@ -12,13 +12,16 @@ struct mlx5_dm {
spinlock_t lock;
unsigned long *steering_sw_icm_alloc_blocks;
unsigned long *header_modify_sw_icm_alloc_blocks;
+ unsigned long *header_modify_pattern_sw_icm_alloc_blocks;
};
struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev)
{
+ u64 header_modify_pattern_icm_blocks = 0;
u64 header_modify_icm_blocks = 0;
u64 steering_icm_blocks = 0;
struct mlx5_dm *dm;
+ bool support_v2;
if (!(MLX5_CAP_GEN_64(dev, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM))
return NULL;
@@ -53,8 +56,27 @@ struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev)
goto err_modify_hdr;
}
+ support_v2 = MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) &&
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2) &&
+ MLX5_CAP64_DEV_MEM(dev, header_modify_pattern_sw_icm_start_address);
+
+ if (support_v2) {
+ header_modify_pattern_icm_blocks =
+ BIT(MLX5_CAP_DEV_MEM(dev, log_header_modify_pattern_sw_icm_size) -
+ MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+
+ dm->header_modify_pattern_sw_icm_alloc_blocks =
+ kcalloc(BITS_TO_LONGS(header_modify_pattern_icm_blocks),
+ sizeof(unsigned long), GFP_KERNEL);
+ if (!dm->header_modify_pattern_sw_icm_alloc_blocks)
+ goto err_pattern;
+ }
+
return dm;
+err_pattern:
+ kfree(dm->header_modify_sw_icm_alloc_blocks);
+
err_modify_hdr:
kfree(dm->steering_sw_icm_alloc_blocks);
@@ -86,6 +108,14 @@ void mlx5_dm_cleanup(struct mlx5_core_dev *dev)
kfree(dm->header_modify_sw_icm_alloc_blocks);
}
+ if (dm->header_modify_pattern_sw_icm_alloc_blocks) {
+ WARN_ON(!bitmap_empty(dm->header_modify_pattern_sw_icm_alloc_blocks,
+ BIT(MLX5_CAP_DEV_MEM(dev,
+ log_header_modify_pattern_sw_icm_size) -
+ MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))));
+ kfree(dm->header_modify_pattern_sw_icm_alloc_blocks);
+ }
+
kfree(dm);
}
@@ -130,6 +160,13 @@ int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
log_header_modify_sw_icm_size);
block_map = dm->header_modify_sw_icm_alloc_blocks;
break;
+ case MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN:
+ icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
+ header_modify_pattern_sw_icm_start_address);
+ log_icm_size = MLX5_CAP_DEV_MEM(dev,
+ log_header_modify_pattern_sw_icm_size);
+ block_map = dm->header_modify_pattern_sw_icm_alloc_blocks;
+ break;
default:
return -EINVAL;
}
@@ -203,6 +240,11 @@ int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type
icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address);
block_map = dm->header_modify_sw_icm_alloc_blocks;
break;
+ case MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN:
+ icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
+ header_modify_pattern_sw_icm_start_address);
+ block_map = dm->header_modify_pattern_sw_icm_alloc_blocks;
+ break;
default:
return -EINVAL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index c9b4e50a593e..2078d9f03a5f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -314,13 +314,6 @@ struct mlx5_reg_host_endianness {
u8 rsvd[15];
};
-#define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
-
-enum {
- MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
- MLX5_DEV_CAP_FLAG_DCT,
-};
-
static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size)
{
switch (size) {
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
index 84621b4cb15b..b03e1c66bac0 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
@@ -19,8 +19,6 @@
#include "mlxbf_gige.h"
#include "mlxbf_gige_regs.h"
-#define DRV_NAME "mlxbf_gige"
-
/* Allocate SKB whose payload pointer aligns with the Bluefield
* hardware DMA limitation, i.e. DMA operation can't cross
* a 4KB boundary. A maximum packet size of 2KB is assumed in the
@@ -427,7 +425,7 @@ static struct platform_driver mlxbf_gige_driver = {
.remove = mlxbf_gige_remove,
.shutdown = mlxbf_gige_shutdown,
.driver = {
- .name = DRV_NAME,
+ .name = KBUILD_MODNAME,
.acpi_match_table = ACPI_PTR(mlxbf_gige_acpi_match),
},
};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index 1a465fd5d8b3..c57e293cca25 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -12,7 +12,6 @@ mlxsw_i2c-objs := i2c.o
obj-$(CONFIG_MLXSW_SPECTRUM) += mlxsw_spectrum.o
mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \
spectrum_switchdev.o spectrum_router.o \
- spectrum_router_xm.o \
spectrum1_kvdl.o spectrum2_kvdl.o \
spectrum_kvdl.o \
spectrum_acl_tcam.o spectrum_acl_ctcam.o \
diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
index 51b260d54237..8a89c2773294 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
@@ -343,23 +343,6 @@ static inline int mlxsw_cmd_boardinfo(struct mlxsw_core *mlxsw_core,
0, 0, false, out_mbox, MLXSW_CMD_MBOX_SIZE);
}
-/* cmd_mbox_xm_num_local_ports
- * Number of local_ports connected to the xm.
- * Each local port is a 4x
- * Spectrum-2/3: 25G
- * Spectrum-4: 50G
- */
-MLXSW_ITEM32(cmd_mbox, boardinfo, xm_num_local_ports, 0x00, 4, 3);
-
-/* cmd_mbox_xm_exists
- * An XM (eXtanded Mezanine, e.g. used for the XLT) is connected on the board.
- */
-MLXSW_ITEM32(cmd_mbox, boardinfo, xm_exists, 0x00, 0, 1);
-
-/* cmd_mbox_xm_local_port_entry
- */
-MLXSW_ITEM_BIT_ARRAY(cmd_mbox, boardinfo, xm_local_port_entry, 0x04, 4, 8);
-
/* cmd_mbox_boardinfo_intapin
* When PCIe interrupt messages are being used, this value is used for clearing
* an interrupt. When using MSI-X, this register is not used.
@@ -674,12 +657,6 @@ MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_double_size, 0x0C, 26, 1);
*/
MLXSW_ITEM32(cmd_mbox, config_profile, set_cqe_version, 0x08, 0, 1);
-/* cmd_mbox_config_set_kvh_xlt_cache_mode
- * Capability bit. Setting a bit to 1 configures the profile
- * according to the mailbox contents.
- */
-MLXSW_ITEM32(cmd_mbox, config_profile, set_kvh_xlt_cache_mode, 0x08, 3, 1);
-
/* cmd_mbox_config_profile_max_vepa_channels
* Maximum number of VEPA channels per port (0 through 16)
* 0 - multi-channel VEPA is disabled
@@ -736,16 +713,25 @@ MLXSW_ITEM32(cmd_mbox, config_profile, max_flood_tables, 0x30, 16, 4);
*/
MLXSW_ITEM32(cmd_mbox, config_profile, max_vid_flood_tables, 0x30, 8, 4);
+enum mlxsw_cmd_mbox_config_profile_flood_mode {
+ /* Mixed mode, where:
+ * max_flood_tables indicates the number of single-entry tables.
+ * max_vid_flood_tables indicates the number of per-VID tables.
+ * max_fid_offset_flood_tables indicates the number of FID-offset
+ * tables. max_fid_flood_tables indicates the number of per-FID tables.
+ * Reserved when unified bridge model is used.
+ */
+ MLXSW_CMD_MBOX_CONFIG_PROFILE_FLOOD_MODE_MIXED = 3,
+ /* Controlled flood tables. Reserved when legacy bridge model is
+ * used.
+ */
+ MLXSW_CMD_MBOX_CONFIG_PROFILE_FLOOD_MODE_CONTROLLED = 4,
+};
+
/* cmd_mbox_config_profile_flood_mode
* Flooding mode to use.
- * 0-2 - Backward compatible modes for SwitchX devices.
- * 3 - Mixed mode, where:
- * max_flood_tables indicates the number of single-entry tables.
- * max_vid_flood_tables indicates the number of per-VID tables.
- * max_fid_offset_flood_tables indicates the number of FID-offset tables.
- * max_fid_flood_tables indicates the number of per-FID tables.
*/
-MLXSW_ITEM32(cmd_mbox, config_profile, flood_mode, 0x30, 0, 2);
+MLXSW_ITEM32(cmd_mbox, config_profile, flood_mode, 0x30, 0, 3);
/* cmd_mbox_config_profile_max_fid_offset_flood_tables
* Maximum number of FID-offset flooding tables.
@@ -806,13 +792,6 @@ MLXSW_ITEM32(cmd_mbox, config_profile, adaptive_routing_group_cap, 0x4C, 0, 16);
*/
MLXSW_ITEM32(cmd_mbox, config_profile, arn, 0x50, 31, 1);
-/* cmd_mbox_config_profile_kvh_xlt_cache_mode
- * KVH XLT cache mode:
- * 0 - XLT can use all KVH as best-effort
- * 1 - XLT cache uses 1/2 KVH
- */
-MLXSW_ITEM32(cmd_mbox, config_profile, kvh_xlt_cache_mode, 0x50, 8, 4);
-
/* cmd_mbox_config_kvd_linear_size
* KVD Linear Size
* Valid for Spectrum only
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index fc52832241b3..ab1cebf227fb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -3151,18 +3151,6 @@ mlxsw_core_port_linecard_get(struct mlxsw_core *mlxsw_core,
return mlxsw_core_port->linecard;
}
-bool mlxsw_core_port_is_xm(const struct mlxsw_core *mlxsw_core, u16 local_port)
-{
- const struct mlxsw_bus_info *bus_info = mlxsw_core->bus_info;
- int i;
-
- for (i = 0; i < bus_info->xm_local_ports_count; i++)
- if (bus_info->xm_local_ports[i] == local_port)
- return true;
- return false;
-}
-EXPORT_SYMBOL(mlxsw_core_port_is_xm);
-
void mlxsw_core_ports_remove_selected(struct mlxsw_core *mlxsw_core,
bool (*selector)(void *priv, u16 local_port),
void *priv)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index c2a891287047..d1e8b8b8d0c1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -261,7 +261,6 @@ mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core,
struct mlxsw_linecard *
mlxsw_core_port_linecard_get(struct mlxsw_core *mlxsw_core,
u16 local_port);
-bool mlxsw_core_port_is_xm(const struct mlxsw_core *mlxsw_core, u16 local_port);
void mlxsw_core_ports_remove_selected(struct mlxsw_core *mlxsw_core,
bool (*selector)(void *priv,
u16 local_port),
@@ -296,8 +295,7 @@ struct mlxsw_config_profile {
used_max_pkey:1,
used_ar_sec:1,
used_adaptive_routing_group_cap:1,
- used_kvd_sizes:1,
- used_kvh_xlt_cache_mode:1;
+ used_kvd_sizes:1;
u8 max_vepa_channels;
u16 max_mid;
u16 max_pgt;
@@ -319,7 +317,6 @@ struct mlxsw_config_profile {
u32 kvd_linear_size;
u8 kvd_hash_single_parts;
u8 kvd_hash_double_parts;
- u8 kvh_xlt_cache_mode;
struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT];
};
@@ -478,8 +475,6 @@ struct mlxsw_fw_rev {
u16 can_reset_minor;
};
-#define MLXSW_BUS_INFO_XM_LOCAL_PORTS_MAX 4
-
struct mlxsw_bus_info {
const char *device_kind;
const char *device_name;
@@ -488,10 +483,7 @@ struct mlxsw_bus_info {
u8 vsd[MLXSW_CMD_BOARDINFO_VSD_LEN];
u8 psid[MLXSW_CMD_BOARDINFO_PSID_LEN];
u8 low_frequency:1,
- read_frc_capable:1,
- xm_exists:1;
- u8 xm_local_ports_count;
- u8 xm_local_ports[MLXSW_BUS_INFO_XM_LOCAL_PORTS_MAX];
+ read_frc_capable:1;
};
struct mlxsw_hwmon;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
index d9660d4cce96..d9bf584234a6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
@@ -359,8 +359,7 @@ static int mlxsw_m_ports_create(struct mlxsw_m *mlxsw_m)
/* Create port objects for each valid entry */
devl_lock(devlink);
for (i = 0; i < mlxsw_m->max_ports; i++) {
- if (mlxsw_m->module_to_port[i] > 0 &&
- !mlxsw_core_port_is_xm(mlxsw_m->core, i)) {
+ if (mlxsw_m->module_to_port[i] > 0) {
err = mlxsw_m_port_create(mlxsw_m,
mlxsw_m->module_to_port[i],
i);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index f91dde4df152..4687dabaaf09 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1252,12 +1252,6 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox,
mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set(mbox,
MLXSW_RES_GET(res, KVD_DOUBLE_SIZE));
}
- if (profile->used_kvh_xlt_cache_mode) {
- mlxsw_cmd_mbox_config_profile_set_kvh_xlt_cache_mode_set(
- mbox, 1);
- mlxsw_cmd_mbox_config_profile_kvh_xlt_cache_mode_set(
- mbox, profile->kvh_xlt_cache_mode);
- }
for (i = 0; i < MLXSW_CONFIG_PROFILE_SWID_COUNT; i++)
mlxsw_pci_config_profile_swid_config(mlxsw_pci, mbox, i,
@@ -1271,30 +1265,6 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox,
return mlxsw_cmd_config_profile_set(mlxsw_pci->core, mbox);
}
-static int mlxsw_pci_boardinfo_xm_process(struct mlxsw_pci *mlxsw_pci,
- struct mlxsw_bus_info *bus_info,
- char *mbox)
-{
- int count = mlxsw_cmd_mbox_boardinfo_xm_num_local_ports_get(mbox);
- int i;
-
- if (!mlxsw_cmd_mbox_boardinfo_xm_exists_get(mbox))
- return 0;
-
- bus_info->xm_exists = true;
-
- if (count > MLXSW_BUS_INFO_XM_LOCAL_PORTS_MAX) {
- dev_err(&mlxsw_pci->pdev->dev, "Invalid number of XM local ports\n");
- return -EINVAL;
- }
- bus_info->xm_local_ports_count = count;
- for (i = 0; i < count; i++)
- bus_info->xm_local_ports[i] =
- mlxsw_cmd_mbox_boardinfo_xm_local_port_entry_get(mbox,
- i);
- return 0;
-}
-
static int mlxsw_pci_boardinfo(struct mlxsw_pci *mlxsw_pci, char *mbox)
{
struct mlxsw_bus_info *bus_info = &mlxsw_pci->bus_info;
@@ -1306,8 +1276,7 @@ static int mlxsw_pci_boardinfo(struct mlxsw_pci *mlxsw_pci, char *mbox)
return err;
mlxsw_cmd_mbox_boardinfo_vsd_memcpy_from(mbox, bus_info->vsd);
mlxsw_cmd_mbox_boardinfo_psid_memcpy_from(mbox, bus_info->psid);
-
- return mlxsw_pci_boardinfo_xm_process(mlxsw_pci, bus_info, mbox);
+ return 0;
}
static int mlxsw_pci_fw_area_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
@@ -1582,6 +1551,14 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
if (err)
goto err_config_profile;
+ /* Some resources depend on unified bridge model, which is configured
+ * as part of config_profile. Query the resources again to get correct
+ * values.
+ */
+ err = mlxsw_core_resources_query(mlxsw_core, mbox, res);
+ if (err)
+ goto err_requery_resources;
+
err = mlxsw_pci_aqs_init(mlxsw_pci, mbox);
if (err)
goto err_aqs_init;
@@ -1599,6 +1576,7 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
err_request_eq_irq:
mlxsw_pci_aqs_fini(mlxsw_pci);
err_aqs_init:
+err_requery_resources:
err_config_profile:
err_cqe_v_check:
err_query_resources:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/port.h b/drivers/net/ethernet/mellanox/mlxsw/port.h
index 741fd2989d12..ac4d4ea51597 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/port.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/port.h
@@ -15,8 +15,6 @@
#define MLXSW_PORT_SWID_TYPE_IB 1
#define MLXSW_PORT_SWID_TYPE_ETH 2
-#define MLXSW_PORT_MID 0xd000
-
#define MLXSW_PORT_MAX_IB_PHY_PORTS 36
#define MLXSW_PORT_MAX_IB_PORTS (MLXSW_PORT_MAX_IB_PHY_PORTS + 1)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 93af6c974ece..7961f0c55fa6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -322,6 +322,18 @@ MLXSW_ITEM32_INDEXED(reg, sfd, rec_action, MLXSW_REG_SFD_BASE_LEN, 28, 4,
MLXSW_ITEM32_INDEXED(reg, sfd, uc_sub_port, MLXSW_REG_SFD_BASE_LEN, 16, 8,
MLXSW_REG_SFD_REC_LEN, 0x08, false);
+/* reg_sfd_uc_set_vid
+ * Set VID.
+ * 0 - Do not update VID.
+ * 1 - Set VID.
+ * For Spectrum-2 when set_vid=0 and smpe_valid=1, the smpe will modify the vid.
+ * Access: RW
+ *
+ * Note: Reserved when legacy bridge model is used.
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_set_vid, MLXSW_REG_SFD_BASE_LEN, 31, 1,
+ MLXSW_REG_SFD_REC_LEN, 0x08, false);
+
/* reg_sfd_uc_fid_vid
* Filtering ID or VLAN ID
* For SwitchX and SwitchX-2:
@@ -335,6 +347,15 @@ MLXSW_ITEM32_INDEXED(reg, sfd, uc_sub_port, MLXSW_REG_SFD_BASE_LEN, 16, 8,
MLXSW_ITEM32_INDEXED(reg, sfd, uc_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16,
MLXSW_REG_SFD_REC_LEN, 0x08, false);
+/* reg_sfd_uc_vid
+ * New VID when set_vid=1.
+ * Access: RW
+ *
+ * Note: Reserved when legacy bridge model is used and when set_vid=0.
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_vid, MLXSW_REG_SFD_BASE_LEN, 16, 12,
+ MLXSW_REG_SFD_REC_LEN, 0x0C, false);
+
/* reg_sfd_uc_system_port
* Unique port identifier for the final destination of the packet.
* Access: RW
@@ -379,6 +400,18 @@ static inline void mlxsw_reg_sfd_uc_pack(char *payload, int rec_index,
MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_sub_port, MLXSW_REG_SFD_BASE_LEN, 16, 8,
MLXSW_REG_SFD_REC_LEN, 0x08, false);
+/* reg_sfd_uc_lag_set_vid
+ * Set VID.
+ * 0 - Do not update VID.
+ * 1 - Set VID.
+ * For Spectrum-2 when set_vid=0 and smpe_valid=1, the smpe will modify the vid.
+ * Access: RW
+ *
+ * Note: Reserved when legacy bridge model is used.
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_set_vid, MLXSW_REG_SFD_BASE_LEN, 31, 1,
+ MLXSW_REG_SFD_REC_LEN, 0x08, false);
+
/* reg_sfd_uc_lag_fid_vid
* Filtering ID or VLAN ID
* For SwitchX and SwitchX-2:
@@ -393,8 +426,10 @@ MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16,
MLXSW_REG_SFD_REC_LEN, 0x08, false);
/* reg_sfd_uc_lag_lag_vid
- * Indicates VID in case of vFIDs. Reserved for FIDs.
+ * New vlan ID.
* Access: RW
+ *
+ * Note: Reserved when legacy bridge model is used and set_vid=0.
*/
MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_lag_vid, MLXSW_REG_SFD_BASE_LEN, 16, 12,
MLXSW_REG_SFD_REC_LEN, 0x0C, false);
@@ -997,7 +1032,7 @@ static inline void mlxsw_reg_spaft_pack(char *payload, u16 local_port,
* to packet types used for flooding.
*/
#define MLXSW_REG_SFGC_ID 0x2011
-#define MLXSW_REG_SFGC_LEN 0x10
+#define MLXSW_REG_SFGC_LEN 0x14
MLXSW_REG_DEFINE(sfgc, MLXSW_REG_SFGC_ID, MLXSW_REG_SFGC_LEN);
@@ -1054,12 +1089,6 @@ MLXSW_ITEM32(reg, sfgc, table_type, 0x04, 16, 3);
*/
MLXSW_ITEM32(reg, sfgc, flood_table, 0x04, 0, 6);
-/* reg_sfgc_mid
- * The multicast ID for the swid. Not supported for Spectrum
- * Access: RW
- */
-MLXSW_ITEM32(reg, sfgc, mid, 0x08, 0, 16);
-
/* reg_sfgc_counter_set_type
* Counter Set Type for flow counters.
* Access: RW
@@ -1072,6 +1101,14 @@ MLXSW_ITEM32(reg, sfgc, counter_set_type, 0x0C, 24, 8);
*/
MLXSW_ITEM32(reg, sfgc, counter_index, 0x0C, 0, 24);
+/* reg_sfgc_mid_base
+ * MID Base.
+ * Access: RW
+ *
+ * Note: Reserved when legacy bridge model is used.
+ */
+MLXSW_ITEM32(reg, sfgc, mid_base, 0x10, 0, 16);
+
static inline void
mlxsw_reg_sfgc_pack(char *payload, enum mlxsw_reg_sfgc_type type,
enum mlxsw_reg_sfgc_bridge_type bridge_type,
@@ -1083,7 +1120,6 @@ mlxsw_reg_sfgc_pack(char *payload, enum mlxsw_reg_sfgc_type type,
mlxsw_reg_sfgc_bridge_type_set(payload, bridge_type);
mlxsw_reg_sfgc_table_type_set(payload, table_type);
mlxsw_reg_sfgc_flood_table_set(payload, flood_table);
- mlxsw_reg_sfgc_mid_set(payload, MLXSW_PORT_MID);
}
/* SFDF - Switch Filtering DB Flush
@@ -1516,7 +1552,7 @@ static inline void mlxsw_reg_spmlr_pack(char *payload, u16 local_port,
* virtualized ports.
*/
#define MLXSW_REG_SVFA_ID 0x201C
-#define MLXSW_REG_SVFA_LEN 0x10
+#define MLXSW_REG_SVFA_LEN 0x18
MLXSW_REG_DEFINE(svfa, MLXSW_REG_SVFA_ID, MLXSW_REG_SVFA_LEN);
@@ -1537,6 +1573,7 @@ MLXSW_ITEM32_LP(reg, svfa, 0x00, 16, 0x00, 12);
enum mlxsw_reg_svfa_mt {
MLXSW_REG_SVFA_MT_VID_TO_FID,
MLXSW_REG_SVFA_MT_PORT_VID_TO_FID,
+ MLXSW_REG_SVFA_MT_VNI_TO_FID,
};
/* reg_svfa_mapping_table
@@ -1586,20 +1623,73 @@ MLXSW_ITEM32(reg, svfa, counter_set_type, 0x08, 24, 8);
*/
MLXSW_ITEM32(reg, svfa, counter_index, 0x08, 0, 24);
-static inline void mlxsw_reg_svfa_pack(char *payload, u16 local_port,
- enum mlxsw_reg_svfa_mt mt, bool valid,
- u16 fid, u16 vid)
+/* reg_svfa_vni
+ * Virtual Network Identifier.
+ * Access: Index
+ *
+ * Note: Reserved when mapping_table is not 2 (VNI mapping table).
+ */
+MLXSW_ITEM32(reg, svfa, vni, 0x10, 0, 24);
+
+/* reg_svfa_irif_v
+ * Ingress RIF valid.
+ * 0 - Ingress RIF is not valid, no ingress RIF assigned.
+ * 1 - Ingress RIF valid.
+ * Must not be set for a non enabled RIF.
+ * Access: RW
+ *
+ * Note: Reserved when legacy bridge model is used.
+ */
+MLXSW_ITEM32(reg, svfa, irif_v, 0x14, 24, 1);
+
+/* reg_svfa_irif
+ * Ingress RIF (Router Interface).
+ * Range is 0..cap_max_router_interfaces-1.
+ * Access: RW
+ *
+ * Note: Reserved when legacy bridge model is used and when irif_v=0.
+ */
+MLXSW_ITEM32(reg, svfa, irif, 0x14, 0, 16);
+
+static inline void __mlxsw_reg_svfa_pack(char *payload,
+ enum mlxsw_reg_svfa_mt mt, bool valid,
+ u16 fid)
{
MLXSW_REG_ZERO(svfa, payload);
- local_port = mt == MLXSW_REG_SVFA_MT_VID_TO_FID ? 0 : local_port;
mlxsw_reg_svfa_swid_set(payload, 0);
- mlxsw_reg_svfa_local_port_set(payload, local_port);
mlxsw_reg_svfa_mapping_table_set(payload, mt);
mlxsw_reg_svfa_v_set(payload, valid);
mlxsw_reg_svfa_fid_set(payload, fid);
+}
+
+static inline void mlxsw_reg_svfa_port_vid_pack(char *payload, u16 local_port,
+ bool valid, u16 fid, u16 vid)
+{
+ enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID;
+
+ __mlxsw_reg_svfa_pack(payload, mt, valid, fid);
+ mlxsw_reg_svfa_local_port_set(payload, local_port);
mlxsw_reg_svfa_vid_set(payload, vid);
}
+static inline void mlxsw_reg_svfa_vid_pack(char *payload, bool valid, u16 fid,
+ u16 vid)
+{
+ enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_VID_TO_FID;
+
+ __mlxsw_reg_svfa_pack(payload, mt, valid, fid);
+ mlxsw_reg_svfa_vid_set(payload, vid);
+}
+
+static inline void mlxsw_reg_svfa_vni_pack(char *payload, bool valid, u16 fid,
+ u32 vni)
+{
+ enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_VNI_TO_FID;
+
+ __mlxsw_reg_svfa_pack(payload, mt, valid, fid);
+ mlxsw_reg_svfa_vni_set(payload, vni);
+}
+
/* SPVTR - Switch Port VLAN Stacking Register
* ------------------------------------------
* The Switch Port VLAN Stacking register configures the VLAN mode of the port
@@ -1741,7 +1831,7 @@ static inline void mlxsw_reg_svpe_pack(char *payload, u16 local_port,
* Creates and configures FIDs.
*/
#define MLXSW_REG_SFMR_ID 0x201F
-#define MLXSW_REG_SFMR_LEN 0x18
+#define MLXSW_REG_SFMR_LEN 0x30
MLXSW_REG_DEFINE(sfmr, MLXSW_REG_SFMR_ID, MLXSW_REG_SFMR_LEN);
@@ -1764,6 +1854,28 @@ MLXSW_ITEM32(reg, sfmr, op, 0x00, 24, 4);
*/
MLXSW_ITEM32(reg, sfmr, fid, 0x00, 0, 16);
+/* reg_sfmr_flood_rsp
+ * Router sub-port flooding table.
+ * 0 - Regular flooding table.
+ * 1 - Router sub-port flooding table. For this FID the flooding is per
+ * router-sub-port local_port. Must not be set for a FID which is not a
+ * router-sub-port and must be set prior to enabling the relevant RIF.
+ * Access: RW
+ *
+ * Note: Reserved when legacy bridge model is used.
+ */
+MLXSW_ITEM32(reg, sfmr, flood_rsp, 0x08, 31, 1);
+
+/* reg_sfmr_flood_bridge_type
+ * Flood bridge type (see SFGC.bridge_type).
+ * 0 - type_0.
+ * 1 - type_1.
+ * Access: RW
+ *
+ * Note: Reserved when legacy bridge model is used and when flood_rsp=1.
+ */
+MLXSW_ITEM32(reg, sfmr, flood_bridge_type, 0x08, 28, 1);
+
/* reg_sfmr_fid_offset
* FID offset.
* Used to point into the flooding table selected by SFGC register if
@@ -1800,12 +1912,52 @@ MLXSW_ITEM32(reg, sfmr, vv, 0x10, 31, 1);
/* reg_sfmr_vni
* Virtual Network Identifier.
+ * When legacy bridge model is used, a given VNI can only be assigned to one
+ * FID. When unified bridge model is used, it configures only the FID->VNI,
+ * the VNI->FID is done by SVFA.
* Access: RW
- *
- * Note: A given VNI can only be assigned to one FID.
*/
MLXSW_ITEM32(reg, sfmr, vni, 0x10, 0, 24);
+/* reg_sfmr_irif_v
+ * Ingress RIF valid.
+ * 0 - Ingress RIF is not valid, no ingress RIF assigned.
+ * 1 - Ingress RIF valid.
+ * Must not be set for a non valid RIF.
+ * Access: RW
+ *
+ * Note: Reserved when legacy bridge model is used.
+ */
+MLXSW_ITEM32(reg, sfmr, irif_v, 0x14, 24, 1);
+
+/* reg_sfmr_irif
+ * Ingress RIF (Router Interface).
+ * Range is 0..cap_max_router_interfaces-1.
+ * Access: RW
+ *
+ * Note: Reserved when legacy bridge model is used and when irif_v=0.
+ */
+MLXSW_ITEM32(reg, sfmr, irif, 0x14, 0, 16);
+
+/* reg_sfmr_smpe_valid
+ * SMPE is valid.
+ * Access: RW
+ *
+ * Note: Reserved when legacy bridge model is used, when flood_rsp=1 and on
+ * Spectrum-1.
+ */
+MLXSW_ITEM32(reg, sfmr, smpe_valid, 0x28, 20, 1);
+
+/* reg_sfmr_smpe
+ * Switch multicast port to egress VID.
+ * Range is 0..cap_max_rmpe-1
+ * Access: RW
+ *
+ * Note: Reserved when legacy bridge model is used, when flood_rsp=1 and on
+ * Spectrum-1.
+ */
+MLXSW_ITEM32(reg, sfmr, smpe, 0x28, 0, 16);
+
static inline void mlxsw_reg_sfmr_pack(char *payload,
enum mlxsw_reg_sfmr_op op, u16 fid,
u16 fid_offset)
@@ -2013,6 +2165,45 @@ static inline void mlxsw_reg_spevet_pack(char *payload, u16 local_port,
mlxsw_reg_spevet_et_vlan_set(payload, et_vlan);
}
+/* SMPE - Switch Multicast Port to Egress VID
+ * ------------------------------------------
+ * The switch multicast port to egress VID maps
+ * {egress_port, SMPE index} -> {VID}.
+ */
+#define MLXSW_REG_SMPE_ID 0x202B
+#define MLXSW_REG_SMPE_LEN 0x0C
+
+MLXSW_REG_DEFINE(smpe, MLXSW_REG_SMPE_ID, MLXSW_REG_SMPE_LEN);
+
+/* reg_smpe_local_port
+ * Local port number.
+ * CPU port is not supported.
+ * Access: Index
+ */
+MLXSW_ITEM32_LP(reg, smpe, 0x00, 16, 0x00, 12);
+
+/* reg_smpe_smpe_index
+ * Switch multicast port to egress VID.
+ * Range is 0..cap_max_rmpe-1.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, smpe, smpe_index, 0x04, 0, 16);
+
+/* reg_smpe_evid
+ * Egress VID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, smpe, evid, 0x08, 0, 12);
+
+static inline void mlxsw_reg_smpe_pack(char *payload, u16 local_port,
+ u16 smpe_index, u16 evid)
+{
+ MLXSW_REG_ZERO(smpe, payload);
+ mlxsw_reg_smpe_local_port_set(payload, local_port);
+ mlxsw_reg_smpe_smpe_index_set(payload, smpe_index);
+ mlxsw_reg_smpe_evid_set(payload, evid);
+}
+
/* SFTR-V2 - Switch Flooding Table Version 2 Register
* --------------------------------------------------
* The switch flooding table is used for flooding packet replication. The table
@@ -2107,6 +2298,23 @@ MLXSW_ITEM32(reg, smid2, swid, 0x00, 24, 8);
*/
MLXSW_ITEM32(reg, smid2, mid, 0x00, 0, 16);
+/* reg_smid2_smpe_valid
+ * SMPE is valid.
+ * When not valid, the egress VID will not be modified by the SMPE table.
+ * Access: RW
+ *
+ * Note: Reserved when legacy bridge model is used and on Spectrum-2.
+ */
+MLXSW_ITEM32(reg, smid2, smpe_valid, 0x08, 20, 1);
+
+/* reg_smid2_smpe
+ * Switch multicast port to egress VID.
+ * Access: RW
+ *
+ * Note: Reserved when legacy bridge model is used and on Spectrum-2.
+ */
+MLXSW_ITEM32(reg, smid2, smpe, 0x08, 0, 16);
+
/* reg_smid2_port
* Local port memebership (1 bit per port).
* Access: RW
@@ -2120,13 +2328,15 @@ MLXSW_ITEM_BIT_ARRAY(reg, smid2, port, 0x20, 0x80, 1);
MLXSW_ITEM_BIT_ARRAY(reg, smid2, port_mask, 0xA0, 0x80, 1);
static inline void mlxsw_reg_smid2_pack(char *payload, u16 mid, u16 port,
- bool set)
+ bool set, bool smpe_valid, u16 smpe)
{
MLXSW_REG_ZERO(smid2, payload);
mlxsw_reg_smid2_swid_set(payload, 0);
mlxsw_reg_smid2_mid_set(payload, mid);
mlxsw_reg_smid2_port_set(payload, port, set);
mlxsw_reg_smid2_port_mask_set(payload, port, 1);
+ mlxsw_reg_smid2_smpe_valid_set(payload, smpe_valid);
+ mlxsw_reg_smid2_smpe_set(payload, smpe_valid ? smpe : 0);
}
/* CWTP - Congetion WRED ECN TClass Profile
@@ -6701,31 +6911,32 @@ MLXSW_ITEM32(reg, ritr, if_vrrp_id_ipv4, 0x1C, 0, 8);
/* VLAN Interface */
-/* reg_ritr_vlan_if_vid
+/* reg_ritr_vlan_if_vlan_id
* VLAN ID.
* Access: RW
*/
-MLXSW_ITEM32(reg, ritr, vlan_if_vid, 0x08, 0, 12);
+MLXSW_ITEM32(reg, ritr, vlan_if_vlan_id, 0x08, 0, 12);
+
+/* reg_ritr_vlan_if_efid
+ * Egress FID.
+ * Used to connect the RIF to a bridge.
+ * Access: RW
+ *
+ * Note: Reserved when legacy bridge model is used and on Spectrum-1.
+ */
+MLXSW_ITEM32(reg, ritr, vlan_if_efid, 0x0C, 0, 16);
/* FID Interface */
/* reg_ritr_fid_if_fid
- * Filtering ID. Used to connect a bridge to the router. Only FIDs from
- * the vFID range are supported.
+ * Filtering ID. Used to connect a bridge to the router.
+ * When legacy bridge model is used, only FIDs from the vFID range are
+ * supported. When unified bridge model is used, this is the egress FID for
+ * router to bridge.
* Access: RW
*/
MLXSW_ITEM32(reg, ritr, fid_if_fid, 0x08, 0, 16);
-static inline void mlxsw_reg_ritr_fid_set(char *payload,
- enum mlxsw_reg_ritr_if_type rif_type,
- u16 fid)
-{
- if (rif_type == MLXSW_REG_RITR_FID_IF)
- mlxsw_reg_ritr_fid_if_fid_set(payload, fid);
- else
- mlxsw_reg_ritr_vlan_if_vid_set(payload, fid);
-}
-
/* Sub-port Interface */
/* reg_ritr_sp_if_lag
@@ -6742,6 +6953,16 @@ MLXSW_ITEM32(reg, ritr, sp_if_lag, 0x08, 24, 1);
*/
MLXSW_ITEM32(reg, ritr, sp_if_system_port, 0x08, 0, 16);
+/* reg_ritr_sp_if_efid
+ * Egress filtering ID.
+ * Used to connect the eRIF to a bridge if eRIF-ACL has modified the DMAC or
+ * the VID.
+ * Access: RW
+ *
+ * Note: Reserved when legacy bridge model is used.
+ */
+MLXSW_ITEM32(reg, ritr, sp_if_efid, 0x0C, 0, 16);
+
/* reg_ritr_sp_if_vid
* VLAN ID.
* Access: RW
@@ -6918,6 +7139,20 @@ static inline void mlxsw_reg_ritr_mac_pack(char *payload, const char *mac)
}
static inline void
+mlxsw_reg_ritr_vlan_if_pack(char *payload, bool enable, u16 rif, u16 vr_id,
+ u16 mtu, const char *mac, u8 mac_profile_id,
+ u16 vlan_id, u16 efid)
+{
+ enum mlxsw_reg_ritr_if_type type = MLXSW_REG_RITR_VLAN_IF;
+
+ mlxsw_reg_ritr_pack(payload, enable, type, rif, vr_id, mtu);
+ mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac);
+ mlxsw_reg_ritr_if_mac_profile_id_set(payload, mac_profile_id);
+ mlxsw_reg_ritr_vlan_if_vlan_id_set(payload, vlan_id);
+ mlxsw_reg_ritr_vlan_if_efid_set(payload, efid);
+}
+
+static inline void
mlxsw_reg_ritr_loopback_ipip_common_pack(char *payload,
enum mlxsw_reg_ritr_loopback_ipip_type ipip_type,
enum mlxsw_reg_ritr_loopback_ipip_options options,
@@ -7848,11 +8083,10 @@ static inline void mlxsw_reg_ralue_pack4(char *payload,
enum mlxsw_reg_ralxx_protocol protocol,
enum mlxsw_reg_ralue_op op,
u16 virtual_router, u8 prefix_len,
- u32 *dip)
+ u32 dip)
{
mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len);
- if (dip)
- mlxsw_reg_ralue_dip4_set(payload, *dip);
+ mlxsw_reg_ralue_dip4_set(payload, dip);
}
static inline void mlxsw_reg_ralue_pack6(char *payload,
@@ -7862,8 +8096,7 @@ static inline void mlxsw_reg_ralue_pack6(char *payload,
const void *dip)
{
mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len);
- if (dip)
- mlxsw_reg_ralue_dip6_memcpy_to(payload, dip);
+ mlxsw_reg_ralue_dip6_memcpy_to(payload, dip);
}
static inline void
@@ -8926,656 +9159,62 @@ mlxsw_reg_rmft2_ipv6_pack(char *payload, bool v, u16 offset, u16 virtual_router,
mlxsw_reg_rmft2_sip6_mask_memcpy_to(payload, (void *)&sip6_mask);
}
-/* RXLTE - Router XLT Enable Register
- * ----------------------------------
- * The RXLTE enables XLT (eXtended Lookup Table) LPM lookups if a capable
- * XM is present on the system.
- */
-
-#define MLXSW_REG_RXLTE_ID 0x8050
-#define MLXSW_REG_RXLTE_LEN 0x0C
-
-MLXSW_REG_DEFINE(rxlte, MLXSW_REG_RXLTE_ID, MLXSW_REG_RXLTE_LEN);
-
-/* reg_rxlte_virtual_router
- * Virtual router ID associated with the router interface.
- * Range is 0..cap_max_virtual_routers-1
- * Access: Index
- */
-MLXSW_ITEM32(reg, rxlte, virtual_router, 0x00, 0, 16);
-
-enum mlxsw_reg_rxlte_protocol {
- MLXSW_REG_RXLTE_PROTOCOL_IPV4,
- MLXSW_REG_RXLTE_PROTOCOL_IPV6,
-};
-
-/* reg_rxlte_protocol
- * Access: Index
- */
-MLXSW_ITEM32(reg, rxlte, protocol, 0x04, 0, 4);
-
-/* reg_rxlte_lpm_xlt_en
- * Access: RW
- */
-MLXSW_ITEM32(reg, rxlte, lpm_xlt_en, 0x08, 0, 1);
-
-static inline void mlxsw_reg_rxlte_pack(char *payload, u16 virtual_router,
- enum mlxsw_reg_rxlte_protocol protocol,
- bool lpm_xlt_en)
-{
- MLXSW_REG_ZERO(rxlte, payload);
- mlxsw_reg_rxlte_virtual_router_set(payload, virtual_router);
- mlxsw_reg_rxlte_protocol_set(payload, protocol);
- mlxsw_reg_rxlte_lpm_xlt_en_set(payload, lpm_xlt_en);
-}
-
-/* RXLTM - Router XLT M select Register
- * ------------------------------------
- * The RXLTM configures and selects the M for the XM lookups.
- */
-
-#define MLXSW_REG_RXLTM_ID 0x8051
-#define MLXSW_REG_RXLTM_LEN 0x14
-
-MLXSW_REG_DEFINE(rxltm, MLXSW_REG_RXLTM_ID, MLXSW_REG_RXLTM_LEN);
-
-/* reg_rxltm_m0_val_v6
- * Global M0 value For IPv6.
- * Range 0..128
- * Access: RW
- */
-MLXSW_ITEM32(reg, rxltm, m0_val_v6, 0x10, 16, 8);
-
-/* reg_rxltm_m0_val_v4
- * Global M0 value For IPv4.
- * Range 0..32
- * Access: RW
- */
-MLXSW_ITEM32(reg, rxltm, m0_val_v4, 0x10, 0, 6);
-
-static inline void mlxsw_reg_rxltm_pack(char *payload, u8 m0_val_v4, u8 m0_val_v6)
-{
- MLXSW_REG_ZERO(rxltm, payload);
- mlxsw_reg_rxltm_m0_val_v6_set(payload, m0_val_v6);
- mlxsw_reg_rxltm_m0_val_v4_set(payload, m0_val_v4);
-}
-
-/* RLCMLD - Router LPM Cache ML Delete Register
- * --------------------------------------------
- * The RLCMLD register is used to bulk delete the XLT-LPM cache ML entries.
- * This can be used by SW when L is increased or decreased, thus need to
- * remove entries with old ML values.
- */
-
-#define MLXSW_REG_RLCMLD_ID 0x8055
-#define MLXSW_REG_RLCMLD_LEN 0x30
-
-MLXSW_REG_DEFINE(rlcmld, MLXSW_REG_RLCMLD_ID, MLXSW_REG_RLCMLD_LEN);
-
-enum mlxsw_reg_rlcmld_select {
- MLXSW_REG_RLCMLD_SELECT_ML_ENTRIES,
- MLXSW_REG_RLCMLD_SELECT_M_ENTRIES,
- MLXSW_REG_RLCMLD_SELECT_M_AND_ML_ENTRIES,
-};
-
-/* reg_rlcmld_select
- * Which entries to delete.
- * Access: Index
- */
-MLXSW_ITEM32(reg, rlcmld, select, 0x00, 16, 2);
-
-enum mlxsw_reg_rlcmld_filter_fields {
- MLXSW_REG_RLCMLD_FILTER_FIELDS_BY_PROTOCOL = 0x04,
- MLXSW_REG_RLCMLD_FILTER_FIELDS_BY_VIRTUAL_ROUTER = 0x08,
- MLXSW_REG_RLCMLD_FILTER_FIELDS_BY_DIP = 0x10,
-};
-
-/* reg_rlcmld_filter_fields
- * If a bit is '0' then the relevant field is ignored.
- * Access: Index
+/* REIV - Router Egress Interface to VID Register
+ * ----------------------------------------------
+ * The REIV register maps {eRIF, egress_port} -> VID.
+ * This mapping is done at the egress, after the ACLs.
+ * This mapping always takes effect after router, regardless of cast
+ * (for unicast/multicast/port-base multicast), regardless of eRIF type and
+ * regardless of bridge decisions (e.g. SFD for unicast or SMPE).
+ * Reserved when the RIF is a loopback RIF.
+ *
+ * Note: Reserved when legacy bridge model is used.
*/
-MLXSW_ITEM32(reg, rlcmld, filter_fields, 0x00, 0, 8);
+#define MLXSW_REG_REIV_ID 0x8034
+#define MLXSW_REG_REIV_BASE_LEN 0x20 /* base length, without records */
+#define MLXSW_REG_REIV_REC_LEN 0x04 /* record length */
+#define MLXSW_REG_REIV_REC_MAX_COUNT 256 /* firmware limitation */
+#define MLXSW_REG_REIV_LEN (MLXSW_REG_REIV_BASE_LEN + \
+ MLXSW_REG_REIV_REC_LEN * \
+ MLXSW_REG_REIV_REC_MAX_COUNT)
-enum mlxsw_reg_rlcmld_protocol {
- MLXSW_REG_RLCMLD_PROTOCOL_UC_IPV4,
- MLXSW_REG_RLCMLD_PROTOCOL_UC_IPV6,
-};
-
-/* reg_rlcmld_protocol
- * Access: Index
- */
-MLXSW_ITEM32(reg, rlcmld, protocol, 0x08, 0, 4);
+MLXSW_REG_DEFINE(reiv, MLXSW_REG_REIV_ID, MLXSW_REG_REIV_LEN);
-/* reg_rlcmld_virtual_router
- * Virtual router ID.
- * Range is 0..cap_max_virtual_routers-1
+/* reg_reiv_port_page
+ * Port page - elport_record[0] is 256*port_page.
* Access: Index
*/
-MLXSW_ITEM32(reg, rlcmld, virtual_router, 0x0C, 0, 16);
+MLXSW_ITEM32(reg, reiv, port_page, 0x00, 0, 4);
-/* reg_rlcmld_dip
- * The prefix of the route or of the marker that the object of the LPM
- * is compared with. The most significant bits of the dip are the prefix.
+/* reg_reiv_erif
+ * Egress RIF.
+ * Range is 0..cap_max_router_interfaces-1.
* Access: Index
*/
-MLXSW_ITEM32(reg, rlcmld, dip4, 0x1C, 0, 32);
-MLXSW_ITEM_BUF(reg, rlcmld, dip6, 0x10, 16);
-
-/* reg_rlcmld_dip_mask
- * per bit:
- * 0: no match
- * 1: match
- * Access: Index
- */
-MLXSW_ITEM32(reg, rlcmld, dip_mask4, 0x2C, 0, 32);
-MLXSW_ITEM_BUF(reg, rlcmld, dip_mask6, 0x20, 16);
-
-static inline void __mlxsw_reg_rlcmld_pack(char *payload,
- enum mlxsw_reg_rlcmld_select select,
- enum mlxsw_reg_rlcmld_protocol protocol,
- u16 virtual_router)
-{
- u8 filter_fields = MLXSW_REG_RLCMLD_FILTER_FIELDS_BY_PROTOCOL |
- MLXSW_REG_RLCMLD_FILTER_FIELDS_BY_VIRTUAL_ROUTER |
- MLXSW_REG_RLCMLD_FILTER_FIELDS_BY_DIP;
-
- MLXSW_REG_ZERO(rlcmld, payload);
- mlxsw_reg_rlcmld_select_set(payload, select);
- mlxsw_reg_rlcmld_filter_fields_set(payload, filter_fields);
- mlxsw_reg_rlcmld_protocol_set(payload, protocol);
- mlxsw_reg_rlcmld_virtual_router_set(payload, virtual_router);
-}
-
-static inline void mlxsw_reg_rlcmld_pack4(char *payload,
- enum mlxsw_reg_rlcmld_select select,
- u16 virtual_router,
- u32 dip, u32 dip_mask)
-{
- __mlxsw_reg_rlcmld_pack(payload, select,
- MLXSW_REG_RLCMLD_PROTOCOL_UC_IPV4,
- virtual_router);
- mlxsw_reg_rlcmld_dip4_set(payload, dip);
- mlxsw_reg_rlcmld_dip_mask4_set(payload, dip_mask);
-}
-
-static inline void mlxsw_reg_rlcmld_pack6(char *payload,
- enum mlxsw_reg_rlcmld_select select,
- u16 virtual_router,
- const void *dip, const void *dip_mask)
-{
- __mlxsw_reg_rlcmld_pack(payload, select,
- MLXSW_REG_RLCMLD_PROTOCOL_UC_IPV6,
- virtual_router);
- mlxsw_reg_rlcmld_dip6_memcpy_to(payload, dip);
- mlxsw_reg_rlcmld_dip_mask6_memcpy_to(payload, dip_mask);
-}
+MLXSW_ITEM32(reg, reiv, erif, 0x04, 0, 16);
-/* RLPMCE - Router LPM Cache Enable Register
- * -----------------------------------------
- * Allows disabling the LPM cache. Can be changed on the fly.
- */
-
-#define MLXSW_REG_RLPMCE_ID 0x8056
-#define MLXSW_REG_RLPMCE_LEN 0x4
-
-MLXSW_REG_DEFINE(rlpmce, MLXSW_REG_RLPMCE_ID, MLXSW_REG_RLPMCE_LEN);
-
-/* reg_rlpmce_flush
- * Flush:
- * 0: do not flush the cache (default)
- * 1: flush (clear) the cache
- * Access: WO
- */
-MLXSW_ITEM32(reg, rlpmce, flush, 0x00, 4, 1);
-
-/* reg_rlpmce_disable
- * LPM cache:
- * 0: enabled (default)
- * 1: disabled
- * Access: RW
- */
-MLXSW_ITEM32(reg, rlpmce, disable, 0x00, 0, 1);
-
-static inline void mlxsw_reg_rlpmce_pack(char *payload, bool flush,
- bool disable)
-{
- MLXSW_REG_ZERO(rlpmce, payload);
- mlxsw_reg_rlpmce_flush_set(payload, flush);
- mlxsw_reg_rlpmce_disable_set(payload, disable);
-}
-
-/* Note that XLTQ, XMDR, XRMT and XRALXX register positions violate the rule
- * of ordering register definitions by the ID. However, XRALXX pack helpers are
- * using RALXX pack helpers, RALXX registers have higher IDs.
- * Also XMDR is using RALUE enums. XLRQ and XRMT are just put alongside with the
- * related registers.
- */
-
-/* XLTQ - XM Lookup Table Query Register
- * -------------------------------------
- */
-#define MLXSW_REG_XLTQ_ID 0x7802
-#define MLXSW_REG_XLTQ_LEN 0x2C
-
-MLXSW_REG_DEFINE(xltq, MLXSW_REG_XLTQ_ID, MLXSW_REG_XLTQ_LEN);
-
-enum mlxsw_reg_xltq_xm_device_id {
- MLXSW_REG_XLTQ_XM_DEVICE_ID_UNKNOWN,
- MLXSW_REG_XLTQ_XM_DEVICE_ID_XLT = 0xCF71,
-};
-
-/* reg_xltq_xm_device_id
- * XM device ID.
- * Access: RO
- */
-MLXSW_ITEM32(reg, xltq, xm_device_id, 0x04, 0, 16);
-
-/* reg_xltq_xlt_cap_ipv4_lpm
- * Access: RO
- */
-MLXSW_ITEM32(reg, xltq, xlt_cap_ipv4_lpm, 0x10, 0, 1);
-
-/* reg_xltq_xlt_cap_ipv6_lpm
- * Access: RO
- */
-MLXSW_ITEM32(reg, xltq, xlt_cap_ipv6_lpm, 0x10, 1, 1);
-
-/* reg_xltq_cap_xlt_entries
- * Number of XLT entries
- * Note: SW must not fill more than 80% in order to avoid overflow
- * Access: RO
- */
-MLXSW_ITEM32(reg, xltq, cap_xlt_entries, 0x20, 0, 32);
-
-/* reg_xltq_cap_xlt_mtable
- * XLT M-Table max size
- * Access: RO
- */
-MLXSW_ITEM32(reg, xltq, cap_xlt_mtable, 0x24, 0, 32);
-
-static inline void mlxsw_reg_xltq_pack(char *payload)
-{
- MLXSW_REG_ZERO(xltq, payload);
-}
-
-static inline void mlxsw_reg_xltq_unpack(char *payload, u16 *xm_device_id, bool *xlt_cap_ipv4_lpm,
- bool *xlt_cap_ipv6_lpm, u32 *cap_xlt_entries,
- u32 *cap_xlt_mtable)
-{
- *xm_device_id = mlxsw_reg_xltq_xm_device_id_get(payload);
- *xlt_cap_ipv4_lpm = mlxsw_reg_xltq_xlt_cap_ipv4_lpm_get(payload);
- *xlt_cap_ipv6_lpm = mlxsw_reg_xltq_xlt_cap_ipv6_lpm_get(payload);
- *cap_xlt_entries = mlxsw_reg_xltq_cap_xlt_entries_get(payload);
- *cap_xlt_mtable = mlxsw_reg_xltq_cap_xlt_mtable_get(payload);
-}
-
-/* XMDR - XM Direct Register
- * -------------------------
- * The XMDR allows direct access to the XM device via the switch.
- * Working in synchronous mode. FW waits for response from the XLT
- * for each command. FW acks the XMDR accordingly.
- */
-#define MLXSW_REG_XMDR_ID 0x7803
-#define MLXSW_REG_XMDR_BASE_LEN 0x20
-#define MLXSW_REG_XMDR_TRANS_LEN 0x80
-#define MLXSW_REG_XMDR_LEN (MLXSW_REG_XMDR_BASE_LEN + \
- MLXSW_REG_XMDR_TRANS_LEN)
-
-MLXSW_REG_DEFINE(xmdr, MLXSW_REG_XMDR_ID, MLXSW_REG_XMDR_LEN);
-
-/* reg_xmdr_bulk_entry
- * Bulk_entry
- * 0: Last entry - immediate flush of XRT-cache
- * 1: Bulk entry - do not flush the XRT-cache
- * Access: OP
- */
-MLXSW_ITEM32(reg, xmdr, bulk_entry, 0x04, 8, 1);
-
-/* reg_xmdr_num_rec
- * Number of records for Direct access to XM
- * Supported: 0..4 commands (except NOP which is a filler)
- * 0 commands is reserved when bulk_entry = 1.
- * 0 commands is allowed when bulk_entry = 0 for immediate XRT-cache flush.
+/* reg_reiv_rec_update
+ * Update enable (when write):
+ * 0 - Do not update the entry.
+ * 1 - Update the entry.
* Access: OP
*/
-MLXSW_ITEM32(reg, xmdr, num_rec, 0x04, 0, 4);
-
-/* reg_xmdr_reply_vect
- * Reply Vector
- * Bit i for command index i+1
- * values per bit:
- * 0: failed
- * 1: succeeded
- * e.g. if commands 1, 2, 4 succeeded and command 3 failed then binary
- * value will be 0b1011
- * Access: RO
- */
-MLXSW_ITEM_BIT_ARRAY(reg, xmdr, reply_vect, 0x08, 4, 1);
-
-static inline void mlxsw_reg_xmdr_pack(char *payload, bool bulk_entry)
-{
- MLXSW_REG_ZERO(xmdr, payload);
- mlxsw_reg_xmdr_bulk_entry_set(payload, bulk_entry);
-}
-
-enum mlxsw_reg_xmdr_c_cmd_id {
- MLXSW_REG_XMDR_C_CMD_ID_LT_ROUTE_V4 = 0x30,
- MLXSW_REG_XMDR_C_CMD_ID_LT_ROUTE_V6 = 0x31,
-};
-
-#define MLXSW_REG_XMDR_C_LT_ROUTE_V4_LEN 32
-#define MLXSW_REG_XMDR_C_LT_ROUTE_V6_LEN 48
-
-/* reg_xmdr_c_cmd_id
- */
-MLXSW_ITEM32(reg, xmdr_c, cmd_id, 0x00, 24, 8);
-
-/* reg_xmdr_c_seq_number
- */
-MLXSW_ITEM32(reg, xmdr_c, seq_number, 0x00, 12, 12);
-
-enum mlxsw_reg_xmdr_c_ltr_op {
- /* Activity is set */
- MLXSW_REG_XMDR_C_LTR_OP_WRITE = 0,
- /* There is no update mask. All fields are updated. */
- MLXSW_REG_XMDR_C_LTR_OP_UPDATE = 1,
- MLXSW_REG_XMDR_C_LTR_OP_DELETE = 2,
-};
-
-/* reg_xmdr_c_ltr_op
- * Operation.
- */
-MLXSW_ITEM32(reg, xmdr_c, ltr_op, 0x04, 24, 8);
-
-/* reg_xmdr_c_ltr_trap_action
- * Trap action.
- * Values are defined in enum mlxsw_reg_ralue_trap_action.
- */
-MLXSW_ITEM32(reg, xmdr_c, ltr_trap_action, 0x04, 20, 4);
-
-enum mlxsw_reg_xmdr_c_ltr_trap_id_num {
- MLXSW_REG_XMDR_C_LTR_TRAP_ID_NUM_RTR_INGRESS0,
- MLXSW_REG_XMDR_C_LTR_TRAP_ID_NUM_RTR_INGRESS1,
- MLXSW_REG_XMDR_C_LTR_TRAP_ID_NUM_RTR_INGRESS2,
- MLXSW_REG_XMDR_C_LTR_TRAP_ID_NUM_RTR_INGRESS3,
-};
-
-/* reg_xmdr_c_ltr_trap_id_num
- * Trap-ID number.
- */
-MLXSW_ITEM32(reg, xmdr_c, ltr_trap_id_num, 0x04, 16, 4);
-
-/* reg_xmdr_c_ltr_virtual_router
- * Virtual Router ID.
- * Range is 0..cap_max_virtual_routers-1
- */
-MLXSW_ITEM32(reg, xmdr_c, ltr_virtual_router, 0x04, 0, 16);
-
-/* reg_xmdr_c_ltr_prefix_len
- * Number of bits in the prefix of the LPM route.
- */
-MLXSW_ITEM32(reg, xmdr_c, ltr_prefix_len, 0x08, 24, 8);
-
-/* reg_xmdr_c_ltr_bmp_len
- * The best match prefix length in the case that there is no match for
- * longer prefixes.
- * If (entry_type != MARKER_ENTRY), bmp_len must be equal to prefix_len
- */
-MLXSW_ITEM32(reg, xmdr_c, ltr_bmp_len, 0x08, 16, 8);
-
-/* reg_xmdr_c_ltr_entry_type
- * Entry type.
- * Values are defined in enum mlxsw_reg_ralue_entry_type.
- */
-MLXSW_ITEM32(reg, xmdr_c, ltr_entry_type, 0x08, 4, 4);
-
-enum mlxsw_reg_xmdr_c_ltr_action_type {
- MLXSW_REG_XMDR_C_LTR_ACTION_TYPE_LOCAL,
- MLXSW_REG_XMDR_C_LTR_ACTION_TYPE_REMOTE,
- MLXSW_REG_XMDR_C_LTR_ACTION_TYPE_IP2ME,
-};
-
-/* reg_xmdr_c_ltr_action_type
- * Action Type.
- */
-MLXSW_ITEM32(reg, xmdr_c, ltr_action_type, 0x08, 0, 4);
-
-/* reg_xmdr_c_ltr_erif
- * Egress Router Interface.
- * Only relevant in case of LOCAL action.
- */
-MLXSW_ITEM32(reg, xmdr_c, ltr_erif, 0x10, 0, 16);
+MLXSW_ITEM32_INDEXED(reg, reiv, rec_update, MLXSW_REG_REIV_BASE_LEN, 31, 1,
+ MLXSW_REG_REIV_REC_LEN, 0x00, false);
-/* reg_xmdr_c_ltr_adjacency_index
- * Points to the first entry of the group-based ECMP.
- * Only relevant in case of REMOTE action.
- */
-MLXSW_ITEM32(reg, xmdr_c, ltr_adjacency_index, 0x10, 0, 24);
-
-#define MLXSW_REG_XMDR_C_LTR_POINTER_TO_TUNNEL_DISABLED_MAGIC 0xFFFFFF
-
-/* reg_xmdr_c_ltr_pointer_to_tunnel
- * Only relevant in case of IP2ME action.
- */
-MLXSW_ITEM32(reg, xmdr_c, ltr_pointer_to_tunnel, 0x10, 0, 24);
-
-/* reg_xmdr_c_ltr_ecmp_size
- * Amount of sequential entries starting
- * from the adjacency_index (the number of ECMPs).
- * The valid range is 1-64, 512, 1024, 2048 and 4096.
- * Only relevant in case of REMOTE action.
- */
-MLXSW_ITEM32(reg, xmdr_c, ltr_ecmp_size, 0x14, 0, 32);
-
-/* reg_xmdr_c_ltr_dip*
- * The prefix of the route or of the marker that the object of the LPM
- * is compared with. The most significant bits of the dip are the prefix.
- * The least significant bits must be '0' if the prefix_len is smaller
- * than 128 for IPv6 or smaller than 32 for IPv4.
- */
-MLXSW_ITEM32(reg, xmdr_c, ltr_dip4, 0x1C, 0, 32);
-MLXSW_ITEM_BUF(reg, xmdr_c, ltr_dip6, 0x1C, 16);
-
-static inline void
-mlxsw_reg_xmdr_c_ltr_pack(char *xmdr_payload, unsigned int trans_offset,
- enum mlxsw_reg_xmdr_c_cmd_id cmd_id, u16 seq_number,
- enum mlxsw_reg_xmdr_c_ltr_op op, u16 virtual_router,
- u8 prefix_len)
-{
- char *payload = xmdr_payload + MLXSW_REG_XMDR_BASE_LEN + trans_offset;
- u8 num_rec = mlxsw_reg_xmdr_num_rec_get(xmdr_payload);
-
- mlxsw_reg_xmdr_num_rec_set(xmdr_payload, num_rec + 1);
-
- mlxsw_reg_xmdr_c_cmd_id_set(payload, cmd_id);
- mlxsw_reg_xmdr_c_seq_number_set(payload, seq_number);
- mlxsw_reg_xmdr_c_ltr_op_set(payload, op);
- mlxsw_reg_xmdr_c_ltr_virtual_router_set(payload, virtual_router);
- mlxsw_reg_xmdr_c_ltr_prefix_len_set(payload, prefix_len);
- mlxsw_reg_xmdr_c_ltr_entry_type_set(payload,
- MLXSW_REG_RALUE_ENTRY_TYPE_ROUTE_ENTRY);
- mlxsw_reg_xmdr_c_ltr_bmp_len_set(payload, prefix_len);
-}
-
-static inline unsigned int
-mlxsw_reg_xmdr_c_ltr_pack4(char *xmdr_payload, unsigned int trans_offset,
- u16 seq_number, enum mlxsw_reg_xmdr_c_ltr_op op,
- u16 virtual_router, u8 prefix_len, u32 *dip)
-{
- char *payload = xmdr_payload + MLXSW_REG_XMDR_BASE_LEN + trans_offset;
-
- mlxsw_reg_xmdr_c_ltr_pack(xmdr_payload, trans_offset,
- MLXSW_REG_XMDR_C_CMD_ID_LT_ROUTE_V4,
- seq_number, op, virtual_router, prefix_len);
- if (dip)
- mlxsw_reg_xmdr_c_ltr_dip4_set(payload, *dip);
- return MLXSW_REG_XMDR_C_LT_ROUTE_V4_LEN;
-}
-
-static inline unsigned int
-mlxsw_reg_xmdr_c_ltr_pack6(char *xmdr_payload, unsigned int trans_offset,
- u16 seq_number, enum mlxsw_reg_xmdr_c_ltr_op op,
- u16 virtual_router, u8 prefix_len, const void *dip)
-{
- char *payload = xmdr_payload + MLXSW_REG_XMDR_BASE_LEN + trans_offset;
-
- mlxsw_reg_xmdr_c_ltr_pack(xmdr_payload, trans_offset,
- MLXSW_REG_XMDR_C_CMD_ID_LT_ROUTE_V6,
- seq_number, op, virtual_router, prefix_len);
- if (dip)
- mlxsw_reg_xmdr_c_ltr_dip6_memcpy_to(payload, dip);
- return MLXSW_REG_XMDR_C_LT_ROUTE_V6_LEN;
-}
-
-static inline void
-mlxsw_reg_xmdr_c_ltr_act_remote_pack(char *xmdr_payload, unsigned int trans_offset,
- enum mlxsw_reg_ralue_trap_action trap_action,
- enum mlxsw_reg_xmdr_c_ltr_trap_id_num trap_id_num,
- u32 adjacency_index, u16 ecmp_size)
-{
- char *payload = xmdr_payload + MLXSW_REG_XMDR_BASE_LEN + trans_offset;
-
- mlxsw_reg_xmdr_c_ltr_action_type_set(payload, MLXSW_REG_XMDR_C_LTR_ACTION_TYPE_REMOTE);
- mlxsw_reg_xmdr_c_ltr_trap_action_set(payload, trap_action);
- mlxsw_reg_xmdr_c_ltr_trap_id_num_set(payload, trap_id_num);
- mlxsw_reg_xmdr_c_ltr_adjacency_index_set(payload, adjacency_index);
- mlxsw_reg_xmdr_c_ltr_ecmp_size_set(payload, ecmp_size);
-}
-
-static inline void
-mlxsw_reg_xmdr_c_ltr_act_local_pack(char *xmdr_payload, unsigned int trans_offset,
- enum mlxsw_reg_ralue_trap_action trap_action,
- enum mlxsw_reg_xmdr_c_ltr_trap_id_num trap_id_num, u16 erif)
-{
- char *payload = xmdr_payload + MLXSW_REG_XMDR_BASE_LEN + trans_offset;
-
- mlxsw_reg_xmdr_c_ltr_action_type_set(payload, MLXSW_REG_XMDR_C_LTR_ACTION_TYPE_LOCAL);
- mlxsw_reg_xmdr_c_ltr_trap_action_set(payload, trap_action);
- mlxsw_reg_xmdr_c_ltr_trap_id_num_set(payload, trap_id_num);
- mlxsw_reg_xmdr_c_ltr_erif_set(payload, erif);
-}
-
-static inline void mlxsw_reg_xmdr_c_ltr_act_ip2me_pack(char *xmdr_payload,
- unsigned int trans_offset)
-{
- char *payload = xmdr_payload + MLXSW_REG_XMDR_BASE_LEN + trans_offset;
-
- mlxsw_reg_xmdr_c_ltr_action_type_set(payload, MLXSW_REG_XMDR_C_LTR_ACTION_TYPE_IP2ME);
- mlxsw_reg_xmdr_c_ltr_pointer_to_tunnel_set(payload,
- MLXSW_REG_XMDR_C_LTR_POINTER_TO_TUNNEL_DISABLED_MAGIC);
-}
-
-static inline void mlxsw_reg_xmdr_c_ltr_act_ip2me_tun_pack(char *xmdr_payload,
- unsigned int trans_offset,
- u32 pointer_to_tunnel)
-{
- char *payload = xmdr_payload + MLXSW_REG_XMDR_BASE_LEN + trans_offset;
-
- mlxsw_reg_xmdr_c_ltr_action_type_set(payload, MLXSW_REG_XMDR_C_LTR_ACTION_TYPE_IP2ME);
- mlxsw_reg_xmdr_c_ltr_pointer_to_tunnel_set(payload, pointer_to_tunnel);
-}
-
-/* XRMT - XM Router M Table Register
- * ---------------------------------
- * The XRMT configures the M-Table for the XLT-LPM.
- */
-#define MLXSW_REG_XRMT_ID 0x7810
-#define MLXSW_REG_XRMT_LEN 0x14
-
-MLXSW_REG_DEFINE(xrmt, MLXSW_REG_XRMT_ID, MLXSW_REG_XRMT_LEN);
-
-/* reg_xrmt_index
- * Index in M-Table.
- * Range 0..cap_xlt_mtable-1
- * Access: Index
- */
-MLXSW_ITEM32(reg, xrmt, index, 0x04, 0, 20);
-
-/* reg_xrmt_l0_val
+/* reg_reiv_rec_evid
+ * Egress VID.
+ * Range is 0..4095.
* Access: RW
*/
-MLXSW_ITEM32(reg, xrmt, l0_val, 0x10, 24, 8);
-
-static inline void mlxsw_reg_xrmt_pack(char *payload, u32 index, u8 l0_val)
-{
- MLXSW_REG_ZERO(xrmt, payload);
- mlxsw_reg_xrmt_index_set(payload, index);
- mlxsw_reg_xrmt_l0_val_set(payload, l0_val);
-}
-
-/* XRALTA - XM Router Algorithmic LPM Tree Allocation Register
- * -----------------------------------------------------------
- * The XRALTA is used to allocate the XLT LPM trees.
- *
- * This register embeds original RALTA register.
- */
-#define MLXSW_REG_XRALTA_ID 0x7811
-#define MLXSW_REG_XRALTA_LEN 0x08
-#define MLXSW_REG_XRALTA_RALTA_OFFSET 0x04
+MLXSW_ITEM32_INDEXED(reg, reiv, rec_evid, MLXSW_REG_REIV_BASE_LEN, 0, 12,
+ MLXSW_REG_REIV_REC_LEN, 0x00, false);
-MLXSW_REG_DEFINE(xralta, MLXSW_REG_XRALTA_ID, MLXSW_REG_XRALTA_LEN);
-
-static inline void mlxsw_reg_xralta_pack(char *payload, bool alloc,
- enum mlxsw_reg_ralxx_protocol protocol,
- u8 tree_id)
+static inline void mlxsw_reg_reiv_pack(char *payload, u8 port_page, u16 erif)
{
- char *ralta_payload = payload + MLXSW_REG_XRALTA_RALTA_OFFSET;
-
- MLXSW_REG_ZERO(xralta, payload);
- mlxsw_reg_ralta_pack(ralta_payload, alloc, protocol, tree_id);
-}
-
-/* XRALST - XM Router Algorithmic LPM Structure Tree Register
- * ----------------------------------------------------------
- * The XRALST is used to set and query the structure of an XLT LPM tree.
- *
- * This register embeds original RALST register.
- */
-#define MLXSW_REG_XRALST_ID 0x7812
-#define MLXSW_REG_XRALST_LEN 0x108
-#define MLXSW_REG_XRALST_RALST_OFFSET 0x04
-
-MLXSW_REG_DEFINE(xralst, MLXSW_REG_XRALST_ID, MLXSW_REG_XRALST_LEN);
-
-static inline void mlxsw_reg_xralst_pack(char *payload, u8 root_bin, u8 tree_id)
-{
- char *ralst_payload = payload + MLXSW_REG_XRALST_RALST_OFFSET;
-
- MLXSW_REG_ZERO(xralst, payload);
- mlxsw_reg_ralst_pack(ralst_payload, root_bin, tree_id);
-}
-
-static inline void mlxsw_reg_xralst_bin_pack(char *payload, u8 bin_number,
- u8 left_child_bin,
- u8 right_child_bin)
-{
- char *ralst_payload = payload + MLXSW_REG_XRALST_RALST_OFFSET;
-
- mlxsw_reg_ralst_bin_pack(ralst_payload, bin_number, left_child_bin,
- right_child_bin);
-}
-
-/* XRALTB - XM Router Algorithmic LPM Tree Binding Register
- * --------------------------------------------------------
- * The XRALTB register is used to bind virtual router and protocol
- * to an allocated LPM tree.
- *
- * This register embeds original RALTB register.
- */
-#define MLXSW_REG_XRALTB_ID 0x7813
-#define MLXSW_REG_XRALTB_LEN 0x08
-#define MLXSW_REG_XRALTB_RALTB_OFFSET 0x04
-
-MLXSW_REG_DEFINE(xraltb, MLXSW_REG_XRALTB_ID, MLXSW_REG_XRALTB_LEN);
-
-static inline void mlxsw_reg_xraltb_pack(char *payload, u16 virtual_router,
- enum mlxsw_reg_ralxx_protocol protocol,
- u8 tree_id)
-{
- char *raltb_payload = payload + MLXSW_REG_XRALTB_RALTB_OFFSET;
-
- MLXSW_REG_ZERO(xraltb, payload);
- mlxsw_reg_raltb_pack(raltb_payload, virtual_router, protocol, tree_id);
+ MLXSW_REG_ZERO(reiv, payload);
+ mlxsw_reg_reiv_port_page_set(payload, port_page);
+ mlxsw_reg_reiv_erif_set(payload, erif);
}
/* MFCR - Management Fan Control Register
@@ -13011,6 +12650,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(spvmlr),
MLXSW_REG(spvc),
MLXSW_REG(spevet),
+ MLXSW_REG(smpe),
MLXSW_REG(sftr2),
MLXSW_REG(smid2),
MLXSW_REG(cwtp),
@@ -13084,16 +12724,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(rigr2),
MLXSW_REG(recr2),
MLXSW_REG(rmft2),
- MLXSW_REG(rxlte),
- MLXSW_REG(rxltm),
- MLXSW_REG(rlcmld),
- MLXSW_REG(rlpmce),
- MLXSW_REG(xltq),
- MLXSW_REG(xmdr),
- MLXSW_REG(xrmt),
- MLXSW_REG(xralta),
- MLXSW_REG(xralst),
- MLXSW_REG(xraltb),
+ MLXSW_REG(reiv),
MLXSW_REG(mfcr),
MLXSW_REG(mfsc),
MLXSW_REG(mfsm),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index cafd206e8d7e..e58acd397edf 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2105,9 +2105,6 @@ static int mlxsw_sp_port_module_info_init(struct mlxsw_sp *mlxsw_sp)
return -ENOMEM;
for (i = 1; i < max_ports; i++) {
- if (mlxsw_core_port_is_xm(mlxsw_sp->core, i))
- continue;
-
port_mapping = &mlxsw_sp->port_mapping[i];
err = mlxsw_sp_port_module_info_get(mlxsw_sp, i, port_mapping);
if (err)
@@ -3235,6 +3232,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->router_ops = &mlxsw_sp1_router_ops;
mlxsw_sp->listeners = mlxsw_sp1_listener;
mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener);
+ mlxsw_sp->fid_family_arr = mlxsw_sp1_fid_family_arr;
mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1;
return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
@@ -3267,6 +3265,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->router_ops = &mlxsw_sp2_router_ops;
mlxsw_sp->listeners = mlxsw_sp2_listener;
mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp2_listener);
+ mlxsw_sp->fid_family_arr = mlxsw_sp2_fid_family_arr;
mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2;
return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
@@ -3299,6 +3298,7 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->router_ops = &mlxsw_sp2_router_ops;
mlxsw_sp->listeners = mlxsw_sp2_listener;
mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp2_listener);
+ mlxsw_sp->fid_family_arr = mlxsw_sp2_fid_family_arr;
mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3;
return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
@@ -3331,6 +3331,7 @@ static int mlxsw_sp4_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->router_ops = &mlxsw_sp2_router_ops;
mlxsw_sp->listeners = mlxsw_sp2_listener;
mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp2_listener);
+ mlxsw_sp->fid_family_arr = mlxsw_sp2_fid_family_arr;
mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP4;
return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
@@ -3379,7 +3380,7 @@ static const struct mlxsw_config_profile mlxsw_sp1_config_profile = {
.max_mid = MLXSW_SP_MID_MAX,
.used_flood_tables = 1,
.used_flood_mode = 1,
- .flood_mode = 3,
+ .flood_mode = MLXSW_CMD_MBOX_CONFIG_PROFILE_FLOOD_MODE_MIXED,
.max_fid_flood_tables = 3,
.fid_flood_table_size = MLXSW_SP_FID_FLOOD_TABLE_SIZE,
.used_max_ib_mc = 1,
@@ -3403,15 +3404,13 @@ static const struct mlxsw_config_profile mlxsw_sp2_config_profile = {
.max_mid = MLXSW_SP_MID_MAX,
.used_flood_tables = 1,
.used_flood_mode = 1,
- .flood_mode = 3,
+ .flood_mode = MLXSW_CMD_MBOX_CONFIG_PROFILE_FLOOD_MODE_MIXED,
.max_fid_flood_tables = 3,
.fid_flood_table_size = MLXSW_SP_FID_FLOOD_TABLE_SIZE,
.used_max_ib_mc = 1,
.max_ib_mc = 0,
.used_max_pkey = 1,
.max_pkey = 0,
- .used_kvh_xlt_cache_mode = 1,
- .kvh_xlt_cache_mode = 1,
.swid_config = {
{
.used_type = 1,
@@ -3585,6 +3584,25 @@ mlxsw_sp_resources_rif_mac_profile_register(struct mlxsw_core *mlxsw_core)
&size_params);
}
+static int mlxsw_sp_resources_rifs_register(struct mlxsw_core *mlxsw_core)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_core);
+ struct devlink_resource_size_params size_params;
+ u64 max_rifs;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_RIFS))
+ return -EIO;
+
+ max_rifs = MLXSW_CORE_RES_GET(mlxsw_core, MAX_RIFS);
+ devlink_resource_size_params_init(&size_params, max_rifs, max_rifs,
+ 1, DEVLINK_RESOURCE_UNIT_ENTRY);
+
+ return devlink_resource_register(devlink, "rifs", max_rifs,
+ MLXSW_SP_RESOURCE_RIFS,
+ DEVLINK_RESOURCE_ID_PARENT_TOP,
+ &size_params);
+}
+
static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core)
{
int err;
@@ -3609,8 +3627,13 @@ static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core)
if (err)
goto err_resources_rif_mac_profile_register;
+ err = mlxsw_sp_resources_rifs_register(mlxsw_core);
+ if (err)
+ goto err_resources_rifs_register;
+
return 0;
+err_resources_rifs_register:
err_resources_rif_mac_profile_register:
err_policer_resources_register:
err_resources_counter_register:
@@ -3643,8 +3666,13 @@ static int mlxsw_sp2_resources_register(struct mlxsw_core *mlxsw_core)
if (err)
goto err_resources_rif_mac_profile_register;
+ err = mlxsw_sp_resources_rifs_register(mlxsw_core);
+ if (err)
+ goto err_resources_rifs_register;
+
return 0;
+err_resources_rifs_register:
err_resources_rif_mac_profile_register:
err_policer_resources_register:
err_resources_counter_register:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index a60d2bbd3aa6..80006a631333 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -68,6 +68,7 @@ enum mlxsw_sp_resource_id {
MLXSW_SP_RESOURCE_GLOBAL_POLICERS,
MLXSW_SP_RESOURCE_SINGLE_RATE_POLICERS,
MLXSW_SP_RESOURCE_RIF_MAC_PROFILES,
+ MLXSW_SP_RESOURCE_RIFS,
};
struct mlxsw_sp_port;
@@ -83,7 +84,7 @@ struct mlxsw_sp_upper {
enum mlxsw_sp_rif_type {
MLXSW_SP_RIF_TYPE_SUBPORT,
- MLXSW_SP_RIF_TYPE_VLAN,
+ MLXSW_SP_RIF_TYPE_VLAN_EMU,
MLXSW_SP_RIF_TYPE_FID,
MLXSW_SP_RIF_TYPE_IPIP_LB, /* IP-in-IP loopback. */
MLXSW_SP_RIF_TYPE_MAX,
@@ -210,6 +211,7 @@ struct mlxsw_sp {
const struct mlxsw_sp_mall_ops *mall_ops;
const struct mlxsw_sp_router_ops *router_ops;
const struct mlxsw_listener *listeners;
+ const struct mlxsw_sp_fid_family **fid_family_arr;
size_t listeners_count;
u32 lowest_shaper_bs;
struct rhashtable ipv6_addr_ht;
@@ -1236,7 +1238,6 @@ int mlxsw_sp_setup_tc_block_qevent_mark(struct mlxsw_sp_port *mlxsw_sp_port,
/* spectrum_fid.c */
bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index);
-bool mlxsw_sp_fid_lag_vid_valid(const struct mlxsw_sp_fid *fid);
struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_index(struct mlxsw_sp *mlxsw_sp,
u16 fid_index);
int mlxsw_sp_fid_nve_ifindex(const struct mlxsw_sp_fid *fid, int *nve_ifindex);
@@ -1286,6 +1287,9 @@ void mlxsw_sp_port_fids_fini(struct mlxsw_sp_port *mlxsw_sp_port);
int mlxsw_sp_fids_init(struct mlxsw_sp *mlxsw_sp);
void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp);
+extern const struct mlxsw_sp_fid_family *mlxsw_sp1_fid_family_arr[];
+extern const struct mlxsw_sp_fid_family *mlxsw_sp2_fid_family_arr[];
+
/* spectrum_mr.c */
enum mlxsw_sp_mr_route_prio {
MLXSW_SP_MR_ROUTE_PRIO_SG,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h
index a68d931090dd..15c8d4de8350 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h
@@ -8,8 +8,8 @@
#include "spectrum.h"
enum mlxsw_sp_counter_sub_pool_id {
- MLXSW_SP_COUNTER_SUB_POOL_FLOW,
MLXSW_SP_COUNTER_SUB_POOL_RIF,
+ MLXSW_SP_COUNTER_SUB_POOL_FLOW,
};
int mlxsw_sp_counter_alloc(struct mlxsw_sp *mlxsw_sp,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
index ce80931f0402..fb04fbec7c82 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
@@ -22,11 +22,18 @@ struct mlxsw_sp_fid_core {
unsigned int *port_fid_mappings;
};
+struct mlxsw_sp_fid_port_vid {
+ struct list_head list;
+ u16 local_port;
+ u16 vid;
+};
+
struct mlxsw_sp_fid {
struct list_head list;
struct mlxsw_sp_rif *rif;
refcount_t ref_count;
u16 fid_index;
+ u16 fid_offset;
struct mlxsw_sp_fid_family *fid_family;
struct rhash_head ht_node;
@@ -37,6 +44,7 @@ struct mlxsw_sp_fid {
int nve_ifindex;
u8 vni_valid:1,
nve_flood_index_valid:1;
+ struct list_head port_vid_list; /* Ordered by local port. */
};
struct mlxsw_sp_fid_8021q {
@@ -81,10 +89,9 @@ struct mlxsw_sp_fid_ops {
struct mlxsw_sp_port *port, u16 vid);
void (*port_vid_unmap)(struct mlxsw_sp_fid *fid,
struct mlxsw_sp_port *port, u16 vid);
- int (*vni_set)(struct mlxsw_sp_fid *fid, __be32 vni);
+ int (*vni_set)(struct mlxsw_sp_fid *fid);
void (*vni_clear)(struct mlxsw_sp_fid *fid);
- int (*nve_flood_index_set)(struct mlxsw_sp_fid *fid,
- u32 nve_flood_index);
+ int (*nve_flood_index_set)(struct mlxsw_sp_fid *fid);
void (*nve_flood_index_clear)(struct mlxsw_sp_fid *fid);
void (*fdb_clear_offload)(const struct mlxsw_sp_fid *fid,
const struct net_device *nve_dev);
@@ -102,7 +109,6 @@ struct mlxsw_sp_fid_family {
enum mlxsw_sp_rif_type rif_type;
const struct mlxsw_sp_fid_ops *ops;
struct mlxsw_sp *mlxsw_sp;
- u8 lag_vid_valid:1;
};
static const int mlxsw_sp_sfgc_uc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = {
@@ -137,11 +143,6 @@ bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index)
return fid_family->start_index == fid_index;
}
-bool mlxsw_sp_fid_lag_vid_valid(const struct mlxsw_sp_fid *fid)
-{
- return fid->fid_family->lag_vid_valid;
-}
-
struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_index(struct mlxsw_sp *mlxsw_sp,
u16 fid_index)
{
@@ -206,17 +207,20 @@ int mlxsw_sp_fid_nve_flood_index_set(struct mlxsw_sp_fid *fid,
const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
int err;
- if (WARN_ON(!ops->nve_flood_index_set || fid->nve_flood_index_valid))
+ if (WARN_ON(fid->nve_flood_index_valid))
return -EINVAL;
- err = ops->nve_flood_index_set(fid, nve_flood_index);
- if (err)
- return err;
-
fid->nve_flood_index = nve_flood_index;
fid->nve_flood_index_valid = true;
+ err = ops->nve_flood_index_set(fid);
+ if (err)
+ goto err_nve_flood_index_set;
return 0;
+
+err_nve_flood_index_set:
+ fid->nve_flood_index_valid = false;
+ return err;
}
void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid)
@@ -224,7 +228,7 @@ void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid)
struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
- if (WARN_ON(!ops->nve_flood_index_clear || !fid->nve_flood_index_valid))
+ if (WARN_ON(!fid->nve_flood_index_valid))
return;
fid->nve_flood_index_valid = false;
@@ -244,7 +248,7 @@ int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, enum mlxsw_sp_nve_type type,
struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp;
int err;
- if (WARN_ON(!ops->vni_set || fid->vni_valid))
+ if (WARN_ON(fid->vni_valid))
return -EINVAL;
fid->nve_type = type;
@@ -256,15 +260,15 @@ int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, enum mlxsw_sp_nve_type type,
if (err)
return err;
- err = ops->vni_set(fid, vni);
+ fid->vni_valid = true;
+ err = ops->vni_set(fid);
if (err)
goto err_vni_set;
- fid->vni_valid = true;
-
return 0;
err_vni_set:
+ fid->vni_valid = false;
rhashtable_remove_fast(&mlxsw_sp->fid_core->vni_ht, &fid->vni_ht_node,
mlxsw_sp_fid_vni_ht_params);
return err;
@@ -276,7 +280,7 @@ void mlxsw_sp_fid_vni_clear(struct mlxsw_sp_fid *fid)
const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp;
- if (WARN_ON(!ops->vni_clear || !fid->vni_valid))
+ if (WARN_ON(!fid->vni_valid))
return;
fid->vni_valid = false;
@@ -405,6 +409,7 @@ static void mlxsw_sp_fid_8021q_setup(struct mlxsw_sp_fid *fid, const void *arg)
u16 vid = *(u16 *) arg;
mlxsw_sp_fid_8021q_fid(fid)->vid = vid;
+ fid->fid_offset = 0;
}
static enum mlxsw_reg_sfmr_op mlxsw_sp_sfmr_op(bool valid)
@@ -413,38 +418,38 @@ static enum mlxsw_reg_sfmr_op mlxsw_sp_sfmr_op(bool valid)
MLXSW_REG_SFMR_OP_DESTROY_FID;
}
-static int mlxsw_sp_fid_op(struct mlxsw_sp *mlxsw_sp, u16 fid_index,
- u16 fid_offset, bool valid)
+static int mlxsw_sp_fid_op(const struct mlxsw_sp_fid *fid, bool valid)
{
+ struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp;
char sfmr_pl[MLXSW_REG_SFMR_LEN];
- mlxsw_reg_sfmr_pack(sfmr_pl, mlxsw_sp_sfmr_op(valid), fid_index,
- fid_offset);
+ mlxsw_reg_sfmr_pack(sfmr_pl, mlxsw_sp_sfmr_op(valid), fid->fid_index,
+ fid->fid_offset);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
}
-static int mlxsw_sp_fid_vni_op(struct mlxsw_sp *mlxsw_sp, u16 fid_index,
- __be32 vni, bool vni_valid, u32 nve_flood_index,
- bool nve_flood_index_valid)
+static int mlxsw_sp_fid_edit_op(const struct mlxsw_sp_fid *fid)
{
+ struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp;
char sfmr_pl[MLXSW_REG_SFMR_LEN];
- mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, fid_index,
- 0);
- mlxsw_reg_sfmr_vv_set(sfmr_pl, vni_valid);
- mlxsw_reg_sfmr_vni_set(sfmr_pl, be32_to_cpu(vni));
- mlxsw_reg_sfmr_vtfp_set(sfmr_pl, nve_flood_index_valid);
- mlxsw_reg_sfmr_nve_tunnel_flood_ptr_set(sfmr_pl, nve_flood_index);
+ mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID,
+ fid->fid_index, fid->fid_offset);
+ mlxsw_reg_sfmr_vv_set(sfmr_pl, fid->vni_valid);
+ mlxsw_reg_sfmr_vni_set(sfmr_pl, be32_to_cpu(fid->vni));
+ mlxsw_reg_sfmr_vtfp_set(sfmr_pl, fid->nve_flood_index_valid);
+ mlxsw_reg_sfmr_nve_tunnel_flood_ptr_set(sfmr_pl, fid->nve_flood_index);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
}
-static int __mlxsw_sp_fid_port_vid_map(struct mlxsw_sp *mlxsw_sp, u16 fid_index,
+static int __mlxsw_sp_fid_port_vid_map(const struct mlxsw_sp_fid *fid,
u16 local_port, u16 vid, bool valid)
{
- enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID;
+ struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp;
char svfa_pl[MLXSW_REG_SVFA_LEN];
- mlxsw_reg_svfa_pack(svfa_pl, local_port, mt, valid, fid_index, vid);
+ mlxsw_reg_svfa_port_vid_pack(svfa_pl, local_port, valid, fid->fid_index,
+ vid);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl);
}
@@ -459,20 +464,19 @@ static void mlxsw_sp_fid_8021d_setup(struct mlxsw_sp_fid *fid, const void *arg)
int br_ifindex = *(int *) arg;
mlxsw_sp_fid_8021d_fid(fid)->br_ifindex = br_ifindex;
+ fid->fid_offset = 0;
}
static int mlxsw_sp_fid_8021d_configure(struct mlxsw_sp_fid *fid)
{
- struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
-
- return mlxsw_sp_fid_op(fid_family->mlxsw_sp, fid->fid_index, 0, true);
+ return mlxsw_sp_fid_op(fid, true);
}
static void mlxsw_sp_fid_8021d_deconfigure(struct mlxsw_sp_fid *fid)
{
if (fid->vni_valid)
mlxsw_sp_nve_fid_disable(fid->fid_family->mlxsw_sp, fid);
- mlxsw_sp_fid_op(fid->fid_family->mlxsw_sp, fid->fid_index, 0, false);
+ mlxsw_sp_fid_op(fid, false);
}
static int mlxsw_sp_fid_8021d_index_alloc(struct mlxsw_sp_fid *fid,
@@ -505,7 +509,6 @@ static u16 mlxsw_sp_fid_8021d_flood_index(const struct mlxsw_sp_fid *fid)
static int mlxsw_sp_port_vp_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port)
{
- struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
int err;
@@ -517,7 +520,7 @@ static int mlxsw_sp_port_vp_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port)
if (!fid)
continue;
- err = __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index,
+ err = __mlxsw_sp_fid_port_vid_map(fid,
mlxsw_sp_port->local_port,
vid, true);
if (err)
@@ -540,8 +543,7 @@ err_fid_port_vid_map:
if (!fid)
continue;
- __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index,
- mlxsw_sp_port->local_port, vid,
+ __mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port->local_port, vid,
false);
}
return err;
@@ -549,7 +551,6 @@ err_fid_port_vid_map:
static void mlxsw_sp_port_vlan_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port)
{
- struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false);
@@ -562,12 +563,49 @@ static void mlxsw_sp_port_vlan_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port)
if (!fid)
continue;
- __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index,
- mlxsw_sp_port->local_port, vid,
+ __mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port->local_port, vid,
false);
}
}
+static int
+mlxsw_sp_fid_port_vid_list_add(struct mlxsw_sp_fid *fid, u16 local_port,
+ u16 vid)
+{
+ struct mlxsw_sp_fid_port_vid *port_vid, *tmp_port_vid;
+
+ port_vid = kzalloc(sizeof(*port_vid), GFP_KERNEL);
+ if (!port_vid)
+ return -ENOMEM;
+
+ port_vid->local_port = local_port;
+ port_vid->vid = vid;
+
+ list_for_each_entry(tmp_port_vid, &fid->port_vid_list, list) {
+ if (tmp_port_vid->local_port > local_port)
+ break;
+ }
+
+ list_add_tail(&port_vid->list, &tmp_port_vid->list);
+ return 0;
+}
+
+static void
+mlxsw_sp_fid_port_vid_list_del(struct mlxsw_sp_fid *fid, u16 local_port,
+ u16 vid)
+{
+ struct mlxsw_sp_fid_port_vid *port_vid, *tmp;
+
+ list_for_each_entry_safe(port_vid, tmp, &fid->port_vid_list, list) {
+ if (port_vid->local_port != local_port || port_vid->vid != vid)
+ continue;
+
+ list_del(&port_vid->list);
+ kfree(port_vid);
+ return;
+ }
+}
+
static int mlxsw_sp_fid_8021d_port_vid_map(struct mlxsw_sp_fid *fid,
struct mlxsw_sp_port *mlxsw_sp_port,
u16 vid)
@@ -576,11 +614,16 @@ static int mlxsw_sp_fid_8021d_port_vid_map(struct mlxsw_sp_fid *fid,
u16 local_port = mlxsw_sp_port->local_port;
int err;
- err = __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index,
- mlxsw_sp_port->local_port, vid, true);
+ err = __mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port->local_port, vid,
+ true);
if (err)
return err;
+ err = mlxsw_sp_fid_port_vid_list_add(fid, mlxsw_sp_port->local_port,
+ vid);
+ if (err)
+ goto err_port_vid_list_add;
+
if (mlxsw_sp->fid_core->port_fid_mappings[local_port]++ == 0) {
err = mlxsw_sp_port_vp_mode_trans(mlxsw_sp_port);
if (err)
@@ -591,8 +634,9 @@ static int mlxsw_sp_fid_8021d_port_vid_map(struct mlxsw_sp_fid *fid,
err_port_vp_mode_trans:
mlxsw_sp->fid_core->port_fid_mappings[local_port]--;
- __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index,
- mlxsw_sp_port->local_port, vid, false);
+ mlxsw_sp_fid_port_vid_list_del(fid, mlxsw_sp_port->local_port, vid);
+err_port_vid_list_add:
+ __mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port->local_port, vid, false);
return err;
}
@@ -606,43 +650,28 @@ mlxsw_sp_fid_8021d_port_vid_unmap(struct mlxsw_sp_fid *fid,
if (mlxsw_sp->fid_core->port_fid_mappings[local_port] == 1)
mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port);
mlxsw_sp->fid_core->port_fid_mappings[local_port]--;
- __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index,
- mlxsw_sp_port->local_port, vid, false);
+ mlxsw_sp_fid_port_vid_list_del(fid, mlxsw_sp_port->local_port, vid);
+ __mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port->local_port, vid, false);
}
-static int mlxsw_sp_fid_8021d_vni_set(struct mlxsw_sp_fid *fid, __be32 vni)
+static int mlxsw_sp_fid_8021d_vni_set(struct mlxsw_sp_fid *fid)
{
- struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
-
- return mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, vni,
- true, fid->nve_flood_index,
- fid->nve_flood_index_valid);
+ return mlxsw_sp_fid_edit_op(fid);
}
static void mlxsw_sp_fid_8021d_vni_clear(struct mlxsw_sp_fid *fid)
{
- struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
-
- mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, 0, false,
- fid->nve_flood_index, fid->nve_flood_index_valid);
+ mlxsw_sp_fid_edit_op(fid);
}
-static int mlxsw_sp_fid_8021d_nve_flood_index_set(struct mlxsw_sp_fid *fid,
- u32 nve_flood_index)
+static int mlxsw_sp_fid_8021d_nve_flood_index_set(struct mlxsw_sp_fid *fid)
{
- struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
-
- return mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index,
- fid->vni, fid->vni_valid, nve_flood_index,
- true);
+ return mlxsw_sp_fid_edit_op(fid);
}
static void mlxsw_sp_fid_8021d_nve_flood_index_clear(struct mlxsw_sp_fid *fid)
{
- struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
-
- mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, fid->vni,
- fid->vni_valid, 0, false);
+ mlxsw_sp_fid_edit_op(fid);
}
static void
@@ -699,7 +728,6 @@ static const struct mlxsw_sp_fid_family mlxsw_sp_fid_8021d_family = {
.nr_flood_tables = ARRAY_SIZE(mlxsw_sp_fid_8021d_flood_tables),
.rif_type = MLXSW_SP_RIF_TYPE_FID,
.ops = &mlxsw_sp_fid_8021d_ops,
- .lag_vid_valid = 1,
};
static bool
@@ -746,11 +774,15 @@ static const struct mlxsw_sp_fid_family mlxsw_sp_fid_8021q_emu_family = {
.end_index = MLXSW_SP_FID_8021Q_EMU_END,
.flood_tables = mlxsw_sp_fid_8021d_flood_tables,
.nr_flood_tables = ARRAY_SIZE(mlxsw_sp_fid_8021d_flood_tables),
- .rif_type = MLXSW_SP_RIF_TYPE_VLAN,
+ .rif_type = MLXSW_SP_RIF_TYPE_VLAN_EMU,
.ops = &mlxsw_sp_fid_8021q_emu_ops,
- .lag_vid_valid = 1,
};
+static void mlxsw_sp_fid_rfid_setup(struct mlxsw_sp_fid *fid, const void *arg)
+{
+ fid->fid_offset = 0;
+}
+
static int mlxsw_sp_fid_rfid_configure(struct mlxsw_sp_fid *fid)
{
/* rFIDs are allocated by the device during init */
@@ -787,6 +819,11 @@ static int mlxsw_sp_fid_rfid_port_vid_map(struct mlxsw_sp_fid *fid,
u16 local_port = mlxsw_sp_port->local_port;
int err;
+ err = mlxsw_sp_fid_port_vid_list_add(fid, mlxsw_sp_port->local_port,
+ vid);
+ if (err)
+ return err;
+
/* We only need to transition the port to virtual mode since
* {Port, VID} => FID is done by the firmware upon RIF creation.
*/
@@ -800,6 +837,7 @@ static int mlxsw_sp_fid_rfid_port_vid_map(struct mlxsw_sp_fid *fid,
err_port_vp_mode_trans:
mlxsw_sp->fid_core->port_fid_mappings[local_port]--;
+ mlxsw_sp_fid_port_vid_list_del(fid, mlxsw_sp_port->local_port, vid);
return err;
}
@@ -813,15 +851,41 @@ mlxsw_sp_fid_rfid_port_vid_unmap(struct mlxsw_sp_fid *fid,
if (mlxsw_sp->fid_core->port_fid_mappings[local_port] == 1)
mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port);
mlxsw_sp->fid_core->port_fid_mappings[local_port]--;
+ mlxsw_sp_fid_port_vid_list_del(fid, mlxsw_sp_port->local_port, vid);
+}
+
+static int mlxsw_sp_fid_rfid_vni_set(struct mlxsw_sp_fid *fid)
+{
+ return -EOPNOTSUPP;
+}
+
+static void mlxsw_sp_fid_rfid_vni_clear(struct mlxsw_sp_fid *fid)
+{
+ WARN_ON_ONCE(1);
+}
+
+static int mlxsw_sp_fid_rfid_nve_flood_index_set(struct mlxsw_sp_fid *fid)
+{
+ return -EOPNOTSUPP;
+}
+
+static void mlxsw_sp_fid_rfid_nve_flood_index_clear(struct mlxsw_sp_fid *fid)
+{
+ WARN_ON_ONCE(1);
}
static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_rfid_ops = {
+ .setup = mlxsw_sp_fid_rfid_setup,
.configure = mlxsw_sp_fid_rfid_configure,
.deconfigure = mlxsw_sp_fid_rfid_deconfigure,
.index_alloc = mlxsw_sp_fid_rfid_index_alloc,
.compare = mlxsw_sp_fid_rfid_compare,
.port_vid_map = mlxsw_sp_fid_rfid_port_vid_map,
.port_vid_unmap = mlxsw_sp_fid_rfid_port_vid_unmap,
+ .vni_set = mlxsw_sp_fid_rfid_vni_set,
+ .vni_clear = mlxsw_sp_fid_rfid_vni_clear,
+ .nve_flood_index_set = mlxsw_sp_fid_rfid_nve_flood_index_set,
+ .nve_flood_index_clear = mlxsw_sp_fid_rfid_nve_flood_index_clear,
};
#define MLXSW_SP_RFID_BASE (15 * 1024)
@@ -836,16 +900,19 @@ static const struct mlxsw_sp_fid_family mlxsw_sp_fid_rfid_family = {
.ops = &mlxsw_sp_fid_rfid_ops,
};
-static int mlxsw_sp_fid_dummy_configure(struct mlxsw_sp_fid *fid)
+static void mlxsw_sp_fid_dummy_setup(struct mlxsw_sp_fid *fid, const void *arg)
{
- struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp;
+ fid->fid_offset = 0;
+}
- return mlxsw_sp_fid_op(mlxsw_sp, fid->fid_index, 0, true);
+static int mlxsw_sp_fid_dummy_configure(struct mlxsw_sp_fid *fid)
+{
+ return mlxsw_sp_fid_op(fid, true);
}
static void mlxsw_sp_fid_dummy_deconfigure(struct mlxsw_sp_fid *fid)
{
- mlxsw_sp_fid_op(fid->fid_family->mlxsw_sp, fid->fid_index, 0, false);
+ mlxsw_sp_fid_op(fid, false);
}
static int mlxsw_sp_fid_dummy_index_alloc(struct mlxsw_sp_fid *fid,
@@ -862,11 +929,36 @@ static bool mlxsw_sp_fid_dummy_compare(const struct mlxsw_sp_fid *fid,
return true;
}
+static int mlxsw_sp_fid_dummy_vni_set(struct mlxsw_sp_fid *fid)
+{
+ return -EOPNOTSUPP;
+}
+
+static void mlxsw_sp_fid_dummy_vni_clear(struct mlxsw_sp_fid *fid)
+{
+ WARN_ON_ONCE(1);
+}
+
+static int mlxsw_sp_fid_dummy_nve_flood_index_set(struct mlxsw_sp_fid *fid)
+{
+ return -EOPNOTSUPP;
+}
+
+static void mlxsw_sp_fid_dummy_nve_flood_index_clear(struct mlxsw_sp_fid *fid)
+{
+ WARN_ON_ONCE(1);
+}
+
static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_dummy_ops = {
+ .setup = mlxsw_sp_fid_dummy_setup,
.configure = mlxsw_sp_fid_dummy_configure,
.deconfigure = mlxsw_sp_fid_dummy_deconfigure,
.index_alloc = mlxsw_sp_fid_dummy_index_alloc,
.compare = mlxsw_sp_fid_dummy_compare,
+ .vni_set = mlxsw_sp_fid_dummy_vni_set,
+ .vni_clear = mlxsw_sp_fid_dummy_vni_clear,
+ .nve_flood_index_set = mlxsw_sp_fid_dummy_nve_flood_index_set,
+ .nve_flood_index_clear = mlxsw_sp_fid_dummy_nve_flood_index_clear,
};
static const struct mlxsw_sp_fid_family mlxsw_sp_fid_dummy_family = {
@@ -877,7 +969,14 @@ static const struct mlxsw_sp_fid_family mlxsw_sp_fid_dummy_family = {
.ops = &mlxsw_sp_fid_dummy_ops,
};
-static const struct mlxsw_sp_fid_family *mlxsw_sp_fid_family_arr[] = {
+const struct mlxsw_sp_fid_family *mlxsw_sp1_fid_family_arr[] = {
+ [MLXSW_SP_FID_TYPE_8021Q] = &mlxsw_sp_fid_8021q_emu_family,
+ [MLXSW_SP_FID_TYPE_8021D] = &mlxsw_sp_fid_8021d_family,
+ [MLXSW_SP_FID_TYPE_RFID] = &mlxsw_sp_fid_rfid_family,
+ [MLXSW_SP_FID_TYPE_DUMMY] = &mlxsw_sp_fid_dummy_family,
+};
+
+const struct mlxsw_sp_fid_family *mlxsw_sp2_fid_family_arr[] = {
[MLXSW_SP_FID_TYPE_8021Q] = &mlxsw_sp_fid_8021q_emu_family,
[MLXSW_SP_FID_TYPE_8021D] = &mlxsw_sp_fid_8021d_family,
[MLXSW_SP_FID_TYPE_RFID] = &mlxsw_sp_fid_rfid_family,
@@ -919,6 +1018,8 @@ static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp,
fid = kzalloc(fid_family->fid_size, GFP_KERNEL);
if (!fid)
return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&fid->port_vid_list);
fid->fid_family = fid_family;
err = fid->fid_family->ops->index_alloc(fid, arg, &fid_index);
@@ -927,8 +1028,7 @@ static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp,
fid->fid_index = fid_index;
__set_bit(fid_index - fid_family->start_index, fid_family->fids_bitmap);
- if (fid->fid_family->ops->setup)
- fid->fid_family->ops->setup(fid, arg);
+ fid->fid_family->ops->setup(fid, arg);
err = fid->fid_family->ops->configure(fid);
if (err)
@@ -967,6 +1067,7 @@ void mlxsw_sp_fid_put(struct mlxsw_sp_fid *fid)
fid->fid_family->ops->deconfigure(fid);
__clear_bit(fid->fid_index - fid_family->start_index,
fid_family->fids_bitmap);
+ WARN_ON_ONCE(!list_empty(&fid->port_vid_list));
kfree(fid);
}
@@ -1144,7 +1245,7 @@ int mlxsw_sp_fids_init(struct mlxsw_sp *mlxsw_sp)
for (i = 0; i < MLXSW_SP_FID_TYPE_MAX; i++) {
err = mlxsw_sp_fid_family_register(mlxsw_sp,
- mlxsw_sp_fid_family_arr[i]);
+ mlxsw_sp->fid_family_arr[i]);
if (err)
goto err_fid_ops_register;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 9dbb573d53ea..0b103fc68a1a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -443,65 +443,12 @@ struct mlxsw_sp_fib_entry_decap {
u32 tunnel_index;
};
-static struct mlxsw_sp_fib_entry_priv *
-mlxsw_sp_fib_entry_priv_create(const struct mlxsw_sp_router_ll_ops *ll_ops)
-{
- struct mlxsw_sp_fib_entry_priv *priv;
-
- if (!ll_ops->fib_entry_priv_size)
- /* No need to have priv */
- return NULL;
-
- priv = kzalloc(sizeof(*priv) + ll_ops->fib_entry_priv_size, GFP_KERNEL);
- if (!priv)
- return ERR_PTR(-ENOMEM);
- refcount_set(&priv->refcnt, 1);
- return priv;
-}
-
-static void
-mlxsw_sp_fib_entry_priv_destroy(struct mlxsw_sp_fib_entry_priv *priv)
-{
- kfree(priv);
-}
-
-static void mlxsw_sp_fib_entry_priv_hold(struct mlxsw_sp_fib_entry_priv *priv)
-{
- refcount_inc(&priv->refcnt);
-}
-
-static void mlxsw_sp_fib_entry_priv_put(struct mlxsw_sp_fib_entry_priv *priv)
-{
- if (!priv || !refcount_dec_and_test(&priv->refcnt))
- return;
- mlxsw_sp_fib_entry_priv_destroy(priv);
-}
-
-static void mlxsw_sp_fib_entry_op_ctx_priv_hold(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- struct mlxsw_sp_fib_entry_priv *priv)
-{
- if (!priv)
- return;
- mlxsw_sp_fib_entry_priv_hold(priv);
- list_add(&priv->list, &op_ctx->fib_entry_priv_list);
-}
-
-static void mlxsw_sp_fib_entry_op_ctx_priv_put_all(struct mlxsw_sp_fib_entry_op_ctx *op_ctx)
-{
- struct mlxsw_sp_fib_entry_priv *priv, *tmp;
-
- list_for_each_entry_safe(priv, tmp, &op_ctx->fib_entry_priv_list, list)
- mlxsw_sp_fib_entry_priv_put(priv);
- INIT_LIST_HEAD(&op_ctx->fib_entry_priv_list);
-}
-
struct mlxsw_sp_fib_entry {
struct mlxsw_sp_fib_node *fib_node;
enum mlxsw_sp_fib_entry_type type;
struct list_head nexthop_group_node;
struct mlxsw_sp_nexthop_group *nh_group;
struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
- struct mlxsw_sp_fib_entry_priv *priv;
};
struct mlxsw_sp_fib4_entry {
@@ -537,7 +484,6 @@ struct mlxsw_sp_fib {
struct mlxsw_sp_vr *vr;
struct mlxsw_sp_lpm_tree *lpm_tree;
enum mlxsw_sp_l3proto proto;
- const struct mlxsw_sp_router_ll_ops *ll_ops;
};
struct mlxsw_sp_vr {
@@ -551,45 +497,16 @@ struct mlxsw_sp_vr {
refcount_t ul_rif_refcnt;
};
-static int mlxsw_sp_router_ll_basic_init(struct mlxsw_sp *mlxsw_sp, u16 vr_id,
- enum mlxsw_sp_l3proto proto)
-{
- return 0;
-}
-
-static int mlxsw_sp_router_ll_basic_ralta_write(struct mlxsw_sp *mlxsw_sp, char *xralta_pl)
-{
- return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta),
- xralta_pl + MLXSW_REG_XRALTA_RALTA_OFFSET);
-}
-
-static int mlxsw_sp_router_ll_basic_ralst_write(struct mlxsw_sp *mlxsw_sp, char *xralst_pl)
-{
- return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst),
- xralst_pl + MLXSW_REG_XRALST_RALST_OFFSET);
-}
-
-static int mlxsw_sp_router_ll_basic_raltb_write(struct mlxsw_sp *mlxsw_sp, char *xraltb_pl)
-{
- return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
- xraltb_pl + MLXSW_REG_XRALTB_RALTB_OFFSET);
-}
-
static const struct rhashtable_params mlxsw_sp_fib_ht_params;
static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_vr *vr,
enum mlxsw_sp_l3proto proto)
{
- const struct mlxsw_sp_router_ll_ops *ll_ops = mlxsw_sp->router->proto_ll_ops[proto];
struct mlxsw_sp_lpm_tree *lpm_tree;
struct mlxsw_sp_fib *fib;
int err;
- err = ll_ops->init(mlxsw_sp, vr->id, proto);
- if (err)
- return ERR_PTR(err);
-
lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
fib = kzalloc(sizeof(*fib), GFP_KERNEL);
if (!fib)
@@ -601,7 +518,6 @@ static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
fib->proto = proto;
fib->vr = vr;
fib->lpm_tree = lpm_tree;
- fib->ll_ops = ll_ops;
mlxsw_sp_lpm_tree_hold(lpm_tree);
err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
if (err)
@@ -640,36 +556,33 @@ mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
}
static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
- const struct mlxsw_sp_router_ll_ops *ll_ops,
struct mlxsw_sp_lpm_tree *lpm_tree)
{
- char xralta_pl[MLXSW_REG_XRALTA_LEN];
+ char ralta_pl[MLXSW_REG_RALTA_LEN];
- mlxsw_reg_xralta_pack(xralta_pl, true,
- (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
- lpm_tree->id);
- return ll_ops->ralta_write(mlxsw_sp, xralta_pl);
+ mlxsw_reg_ralta_pack(ralta_pl, true,
+ (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
+ lpm_tree->id);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
}
static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
- const struct mlxsw_sp_router_ll_ops *ll_ops,
struct mlxsw_sp_lpm_tree *lpm_tree)
{
- char xralta_pl[MLXSW_REG_XRALTA_LEN];
+ char ralta_pl[MLXSW_REG_RALTA_LEN];
- mlxsw_reg_xralta_pack(xralta_pl, false,
- (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
- lpm_tree->id);
- ll_ops->ralta_write(mlxsw_sp, xralta_pl);
+ mlxsw_reg_ralta_pack(ralta_pl, false,
+ (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
+ lpm_tree->id);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
}
static int
mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
- const struct mlxsw_sp_router_ll_ops *ll_ops,
struct mlxsw_sp_prefix_usage *prefix_usage,
struct mlxsw_sp_lpm_tree *lpm_tree)
{
- char xralst_pl[MLXSW_REG_XRALST_LEN];
+ char ralst_pl[MLXSW_REG_RALST_LEN];
u8 root_bin = 0;
u8 prefix;
u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
@@ -677,20 +590,19 @@ mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
root_bin = prefix;
- mlxsw_reg_xralst_pack(xralst_pl, root_bin, lpm_tree->id);
+ mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
if (prefix == 0)
continue;
- mlxsw_reg_xralst_bin_pack(xralst_pl, prefix, last_prefix,
- MLXSW_REG_RALST_BIN_NO_CHILD);
+ mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
+ MLXSW_REG_RALST_BIN_NO_CHILD);
last_prefix = prefix;
}
- return ll_ops->ralst_write(mlxsw_sp, xralst_pl);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
}
static struct mlxsw_sp_lpm_tree *
mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
- const struct mlxsw_sp_router_ll_ops *ll_ops,
struct mlxsw_sp_prefix_usage *prefix_usage,
enum mlxsw_sp_l3proto proto)
{
@@ -701,11 +613,12 @@ mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
if (!lpm_tree)
return ERR_PTR(-EBUSY);
lpm_tree->proto = proto;
- err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, ll_ops, lpm_tree);
+ err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
if (err)
return ERR_PTR(err);
- err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, ll_ops, prefix_usage, lpm_tree);
+ err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
+ lpm_tree);
if (err)
goto err_left_struct_set;
memcpy(&lpm_tree->prefix_usage, prefix_usage,
@@ -716,15 +629,14 @@ mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
return lpm_tree;
err_left_struct_set:
- mlxsw_sp_lpm_tree_free(mlxsw_sp, ll_ops, lpm_tree);
+ mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
return ERR_PTR(err);
}
static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
- const struct mlxsw_sp_router_ll_ops *ll_ops,
struct mlxsw_sp_lpm_tree *lpm_tree)
{
- mlxsw_sp_lpm_tree_free(mlxsw_sp, ll_ops, lpm_tree);
+ mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
}
static struct mlxsw_sp_lpm_tree *
@@ -732,7 +644,6 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_prefix_usage *prefix_usage,
enum mlxsw_sp_l3proto proto)
{
- const struct mlxsw_sp_router_ll_ops *ll_ops = mlxsw_sp->router->proto_ll_ops[proto];
struct mlxsw_sp_lpm_tree *lpm_tree;
int i;
@@ -746,7 +657,7 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
return lpm_tree;
}
}
- return mlxsw_sp_lpm_tree_create(mlxsw_sp, ll_ops, prefix_usage, proto);
+ return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
}
static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
@@ -757,11 +668,8 @@ static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_lpm_tree *lpm_tree)
{
- const struct mlxsw_sp_router_ll_ops *ll_ops =
- mlxsw_sp->router->proto_ll_ops[lpm_tree->proto];
-
if (--lpm_tree->ref_count == 0)
- mlxsw_sp_lpm_tree_destroy(mlxsw_sp, ll_ops, lpm_tree);
+ mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
}
#define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
@@ -851,23 +759,23 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
const struct mlxsw_sp_fib *fib, u8 tree_id)
{
- char xraltb_pl[MLXSW_REG_XRALTB_LEN];
+ char raltb_pl[MLXSW_REG_RALTB_LEN];
- mlxsw_reg_xraltb_pack(xraltb_pl, fib->vr->id,
- (enum mlxsw_reg_ralxx_protocol) fib->proto,
- tree_id);
- return fib->ll_ops->raltb_write(mlxsw_sp, xraltb_pl);
+ mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
+ (enum mlxsw_reg_ralxx_protocol) fib->proto,
+ tree_id);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
}
static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
const struct mlxsw_sp_fib *fib)
{
- char xraltb_pl[MLXSW_REG_XRALTB_LEN];
+ char raltb_pl[MLXSW_REG_RALTB_LEN];
/* Bind to tree 0 which is default */
- mlxsw_reg_xraltb_pack(xraltb_pl, fib->vr->id,
- (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
- return fib->ll_ops->raltb_write(mlxsw_sp, xraltb_pl);
+ mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
+ (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
}
static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
@@ -5778,14 +5686,13 @@ mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
static void
mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry,
- enum mlxsw_sp_fib_entry_op op)
+ enum mlxsw_reg_ralue_op op)
{
switch (op) {
- case MLXSW_SP_FIB_ENTRY_OP_WRITE:
- case MLXSW_SP_FIB_ENTRY_OP_UPDATE:
+ case MLXSW_REG_RALUE_OP_WRITE_WRITE:
mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry);
break;
- case MLXSW_SP_FIB_ENTRY_OP_DELETE:
+ case MLXSW_REG_RALUE_OP_WRITE_DELETE:
mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry);
break;
default:
@@ -5793,140 +5700,39 @@ mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp,
}
}
-struct mlxsw_sp_fib_entry_op_ctx_basic {
- char ralue_pl[MLXSW_REG_RALUE_LEN];
-};
-
static void
-mlxsw_sp_router_ll_basic_fib_entry_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- enum mlxsw_sp_l3proto proto,
- enum mlxsw_sp_fib_entry_op op,
- u16 virtual_router, u8 prefix_len,
- unsigned char *addr,
- struct mlxsw_sp_fib_entry_priv *priv)
+mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
+ const struct mlxsw_sp_fib_entry *fib_entry,
+ enum mlxsw_reg_ralue_op op)
{
- struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
- enum mlxsw_reg_ralxx_protocol ralxx_proto;
- char *ralue_pl = op_ctx_basic->ralue_pl;
- enum mlxsw_reg_ralue_op ralue_op;
+ struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
+ enum mlxsw_reg_ralxx_protocol proto;
+ u32 *p_dip;
- ralxx_proto = (enum mlxsw_reg_ralxx_protocol) proto;
+ proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
- switch (op) {
- case MLXSW_SP_FIB_ENTRY_OP_WRITE:
- case MLXSW_SP_FIB_ENTRY_OP_UPDATE:
- ralue_op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
- break;
- case MLXSW_SP_FIB_ENTRY_OP_DELETE:
- ralue_op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
- break;
- default:
- WARN_ON_ONCE(1);
- return;
- }
-
- switch (proto) {
+ switch (fib->proto) {
case MLXSW_SP_L3_PROTO_IPV4:
- mlxsw_reg_ralue_pack4(ralue_pl, ralxx_proto, ralue_op,
- virtual_router, prefix_len, (u32 *) addr);
+ p_dip = (u32 *) fib_entry->fib_node->key.addr;
+ mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
+ fib_entry->fib_node->key.prefix_len,
+ *p_dip);
break;
case MLXSW_SP_L3_PROTO_IPV6:
- mlxsw_reg_ralue_pack6(ralue_pl, ralxx_proto, ralue_op,
- virtual_router, prefix_len, addr);
+ mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
+ fib_entry->fib_node->key.prefix_len,
+ fib_entry->fib_node->key.addr);
break;
}
}
-static void
-mlxsw_sp_router_ll_basic_fib_entry_act_remote_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- enum mlxsw_reg_ralue_trap_action trap_action,
- u16 trap_id, u32 adjacency_index, u16 ecmp_size)
-{
- struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
-
- mlxsw_reg_ralue_act_remote_pack(op_ctx_basic->ralue_pl, trap_action,
- trap_id, adjacency_index, ecmp_size);
-}
-
-static void
-mlxsw_sp_router_ll_basic_fib_entry_act_local_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- enum mlxsw_reg_ralue_trap_action trap_action,
- u16 trap_id, u16 local_erif)
-{
- struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
-
- mlxsw_reg_ralue_act_local_pack(op_ctx_basic->ralue_pl, trap_action,
- trap_id, local_erif);
-}
-
-static void
-mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx)
-{
- struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
-
- mlxsw_reg_ralue_act_ip2me_pack(op_ctx_basic->ralue_pl);
-}
-
-static void
-mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_tun_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- u32 tunnel_ptr)
-{
- struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
-
- mlxsw_reg_ralue_act_ip2me_tun_pack(op_ctx_basic->ralue_pl, tunnel_ptr);
-}
-
-static int
-mlxsw_sp_router_ll_basic_fib_entry_commit(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- bool *postponed_for_bulk)
-{
- struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
-
- return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
- op_ctx_basic->ralue_pl);
-}
-
-static bool
-mlxsw_sp_router_ll_basic_fib_entry_is_committed(struct mlxsw_sp_fib_entry_priv *priv)
-{
- return true;
-}
-
-static void mlxsw_sp_fib_entry_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- struct mlxsw_sp_fib_entry *fib_entry,
- enum mlxsw_sp_fib_entry_op op)
-{
- struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
-
- mlxsw_sp_fib_entry_op_ctx_priv_hold(op_ctx, fib_entry->priv);
- fib->ll_ops->fib_entry_pack(op_ctx, fib->proto, op, fib->vr->id,
- fib_entry->fib_node->key.prefix_len,
- fib_entry->fib_node->key.addr,
- fib_entry->priv);
-}
-
-static int mlxsw_sp_fib_entry_commit(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- const struct mlxsw_sp_router_ll_ops *ll_ops)
-{
- bool postponed_for_bulk = false;
- int err;
-
- err = ll_ops->fib_entry_commit(mlxsw_sp, op_ctx, &postponed_for_bulk);
- if (!postponed_for_bulk)
- mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
- return err;
-}
-
static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
struct mlxsw_sp_fib_entry *fib_entry,
- enum mlxsw_sp_fib_entry_op op)
+ enum mlxsw_reg_ralue_op op)
{
- const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
struct mlxsw_sp_nexthop_group_info *nhgi = nh_group->nhgi;
+ char ralue_pl[MLXSW_REG_RALUE_LEN];
enum mlxsw_reg_ralue_trap_action trap_action;
u16 trap_id = 0;
u32 adjacency_index = 0;
@@ -5949,20 +5755,19 @@ static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
}
- mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
- ll_ops->fib_entry_act_remote_pack(op_ctx, trap_action, trap_id,
- adjacency_index, ecmp_size);
- return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
+ mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
+ mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
+ adjacency_index, ecmp_size);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
}
static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
struct mlxsw_sp_fib_entry *fib_entry,
- enum mlxsw_sp_fib_entry_op op)
+ enum mlxsw_reg_ralue_op op)
{
- const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
struct mlxsw_sp_rif *rif = fib_entry->nh_group->nhgi->nh_rif;
enum mlxsw_reg_ralue_trap_action trap_action;
+ char ralue_pl[MLXSW_REG_RALUE_LEN];
u16 trap_id = 0;
u16 rif_index = 0;
@@ -5974,64 +5779,61 @@ static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
}
- mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
- ll_ops->fib_entry_act_local_pack(op_ctx, trap_action, trap_id, rif_index);
- return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
+ mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
+ mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
+ rif_index);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
}
static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
struct mlxsw_sp_fib_entry *fib_entry,
- enum mlxsw_sp_fib_entry_op op)
+ enum mlxsw_reg_ralue_op op)
{
- const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
+ char ralue_pl[MLXSW_REG_RALUE_LEN];
- mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
- ll_ops->fib_entry_act_ip2me_pack(op_ctx);
- return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
+ mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
+ mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
}
static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
struct mlxsw_sp_fib_entry *fib_entry,
- enum mlxsw_sp_fib_entry_op op)
+ enum mlxsw_reg_ralue_op op)
{
- const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
enum mlxsw_reg_ralue_trap_action trap_action;
+ char ralue_pl[MLXSW_REG_RALUE_LEN];
trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR;
- mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
- ll_ops->fib_entry_act_local_pack(op_ctx, trap_action, 0, 0);
- return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
+ mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
+ mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
}
static int
mlxsw_sp_fib_entry_op_unreachable(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
struct mlxsw_sp_fib_entry *fib_entry,
- enum mlxsw_sp_fib_entry_op op)
+ enum mlxsw_reg_ralue_op op)
{
- const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
enum mlxsw_reg_ralue_trap_action trap_action;
+ char ralue_pl[MLXSW_REG_RALUE_LEN];
u16 trap_id;
trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
trap_id = MLXSW_TRAP_ID_RTR_INGRESS1;
- mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
- ll_ops->fib_entry_act_local_pack(op_ctx, trap_action, trap_id, 0);
- return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
+ mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
+ mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, 0);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
}
static int
mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
struct mlxsw_sp_fib_entry *fib_entry,
- enum mlxsw_sp_fib_entry_op op)
+ enum mlxsw_reg_ralue_op op)
{
- const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
const struct mlxsw_sp_ipip_ops *ipip_ops;
+ char ralue_pl[MLXSW_REG_RALUE_LEN];
int err;
if (WARN_ON(!ipip_entry))
@@ -6043,55 +5845,54 @@ mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
if (err)
return err;
- mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
- ll_ops->fib_entry_act_ip2me_tun_pack(op_ctx,
- fib_entry->decap.tunnel_index);
- return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
+ mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
+ mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
+ fib_entry->decap.tunnel_index);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
}
static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
struct mlxsw_sp_fib_entry *fib_entry,
- enum mlxsw_sp_fib_entry_op op)
+ enum mlxsw_reg_ralue_op op)
{
- const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
+ char ralue_pl[MLXSW_REG_RALUE_LEN];
- mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
- ll_ops->fib_entry_act_ip2me_tun_pack(op_ctx,
- fib_entry->decap.tunnel_index);
- return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
+ mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
+ mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
+ fib_entry->decap.tunnel_index);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
}
static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
struct mlxsw_sp_fib_entry *fib_entry,
- enum mlxsw_sp_fib_entry_op op)
+ enum mlxsw_reg_ralue_op op)
{
switch (fib_entry->type) {
case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
- return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, op_ctx, fib_entry, op);
+ return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
- return mlxsw_sp_fib_entry_op_local(mlxsw_sp, op_ctx, fib_entry, op);
+ return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
- return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, op_ctx, fib_entry, op);
+ return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
- return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, op_ctx, fib_entry, op);
+ return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op);
case MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE:
- return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, op_ctx, fib_entry, op);
+ return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, fib_entry,
+ op);
case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
- return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp, op_ctx, fib_entry, op);
+ return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
+ fib_entry, op);
case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
- return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, op_ctx, fib_entry, op);
+ return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
}
return -EINVAL;
}
static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
struct mlxsw_sp_fib_entry *fib_entry,
- enum mlxsw_sp_fib_entry_op op)
+ enum mlxsw_reg_ralue_op op)
{
- int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, op_ctx, fib_entry, op);
+ int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
if (err)
return err;
@@ -6101,35 +5902,18 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
return err;
}
-static int __mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- struct mlxsw_sp_fib_entry *fib_entry,
- bool is_new)
-{
- return mlxsw_sp_fib_entry_op(mlxsw_sp, op_ctx, fib_entry,
- is_new ? MLXSW_SP_FIB_ENTRY_OP_WRITE :
- MLXSW_SP_FIB_ENTRY_OP_UPDATE);
-}
-
static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry)
{
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx = mlxsw_sp->router->ll_op_ctx;
-
- mlxsw_sp_fib_entry_op_ctx_clear(op_ctx);
- return __mlxsw_sp_fib_entry_update(mlxsw_sp, op_ctx, fib_entry, false);
+ return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
+ MLXSW_REG_RALUE_OP_WRITE_WRITE);
}
static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
struct mlxsw_sp_fib_entry *fib_entry)
{
- const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
-
- if (!ll_ops->fib_entry_is_committed(fib_entry->priv))
- return 0;
- return mlxsw_sp_fib_entry_op(mlxsw_sp, op_ctx, fib_entry,
- MLXSW_SP_FIB_ENTRY_OP_DELETE);
+ return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
+ MLXSW_REG_RALUE_OP_WRITE_DELETE);
}
static int
@@ -6224,12 +6008,6 @@ mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
return ERR_PTR(-ENOMEM);
fib_entry = &fib4_entry->common;
- fib_entry->priv = mlxsw_sp_fib_entry_priv_create(fib_node->fib->ll_ops);
- if (IS_ERR(fib_entry->priv)) {
- err = PTR_ERR(fib_entry->priv);
- goto err_fib_entry_priv_create;
- }
-
err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
if (err)
goto err_nexthop4_group_get;
@@ -6258,8 +6036,6 @@ err_fib4_entry_type_set:
err_nexthop_group_vr_link:
mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
err_nexthop4_group_get:
- mlxsw_sp_fib_entry_priv_put(fib_entry->priv);
-err_fib_entry_priv_create:
kfree(fib4_entry);
return ERR_PTR(err);
}
@@ -6274,7 +6050,6 @@ static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_nexthop_group_vr_unlink(fib4_entry->common.nh_group,
fib_node->fib);
mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
- mlxsw_sp_fib_entry_priv_put(fib4_entry->common.priv);
kfree(fib4_entry);
}
@@ -6512,16 +6287,14 @@ static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
}
static int mlxsw_sp_fib_node_entry_link(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
struct mlxsw_sp_fib_entry *fib_entry)
{
struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
- bool is_new = !fib_node->fib_entry;
int err;
fib_node->fib_entry = fib_entry;
- err = __mlxsw_sp_fib_entry_update(mlxsw_sp, op_ctx, fib_entry, is_new);
+ err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
if (err)
goto err_fib_entry_update;
@@ -6532,25 +6305,14 @@ err_fib_entry_update:
return err;
}
-static int __mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- struct mlxsw_sp_fib_entry *fib_entry)
+static void
+mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry)
{
struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
- int err;
- err = mlxsw_sp_fib_entry_del(mlxsw_sp, op_ctx, fib_entry);
+ mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
fib_node->fib_entry = NULL;
- return err;
-}
-
-static void mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry *fib_entry)
-{
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx = mlxsw_sp->router->ll_op_ctx;
-
- mlxsw_sp_fib_entry_op_ctx_clear(op_ctx);
- __mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, op_ctx, fib_entry);
}
static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry)
@@ -6572,7 +6334,6 @@ static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry)
static int
mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
const struct fib_entry_notifier_info *fen_info)
{
struct mlxsw_sp_fib4_entry *fib4_entry, *fib4_replaced;
@@ -6607,7 +6368,7 @@ mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp,
}
replaced = fib_node->fib_entry;
- err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, op_ctx, &fib4_entry->common);
+ err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib4_entry->common);
if (err) {
dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
goto err_fib_node_entry_link;
@@ -6632,23 +6393,20 @@ err_fib4_entry_create:
return err;
}
-static int mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- struct fib_entry_notifier_info *fen_info)
+static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
+ struct fib_entry_notifier_info *fen_info)
{
struct mlxsw_sp_fib4_entry *fib4_entry;
struct mlxsw_sp_fib_node *fib_node;
- int err;
fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
if (!fib4_entry)
- return 0;
+ return;
fib_node = fib4_entry->common.fib_node;
- err = __mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, op_ctx, &fib4_entry->common);
+ mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib4_entry->common);
mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
- return err;
}
static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
@@ -6946,9 +6704,9 @@ static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
}
-static int mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- struct mlxsw_sp_fib6_entry *fib6_entry)
+static int
+mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry)
{
struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
@@ -6971,8 +6729,7 @@ static int mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
* currently associated with it in the device's table is that
* of the old group. Start using the new one instead.
*/
- err = __mlxsw_sp_fib_entry_update(mlxsw_sp, op_ctx,
- &fib6_entry->common, false);
+ err = mlxsw_sp_fib_entry_update(mlxsw_sp, &fib6_entry->common);
if (err)
goto err_fib_entry_update;
@@ -6996,7 +6753,6 @@ err_nexthop6_group_get:
static int
mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
struct mlxsw_sp_fib6_entry *fib6_entry,
struct fib6_info **rt_arr, unsigned int nrt6)
{
@@ -7014,7 +6770,7 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
fib6_entry->nrt6++;
}
- err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, op_ctx, fib6_entry);
+ err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
if (err)
goto err_rt6_unwind;
@@ -7033,7 +6789,6 @@ err_rt6_unwind:
static void
mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
struct mlxsw_sp_fib6_entry *fib6_entry,
struct fib6_info **rt_arr, unsigned int nrt6)
{
@@ -7051,7 +6806,7 @@ mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
}
- mlxsw_sp_nexthop6_group_update(mlxsw_sp, op_ctx, fib6_entry);
+ mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
}
static int
@@ -7137,12 +6892,6 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
return ERR_PTR(-ENOMEM);
fib_entry = &fib6_entry->common;
- fib_entry->priv = mlxsw_sp_fib_entry_priv_create(fib_node->fib->ll_ops);
- if (IS_ERR(fib_entry->priv)) {
- err = PTR_ERR(fib_entry->priv);
- goto err_fib_entry_priv_create;
- }
-
INIT_LIST_HEAD(&fib6_entry->rt6_list);
for (i = 0; i < nrt6; i++) {
@@ -7184,8 +6933,6 @@ err_rt6_unwind:
list_del(&mlxsw_sp_rt6->list);
mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
}
- mlxsw_sp_fib_entry_priv_put(fib_entry->priv);
-err_fib_entry_priv_create:
kfree(fib6_entry);
return ERR_PTR(err);
}
@@ -7208,7 +6955,6 @@ static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
WARN_ON(fib6_entry->nrt6);
- mlxsw_sp_fib_entry_priv_put(fib6_entry->common.priv);
kfree(fib6_entry);
}
@@ -7266,8 +7012,8 @@ static bool mlxsw_sp_fib6_allow_replace(struct mlxsw_sp_fib6_entry *fib6_entry)
}
static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- struct fib6_info **rt_arr, unsigned int nrt6)
+ struct fib6_info **rt_arr,
+ unsigned int nrt6)
{
struct mlxsw_sp_fib6_entry *fib6_entry, *fib6_replaced;
struct mlxsw_sp_fib_entry *replaced;
@@ -7306,7 +7052,7 @@ static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp,
}
replaced = fib_node->fib_entry;
- err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, op_ctx, &fib6_entry->common);
+ err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib6_entry->common);
if (err)
goto err_fib_node_entry_link;
@@ -7330,8 +7076,8 @@ err_fib6_entry_create:
}
static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- struct fib6_info **rt_arr, unsigned int nrt6)
+ struct fib6_info **rt_arr,
+ unsigned int nrt6)
{
struct mlxsw_sp_fib6_entry *fib6_entry;
struct mlxsw_sp_fib_node *fib_node;
@@ -7359,7 +7105,8 @@ static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp,
fib6_entry = container_of(fib_node->fib_entry,
struct mlxsw_sp_fib6_entry, common);
- err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, op_ctx, fib6_entry, rt_arr, nrt6);
+ err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt_arr,
+ nrt6);
if (err)
goto err_fib6_entry_nexthop_add;
@@ -7370,17 +7117,16 @@ err_fib6_entry_nexthop_add:
return err;
}
-static int mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- struct fib6_info **rt_arr, unsigned int nrt6)
+static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
+ struct fib6_info **rt_arr,
+ unsigned int nrt6)
{
struct mlxsw_sp_fib6_entry *fib6_entry;
struct mlxsw_sp_fib_node *fib_node;
struct fib6_info *rt = rt_arr[0];
- int err;
if (mlxsw_sp_fib6_rt_should_ignore(rt))
- return 0;
+ return;
/* Multipath routes are first added to the FIB trie and only then
* notified. If we vetoed the addition, we will get a delete
@@ -7389,22 +7135,22 @@ static int mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
*/
fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
if (!fib6_entry)
- return 0;
+ return;
/* If not all the nexthops are deleted, then only reduce the nexthop
* group.
*/
if (nrt6 != fib6_entry->nrt6) {
- mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, op_ctx, fib6_entry, rt_arr, nrt6);
- return 0;
+ mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr,
+ nrt6);
+ return;
}
fib_node = fib6_entry->common.fib_node;
- err = __mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, op_ctx, &fib6_entry->common);
+ mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib6_entry->common);
mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
- return err;
}
static struct mlxsw_sp_mr_table *
@@ -7557,15 +7303,15 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
}
}
-struct mlxsw_sp_fib6_event {
+struct mlxsw_sp_fib6_event_work {
struct fib6_info **rt_arr;
unsigned int nrt6;
};
-struct mlxsw_sp_fib_event {
- struct list_head list; /* node in fib queue */
+struct mlxsw_sp_fib_event_work {
+ struct work_struct work;
union {
- struct mlxsw_sp_fib6_event fib6_event;
+ struct mlxsw_sp_fib6_event_work fib6_work;
struct fib_entry_notifier_info fen_info;
struct fib_rule_notifier_info fr_info;
struct fib_nh_notifier_info fnh_info;
@@ -7574,12 +7320,11 @@ struct mlxsw_sp_fib_event {
};
struct mlxsw_sp *mlxsw_sp;
unsigned long event;
- int family;
};
static int
-mlxsw_sp_router_fib6_event_init(struct mlxsw_sp_fib6_event *fib6_event,
- struct fib6_entry_notifier_info *fen6_info)
+mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work,
+ struct fib6_entry_notifier_info *fen6_info)
{
struct fib6_info *rt = fen6_info->rt;
struct fib6_info **rt_arr;
@@ -7593,8 +7338,8 @@ mlxsw_sp_router_fib6_event_init(struct mlxsw_sp_fib6_event *fib6_event,
if (!rt_arr)
return -ENOMEM;
- fib6_event->rt_arr = rt_arr;
- fib6_event->nrt6 = nrt6;
+ fib6_work->rt_arr = rt_arr;
+ fib6_work->nrt6 = nrt6;
rt_arr[0] = rt;
fib6_info_hold(rt);
@@ -7616,242 +7361,182 @@ mlxsw_sp_router_fib6_event_init(struct mlxsw_sp_fib6_event *fib6_event,
}
static void
-mlxsw_sp_router_fib6_event_fini(struct mlxsw_sp_fib6_event *fib6_event)
+mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work)
{
int i;
- for (i = 0; i < fib6_event->nrt6; i++)
- mlxsw_sp_rt6_release(fib6_event->rt_arr[i]);
- kfree(fib6_event->rt_arr);
+ for (i = 0; i < fib6_work->nrt6; i++)
+ mlxsw_sp_rt6_release(fib6_work->rt_arr[i]);
+ kfree(fib6_work->rt_arr);
}
-static void mlxsw_sp_router_fib4_event_process(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- struct mlxsw_sp_fib_event *fib_event)
+static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
{
+ struct mlxsw_sp_fib_event_work *fib_work =
+ container_of(work, struct mlxsw_sp_fib_event_work, work);
+ struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
int err;
+ mutex_lock(&mlxsw_sp->router->lock);
mlxsw_sp_span_respin(mlxsw_sp);
- switch (fib_event->event) {
+ switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE:
- err = mlxsw_sp_router_fib4_replace(mlxsw_sp, op_ctx, &fib_event->fen_info);
+ err = mlxsw_sp_router_fib4_replace(mlxsw_sp,
+ &fib_work->fen_info);
if (err) {
- mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n");
mlxsw_sp_fib4_offload_failed_flag_set(mlxsw_sp,
- &fib_event->fen_info);
+ &fib_work->fen_info);
}
- fib_info_put(fib_event->fen_info.fi);
+ fib_info_put(fib_work->fen_info.fi);
break;
case FIB_EVENT_ENTRY_DEL:
- err = mlxsw_sp_router_fib4_del(mlxsw_sp, op_ctx, &fib_event->fen_info);
- if (err)
- mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
- fib_info_put(fib_event->fen_info.fi);
+ mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
+ fib_info_put(fib_work->fen_info.fi);
break;
case FIB_EVENT_NH_ADD:
case FIB_EVENT_NH_DEL:
- mlxsw_sp_nexthop4_event(mlxsw_sp, fib_event->event, fib_event->fnh_info.fib_nh);
- fib_info_put(fib_event->fnh_info.fib_nh->nh_parent);
+ mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
+ fib_work->fnh_info.fib_nh);
+ fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
break;
}
+ mutex_unlock(&mlxsw_sp->router->lock);
+ kfree(fib_work);
}
-static void mlxsw_sp_router_fib6_event_process(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- struct mlxsw_sp_fib_event *fib_event)
+static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
{
- struct mlxsw_sp_fib6_event *fib6_event = &fib_event->fib6_event;
+ struct mlxsw_sp_fib_event_work *fib_work =
+ container_of(work, struct mlxsw_sp_fib_event_work, work);
+ struct mlxsw_sp_fib6_event_work *fib6_work = &fib_work->fib6_work;
+ struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
int err;
+ mutex_lock(&mlxsw_sp->router->lock);
mlxsw_sp_span_respin(mlxsw_sp);
- switch (fib_event->event) {
+ switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE:
- err = mlxsw_sp_router_fib6_replace(mlxsw_sp, op_ctx, fib_event->fib6_event.rt_arr,
- fib_event->fib6_event.nrt6);
+ err = mlxsw_sp_router_fib6_replace(mlxsw_sp,
+ fib6_work->rt_arr,
+ fib6_work->nrt6);
if (err) {
- mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n");
mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
- fib6_event->rt_arr,
- fib6_event->nrt6);
+ fib6_work->rt_arr,
+ fib6_work->nrt6);
}
- mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event);
+ mlxsw_sp_router_fib6_work_fini(fib6_work);
break;
case FIB_EVENT_ENTRY_APPEND:
- err = mlxsw_sp_router_fib6_append(mlxsw_sp, op_ctx, fib_event->fib6_event.rt_arr,
- fib_event->fib6_event.nrt6);
+ err = mlxsw_sp_router_fib6_append(mlxsw_sp,
+ fib6_work->rt_arr,
+ fib6_work->nrt6);
if (err) {
- mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
dev_warn(mlxsw_sp->bus_info->dev, "FIB append failed.\n");
mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
- fib6_event->rt_arr,
- fib6_event->nrt6);
+ fib6_work->rt_arr,
+ fib6_work->nrt6);
}
- mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event);
+ mlxsw_sp_router_fib6_work_fini(fib6_work);
break;
case FIB_EVENT_ENTRY_DEL:
- err = mlxsw_sp_router_fib6_del(mlxsw_sp, op_ctx, fib_event->fib6_event.rt_arr,
- fib_event->fib6_event.nrt6);
- if (err)
- mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
- mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event);
+ mlxsw_sp_router_fib6_del(mlxsw_sp,
+ fib6_work->rt_arr,
+ fib6_work->nrt6);
+ mlxsw_sp_router_fib6_work_fini(fib6_work);
break;
}
+ mutex_unlock(&mlxsw_sp->router->lock);
+ kfree(fib_work);
}
-static void mlxsw_sp_router_fibmr_event_process(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_event *fib_event)
+static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
{
+ struct mlxsw_sp_fib_event_work *fib_work =
+ container_of(work, struct mlxsw_sp_fib_event_work, work);
+ struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
bool replace;
int err;
rtnl_lock();
mutex_lock(&mlxsw_sp->router->lock);
- switch (fib_event->event) {
+ switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE:
case FIB_EVENT_ENTRY_ADD:
- replace = fib_event->event == FIB_EVENT_ENTRY_REPLACE;
+ replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
- err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_event->men_info, replace);
+ err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
+ replace);
if (err)
dev_warn(mlxsw_sp->bus_info->dev, "MR entry add failed.\n");
- mr_cache_put(fib_event->men_info.mfc);
+ mr_cache_put(fib_work->men_info.mfc);
break;
case FIB_EVENT_ENTRY_DEL:
- mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_event->men_info);
- mr_cache_put(fib_event->men_info.mfc);
+ mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
+ mr_cache_put(fib_work->men_info.mfc);
break;
case FIB_EVENT_VIF_ADD:
err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
- &fib_event->ven_info);
+ &fib_work->ven_info);
if (err)
dev_warn(mlxsw_sp->bus_info->dev, "MR VIF add failed.\n");
- dev_put(fib_event->ven_info.dev);
+ dev_put(fib_work->ven_info.dev);
break;
case FIB_EVENT_VIF_DEL:
- mlxsw_sp_router_fibmr_vif_del(mlxsw_sp, &fib_event->ven_info);
- dev_put(fib_event->ven_info.dev);
+ mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
+ &fib_work->ven_info);
+ dev_put(fib_work->ven_info.dev);
break;
}
mutex_unlock(&mlxsw_sp->router->lock);
rtnl_unlock();
+ kfree(fib_work);
}
-static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
-{
- struct mlxsw_sp_router *router = container_of(work, struct mlxsw_sp_router, fib_event_work);
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx = router->ll_op_ctx;
- struct mlxsw_sp *mlxsw_sp = router->mlxsw_sp;
- struct mlxsw_sp_fib_event *next_fib_event;
- struct mlxsw_sp_fib_event *fib_event;
- int last_family = AF_UNSPEC;
- LIST_HEAD(fib_event_queue);
-
- spin_lock_bh(&router->fib_event_queue_lock);
- list_splice_init(&router->fib_event_queue, &fib_event_queue);
- spin_unlock_bh(&router->fib_event_queue_lock);
-
- /* Router lock is held here to make sure per-instance
- * operation context is not used in between FIB4/6 events
- * processing.
- */
- mutex_lock(&router->lock);
- mlxsw_sp_fib_entry_op_ctx_clear(op_ctx);
- list_for_each_entry_safe(fib_event, next_fib_event,
- &fib_event_queue, list) {
- /* Check if the next entry in the queue exists and it is
- * of the same type (family and event) as the currect one.
- * In that case it is permitted to do the bulking
- * of multiple FIB entries to a single register write.
- */
- op_ctx->bulk_ok = !list_is_last(&fib_event->list, &fib_event_queue) &&
- fib_event->family == next_fib_event->family &&
- fib_event->event == next_fib_event->event;
- op_ctx->event = fib_event->event;
-
- /* In case family of this and the previous entry are different, context
- * reinitialization is going to be needed now, indicate that.
- * Note that since last_family is initialized to AF_UNSPEC, this is always
- * going to happen for the first entry processed in the work.
- */
- if (fib_event->family != last_family)
- op_ctx->initialized = false;
-
- switch (fib_event->family) {
- case AF_INET:
- mlxsw_sp_router_fib4_event_process(mlxsw_sp, op_ctx,
- fib_event);
- break;
- case AF_INET6:
- mlxsw_sp_router_fib6_event_process(mlxsw_sp, op_ctx,
- fib_event);
- break;
- case RTNL_FAMILY_IP6MR:
- case RTNL_FAMILY_IPMR:
- /* Unlock here as inside FIBMR the lock is taken again
- * under RTNL. The per-instance operation context
- * is not used by FIBMR.
- */
- mutex_unlock(&router->lock);
- mlxsw_sp_router_fibmr_event_process(mlxsw_sp,
- fib_event);
- mutex_lock(&router->lock);
- break;
- default:
- WARN_ON_ONCE(1);
- }
- last_family = fib_event->family;
- kfree(fib_event);
- cond_resched();
- }
- WARN_ON_ONCE(!list_empty(&router->ll_op_ctx->fib_entry_priv_list));
- mutex_unlock(&router->lock);
-}
-
-static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event *fib_event,
+static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
struct fib_notifier_info *info)
{
struct fib_entry_notifier_info *fen_info;
struct fib_nh_notifier_info *fnh_info;
- switch (fib_event->event) {
+ switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE:
case FIB_EVENT_ENTRY_DEL:
fen_info = container_of(info, struct fib_entry_notifier_info,
info);
- fib_event->fen_info = *fen_info;
+ fib_work->fen_info = *fen_info;
/* Take reference on fib_info to prevent it from being
- * freed while event is queued. Release it afterwards.
+ * freed while work is queued. Release it afterwards.
*/
- fib_info_hold(fib_event->fen_info.fi);
+ fib_info_hold(fib_work->fen_info.fi);
break;
case FIB_EVENT_NH_ADD:
case FIB_EVENT_NH_DEL:
fnh_info = container_of(info, struct fib_nh_notifier_info,
info);
- fib_event->fnh_info = *fnh_info;
- fib_info_hold(fib_event->fnh_info.fib_nh->nh_parent);
+ fib_work->fnh_info = *fnh_info;
+ fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
break;
}
}
-static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event *fib_event,
+static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
struct fib_notifier_info *info)
{
struct fib6_entry_notifier_info *fen6_info;
int err;
- switch (fib_event->event) {
+ switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE:
case FIB_EVENT_ENTRY_APPEND:
case FIB_EVENT_ENTRY_DEL:
fen6_info = container_of(info, struct fib6_entry_notifier_info,
info);
- err = mlxsw_sp_router_fib6_event_init(&fib_event->fib6_event,
- fen6_info);
+ err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work,
+ fen6_info);
if (err)
return err;
break;
@@ -7861,20 +7546,20 @@ static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event *fib_event,
}
static void
-mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event *fib_event,
+mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
struct fib_notifier_info *info)
{
- switch (fib_event->event) {
+ switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE:
case FIB_EVENT_ENTRY_ADD:
case FIB_EVENT_ENTRY_DEL:
- memcpy(&fib_event->men_info, info, sizeof(fib_event->men_info));
- mr_cache_hold(fib_event->men_info.mfc);
+ memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
+ mr_cache_hold(fib_work->men_info.mfc);
break;
case FIB_EVENT_VIF_ADD:
case FIB_EVENT_VIF_DEL:
- memcpy(&fib_event->ven_info, info, sizeof(fib_event->ven_info));
- dev_hold(fib_event->ven_info.dev);
+ memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
+ dev_hold(fib_work->ven_info.dev);
break;
}
}
@@ -7928,7 +7613,7 @@ static int mlxsw_sp_router_fib_rule_event(unsigned long event,
static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
- struct mlxsw_sp_fib_event *fib_event;
+ struct mlxsw_sp_fib_event_work *fib_work;
struct fib_notifier_info *info = ptr;
struct mlxsw_sp_router *router;
int err;
@@ -7960,39 +7645,37 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
break;
}
- fib_event = kzalloc(sizeof(*fib_event), GFP_ATOMIC);
- if (!fib_event)
+ fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
+ if (!fib_work)
return NOTIFY_BAD;
- fib_event->mlxsw_sp = router->mlxsw_sp;
- fib_event->event = event;
- fib_event->family = info->family;
+ fib_work->mlxsw_sp = router->mlxsw_sp;
+ fib_work->event = event;
switch (info->family) {
case AF_INET:
- mlxsw_sp_router_fib4_event(fib_event, info);
+ INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
+ mlxsw_sp_router_fib4_event(fib_work, info);
break;
case AF_INET6:
- err = mlxsw_sp_router_fib6_event(fib_event, info);
+ INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
+ err = mlxsw_sp_router_fib6_event(fib_work, info);
if (err)
goto err_fib_event;
break;
case RTNL_FAMILY_IP6MR:
case RTNL_FAMILY_IPMR:
- mlxsw_sp_router_fibmr_event(fib_event, info);
+ INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
+ mlxsw_sp_router_fibmr_event(fib_work, info);
break;
}
- /* Enqueue the event and trigger the work */
- spin_lock_bh(&router->fib_event_queue_lock);
- list_add_tail(&fib_event->list, &router->fib_event_queue);
- spin_unlock_bh(&router->fib_event_queue_lock);
- mlxsw_core_schedule_work(&router->fib_event_work);
+ mlxsw_core_schedule_work(&fib_work->work);
return NOTIFY_DONE;
err_fib_event:
- kfree(fib_event);
+ kfree(fib_work);
return NOTIFY_BAD;
}
@@ -8035,7 +7718,7 @@ u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev)
/* We only return the VID for VLAN RIFs. Otherwise we return an
* invalid value (0).
*/
- if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN)
+ if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN_EMU)
goto out;
vid = mlxsw_sp_fid_8021q_vid(rif->fid);
@@ -8451,6 +8134,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_rif_counters_alloc(rif);
}
+ atomic_inc(&mlxsw_sp->router->rifs_count);
return rif;
err_stats_enable:
@@ -8480,6 +8164,7 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
struct mlxsw_sp_vr *vr;
int i;
+ atomic_dec(&mlxsw_sp->router->rifs_count);
mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
vr = &mlxsw_sp->router->vrs[rif->vr_id];
@@ -8638,6 +8323,13 @@ static u64 mlxsw_sp_rif_mac_profiles_occ_get(void *priv)
return atomic_read(&mlxsw_sp->router->rif_mac_profiles_count);
}
+static u64 mlxsw_sp_rifs_occ_get(void *priv)
+{
+ const struct mlxsw_sp *mlxsw_sp = priv;
+
+ return atomic_read(&mlxsw_sp->router->rifs_count);
+}
+
static struct mlxsw_sp_rif_mac_profile *
mlxsw_sp_rif_mac_profile_create(struct mlxsw_sp *mlxsw_sp, const char *mac,
struct netlink_ext_ack *extack)
@@ -9685,10 +9377,9 @@ static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
.fid_get = mlxsw_sp_rif_subport_fid_get,
};
-static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
- enum mlxsw_reg_ritr_if_type type,
- u16 vid_fid, bool enable)
+static int mlxsw_sp_rif_fid_op(struct mlxsw_sp_rif *rif, u16 fid, bool enable)
{
+ enum mlxsw_reg_ritr_if_type type = MLXSW_REG_RITR_FID_IF;
struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
char ritr_pl[MLXSW_REG_RITR_LEN];
@@ -9696,7 +9387,7 @@ static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
rif->dev->mtu);
mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id);
- mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
+ mlxsw_reg_ritr_fid_if_fid_set(ritr_pl, fid);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
}
@@ -9720,10 +9411,9 @@ static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif,
return err;
rif->mac_profile_id = mac_profile;
- err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
- true);
+ err = mlxsw_sp_rif_fid_op(rif, fid_index, true);
if (err)
- goto err_rif_vlan_fid_op;
+ goto err_rif_fid_op;
err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
mlxsw_sp_router_port(mlxsw_sp), true);
@@ -9750,8 +9440,8 @@ err_fid_bc_flood_set:
mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
mlxsw_sp_router_port(mlxsw_sp), false);
err_fid_mc_flood_set:
- mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
-err_rif_vlan_fid_op:
+ mlxsw_sp_rif_fid_op(rif, fid_index, false);
+err_rif_fid_op:
mlxsw_sp_rif_mac_profile_put(mlxsw_sp, mac_profile);
return err;
}
@@ -9770,7 +9460,7 @@ static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
mlxsw_sp_router_port(mlxsw_sp), false);
mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
mlxsw_sp_router_port(mlxsw_sp), false);
- mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
+ mlxsw_sp_rif_fid_op(rif, fid_index, false);
mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id);
}
@@ -9848,7 +9538,7 @@ static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
}
static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_emu_ops = {
- .type = MLXSW_SP_RIF_TYPE_VLAN,
+ .type = MLXSW_SP_RIF_TYPE_VLAN_EMU,
.rif_size = sizeof(struct mlxsw_sp_rif),
.configure = mlxsw_sp_rif_fid_configure,
.deconfigure = mlxsw_sp_rif_fid_deconfigure,
@@ -9926,7 +9616,7 @@ static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
static const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
[MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops,
- [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_emu_ops,
+ [MLXSW_SP_RIF_TYPE_VLAN_EMU] = &mlxsw_sp_rif_vlan_emu_ops,
[MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops,
[MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp1_rif_ipip_lb_ops,
};
@@ -9969,6 +9659,7 @@ mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
if (err)
goto ul_rif_op_err;
+ atomic_inc(&mlxsw_sp->router->rifs_count);
return ul_rif;
ul_rif_op_err:
@@ -9981,6 +9672,7 @@ static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
{
struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
+ atomic_dec(&mlxsw_sp->router->rifs_count);
mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
kfree(ul_rif);
@@ -10112,7 +9804,7 @@ static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
static const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
[MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops,
- [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_emu_ops,
+ [MLXSW_SP_RIF_TYPE_VLAN_EMU] = &mlxsw_sp_rif_vlan_emu_ops,
[MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops,
[MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp2_rif_ipip_lb_ops,
};
@@ -10136,10 +9828,15 @@ static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
idr_init(&mlxsw_sp->router->rif_mac_profiles_idr);
atomic_set(&mlxsw_sp->router->rif_mac_profiles_count, 0);
+ atomic_set(&mlxsw_sp->router->rifs_count, 0);
devlink_resource_occ_get_register(devlink,
MLXSW_SP_RESOURCE_RIF_MAC_PROFILES,
mlxsw_sp_rif_mac_profiles_occ_get,
mlxsw_sp);
+ devlink_resource_occ_get_register(devlink,
+ MLXSW_SP_RESOURCE_RIFS,
+ mlxsw_sp_rifs_occ_get,
+ mlxsw_sp);
return 0;
}
@@ -10149,9 +9846,11 @@ static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
int i;
+ WARN_ON_ONCE(atomic_read(&mlxsw_sp->router->rifs_count));
for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
+ devlink_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_RIFS);
devlink_resource_occ_get_unregister(devlink,
MLXSW_SP_RESOURCE_RIF_MAC_PROFILES);
WARN_ON(!idr_is_empty(&mlxsw_sp->router->rif_mac_profiles_idr));
@@ -10533,46 +10232,6 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
}
-static const struct mlxsw_sp_router_ll_ops mlxsw_sp_router_ll_basic_ops = {
- .init = mlxsw_sp_router_ll_basic_init,
- .ralta_write = mlxsw_sp_router_ll_basic_ralta_write,
- .ralst_write = mlxsw_sp_router_ll_basic_ralst_write,
- .raltb_write = mlxsw_sp_router_ll_basic_raltb_write,
- .fib_entry_op_ctx_size = sizeof(struct mlxsw_sp_fib_entry_op_ctx_basic),
- .fib_entry_pack = mlxsw_sp_router_ll_basic_fib_entry_pack,
- .fib_entry_act_remote_pack = mlxsw_sp_router_ll_basic_fib_entry_act_remote_pack,
- .fib_entry_act_local_pack = mlxsw_sp_router_ll_basic_fib_entry_act_local_pack,
- .fib_entry_act_ip2me_pack = mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_pack,
- .fib_entry_act_ip2me_tun_pack = mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_tun_pack,
- .fib_entry_commit = mlxsw_sp_router_ll_basic_fib_entry_commit,
- .fib_entry_is_committed = mlxsw_sp_router_ll_basic_fib_entry_is_committed,
-};
-
-static int mlxsw_sp_router_ll_op_ctx_init(struct mlxsw_sp_router *router)
-{
- size_t max_size = 0;
- int i;
-
- for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
- size_t size = router->proto_ll_ops[i]->fib_entry_op_ctx_size;
-
- if (size > max_size)
- max_size = size;
- }
- router->ll_op_ctx = kzalloc(sizeof(*router->ll_op_ctx) + max_size,
- GFP_KERNEL);
- if (!router->ll_op_ctx)
- return -ENOMEM;
- INIT_LIST_HEAD(&router->ll_op_ctx->fib_entry_priv_list);
- return 0;
-}
-
-static void mlxsw_sp_router_ll_op_ctx_fini(struct mlxsw_sp_router *router)
-{
- WARN_ON(!list_empty(&router->ll_op_ctx->fib_entry_priv_list));
- kfree(router->ll_op_ctx);
-}
-
static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp)
{
u16 lb_rif_index;
@@ -10646,23 +10305,9 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
if (err)
goto err_router_ops_init;
- err = mlxsw_sp_router_xm_init(mlxsw_sp);
- if (err)
- goto err_xm_init;
-
- router->proto_ll_ops[MLXSW_SP_L3_PROTO_IPV4] = mlxsw_sp_router_xm_ipv4_is_supported(mlxsw_sp) ?
- &mlxsw_sp_router_ll_xm_ops :
- &mlxsw_sp_router_ll_basic_ops;
- router->proto_ll_ops[MLXSW_SP_L3_PROTO_IPV6] = &mlxsw_sp_router_ll_basic_ops;
-
- err = mlxsw_sp_router_ll_op_ctx_init(router);
- if (err)
- goto err_ll_op_ctx_init;
-
INIT_LIST_HEAD(&mlxsw_sp->router->nh_res_grp_list);
INIT_DELAYED_WORK(&mlxsw_sp->router->nh_grp_activity_dw,
mlxsw_sp_nh_grp_activity_work);
-
INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
err = __mlxsw_sp_router_init(mlxsw_sp);
if (err)
@@ -10715,10 +10360,6 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
if (err)
goto err_dscp_init;
- INIT_WORK(&router->fib_event_work, mlxsw_sp_router_fib_event_work);
- INIT_LIST_HEAD(&router->fib_event_queue);
- spin_lock_init(&router->fib_event_queue_lock);
-
router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
err = register_inetaddr_notifier(&router->inetaddr_nb);
if (err)
@@ -10773,7 +10414,6 @@ err_register_inet6addr_notifier:
unregister_inetaddr_notifier(&router->inetaddr_nb);
err_register_inetaddr_notifier:
mlxsw_core_flush_owq();
- WARN_ON(!list_empty(&router->fib_event_queue));
err_dscp_init:
err_mp_hash_init:
mlxsw_sp_neigh_fini(mlxsw_sp);
@@ -10797,10 +10437,6 @@ err_rifs_init:
__mlxsw_sp_router_fini(mlxsw_sp);
err_router_init:
cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw);
- mlxsw_sp_router_ll_op_ctx_fini(router);
-err_ll_op_ctx_init:
- mlxsw_sp_router_xm_fini(mlxsw_sp);
-err_xm_init:
err_router_ops_init:
mutex_destroy(&mlxsw_sp->router->lock);
kfree(mlxsw_sp->router);
@@ -10819,7 +10455,6 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb);
unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb);
mlxsw_core_flush_owq();
- WARN_ON(!list_empty(&mlxsw_sp->router->fib_event_queue));
mlxsw_sp_neigh_fini(mlxsw_sp);
mlxsw_sp_lb_rif_fini(mlxsw_sp);
mlxsw_sp_vrs_fini(mlxsw_sp);
@@ -10831,8 +10466,6 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
mlxsw_sp_rifs_fini(mlxsw_sp);
__mlxsw_sp_router_fini(mlxsw_sp);
cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw);
- mlxsw_sp_router_ll_op_ctx_fini(mlxsw_sp->router);
- mlxsw_sp_router_xm_fini(mlxsw_sp);
mutex_destroy(&mlxsw_sp->router->lock);
kfree(mlxsw_sp->router);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 37411b74c3e6..b5c83ec7a87f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -15,32 +15,12 @@ struct mlxsw_sp_router_nve_decap {
u8 valid:1;
};
-struct mlxsw_sp_fib_entry_op_ctx {
- u8 bulk_ok:1, /* Indicate to the low-level op it is ok to bulk
- * the actual entry with the one that is the next
- * in queue.
- */
- initialized:1; /* Bit that the low-level op sets in case
- * the context priv is initialized.
- */
- struct list_head fib_entry_priv_list;
- unsigned long event;
- unsigned long ll_priv[];
-};
-
-static inline void
-mlxsw_sp_fib_entry_op_ctx_clear(struct mlxsw_sp_fib_entry_op_ctx *op_ctx)
-{
- WARN_ON_ONCE(!list_empty(&op_ctx->fib_entry_priv_list));
- memset(op_ctx, 0, sizeof(*op_ctx));
- INIT_LIST_HEAD(&op_ctx->fib_entry_priv_list);
-}
-
struct mlxsw_sp_router {
struct mlxsw_sp *mlxsw_sp;
struct mlxsw_sp_rif **rifs;
struct idr rif_mac_profiles_idr;
atomic_t rif_mac_profiles_count;
+ atomic_t rifs_count;
u8 max_rif_mac_profile;
struct mlxsw_sp_vr *vrs;
struct rhashtable neigh_ht;
@@ -72,14 +52,8 @@ struct mlxsw_sp_router {
const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
struct mlxsw_sp_router_nve_decap nve_decap_config;
struct mutex lock; /* Protects shared router resources */
- struct work_struct fib_event_work;
- struct list_head fib_event_queue;
- spinlock_t fib_event_queue_lock; /* Protects fib event queue list */
- /* One set of ops for each protocol: IPv4 and IPv6 */
- const struct mlxsw_sp_router_ll_ops *proto_ll_ops[MLXSW_SP_L3_PROTO_MAX];
struct mlxsw_sp_fib_entry_op_ctx *ll_op_ctx;
u16 lb_rif_index;
- struct mlxsw_sp_router_xm *xm;
const struct mlxsw_sp_adj_grp_size_range *adj_grp_size_ranges;
size_t adj_grp_size_ranges_count;
struct delayed_work nh_grp_activity_dw;
@@ -89,48 +63,6 @@ struct mlxsw_sp_router {
u32 adj_trap_index;
};
-struct mlxsw_sp_fib_entry_priv {
- refcount_t refcnt;
- struct list_head list; /* Member in op_ctx->fib_entry_priv_list */
- unsigned long priv[];
-};
-
-enum mlxsw_sp_fib_entry_op {
- MLXSW_SP_FIB_ENTRY_OP_WRITE,
- MLXSW_SP_FIB_ENTRY_OP_UPDATE,
- MLXSW_SP_FIB_ENTRY_OP_DELETE,
-};
-
-/* Low-level router ops. Basically this is to handle the different
- * register sets to work with ordinary and XM trees and FIB entries.
- */
-struct mlxsw_sp_router_ll_ops {
- int (*init)(struct mlxsw_sp *mlxsw_sp, u16 vr_id,
- enum mlxsw_sp_l3proto proto);
- int (*ralta_write)(struct mlxsw_sp *mlxsw_sp, char *xralta_pl);
- int (*ralst_write)(struct mlxsw_sp *mlxsw_sp, char *xralst_pl);
- int (*raltb_write)(struct mlxsw_sp *mlxsw_sp, char *xraltb_pl);
- size_t fib_entry_op_ctx_size;
- size_t fib_entry_priv_size;
- void (*fib_entry_pack)(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- enum mlxsw_sp_l3proto proto, enum mlxsw_sp_fib_entry_op op,
- u16 virtual_router, u8 prefix_len, unsigned char *addr,
- struct mlxsw_sp_fib_entry_priv *priv);
- void (*fib_entry_act_remote_pack)(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- enum mlxsw_reg_ralue_trap_action trap_action,
- u16 trap_id, u32 adjacency_index, u16 ecmp_size);
- void (*fib_entry_act_local_pack)(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- enum mlxsw_reg_ralue_trap_action trap_action,
- u16 trap_id, u16 local_erif);
- void (*fib_entry_act_ip2me_pack)(struct mlxsw_sp_fib_entry_op_ctx *op_ctx);
- void (*fib_entry_act_ip2me_tun_pack)(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- u32 tunnel_ptr);
- int (*fib_entry_commit)(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- bool *postponed_for_bulk);
- bool (*fib_entry_is_committed)(struct mlxsw_sp_fib_entry_priv *priv);
-};
-
struct mlxsw_sp_rif_ipip_lb;
struct mlxsw_sp_rif_ipip_lb_config {
enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
@@ -232,10 +164,4 @@ int mlxsw_sp_ipip_ecn_decap_init(struct mlxsw_sp *mlxsw_sp);
struct net_device *
mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev);
-extern const struct mlxsw_sp_router_ll_ops mlxsw_sp_router_ll_xm_ops;
-
-int mlxsw_sp_router_xm_init(struct mlxsw_sp *mlxsw_sp);
-void mlxsw_sp_router_xm_fini(struct mlxsw_sp *mlxsw_sp);
-bool mlxsw_sp_router_xm_ipv4_is_supported(const struct mlxsw_sp *mlxsw_sp);
-
#endif /* _MLXSW_ROUTER_H_*/
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router_xm.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router_xm.c
deleted file mode 100644
index d213af723a2a..000000000000
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router_xm.c
+++ /dev/null
@@ -1,812 +0,0 @@
-// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
-/* Copyright (c) 2020 Mellanox Technologies. All rights reserved */
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/rhashtable.h>
-
-#include "spectrum.h"
-#include "core.h"
-#include "reg.h"
-#include "spectrum_router.h"
-
-#define MLXSW_SP_ROUTER_XM_M_VAL 16
-
-static const u8 mlxsw_sp_router_xm_m_val[] = {
- [MLXSW_SP_L3_PROTO_IPV4] = MLXSW_SP_ROUTER_XM_M_VAL,
- [MLXSW_SP_L3_PROTO_IPV6] = 0, /* Currently unused. */
-};
-
-#define MLXSW_SP_ROUTER_XM_L_VAL_MAX 16
-
-struct mlxsw_sp_router_xm {
- bool ipv4_supported;
- bool ipv6_supported;
- unsigned int entries_size;
- struct rhashtable ltable_ht;
- struct rhashtable flush_ht; /* Stores items about to be flushed from cache */
- unsigned int flush_count;
- bool flush_all_mode;
-};
-
-struct mlxsw_sp_router_xm_ltable_node {
- struct rhash_head ht_node; /* Member of router_xm->ltable_ht */
- u16 mindex;
- u8 current_lvalue;
- refcount_t refcnt;
- unsigned int lvalue_ref[MLXSW_SP_ROUTER_XM_L_VAL_MAX + 1];
-};
-
-static const struct rhashtable_params mlxsw_sp_router_xm_ltable_ht_params = {
- .key_offset = offsetof(struct mlxsw_sp_router_xm_ltable_node, mindex),
- .head_offset = offsetof(struct mlxsw_sp_router_xm_ltable_node, ht_node),
- .key_len = sizeof(u16),
- .automatic_shrinking = true,
-};
-
-struct mlxsw_sp_router_xm_flush_info {
- bool all;
- enum mlxsw_sp_l3proto proto;
- u16 virtual_router;
- u8 prefix_len;
- unsigned char addr[sizeof(struct in6_addr)];
-};
-
-struct mlxsw_sp_router_xm_fib_entry {
- bool committed;
- struct mlxsw_sp_router_xm_ltable_node *ltable_node; /* Parent node */
- u16 mindex; /* Store for processing from commit op */
- u8 lvalue;
- struct mlxsw_sp_router_xm_flush_info flush_info;
-};
-
-#define MLXSW_SP_ROUTE_LL_XM_ENTRIES_MAX \
- (MLXSW_REG_XMDR_TRANS_LEN / MLXSW_REG_XMDR_C_LT_ROUTE_V4_LEN)
-
-struct mlxsw_sp_fib_entry_op_ctx_xm {
- bool initialized;
- char xmdr_pl[MLXSW_REG_XMDR_LEN];
- unsigned int trans_offset; /* Offset of the current command within one
- * transaction of XMDR register.
- */
- unsigned int trans_item_len; /* The current command length. This is used
- * to advance 'trans_offset' when the next
- * command is appended.
- */
- unsigned int entries_count;
- struct mlxsw_sp_router_xm_fib_entry *entries[MLXSW_SP_ROUTE_LL_XM_ENTRIES_MAX];
-};
-
-static int mlxsw_sp_router_ll_xm_init(struct mlxsw_sp *mlxsw_sp, u16 vr_id,
- enum mlxsw_sp_l3proto proto)
-{
- char rxlte_pl[MLXSW_REG_RXLTE_LEN];
-
- mlxsw_reg_rxlte_pack(rxlte_pl, vr_id,
- (enum mlxsw_reg_rxlte_protocol) proto, true);
- return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rxlte), rxlte_pl);
-}
-
-static int mlxsw_sp_router_ll_xm_ralta_write(struct mlxsw_sp *mlxsw_sp, char *xralta_pl)
-{
- return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(xralta), xralta_pl);
-}
-
-static int mlxsw_sp_router_ll_xm_ralst_write(struct mlxsw_sp *mlxsw_sp, char *xralst_pl)
-{
- return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(xralst), xralst_pl);
-}
-
-static int mlxsw_sp_router_ll_xm_raltb_write(struct mlxsw_sp *mlxsw_sp, char *xraltb_pl)
-{
- return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(xraltb), xraltb_pl);
-}
-
-static u16 mlxsw_sp_router_ll_xm_mindex_get4(const u32 addr)
-{
- /* Currently the M-index is set to linear mode. That means it is defined
- * as 16 MSB of IP address.
- */
- return addr >> MLXSW_SP_ROUTER_XM_L_VAL_MAX;
-}
-
-static u16 mlxsw_sp_router_ll_xm_mindex_get6(const unsigned char *addr)
-{
- WARN_ON_ONCE(1);
- return 0; /* currently unused */
-}
-
-static void mlxsw_sp_router_ll_xm_op_ctx_check_init(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm)
-{
- if (op_ctx->initialized)
- return;
- op_ctx->initialized = true;
-
- mlxsw_reg_xmdr_pack(op_ctx_xm->xmdr_pl, true);
- op_ctx_xm->trans_offset = 0;
- op_ctx_xm->entries_count = 0;
-}
-
-static void mlxsw_sp_router_ll_xm_fib_entry_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- enum mlxsw_sp_l3proto proto,
- enum mlxsw_sp_fib_entry_op op,
- u16 virtual_router, u8 prefix_len,
- unsigned char *addr,
- struct mlxsw_sp_fib_entry_priv *priv)
-{
- struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm = (void *) op_ctx->ll_priv;
- struct mlxsw_sp_router_xm_fib_entry *fib_entry = (void *) priv->priv;
- struct mlxsw_sp_router_xm_flush_info *flush_info;
- enum mlxsw_reg_xmdr_c_ltr_op xmdr_c_ltr_op;
- unsigned int len;
-
- mlxsw_sp_router_ll_xm_op_ctx_check_init(op_ctx, op_ctx_xm);
-
- switch (op) {
- case MLXSW_SP_FIB_ENTRY_OP_WRITE:
- xmdr_c_ltr_op = MLXSW_REG_XMDR_C_LTR_OP_WRITE;
- break;
- case MLXSW_SP_FIB_ENTRY_OP_UPDATE:
- xmdr_c_ltr_op = MLXSW_REG_XMDR_C_LTR_OP_UPDATE;
- break;
- case MLXSW_SP_FIB_ENTRY_OP_DELETE:
- xmdr_c_ltr_op = MLXSW_REG_XMDR_C_LTR_OP_DELETE;
- break;
- default:
- WARN_ON_ONCE(1);
- return;
- }
-
- switch (proto) {
- case MLXSW_SP_L3_PROTO_IPV4:
- len = mlxsw_reg_xmdr_c_ltr_pack4(op_ctx_xm->xmdr_pl, op_ctx_xm->trans_offset,
- op_ctx_xm->entries_count, xmdr_c_ltr_op,
- virtual_router, prefix_len, (u32 *) addr);
- fib_entry->mindex = mlxsw_sp_router_ll_xm_mindex_get4(*((u32 *) addr));
- break;
- case MLXSW_SP_L3_PROTO_IPV6:
- len = mlxsw_reg_xmdr_c_ltr_pack6(op_ctx_xm->xmdr_pl, op_ctx_xm->trans_offset,
- op_ctx_xm->entries_count, xmdr_c_ltr_op,
- virtual_router, prefix_len, addr);
- fib_entry->mindex = mlxsw_sp_router_ll_xm_mindex_get6(addr);
- break;
- default:
- WARN_ON_ONCE(1);
- return;
- }
- if (!op_ctx_xm->trans_offset)
- op_ctx_xm->trans_item_len = len;
- else
- WARN_ON_ONCE(op_ctx_xm->trans_item_len != len);
-
- op_ctx_xm->entries[op_ctx_xm->entries_count] = fib_entry;
-
- fib_entry->lvalue = prefix_len > mlxsw_sp_router_xm_m_val[proto] ?
- prefix_len - mlxsw_sp_router_xm_m_val[proto] : 0;
-
- flush_info = &fib_entry->flush_info;
- flush_info->proto = proto;
- flush_info->virtual_router = virtual_router;
- flush_info->prefix_len = prefix_len;
- if (addr)
- memcpy(flush_info->addr, addr, sizeof(flush_info->addr));
- else
- memset(flush_info->addr, 0, sizeof(flush_info->addr));
-}
-
-static void
-mlxsw_sp_router_ll_xm_fib_entry_act_remote_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- enum mlxsw_reg_ralue_trap_action trap_action,
- u16 trap_id, u32 adjacency_index, u16 ecmp_size)
-{
- struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm = (void *) op_ctx->ll_priv;
-
- mlxsw_reg_xmdr_c_ltr_act_remote_pack(op_ctx_xm->xmdr_pl, op_ctx_xm->trans_offset,
- trap_action, trap_id, adjacency_index, ecmp_size);
-}
-
-static void
-mlxsw_sp_router_ll_xm_fib_entry_act_local_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- enum mlxsw_reg_ralue_trap_action trap_action,
- u16 trap_id, u16 local_erif)
-{
- struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm = (void *) op_ctx->ll_priv;
-
- mlxsw_reg_xmdr_c_ltr_act_local_pack(op_ctx_xm->xmdr_pl, op_ctx_xm->trans_offset,
- trap_action, trap_id, local_erif);
-}
-
-static void
-mlxsw_sp_router_ll_xm_fib_entry_act_ip2me_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx)
-{
- struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm = (void *) op_ctx->ll_priv;
-
- mlxsw_reg_xmdr_c_ltr_act_ip2me_pack(op_ctx_xm->xmdr_pl, op_ctx_xm->trans_offset);
-}
-
-static void
-mlxsw_sp_router_ll_xm_fib_entry_act_ip2me_tun_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- u32 tunnel_ptr)
-{
- struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm = (void *) op_ctx->ll_priv;
-
- mlxsw_reg_xmdr_c_ltr_act_ip2me_tun_pack(op_ctx_xm->xmdr_pl, op_ctx_xm->trans_offset,
- tunnel_ptr);
-}
-
-static struct mlxsw_sp_router_xm_ltable_node *
-mlxsw_sp_router_xm_ltable_node_get(struct mlxsw_sp_router_xm *router_xm, u16 mindex)
-{
- struct mlxsw_sp_router_xm_ltable_node *ltable_node;
- int err;
-
- ltable_node = rhashtable_lookup_fast(&router_xm->ltable_ht, &mindex,
- mlxsw_sp_router_xm_ltable_ht_params);
- if (ltable_node) {
- refcount_inc(&ltable_node->refcnt);
- return ltable_node;
- }
- ltable_node = kzalloc(sizeof(*ltable_node), GFP_KERNEL);
- if (!ltable_node)
- return ERR_PTR(-ENOMEM);
- ltable_node->mindex = mindex;
- refcount_set(&ltable_node->refcnt, 1);
-
- err = rhashtable_insert_fast(&router_xm->ltable_ht, &ltable_node->ht_node,
- mlxsw_sp_router_xm_ltable_ht_params);
- if (err)
- goto err_insert;
-
- return ltable_node;
-
-err_insert:
- kfree(ltable_node);
- return ERR_PTR(err);
-}
-
-static void mlxsw_sp_router_xm_ltable_node_put(struct mlxsw_sp_router_xm *router_xm,
- struct mlxsw_sp_router_xm_ltable_node *ltable_node)
-{
- if (!refcount_dec_and_test(&ltable_node->refcnt))
- return;
- rhashtable_remove_fast(&router_xm->ltable_ht, &ltable_node->ht_node,
- mlxsw_sp_router_xm_ltable_ht_params);
- kfree(ltable_node);
-}
-
-static int mlxsw_sp_router_xm_ltable_lvalue_set(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_router_xm_ltable_node *ltable_node)
-{
- char xrmt_pl[MLXSW_REG_XRMT_LEN];
-
- mlxsw_reg_xrmt_pack(xrmt_pl, ltable_node->mindex, ltable_node->current_lvalue);
- return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(xrmt), xrmt_pl);
-}
-
-struct mlxsw_sp_router_xm_flush_node {
- struct rhash_head ht_node; /* Member of router_xm->flush_ht */
- struct list_head list;
- struct mlxsw_sp_router_xm_flush_info flush_info;
- struct delayed_work dw;
- struct mlxsw_sp *mlxsw_sp;
- unsigned long start_jiffies;
- unsigned int reuses; /* By how many flush calls this was reused. */
- refcount_t refcnt;
-};
-
-static const struct rhashtable_params mlxsw_sp_router_xm_flush_ht_params = {
- .key_offset = offsetof(struct mlxsw_sp_router_xm_flush_node, flush_info),
- .head_offset = offsetof(struct mlxsw_sp_router_xm_flush_node, ht_node),
- .key_len = sizeof(struct mlxsw_sp_router_xm_flush_info),
- .automatic_shrinking = true,
-};
-
-static struct mlxsw_sp_router_xm_flush_node *
-mlxsw_sp_router_xm_cache_flush_node_create(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_router_xm_flush_info *flush_info)
-{
- struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm;
- struct mlxsw_sp_router_xm_flush_node *flush_node;
- int err;
-
- flush_node = kzalloc(sizeof(*flush_node), GFP_KERNEL);
- if (!flush_node)
- return ERR_PTR(-ENOMEM);
-
- flush_node->flush_info = *flush_info;
- err = rhashtable_insert_fast(&router_xm->flush_ht, &flush_node->ht_node,
- mlxsw_sp_router_xm_flush_ht_params);
- if (err) {
- kfree(flush_node);
- return ERR_PTR(err);
- }
- router_xm->flush_count++;
- flush_node->mlxsw_sp = mlxsw_sp;
- flush_node->start_jiffies = jiffies;
- refcount_set(&flush_node->refcnt, 1);
- return flush_node;
-}
-
-static void
-mlxsw_sp_router_xm_cache_flush_node_hold(struct mlxsw_sp_router_xm_flush_node *flush_node)
-{
- if (!flush_node)
- return;
- refcount_inc(&flush_node->refcnt);
-}
-
-static void
-mlxsw_sp_router_xm_cache_flush_node_put(struct mlxsw_sp_router_xm_flush_node *flush_node)
-{
- if (!flush_node || !refcount_dec_and_test(&flush_node->refcnt))
- return;
- kfree(flush_node);
-}
-
-static void
-mlxsw_sp_router_xm_cache_flush_node_destroy(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_router_xm_flush_node *flush_node)
-{
- struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm;
-
- router_xm->flush_count--;
- rhashtable_remove_fast(&router_xm->flush_ht, &flush_node->ht_node,
- mlxsw_sp_router_xm_flush_ht_params);
- mlxsw_sp_router_xm_cache_flush_node_put(flush_node);
-}
-
-static u32 mlxsw_sp_router_xm_flush_mask4(u8 prefix_len)
-{
- return GENMASK(31, 32 - prefix_len);
-}
-
-static unsigned char *mlxsw_sp_router_xm_flush_mask6(u8 prefix_len)
-{
- static unsigned char mask[sizeof(struct in6_addr)];
-
- memset(mask, 0, sizeof(mask));
- memset(mask, 0xff, prefix_len / 8);
- mask[prefix_len / 8] = GENMASK(8, 8 - prefix_len % 8);
- return mask;
-}
-
-#define MLXSW_SP_ROUTER_XM_CACHE_PARALLEL_FLUSHES_LIMIT 15
-#define MLXSW_SP_ROUTER_XM_CACHE_FLUSH_ALL_MIN_REUSES 15
-#define MLXSW_SP_ROUTER_XM_CACHE_DELAY 50 /* usecs */
-#define MLXSW_SP_ROUTER_XM_CACHE_MAX_WAIT (MLXSW_SP_ROUTER_XM_CACHE_DELAY * 10)
-
-static void mlxsw_sp_router_xm_cache_flush_work(struct work_struct *work)
-{
- struct mlxsw_sp_router_xm_flush_info *flush_info;
- struct mlxsw_sp_router_xm_flush_node *flush_node;
- char rlcmld_pl[MLXSW_REG_RLCMLD_LEN];
- enum mlxsw_reg_rlcmld_select select;
- struct mlxsw_sp *mlxsw_sp;
- u32 addr4;
- int err;
-
- flush_node = container_of(work, struct mlxsw_sp_router_xm_flush_node,
- dw.work);
- mlxsw_sp = flush_node->mlxsw_sp;
- flush_info = &flush_node->flush_info;
-
- if (flush_info->all) {
- char rlpmce_pl[MLXSW_REG_RLPMCE_LEN];
-
- mlxsw_reg_rlpmce_pack(rlpmce_pl, true, false);
- err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rlpmce),
- rlpmce_pl);
- if (err)
- dev_err(mlxsw_sp->bus_info->dev, "Failed to flush XM cache\n");
-
- if (flush_node->reuses <
- MLXSW_SP_ROUTER_XM_CACHE_FLUSH_ALL_MIN_REUSES)
- /* Leaving flush-all mode. */
- mlxsw_sp->router->xm->flush_all_mode = false;
- goto out;
- }
-
- select = MLXSW_REG_RLCMLD_SELECT_M_AND_ML_ENTRIES;
-
- switch (flush_info->proto) {
- case MLXSW_SP_L3_PROTO_IPV4:
- addr4 = *((u32 *) flush_info->addr);
- addr4 &= mlxsw_sp_router_xm_flush_mask4(flush_info->prefix_len);
-
- /* In case the flush prefix length is bigger than M-value,
- * it makes no sense to flush M entries. So just flush
- * the ML entries.
- */
- if (flush_info->prefix_len > MLXSW_SP_ROUTER_XM_M_VAL)
- select = MLXSW_REG_RLCMLD_SELECT_ML_ENTRIES;
-
- mlxsw_reg_rlcmld_pack4(rlcmld_pl, select,
- flush_info->virtual_router, addr4,
- mlxsw_sp_router_xm_flush_mask4(flush_info->prefix_len));
- break;
- case MLXSW_SP_L3_PROTO_IPV6:
- mlxsw_reg_rlcmld_pack6(rlcmld_pl, select,
- flush_info->virtual_router, flush_info->addr,
- mlxsw_sp_router_xm_flush_mask6(flush_info->prefix_len));
- break;
- default:
- WARN_ON(true);
- goto out;
- }
- err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rlcmld), rlcmld_pl);
- if (err)
- dev_err(mlxsw_sp->bus_info->dev, "Failed to flush XM cache\n");
-
-out:
- mlxsw_sp_router_xm_cache_flush_node_destroy(mlxsw_sp, flush_node);
-}
-
-static bool
-mlxsw_sp_router_xm_cache_flush_may_cancel(struct mlxsw_sp_router_xm_flush_node *flush_node)
-{
- unsigned long max_wait = usecs_to_jiffies(MLXSW_SP_ROUTER_XM_CACHE_MAX_WAIT);
- unsigned long delay = usecs_to_jiffies(MLXSW_SP_ROUTER_XM_CACHE_DELAY);
-
- /* In case there is the same flushing work pending, check
- * if we can consolidate with it. We can do it up to MAX_WAIT.
- * Cancel the delayed work. If the work was still pending.
- */
- if (time_is_before_jiffies(flush_node->start_jiffies + max_wait - delay) &&
- cancel_delayed_work_sync(&flush_node->dw))
- return true;
- return false;
-}
-
-static int
-mlxsw_sp_router_xm_cache_flush_schedule(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_router_xm_flush_info *flush_info)
-{
- unsigned long delay = usecs_to_jiffies(MLXSW_SP_ROUTER_XM_CACHE_DELAY);
- struct mlxsw_sp_router_xm_flush_info flush_all_info = {.all = true};
- struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm;
- struct mlxsw_sp_router_xm_flush_node *flush_node;
-
- /* Check if the queued number of flushes reached critical amount after
- * which it is better to just flush the whole cache.
- */
- if (router_xm->flush_count == MLXSW_SP_ROUTER_XM_CACHE_PARALLEL_FLUSHES_LIMIT)
- /* Entering flush-all mode. */
- router_xm->flush_all_mode = true;
-
- if (router_xm->flush_all_mode)
- flush_info = &flush_all_info;
-
- rcu_read_lock();
- flush_node = rhashtable_lookup_fast(&router_xm->flush_ht, flush_info,
- mlxsw_sp_router_xm_flush_ht_params);
- /* Take a reference so the object is not freed before possible
- * delayed work cancel could be done.
- */
- mlxsw_sp_router_xm_cache_flush_node_hold(flush_node);
- rcu_read_unlock();
-
- if (flush_node && mlxsw_sp_router_xm_cache_flush_may_cancel(flush_node)) {
- flush_node->reuses++;
- mlxsw_sp_router_xm_cache_flush_node_put(flush_node);
- /* Original work was within wait period and was canceled.
- * That means that the reference is still held and the
- * flush_node_put() call above did not free the flush_node.
- * Reschedule it with fresh delay.
- */
- goto schedule_work;
- } else {
- mlxsw_sp_router_xm_cache_flush_node_put(flush_node);
- }
-
- flush_node = mlxsw_sp_router_xm_cache_flush_node_create(mlxsw_sp, flush_info);
- if (IS_ERR(flush_node))
- return PTR_ERR(flush_node);
- INIT_DELAYED_WORK(&flush_node->dw, mlxsw_sp_router_xm_cache_flush_work);
-
-schedule_work:
- mlxsw_core_schedule_dw(&flush_node->dw, delay);
- return 0;
-}
-
-static int
-mlxsw_sp_router_xm_ml_entry_add(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_router_xm_fib_entry *fib_entry)
-{
- struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm;
- struct mlxsw_sp_router_xm_ltable_node *ltable_node;
- u8 lvalue = fib_entry->lvalue;
- int err;
-
- ltable_node = mlxsw_sp_router_xm_ltable_node_get(router_xm,
- fib_entry->mindex);
- if (IS_ERR(ltable_node))
- return PTR_ERR(ltable_node);
- if (lvalue > ltable_node->current_lvalue) {
- /* The L-value is bigger then the one currently set, update. */
- ltable_node->current_lvalue = lvalue;
- err = mlxsw_sp_router_xm_ltable_lvalue_set(mlxsw_sp,
- ltable_node);
- if (err)
- goto err_lvalue_set;
-
- /* The L value for prefix/M is increased.
- * Therefore, all entries in M and ML caches matching
- * {prefix/M, proto, VR} need to be flushed. Set the flush
- * prefix length to M to achieve that.
- */
- fib_entry->flush_info.prefix_len = MLXSW_SP_ROUTER_XM_M_VAL;
- }
-
- ltable_node->lvalue_ref[lvalue]++;
- fib_entry->ltable_node = ltable_node;
-
- return 0;
-
-err_lvalue_set:
- mlxsw_sp_router_xm_ltable_node_put(router_xm, ltable_node);
- return err;
-}
-
-static void
-mlxsw_sp_router_xm_ml_entry_del(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_router_xm_fib_entry *fib_entry)
-{
- struct mlxsw_sp_router_xm_ltable_node *ltable_node =
- fib_entry->ltable_node;
- struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm;
- u8 lvalue = fib_entry->lvalue;
-
- ltable_node->lvalue_ref[lvalue]--;
- if (lvalue == ltable_node->current_lvalue && lvalue &&
- !ltable_node->lvalue_ref[lvalue]) {
- u8 new_lvalue = lvalue - 1;
-
- /* Find the biggest L-value left out there. */
- while (new_lvalue > 0 && !ltable_node->lvalue_ref[lvalue])
- new_lvalue--;
-
- ltable_node->current_lvalue = new_lvalue;
- mlxsw_sp_router_xm_ltable_lvalue_set(mlxsw_sp, ltable_node);
-
- /* The L value for prefix/M is decreased.
- * Therefore, all entries in M and ML caches matching
- * {prefix/M, proto, VR} need to be flushed. Set the flush
- * prefix length to M to achieve that.
- */
- fib_entry->flush_info.prefix_len = MLXSW_SP_ROUTER_XM_M_VAL;
- }
- mlxsw_sp_router_xm_ltable_node_put(router_xm, ltable_node);
-}
-
-static int
-mlxsw_sp_router_xm_ml_entries_add(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm)
-{
- struct mlxsw_sp_router_xm_fib_entry *fib_entry;
- int err;
- int i;
-
- for (i = 0; i < op_ctx_xm->entries_count; i++) {
- fib_entry = op_ctx_xm->entries[i];
- err = mlxsw_sp_router_xm_ml_entry_add(mlxsw_sp, fib_entry);
- if (err)
- goto rollback;
- }
- return 0;
-
-rollback:
- for (i--; i >= 0; i--) {
- fib_entry = op_ctx_xm->entries[i];
- mlxsw_sp_router_xm_ml_entry_del(mlxsw_sp, fib_entry);
- }
- return err;
-}
-
-static void
-mlxsw_sp_router_xm_ml_entries_del(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm)
-{
- struct mlxsw_sp_router_xm_fib_entry *fib_entry;
- int i;
-
- for (i = 0; i < op_ctx_xm->entries_count; i++) {
- fib_entry = op_ctx_xm->entries[i];
- mlxsw_sp_router_xm_ml_entry_del(mlxsw_sp, fib_entry);
- }
-}
-
-static void
-mlxsw_sp_router_xm_ml_entries_cache_flush(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm)
-{
- struct mlxsw_sp_router_xm_fib_entry *fib_entry;
- int err;
- int i;
-
- for (i = 0; i < op_ctx_xm->entries_count; i++) {
- fib_entry = op_ctx_xm->entries[i];
- err = mlxsw_sp_router_xm_cache_flush_schedule(mlxsw_sp,
- &fib_entry->flush_info);
- if (err)
- dev_err(mlxsw_sp->bus_info->dev, "Failed to flush XM cache\n");
- }
-}
-
-static int mlxsw_sp_router_ll_xm_fib_entry_commit(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
- bool *postponed_for_bulk)
-{
- struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm = (void *) op_ctx->ll_priv;
- struct mlxsw_sp_router_xm_fib_entry *fib_entry;
- u8 num_rec;
- int err;
- int i;
-
- op_ctx_xm->trans_offset += op_ctx_xm->trans_item_len;
- op_ctx_xm->entries_count++;
-
- /* Check if bulking is possible and there is still room for another
- * FIB entry record. The size of 'trans_item_len' is either size of IPv4
- * command or size of IPv6 command. Not possible to mix those in a
- * single XMDR write.
- */
- if (op_ctx->bulk_ok &&
- op_ctx_xm->trans_offset + op_ctx_xm->trans_item_len <= MLXSW_REG_XMDR_TRANS_LEN) {
- if (postponed_for_bulk)
- *postponed_for_bulk = true;
- return 0;
- }
-
- if (op_ctx->event == FIB_EVENT_ENTRY_REPLACE) {
- /* The L-table is updated inside. It has to be done before
- * the prefix is inserted.
- */
- err = mlxsw_sp_router_xm_ml_entries_add(mlxsw_sp, op_ctx_xm);
- if (err)
- goto out;
- }
-
- err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(xmdr), op_ctx_xm->xmdr_pl);
- if (err)
- goto out;
- num_rec = mlxsw_reg_xmdr_num_rec_get(op_ctx_xm->xmdr_pl);
- if (num_rec > op_ctx_xm->entries_count) {
- dev_err(mlxsw_sp->bus_info->dev, "Invalid XMDR number of records\n");
- err = -EIO;
- goto out;
- }
- for (i = 0; i < num_rec; i++) {
- if (!mlxsw_reg_xmdr_reply_vect_get(op_ctx_xm->xmdr_pl, i)) {
- dev_err(mlxsw_sp->bus_info->dev, "Command send over XMDR failed\n");
- err = -EIO;
- goto out;
- } else {
- fib_entry = op_ctx_xm->entries[i];
- fib_entry->committed = true;
- }
- }
-
- if (op_ctx->event == FIB_EVENT_ENTRY_DEL)
- /* The L-table is updated inside. It has to be done after
- * the prefix was removed.
- */
- mlxsw_sp_router_xm_ml_entries_del(mlxsw_sp, op_ctx_xm);
-
- /* At the very end, do the XLT cache flushing to evict stale
- * M and ML cache entries after prefixes were inserted/removed.
- */
- mlxsw_sp_router_xm_ml_entries_cache_flush(mlxsw_sp, op_ctx_xm);
-
-out:
- /* Next pack call is going to do reinitialization */
- op_ctx->initialized = false;
- return err;
-}
-
-static bool mlxsw_sp_router_ll_xm_fib_entry_is_committed(struct mlxsw_sp_fib_entry_priv *priv)
-{
- struct mlxsw_sp_router_xm_fib_entry *fib_entry = (void *) priv->priv;
-
- return fib_entry->committed;
-}
-
-const struct mlxsw_sp_router_ll_ops mlxsw_sp_router_ll_xm_ops = {
- .init = mlxsw_sp_router_ll_xm_init,
- .ralta_write = mlxsw_sp_router_ll_xm_ralta_write,
- .ralst_write = mlxsw_sp_router_ll_xm_ralst_write,
- .raltb_write = mlxsw_sp_router_ll_xm_raltb_write,
- .fib_entry_op_ctx_size = sizeof(struct mlxsw_sp_fib_entry_op_ctx_xm),
- .fib_entry_priv_size = sizeof(struct mlxsw_sp_router_xm_fib_entry),
- .fib_entry_pack = mlxsw_sp_router_ll_xm_fib_entry_pack,
- .fib_entry_act_remote_pack = mlxsw_sp_router_ll_xm_fib_entry_act_remote_pack,
- .fib_entry_act_local_pack = mlxsw_sp_router_ll_xm_fib_entry_act_local_pack,
- .fib_entry_act_ip2me_pack = mlxsw_sp_router_ll_xm_fib_entry_act_ip2me_pack,
- .fib_entry_act_ip2me_tun_pack = mlxsw_sp_router_ll_xm_fib_entry_act_ip2me_tun_pack,
- .fib_entry_commit = mlxsw_sp_router_ll_xm_fib_entry_commit,
- .fib_entry_is_committed = mlxsw_sp_router_ll_xm_fib_entry_is_committed,
-};
-
-#define MLXSW_SP_ROUTER_XM_MINDEX_SIZE (64 * 1024)
-
-int mlxsw_sp_router_xm_init(struct mlxsw_sp *mlxsw_sp)
-{
- struct mlxsw_sp_router_xm *router_xm;
- char rxltm_pl[MLXSW_REG_RXLTM_LEN];
- char xltq_pl[MLXSW_REG_XLTQ_LEN];
- u32 mindex_size;
- u16 device_id;
- int err;
-
- if (!mlxsw_sp->bus_info->xm_exists)
- return 0;
-
- router_xm = kzalloc(sizeof(*router_xm), GFP_KERNEL);
- if (!router_xm)
- return -ENOMEM;
-
- mlxsw_reg_xltq_pack(xltq_pl);
- err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(xltq), xltq_pl);
- if (err)
- goto err_xltq_query;
- mlxsw_reg_xltq_unpack(xltq_pl, &device_id, &router_xm->ipv4_supported,
- &router_xm->ipv6_supported, &router_xm->entries_size, &mindex_size);
-
- if (device_id != MLXSW_REG_XLTQ_XM_DEVICE_ID_XLT) {
- dev_err(mlxsw_sp->bus_info->dev, "Invalid XM device id\n");
- err = -EINVAL;
- goto err_device_id_check;
- }
-
- if (mindex_size != MLXSW_SP_ROUTER_XM_MINDEX_SIZE) {
- dev_err(mlxsw_sp->bus_info->dev, "Unexpected M-index size\n");
- err = -EINVAL;
- goto err_mindex_size_check;
- }
-
- mlxsw_reg_rxltm_pack(rxltm_pl, mlxsw_sp_router_xm_m_val[MLXSW_SP_L3_PROTO_IPV4],
- mlxsw_sp_router_xm_m_val[MLXSW_SP_L3_PROTO_IPV6]);
- err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rxltm), rxltm_pl);
- if (err)
- goto err_rxltm_write;
-
- err = rhashtable_init(&router_xm->ltable_ht, &mlxsw_sp_router_xm_ltable_ht_params);
- if (err)
- goto err_ltable_ht_init;
-
- err = rhashtable_init(&router_xm->flush_ht, &mlxsw_sp_router_xm_flush_ht_params);
- if (err)
- goto err_flush_ht_init;
-
- mlxsw_sp->router->xm = router_xm;
- return 0;
-
-err_flush_ht_init:
- rhashtable_destroy(&router_xm->ltable_ht);
-err_ltable_ht_init:
-err_rxltm_write:
-err_mindex_size_check:
-err_device_id_check:
-err_xltq_query:
- kfree(router_xm);
- return err;
-}
-
-void mlxsw_sp_router_xm_fini(struct mlxsw_sp *mlxsw_sp)
-{
- struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm;
-
- if (!mlxsw_sp->bus_info->xm_exists)
- return;
-
- rhashtable_destroy(&router_xm->flush_ht);
- rhashtable_destroy(&router_xm->ltable_ht);
- kfree(router_xm);
-}
-
-bool mlxsw_sp_router_xm_ipv4_is_supported(const struct mlxsw_sp *mlxsw_sp)
-{
- struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm;
-
- return router_xm && router_xm->ipv4_supported;
-}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index a6d2e806cba9..863c8055746b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -111,10 +111,10 @@ static void
mlxsw_sp_bridge_port_mdb_flush(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_bridge_port *bridge_port);
-static void
-mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port,
+static int
+mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_bridge_device
- *bridge_device);
+ *bridge_device, bool mc_enabled);
static void
mlxsw_sp_port_mrouter_update_mdb(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -643,6 +643,64 @@ err_port_bridge_vlan_flood_set:
}
static int
+mlxsw_sp_bridge_vlans_flood_set(struct mlxsw_sp_bridge_vlan *bridge_vlan,
+ enum mlxsw_sp_flood_type packet_type,
+ bool member)
+{
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+ int err;
+
+ list_for_each_entry(mlxsw_sp_port_vlan, &bridge_vlan->port_vlan_list,
+ bridge_vlan_node) {
+ u16 local_port = mlxsw_sp_port_vlan->mlxsw_sp_port->local_port;
+
+ err = mlxsw_sp_fid_flood_set(mlxsw_sp_port_vlan->fid,
+ packet_type, local_port, member);
+ if (err)
+ goto err_fid_flood_set;
+ }
+
+ return 0;
+
+err_fid_flood_set:
+ list_for_each_entry_continue_reverse(mlxsw_sp_port_vlan,
+ &bridge_vlan->port_vlan_list,
+ list) {
+ u16 local_port = mlxsw_sp_port_vlan->mlxsw_sp_port->local_port;
+
+ mlxsw_sp_fid_flood_set(mlxsw_sp_port_vlan->fid, packet_type,
+ local_port, !member);
+ }
+
+ return err;
+}
+
+static int
+mlxsw_sp_bridge_ports_flood_table_set(struct mlxsw_sp_bridge_port *bridge_port,
+ enum mlxsw_sp_flood_type packet_type,
+ bool member)
+{
+ struct mlxsw_sp_bridge_vlan *bridge_vlan;
+ int err;
+
+ list_for_each_entry(bridge_vlan, &bridge_port->vlans_list, list) {
+ err = mlxsw_sp_bridge_vlans_flood_set(bridge_vlan, packet_type,
+ member);
+ if (err)
+ goto err_bridge_vlans_flood_set;
+ }
+
+ return 0;
+
+err_bridge_vlans_flood_set:
+ list_for_each_entry_continue_reverse(bridge_vlan,
+ &bridge_port->vlans_list, list)
+ mlxsw_sp_bridge_vlans_flood_set(bridge_vlan, packet_type,
+ !member);
+ return err;
+}
+
+static int
mlxsw_sp_port_bridge_vlan_learning_set(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_bridge_vlan *bridge_vlan,
bool set)
@@ -842,6 +900,7 @@ static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port,
struct net_device *orig_dev,
bool mc_disabled)
{
+ enum mlxsw_sp_flood_type packet_type = MLXSW_SP_FLOOD_TYPE_MC;
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
struct mlxsw_sp_bridge_device *bridge_device;
struct mlxsw_sp_bridge_port *bridge_port;
@@ -854,26 +913,40 @@ static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port,
if (!bridge_device)
return 0;
- if (bridge_device->multicast_enabled != !mc_disabled) {
- bridge_device->multicast_enabled = !mc_disabled;
- mlxsw_sp_bridge_mdb_mc_enable_sync(mlxsw_sp_port,
- bridge_device);
- }
+ if (bridge_device->multicast_enabled == !mc_disabled)
+ return 0;
+
+ bridge_device->multicast_enabled = !mc_disabled;
+ err = mlxsw_sp_bridge_mdb_mc_enable_sync(mlxsw_sp, bridge_device,
+ !mc_disabled);
+ if (err)
+ goto err_mc_enable_sync;
list_for_each_entry(bridge_port, &bridge_device->ports_list, list) {
- enum mlxsw_sp_flood_type packet_type = MLXSW_SP_FLOOD_TYPE_MC;
bool member = mlxsw_sp_mc_flood(bridge_port);
- err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port,
- bridge_port,
- packet_type, member);
+ err = mlxsw_sp_bridge_ports_flood_table_set(bridge_port,
+ packet_type,
+ member);
if (err)
- return err;
+ goto err_flood_table_set;
}
- bridge_device->multicast_enabled = !mc_disabled;
-
return 0;
+
+err_flood_table_set:
+ list_for_each_entry_continue_reverse(bridge_port,
+ &bridge_device->ports_list, list) {
+ bool member = mlxsw_sp_mc_flood(bridge_port);
+
+ mlxsw_sp_bridge_ports_flood_table_set(bridge_port, packet_type,
+ !member);
+ }
+ mlxsw_sp_bridge_mdb_mc_enable_sync(mlxsw_sp, bridge_device,
+ mc_disabled);
+err_mc_enable_sync:
+ bridge_device->multicast_enabled = mc_disabled;
+ return err;
}
static int mlxsw_sp_smid_router_port_set(struct mlxsw_sp *mlxsw_sp,
@@ -887,7 +960,7 @@ static int mlxsw_sp_smid_router_port_set(struct mlxsw_sp *mlxsw_sp,
return -ENOMEM;
mlxsw_reg_smid2_pack(smid2_pl, mid_idx,
- mlxsw_sp_router_port(mlxsw_sp), add);
+ mlxsw_sp_router_port(mlxsw_sp), add, false, 0);
err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid2), smid2_pl);
kfree(smid2_pl);
return err;
@@ -1584,7 +1657,7 @@ static int mlxsw_sp_port_smid_full_entry(struct mlxsw_sp *mlxsw_sp, u16 mid_idx,
if (!smid2_pl)
return -ENOMEM;
- mlxsw_reg_smid2_pack(smid2_pl, mid_idx, 0, false);
+ mlxsw_reg_smid2_pack(smid2_pl, mid_idx, 0, false, false, 0);
for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) {
if (mlxsw_sp->ports[i])
mlxsw_reg_smid2_port_mask_set(smid2_pl, i, 1);
@@ -1615,7 +1688,8 @@ static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port,
if (!smid2_pl)
return -ENOMEM;
- mlxsw_reg_smid2_pack(smid2_pl, mid_idx, mlxsw_sp_port->local_port, add);
+ mlxsw_reg_smid2_pack(smid2_pl, mid_idx, mlxsw_sp_port->local_port, add,
+ false, 0);
err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid2), smid2_pl);
kfree(smid2_pl);
return err;
@@ -1676,7 +1750,7 @@ mlxsw_sp_mc_get_mrouters_bitmap(unsigned long *flood_bitmap,
}
}
-static bool
+static int
mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_mid *mid,
struct mlxsw_sp_bridge_device *bridge_device)
@@ -1689,12 +1763,12 @@ mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp,
mid_idx = find_first_zero_bit(mlxsw_sp->bridge->mids_bitmap,
MLXSW_SP_MID_MAX);
if (mid_idx == MLXSW_SP_MID_MAX)
- return false;
+ return -ENOBUFS;
num_of_ports = mlxsw_core_max_ports(mlxsw_sp->core);
flood_bitmap = bitmap_alloc(num_of_ports, GFP_KERNEL);
if (!flood_bitmap)
- return false;
+ return -ENOMEM;
bitmap_copy(flood_bitmap, mid->ports_in_mid, num_of_ports);
mlxsw_sp_mc_get_mrouters_bitmap(flood_bitmap, bridge_device, mlxsw_sp);
@@ -1704,16 +1778,16 @@ mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp,
bridge_device->mrouter);
bitmap_free(flood_bitmap);
if (err)
- return false;
+ return err;
err = mlxsw_sp_port_mdb_op(mlxsw_sp, mid->addr, mid->fid, mid_idx,
true);
if (err)
- return false;
+ return err;
set_bit(mid_idx, mlxsw_sp->bridge->mids_bitmap);
mid->in_hw = true;
- return true;
+ return 0;
}
static int mlxsw_sp_mc_remove_mdb_entry(struct mlxsw_sp *mlxsw_sp,
@@ -1735,6 +1809,7 @@ mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp,
u16 fid)
{
struct mlxsw_sp_mid *mid;
+ int err;
mid = kzalloc(sizeof(*mid), GFP_KERNEL);
if (!mid)
@@ -1752,7 +1827,8 @@ mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp,
if (!bridge_device->multicast_enabled)
goto out;
- if (!mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid, bridge_device))
+ err = mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid, bridge_device);
+ if (err)
goto err_write_mdb_entry;
out:
@@ -1839,24 +1915,37 @@ err_out:
return err;
}
-static void
-mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port,
- struct mlxsw_sp_bridge_device
- *bridge_device)
+static int
+mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_bridge_device *bridge_device,
+ bool mc_enabled)
{
- struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
struct mlxsw_sp_mid *mid;
- bool mc_enabled;
-
- mc_enabled = bridge_device->multicast_enabled;
+ int err;
list_for_each_entry(mid, &bridge_device->mids_list, list) {
if (mc_enabled)
- mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid,
- bridge_device);
+ err = mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid,
+ bridge_device);
else
+ err = mlxsw_sp_mc_remove_mdb_entry(mlxsw_sp, mid);
+
+ if (err)
+ goto err_mdb_entry_update;
+ }
+
+ return 0;
+
+err_mdb_entry_update:
+ list_for_each_entry_continue_reverse(mid, &bridge_device->mids_list,
+ list) {
+ if (mc_enabled)
mlxsw_sp_mc_remove_mdb_entry(mlxsw_sp, mid);
+ else
+ mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid,
+ bridge_device);
}
+ return err;
}
static void
@@ -2729,8 +2818,7 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp,
bridge_device = bridge_port->bridge_device;
vid = bridge_device->vlan_enabled ? mlxsw_sp_port_vlan->vid : 0;
- lag_vid = mlxsw_sp_fid_lag_vid_valid(mlxsw_sp_port_vlan->fid) ?
- mlxsw_sp_port_vlan->vid : 0;
+ lag_vid = mlxsw_sp_port_vlan->vid;
do_fdb_op:
err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, fid, lag_vid,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index ed4d0d3448f3..d0baba38d2a3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -953,16 +953,16 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
.trap = MLXSW_SP_TRAP_CONTROL(ARP_REQUEST, NEIGH_DISCOVERY,
MIRROR),
.listeners_arr = {
- MLXSW_SP_RXL_MARK(ARPBC, NEIGH_DISCOVERY, MIRROR_TO_CPU,
- false),
+ MLXSW_SP_RXL_MARK(ROUTER_ARPBC, NEIGH_DISCOVERY,
+ TRAP_TO_CPU, false),
},
},
{
.trap = MLXSW_SP_TRAP_CONTROL(ARP_RESPONSE, NEIGH_DISCOVERY,
MIRROR),
.listeners_arr = {
- MLXSW_SP_RXL_MARK(ARPUC, NEIGH_DISCOVERY, MIRROR_TO_CPU,
- false),
+ MLXSW_SP_RXL_MARK(ROUTER_ARPUC, NEIGH_DISCOVERY,
+ TRAP_TO_CPU, false),
},
},
{
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index d888498aed33..8da169663bda 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -27,8 +27,6 @@ enum {
MLXSW_TRAP_ID_PKT_SAMPLE = 0x38,
MLXSW_TRAP_ID_FID_MISS = 0x3D,
MLXSW_TRAP_ID_DECAP_ECN0 = 0x40,
- MLXSW_TRAP_ID_ARPBC = 0x50,
- MLXSW_TRAP_ID_ARPUC = 0x51,
MLXSW_TRAP_ID_MTUERROR = 0x52,
MLXSW_TRAP_ID_TTLERROR = 0x53,
MLXSW_TRAP_ID_LBERROR = 0x54,
@@ -71,6 +69,8 @@ enum {
MLXSW_TRAP_ID_IPV6_BFD = 0xD1,
MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6,
MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7,
+ MLXSW_TRAP_ID_ROUTER_ARPBC = 0xE0,
+ MLXSW_TRAP_ID_ROUTER_ARPUC = 0xE1,
MLXSW_TRAP_ID_DISCARD_NON_ROUTABLE = 0x11A,
MLXSW_TRAP_ID_DISCARD_ROUTER2 = 0x130,
MLXSW_TRAP_ID_DISCARD_ROUTER3 = 0x131,
diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c
index c8fe8b31f07b..b1c74e6cb012 100644
--- a/drivers/net/ethernet/microchip/lan743x_ethtool.c
+++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c
@@ -155,8 +155,8 @@ static int lan743x_otp_write(struct lan743x_adapter *adapter, u32 offset,
return 0;
}
-static int lan743x_hs_syslock_acquire(struct lan743x_adapter *adapter,
- u16 timeout)
+int lan743x_hs_syslock_acquire(struct lan743x_adapter *adapter,
+ u16 timeout)
{
u16 timeout_cnt = 0;
u32 val;
@@ -192,7 +192,7 @@ static int lan743x_hs_syslock_acquire(struct lan743x_adapter *adapter,
return 0;
}
-static void lan743x_hs_syslock_release(struct lan743x_adapter *adapter)
+void lan743x_hs_syslock_release(struct lan743x_adapter *adapter)
{
u32 val;
@@ -1149,7 +1149,12 @@ static void lan743x_ethtool_get_wol(struct net_device *netdev,
wol->supported |= WAKE_BCAST | WAKE_UCAST | WAKE_MCAST |
WAKE_MAGIC | WAKE_PHY | WAKE_ARP;
+ if (adapter->is_pci11x1x)
+ wol->supported |= WAKE_MAGICSECURE;
+
wol->wolopts |= adapter->wolopts;
+ if (adapter->wolopts & WAKE_MAGICSECURE)
+ memcpy(wol->sopass, adapter->sopass, sizeof(wol->sopass));
}
static int lan743x_ethtool_set_wol(struct net_device *netdev,
@@ -1170,6 +1175,13 @@ static int lan743x_ethtool_set_wol(struct net_device *netdev,
adapter->wolopts |= WAKE_PHY;
if (wol->wolopts & WAKE_ARP)
adapter->wolopts |= WAKE_ARP;
+ if (wol->wolopts & WAKE_MAGICSECURE &&
+ wol->wolopts & WAKE_MAGIC) {
+ memcpy(adapter->sopass, wol->sopass, sizeof(wol->sopass));
+ adapter->wolopts |= WAKE_MAGICSECURE;
+ } else {
+ memset(adapter->sopass, 0, sizeof(u8) * SOPASS_MAX);
+ }
device_set_wakeup_enable(&adapter->pdev->dev, (bool)wol->wolopts);
@@ -1178,6 +1190,49 @@ static int lan743x_ethtool_set_wol(struct net_device *netdev,
}
#endif /* CONFIG_PM */
+static void lan743x_common_regs(struct net_device *dev,
+ struct ethtool_regs *regs, void *p)
+
+{
+ struct lan743x_adapter *adapter = netdev_priv(dev);
+ u32 *rb = p;
+
+ memset(p, 0, (MAX_LAN743X_ETH_REGS * sizeof(u32)));
+
+ rb[ETH_PRIV_FLAGS] = adapter->flags;
+ rb[ETH_ID_REV] = lan743x_csr_read(adapter, ID_REV);
+ rb[ETH_FPGA_REV] = lan743x_csr_read(adapter, FPGA_REV);
+ rb[ETH_STRAP_READ] = lan743x_csr_read(adapter, STRAP_READ);
+ rb[ETH_INT_STS] = lan743x_csr_read(adapter, INT_STS);
+ rb[ETH_HW_CFG] = lan743x_csr_read(adapter, HW_CFG);
+ rb[ETH_PMT_CTL] = lan743x_csr_read(adapter, PMT_CTL);
+ rb[ETH_E2P_CMD] = lan743x_csr_read(adapter, E2P_CMD);
+ rb[ETH_E2P_DATA] = lan743x_csr_read(adapter, E2P_DATA);
+ rb[ETH_MAC_CR] = lan743x_csr_read(adapter, MAC_CR);
+ rb[ETH_MAC_RX] = lan743x_csr_read(adapter, MAC_RX);
+ rb[ETH_MAC_TX] = lan743x_csr_read(adapter, MAC_TX);
+ rb[ETH_FLOW] = lan743x_csr_read(adapter, MAC_FLOW);
+ rb[ETH_MII_ACC] = lan743x_csr_read(adapter, MAC_MII_ACC);
+ rb[ETH_MII_DATA] = lan743x_csr_read(adapter, MAC_MII_DATA);
+ rb[ETH_EEE_TX_LPI_REQ_DLY] = lan743x_csr_read(adapter,
+ MAC_EEE_TX_LPI_REQ_DLY_CNT);
+ rb[ETH_WUCSR] = lan743x_csr_read(adapter, MAC_WUCSR);
+ rb[ETH_WK_SRC] = lan743x_csr_read(adapter, MAC_WK_SRC);
+}
+
+static int lan743x_get_regs_len(struct net_device *dev)
+{
+ return MAX_LAN743X_ETH_REGS * sizeof(u32);
+}
+
+static void lan743x_get_regs(struct net_device *dev,
+ struct ethtool_regs *regs, void *p)
+{
+ regs->version = LAN743X_ETH_REG_VERSION;
+
+ lan743x_common_regs(dev, regs, p);
+}
+
const struct ethtool_ops lan743x_ethtool_ops = {
.get_drvinfo = lan743x_ethtool_get_drvinfo,
.get_msglevel = lan743x_ethtool_get_msglevel,
@@ -1202,6 +1257,8 @@ const struct ethtool_ops lan743x_ethtool_ops = {
.set_eee = lan743x_ethtool_set_eee,
.get_link_ksettings = phy_ethtool_get_link_ksettings,
.set_link_ksettings = phy_ethtool_set_link_ksettings,
+ .get_regs_len = lan743x_get_regs_len,
+ .get_regs = lan743x_get_regs,
#ifdef CONFIG_PM
.get_wol = lan743x_ethtool_get_wol,
.set_wol = lan743x_ethtool_set_wol,
diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.h b/drivers/net/ethernet/microchip/lan743x_ethtool.h
index d0d11a777a58..7f5996a52488 100644
--- a/drivers/net/ethernet/microchip/lan743x_ethtool.h
+++ b/drivers/net/ethernet/microchip/lan743x_ethtool.h
@@ -6,6 +6,32 @@
#include "linux/ethtool.h"
+#define LAN743X_ETH_REG_VERSION 1
+
+enum {
+ ETH_PRIV_FLAGS,
+ ETH_ID_REV,
+ ETH_FPGA_REV,
+ ETH_STRAP_READ,
+ ETH_INT_STS,
+ ETH_HW_CFG,
+ ETH_PMT_CTL,
+ ETH_E2P_CMD,
+ ETH_E2P_DATA,
+ ETH_MAC_CR,
+ ETH_MAC_RX,
+ ETH_MAC_TX,
+ ETH_FLOW,
+ ETH_MII_ACC,
+ ETH_MII_DATA,
+ ETH_EEE_TX_LPI_REQ_DLY,
+ ETH_WUCSR,
+ ETH_WK_SRC,
+
+ /* Add new registers above */
+ MAX_LAN743X_ETH_REGS
+};
+
extern const struct ethtool_ops lan743x_ethtool_ops;
#endif /* _LAN743X_ETHTOOL_H */
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index af81236b4b4e..a9a1dea6d731 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -22,20 +22,36 @@
#define MMD_ACCESS_WRITE 1
#define MMD_ACCESS_READ 2
#define MMD_ACCESS_READ_INC 3
+#define PCS_POWER_STATE_DOWN 0x6
+#define PCS_POWER_STATE_UP 0x4
static void pci11x1x_strap_get_status(struct lan743x_adapter *adapter)
{
u32 chip_rev;
+ u32 cfg_load;
+ u32 hw_cfg;
u32 strap;
+ int ret;
+
+ /* Timeout = 100 (i.e. 1 sec (10 msce * 100)) */
+ ret = lan743x_hs_syslock_acquire(adapter, 100);
+ if (ret < 0) {
+ netif_err(adapter, drv, adapter->netdev,
+ "Sys Lock acquire failed ret:%d\n", ret);
+ return;
+ }
- strap = lan743x_csr_read(adapter, STRAP_READ);
- if (strap & STRAP_READ_USE_SGMII_EN_) {
+ cfg_load = lan743x_csr_read(adapter, ETH_SYS_CONFIG_LOAD_STARTED_REG);
+ lan743x_hs_syslock_release(adapter);
+ hw_cfg = lan743x_csr_read(adapter, HW_CFG);
+
+ if (cfg_load & GEN_SYS_LOAD_STARTED_REG_ETH_ ||
+ hw_cfg & HW_CFG_RST_PROTECT_) {
+ strap = lan743x_csr_read(adapter, STRAP_READ);
if (strap & STRAP_READ_SGMII_EN_)
adapter->is_sgmii_en = true;
else
adapter->is_sgmii_en = false;
- netif_dbg(adapter, drv, adapter->netdev,
- "STRAP_READ: 0x%08X\n", strap);
} else {
chip_rev = lan743x_csr_read(adapter, FPGA_REV);
if (chip_rev) {
@@ -43,12 +59,12 @@ static void pci11x1x_strap_get_status(struct lan743x_adapter *adapter)
adapter->is_sgmii_en = true;
else
adapter->is_sgmii_en = false;
- netif_dbg(adapter, drv, adapter->netdev,
- "FPGA_REV: 0x%08X\n", chip_rev);
} else {
adapter->is_sgmii_en = false;
}
}
+ netif_dbg(adapter, drv, adapter->netdev,
+ "SGMII I/F %sable\n", adapter->is_sgmii_en ? "En" : "Dis");
}
static bool is_pci11x1x_chip(struct lan743x_adapter *adapter)
@@ -909,6 +925,318 @@ static int lan743x_mdiobus_c45_write(struct mii_bus *bus,
return ret;
}
+static int lan743x_sgmii_wait_till_not_busy(struct lan743x_adapter *adapter)
+{
+ u32 data;
+ int ret;
+
+ ret = readx_poll_timeout(LAN743X_CSR_READ_OP, SGMII_ACC, data,
+ !(data & SGMII_ACC_SGMII_BZY_), 100, 1000000);
+ if (ret < 0)
+ netif_err(adapter, drv, adapter->netdev,
+ "%s: error %d sgmii wait timeout\n", __func__, ret);
+
+ return ret;
+}
+
+static int lan743x_sgmii_read(struct lan743x_adapter *adapter, u8 mmd, u16 addr)
+{
+ u32 mmd_access;
+ int ret;
+ u32 val;
+
+ if (mmd > 31) {
+ netif_err(adapter, probe, adapter->netdev,
+ "%s mmd should <= 31\n", __func__);
+ return -EINVAL;
+ }
+
+ mutex_lock(&adapter->sgmii_rw_lock);
+ /* Load Register Address */
+ mmd_access = mmd << SGMII_ACC_SGMII_MMD_SHIFT_;
+ mmd_access |= (addr | SGMII_ACC_SGMII_BZY_);
+ lan743x_csr_write(adapter, SGMII_ACC, mmd_access);
+ ret = lan743x_sgmii_wait_till_not_busy(adapter);
+ if (ret < 0)
+ goto sgmii_unlock;
+
+ val = lan743x_csr_read(adapter, SGMII_DATA);
+ ret = (int)(val & SGMII_DATA_MASK_);
+
+sgmii_unlock:
+ mutex_unlock(&adapter->sgmii_rw_lock);
+
+ return ret;
+}
+
+static int lan743x_sgmii_write(struct lan743x_adapter *adapter,
+ u8 mmd, u16 addr, u16 val)
+{
+ u32 mmd_access;
+ int ret;
+
+ if (mmd > 31) {
+ netif_err(adapter, probe, adapter->netdev,
+ "%s mmd should <= 31\n", __func__);
+ return -EINVAL;
+ }
+ mutex_lock(&adapter->sgmii_rw_lock);
+ /* Load Register Data */
+ lan743x_csr_write(adapter, SGMII_DATA, (u32)(val & SGMII_DATA_MASK_));
+ /* Load Register Address */
+ mmd_access = mmd << SGMII_ACC_SGMII_MMD_SHIFT_;
+ mmd_access |= (addr | SGMII_ACC_SGMII_BZY_ | SGMII_ACC_SGMII_WR_);
+ lan743x_csr_write(adapter, SGMII_ACC, mmd_access);
+ ret = lan743x_sgmii_wait_till_not_busy(adapter);
+ mutex_unlock(&adapter->sgmii_rw_lock);
+
+ return ret;
+}
+
+static int lan743x_sgmii_mpll_set(struct lan743x_adapter *adapter,
+ u16 baud)
+{
+ int mpllctrl0;
+ int mpllctrl1;
+ int miscctrl1;
+ int ret;
+
+ mpllctrl0 = lan743x_sgmii_read(adapter, MDIO_MMD_VEND2,
+ VR_MII_GEN2_4_MPLL_CTRL0);
+ if (mpllctrl0 < 0)
+ return mpllctrl0;
+
+ mpllctrl0 &= ~VR_MII_MPLL_CTRL0_USE_REFCLK_PAD_;
+ if (baud == VR_MII_BAUD_RATE_1P25GBPS) {
+ mpllctrl1 = VR_MII_MPLL_MULTIPLIER_100;
+ /* mpll_baud_clk/4 */
+ miscctrl1 = 0xA;
+ } else {
+ mpllctrl1 = VR_MII_MPLL_MULTIPLIER_125;
+ /* mpll_baud_clk/2 */
+ miscctrl1 = 0x5;
+ }
+
+ ret = lan743x_sgmii_write(adapter, MDIO_MMD_VEND2,
+ VR_MII_GEN2_4_MPLL_CTRL0, mpllctrl0);
+ if (ret < 0)
+ return ret;
+
+ ret = lan743x_sgmii_write(adapter, MDIO_MMD_VEND2,
+ VR_MII_GEN2_4_MPLL_CTRL1, mpllctrl1);
+ if (ret < 0)
+ return ret;
+
+ return lan743x_sgmii_write(adapter, MDIO_MMD_VEND2,
+ VR_MII_GEN2_4_MISC_CTRL1, miscctrl1);
+}
+
+static int lan743x_sgmii_2_5G_mode_set(struct lan743x_adapter *adapter,
+ bool enable)
+{
+ if (enable)
+ return lan743x_sgmii_mpll_set(adapter,
+ VR_MII_BAUD_RATE_3P125GBPS);
+ else
+ return lan743x_sgmii_mpll_set(adapter,
+ VR_MII_BAUD_RATE_1P25GBPS);
+}
+
+static int lan743x_is_sgmii_2_5G_mode(struct lan743x_adapter *adapter,
+ bool *status)
+{
+ int ret;
+
+ ret = lan743x_sgmii_read(adapter, MDIO_MMD_VEND2,
+ VR_MII_GEN2_4_MPLL_CTRL1);
+ if (ret < 0)
+ return ret;
+
+ if (ret == VR_MII_MPLL_MULTIPLIER_125 ||
+ ret == VR_MII_MPLL_MULTIPLIER_50)
+ *status = true;
+ else
+ *status = false;
+
+ return 0;
+}
+
+static int lan743x_sgmii_aneg_update(struct lan743x_adapter *adapter)
+{
+ enum lan743x_sgmii_lsd lsd = adapter->sgmii_lsd;
+ int mii_ctrl;
+ int dgt_ctrl;
+ int an_ctrl;
+ int ret;
+
+ if (lsd == LINK_2500_MASTER || lsd == LINK_2500_SLAVE)
+ /* Switch to 2.5 Gbps */
+ ret = lan743x_sgmii_2_5G_mode_set(adapter, true);
+ else
+ /* Switch to 10/100/1000 Mbps clock */
+ ret = lan743x_sgmii_2_5G_mode_set(adapter, false);
+ if (ret < 0)
+ return ret;
+
+ /* Enable SGMII Auto NEG */
+ mii_ctrl = lan743x_sgmii_read(adapter, MDIO_MMD_VEND2, MII_BMCR);
+ if (mii_ctrl < 0)
+ return mii_ctrl;
+
+ an_ctrl = lan743x_sgmii_read(adapter, MDIO_MMD_VEND2, VR_MII_AN_CTRL);
+ if (an_ctrl < 0)
+ return an_ctrl;
+
+ dgt_ctrl = lan743x_sgmii_read(adapter, MDIO_MMD_VEND2,
+ VR_MII_DIG_CTRL1);
+ if (dgt_ctrl < 0)
+ return dgt_ctrl;
+
+ if (lsd == LINK_2500_MASTER || lsd == LINK_2500_SLAVE) {
+ mii_ctrl &= ~(BMCR_ANENABLE | BMCR_ANRESTART | BMCR_SPEED100);
+ mii_ctrl |= BMCR_SPEED1000;
+ dgt_ctrl |= VR_MII_DIG_CTRL1_CL37_TMR_OVR_RIDE_;
+ dgt_ctrl &= ~VR_MII_DIG_CTRL1_MAC_AUTO_SW_;
+ /* In order for Auto-Negotiation to operate properly at
+ * 2.5 Gbps the 1.6ms link timer values must be adjusted
+ * The VR_MII_LINK_TIMER_CTRL Register must be set to
+ * 16'h7A1 and The CL37_TMR_OVR_RIDE bit of the
+ * VR_MII_DIG_CTRL1 Register set to 1
+ */
+ ret = lan743x_sgmii_write(adapter, MDIO_MMD_VEND2,
+ VR_MII_LINK_TIMER_CTRL, 0x7A1);
+ if (ret < 0)
+ return ret;
+ } else {
+ mii_ctrl |= (BMCR_ANENABLE | BMCR_ANRESTART);
+ an_ctrl &= ~VR_MII_AN_CTRL_SGMII_LINK_STS_;
+ dgt_ctrl &= ~VR_MII_DIG_CTRL1_CL37_TMR_OVR_RIDE_;
+ dgt_ctrl |= VR_MII_DIG_CTRL1_MAC_AUTO_SW_;
+ }
+
+ ret = lan743x_sgmii_write(adapter, MDIO_MMD_VEND2, MII_BMCR,
+ mii_ctrl);
+ if (ret < 0)
+ return ret;
+
+ ret = lan743x_sgmii_write(adapter, MDIO_MMD_VEND2,
+ VR_MII_DIG_CTRL1, dgt_ctrl);
+ if (ret < 0)
+ return ret;
+
+ return lan743x_sgmii_write(adapter, MDIO_MMD_VEND2,
+ VR_MII_AN_CTRL, an_ctrl);
+}
+
+static int lan743x_pcs_seq_state(struct lan743x_adapter *adapter, u8 state)
+{
+ u8 wait_cnt = 0;
+ u32 dig_sts;
+
+ do {
+ dig_sts = lan743x_sgmii_read(adapter, MDIO_MMD_VEND2,
+ VR_MII_DIG_STS);
+ if (((dig_sts & VR_MII_DIG_STS_PSEQ_STATE_MASK_) >>
+ VR_MII_DIG_STS_PSEQ_STATE_POS_) == state)
+ break;
+ usleep_range(1000, 2000);
+ } while (wait_cnt++ < 10);
+
+ if (wait_cnt >= 10)
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
+static int lan743x_sgmii_config(struct lan743x_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct phy_device *phydev = netdev->phydev;
+ enum lan743x_sgmii_lsd lsd = POWER_DOWN;
+ int mii_ctl;
+ bool status;
+ int ret;
+
+ switch (phydev->speed) {
+ case SPEED_2500:
+ if (phydev->master_slave_state == MASTER_SLAVE_STATE_MASTER)
+ lsd = LINK_2500_MASTER;
+ else
+ lsd = LINK_2500_SLAVE;
+ break;
+ case SPEED_1000:
+ if (phydev->master_slave_state == MASTER_SLAVE_STATE_MASTER)
+ lsd = LINK_1000_MASTER;
+ else
+ lsd = LINK_1000_SLAVE;
+ break;
+ case SPEED_100:
+ if (phydev->duplex)
+ lsd = LINK_100FD;
+ else
+ lsd = LINK_100HD;
+ break;
+ case SPEED_10:
+ if (phydev->duplex)
+ lsd = LINK_10FD;
+ else
+ lsd = LINK_10HD;
+ break;
+ default:
+ netif_err(adapter, drv, adapter->netdev,
+ "Invalid speed %d\n", phydev->speed);
+ return -EINVAL;
+ }
+
+ adapter->sgmii_lsd = lsd;
+ ret = lan743x_sgmii_aneg_update(adapter);
+ if (ret < 0) {
+ netif_err(adapter, drv, adapter->netdev,
+ "error %d SGMII cfg failed\n", ret);
+ return ret;
+ }
+
+ ret = lan743x_is_sgmii_2_5G_mode(adapter, &status);
+ if (ret < 0) {
+ netif_err(adapter, drv, adapter->netdev,
+ "erro %d SGMII get mode failed\n", ret);
+ return ret;
+ }
+
+ if (status)
+ netif_dbg(adapter, drv, adapter->netdev,
+ "SGMII 2.5G mode enable\n");
+ else
+ netif_dbg(adapter, drv, adapter->netdev,
+ "SGMII 1G mode enable\n");
+
+ /* SGMII/1000/2500BASE-X PCS power down */
+ mii_ctl = lan743x_sgmii_read(adapter, MDIO_MMD_VEND2, MII_BMCR);
+ if (mii_ctl < 0)
+ return mii_ctl;
+
+ mii_ctl |= BMCR_PDOWN;
+ ret = lan743x_sgmii_write(adapter, MDIO_MMD_VEND2, MII_BMCR, mii_ctl);
+ if (ret < 0)
+ return ret;
+
+ ret = lan743x_pcs_seq_state(adapter, PCS_POWER_STATE_DOWN);
+ if (ret < 0)
+ return ret;
+
+ /* SGMII/1000/2500BASE-X PCS power up */
+ mii_ctl &= ~BMCR_PDOWN;
+ ret = lan743x_sgmii_write(adapter, MDIO_MMD_VEND2, MII_BMCR, mii_ctl);
+ if (ret < 0)
+ return ret;
+
+ ret = lan743x_pcs_seq_state(adapter, PCS_POWER_STATE_UP);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
static void lan743x_mac_set_address(struct lan743x_adapter *adapter,
u8 *addr)
{
@@ -1124,6 +1452,10 @@ static void lan743x_phy_link_status_change(struct net_device *netdev)
data |= MAC_CR_CFG_H_;
data &= ~MAC_CR_CFG_L_;
break;
+ case SPEED_2500:
+ data |= MAC_CR_CFG_H_;
+ data |= MAC_CR_CFG_L_;
+ break;
}
lan743x_csr_write(adapter, MAC_CR, data);
@@ -1135,6 +1467,10 @@ static void lan743x_phy_link_status_change(struct net_device *netdev)
lan743x_phy_update_flowcontrol(adapter, local_advertisement,
remote_advertisement);
lan743x_ptp_update_latency(adapter, phydev->speed);
+ if (phydev->interface == PHY_INTERFACE_MODE_SGMII ||
+ phydev->interface == PHY_INTERFACE_MODE_1000BASEX ||
+ phydev->interface == PHY_INTERFACE_MODE_2500BASEX)
+ lan743x_sgmii_config(adapter);
}
}
@@ -2875,6 +3211,7 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter,
adapter->max_vector_count = PCI11X1X_MAX_VECTOR_COUNT;
pci11x1x_strap_get_status(adapter);
spin_lock_init(&adapter->eth_syslock_spinlock);
+ mutex_init(&adapter->sgmii_rw_lock);
} else {
adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS;
adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS;
@@ -3124,6 +3461,7 @@ static void lan743x_pm_set_wol(struct lan743x_adapter *adapter)
const u8 ipv6_multicast[3] = { 0x33, 0x33 };
const u8 arp_type[2] = { 0x08, 0x06 };
int mask_index;
+ u32 sopass;
u32 pmtctl;
u32 wucsr;
u32 macrx;
@@ -3218,6 +3556,14 @@ static void lan743x_pm_set_wol(struct lan743x_adapter *adapter)
pmtctl |= PMT_CTL_RX_FCT_RFE_D3_CLK_OVR_;
}
+ if (adapter->wolopts & WAKE_MAGICSECURE) {
+ sopass = *(u32 *)adapter->sopass;
+ lan743x_csr_write(adapter, MAC_MP_SO_LO, sopass);
+ sopass = *(u16 *)&adapter->sopass[4];
+ lan743x_csr_write(adapter, MAC_MP_SO_HI, sopass);
+ wucsr |= MAC_MP_SO_EN_;
+ }
+
lan743x_csr_write(adapter, MAC_WUCSR, wucsr);
lan743x_csr_write(adapter, PMT_CTL, pmtctl);
lan743x_csr_write(adapter, MAC_RX, macrx);
@@ -3228,6 +3574,7 @@ static int lan743x_pm_suspend(struct device *dev)
struct pci_dev *pdev = to_pci_dev(dev);
struct net_device *netdev = pci_get_drvdata(pdev);
struct lan743x_adapter *adapter = netdev_priv(netdev);
+ u32 data;
lan743x_pcidev_shutdown(pdev);
@@ -3239,6 +3586,18 @@ static int lan743x_pm_suspend(struct device *dev)
if (adapter->wolopts)
lan743x_pm_set_wol(adapter);
+ if (adapter->is_pci11x1x) {
+ /* Save HW_CFG to config again in PM resume */
+ data = lan743x_csr_read(adapter, HW_CFG);
+ adapter->hw_cfg = data;
+ data |= (HW_CFG_RST_PROTECT_PCIE_ |
+ HW_CFG_D3_RESET_DIS_ |
+ HW_CFG_D3_VAUX_OVR_ |
+ HW_CFG_HOT_RESET_DIS_ |
+ HW_CFG_RST_PROTECT_);
+ lan743x_csr_write(adapter, HW_CFG, data);
+ }
+
/* Host sets PME_En, put D3hot */
return pci_prepare_to_sleep(pdev);
}
@@ -3254,6 +3613,10 @@ static int lan743x_pm_resume(struct device *dev)
pci_restore_state(pdev);
pci_save_state(pdev);
+ /* Restore HW_CFG that was saved during pm suspend */
+ if (adapter->is_pci11x1x)
+ lan743x_csr_write(adapter, HW_CFG, adapter->hw_cfg);
+
ret = lan743x_hardware_init(adapter, pdev);
if (ret) {
netif_err(adapter, probe, adapter->netdev,
@@ -3270,6 +3633,9 @@ static int lan743x_pm_resume(struct device *dev)
lan743x_netdev_open(netdev);
netif_device_attach(netdev);
+ ret = lan743x_csr_read(adapter, MAC_WK_SRC);
+ netif_info(adapter, drv, adapter->netdev,
+ "Wakeup source : 0x%08X\n", ret);
return 0;
}
diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h
index 1ca5f3216403..72adae4f2aa0 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.h
+++ b/drivers/net/ethernet/microchip/lan743x_main.h
@@ -43,6 +43,11 @@
#define STRAP_READ_ADV_PM_DISABLE_ BIT(0)
#define HW_CFG (0x010)
+#define HW_CFG_RST_PROTECT_PCIE_ BIT(19)
+#define HW_CFG_HOT_RESET_DIS_ BIT(15)
+#define HW_CFG_D3_VAUX_OVR_ BIT(14)
+#define HW_CFG_D3_RESET_DIS_ BIT(13)
+#define HW_CFG_RST_PROTECT_ BIT(12)
#define HW_CFG_RELOAD_TYPE_ALL_ (0x00000FC0)
#define HW_CFG_EE_OTP_RELOAD_ BIT(4)
#define HW_CFG_LRST_ BIT(1)
@@ -92,6 +97,11 @@
#define CONFIG_REG_ADDR_BASE (0x0000)
#define ETH_EEPROM_REG_ADDR_BASE (0x0E00)
#define ETH_OTP_REG_ADDR_BASE (0x1000)
+#define GEN_SYS_CONFIG_LOAD_STARTED_REG (0x0078)
+#define ETH_SYS_CONFIG_LOAD_STARTED_REG (ETH_SYS_REG_ADDR_BASE + \
+ CONFIG_REG_ADDR_BASE + \
+ GEN_SYS_CONFIG_LOAD_STARTED_REG)
+#define GEN_SYS_LOAD_STARTED_REG_ETH_ BIT(4)
#define SYS_LOCK_REG (0x00A0)
#define SYS_LOCK_REG_MAIN_LOCK_ BIT(7)
#define SYS_LOCK_REG_GEN_PERI_LOCK_ BIT(5)
@@ -214,6 +224,7 @@
#define MAC_EEE_TX_LPI_REQ_DLY_CNT (0x130)
#define MAC_WUCSR (0x140)
+#define MAC_MP_SO_EN_ BIT(21)
#define MAC_WUCSR_RFE_WAKE_EN_ BIT(14)
#define MAC_WUCSR_PFDA_EN_ BIT(3)
#define MAC_WUCSR_WAKE_EN_ BIT(2)
@@ -221,6 +232,8 @@
#define MAC_WUCSR_BCST_EN_ BIT(0)
#define MAC_WK_SRC (0x144)
+#define MAC_MP_SO_HI (0x148)
+#define MAC_MP_SO_LO (0x14C)
#define MAC_WUF_CFG0 (0x150)
#define MAC_NUM_OF_WUF_CFG (32)
@@ -280,11 +293,82 @@
#define MAC_WUCSR2 (0x600)
+#define SGMII_ACC (0x720)
+#define SGMII_ACC_SGMII_BZY_ BIT(31)
+#define SGMII_ACC_SGMII_WR_ BIT(30)
+#define SGMII_ACC_SGMII_MMD_SHIFT_ (16)
+#define SGMII_ACC_SGMII_MMD_MASK_ GENMASK(20, 16)
+#define SGMII_ACC_SGMII_MMD_VSR_ BIT(15)
+#define SGMII_ACC_SGMII_ADDR_SHIFT_ (0)
+#define SGMII_ACC_SGMII_ADDR_MASK_ GENMASK(15, 0)
+#define SGMII_DATA (0x724)
+#define SGMII_DATA_SHIFT_ (0)
+#define SGMII_DATA_MASK_ GENMASK(15, 0)
#define SGMII_CTL (0x728)
#define SGMII_CTL_SGMII_ENABLE_ BIT(31)
#define SGMII_CTL_LINK_STATUS_SOURCE_ BIT(8)
#define SGMII_CTL_SGMII_POWER_DN_ BIT(1)
+/* Vendor Specific SGMII MMD details */
+#define SR_VSMMD_PCS_ID1 0x0004
+#define SR_VSMMD_PCS_ID2 0x0005
+#define SR_VSMMD_STS 0x0008
+#define SR_VSMMD_CTRL 0x0009
+
+#define VR_MII_DIG_CTRL1 0x8000
+#define VR_MII_DIG_CTRL1_VR_RST_ BIT(15)
+#define VR_MII_DIG_CTRL1_R2TLBE_ BIT(14)
+#define VR_MII_DIG_CTRL1_EN_VSMMD1_ BIT(13)
+#define VR_MII_DIG_CTRL1_CS_EN_ BIT(10)
+#define VR_MII_DIG_CTRL1_MAC_AUTO_SW_ BIT(9)
+#define VR_MII_DIG_CTRL1_INIT_ BIT(8)
+#define VR_MII_DIG_CTRL1_DTXLANED_0_ BIT(4)
+#define VR_MII_DIG_CTRL1_CL37_TMR_OVR_RIDE_ BIT(3)
+#define VR_MII_DIG_CTRL1_EN_2_5G_MODE_ BIT(2)
+#define VR_MII_DIG_CTRL1_BYP_PWRUP_ BIT(1)
+#define VR_MII_DIG_CTRL1_PHY_MODE_CTRL_ BIT(0)
+#define VR_MII_AN_CTRL 0x8001
+#define VR_MII_AN_CTRL_MII_CTRL_ BIT(8)
+#define VR_MII_AN_CTRL_SGMII_LINK_STS_ BIT(4)
+#define VR_MII_AN_CTRL_TX_CONFIG_ BIT(3)
+#define VR_MII_AN_CTRL_1000BASE_X_ (0)
+#define VR_MII_AN_CTRL_SGMII_MODE_ (2)
+#define VR_MII_AN_CTRL_QSGMII_MODE_ (3)
+#define VR_MII_AN_CTRL_PCS_MODE_SHIFT_ (1)
+#define VR_MII_AN_CTRL_PCS_MODE_MASK_ GENMASK(2, 1)
+#define VR_MII_AN_CTRL_MII_AN_INTR_EN_ BIT(0)
+#define VR_MII_AN_INTR_STS 0x8002
+#define VR_MII_AN_INTR_STS_LINK_UP_ BIT(4)
+#define VR_MII_AN_INTR_STS_SPEED_MASK_ GENMASK(3, 2)
+#define VR_MII_AN_INTR_STS_1000_MBPS_ BIT(3)
+#define VR_MII_AN_INTR_STS_100_MBPS_ BIT(2)
+#define VR_MII_AN_INTR_STS_10_MBPS_ (0)
+#define VR_MII_AN_INTR_STS_FDX_ BIT(1)
+#define VR_MII_AN_INTR_STS_CL37_ANCMPLT_INTR_ BIT(0)
+
+#define VR_MII_LINK_TIMER_CTRL 0x800A
+#define VR_MII_DIG_STS 0x8010
+#define VR_MII_DIG_STS_PSEQ_STATE_MASK_ GENMASK(4, 2)
+#define VR_MII_DIG_STS_PSEQ_STATE_POS_ (2)
+#define VR_MII_GEN2_4_MPLL_CTRL0 0x8078
+#define VR_MII_MPLL_CTRL0_REF_CLK_DIV2_ BIT(12)
+#define VR_MII_MPLL_CTRL0_USE_REFCLK_PAD_ BIT(4)
+#define VR_MII_GEN2_4_MPLL_CTRL1 0x8079
+#define VR_MII_MPLL_CTRL1_MPLL_MULTIPLIER_ GENMASK(6, 0)
+#define VR_MII_BAUD_RATE_3P125GBPS (3125)
+#define VR_MII_BAUD_RATE_1P25GBPS (1250)
+#define VR_MII_MPLL_MULTIPLIER_125 (125)
+#define VR_MII_MPLL_MULTIPLIER_100 (100)
+#define VR_MII_MPLL_MULTIPLIER_50 (50)
+#define VR_MII_MPLL_MULTIPLIER_40 (40)
+#define VR_MII_GEN2_4_MISC_CTRL1 0x809A
+#define VR_MII_CTRL1_RX_RATE_0_MASK_ GENMASK(3, 2)
+#define VR_MII_CTRL1_RX_RATE_0_SHIFT_ (2)
+#define VR_MII_CTRL1_TX_RATE_0_MASK_ GENMASK(1, 0)
+#define VR_MII_MPLL_BAUD_CLK (0)
+#define VR_MII_MPLL_BAUD_CLK_DIV_2 (1)
+#define VR_MII_MPLL_BAUD_CLK_DIV_4 (2)
+
#define INT_STS (0x780)
#define INT_BIT_DMA_RX_(channel) BIT(24 + (channel))
#define INT_BIT_ALL_RX_ (0x0F000000)
@@ -906,12 +990,28 @@ struct lan743x_rx {
struct sk_buff *skb_head, *skb_tail;
};
+/* SGMII Link Speed Duplex status */
+enum lan743x_sgmii_lsd {
+ POWER_DOWN = 0,
+ LINK_DOWN,
+ ANEG_BUSY,
+ LINK_10HD,
+ LINK_10FD,
+ LINK_100HD,
+ LINK_100FD,
+ LINK_1000_MASTER,
+ LINK_1000_SLAVE,
+ LINK_2500_MASTER,
+ LINK_2500_SLAVE
+};
+
struct lan743x_adapter {
struct net_device *netdev;
struct mii_bus *mdiobus;
int msg_enable;
#ifdef CONFIG_PM
u32 wolopts;
+ u8 sopass[SOPASS_MAX];
#endif
struct pci_dev *pdev;
struct lan743x_csr csr;
@@ -931,12 +1031,16 @@ struct lan743x_adapter {
spinlock_t eth_syslock_spinlock;
bool eth_syslock_en;
u32 eth_syslock_acquire_cnt;
+ struct mutex sgmii_rw_lock;
+ /* SGMII Link Speed & Duplex status */
+ enum lan743x_sgmii_lsd sgmii_lsd;
u8 max_tx_channels;
u8 used_tx_channels;
u8 max_vector_count;
#define LAN743X_ADAPTER_FLAG_OTP BIT(0)
u32 flags;
+ u32 hw_cfg;
};
#define LAN743X_COMPONENT_FLAG_RX(channel) BIT(20 + (channel))
@@ -1049,5 +1153,7 @@ struct lan743x_rx_buffer_info {
u32 lan743x_csr_read(struct lan743x_adapter *adapter, int offset);
void lan743x_csr_write(struct lan743x_adapter *adapter, int offset, u32 data);
+int lan743x_hs_syslock_acquire(struct lan743x_adapter *adapter, u16 timeout);
+void lan743x_hs_syslock_release(struct lan743x_adapter *adapter);
#endif /* _LAN743X_H */
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c
index 3429660cd2e5..40ef9fad3a77 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c
@@ -394,12 +394,10 @@ static int sparx5_handle_port_mdb_add(struct net_device *dev,
struct sparx5 *spx5 = port->sparx5;
u16 pgid_idx, vid;
u32 mact_entry;
+ bool is_host;
int res, err;
- if (netif_is_bridge_master(v->obj.orig_dev)) {
- sparx5_mact_learn(spx5, PGID_CPU, v->addr, v->vid);
- return 0;
- }
+ is_host = netif_is_bridge_master(v->obj.orig_dev);
/* When VLAN unaware the vlan value is not parsed and we receive vid 0.
* Fall back to bridge vid 1.
@@ -416,17 +414,33 @@ static int sparx5_handle_port_mdb_add(struct net_device *dev,
/* MC_IDX starts after the port masks in the PGID table */
pgid_idx += SPX5_PORTS;
- sparx5_pgid_update_mask(port, pgid_idx, true);
+
+ if (is_host)
+ spx5_rmw(ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA_SET(1),
+ ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA, spx5,
+ ANA_AC_PGID_MISC_CFG(pgid_idx));
+ else
+ sparx5_pgid_update_mask(port, pgid_idx, true);
+
} else {
err = sparx5_pgid_alloc_mcast(spx5, &pgid_idx);
if (err) {
netdev_warn(dev, "multicast pgid table full\n");
return err;
}
- sparx5_pgid_update_mask(port, pgid_idx, true);
+
+ if (is_host)
+ spx5_rmw(ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA_SET(1),
+ ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA, spx5,
+ ANA_AC_PGID_MISC_CFG(pgid_idx));
+ else
+ sparx5_pgid_update_mask(port, pgid_idx, true);
+
err = sparx5_mact_learn(spx5, pgid_idx, v->addr, vid);
+
if (err) {
netdev_warn(dev, "could not learn mac address %pM\n", v->addr);
+ sparx5_pgid_free(spx5, pgid_idx);
sparx5_pgid_update_mask(port, pgid_idx, false);
return err;
}
@@ -463,13 +477,8 @@ static int sparx5_handle_port_mdb_del(struct net_device *dev,
struct sparx5_port *port = netdev_priv(dev);
struct sparx5 *spx5 = port->sparx5;
u16 pgid_idx, vid;
- u32 mact_entry, res, pgid_entry[3];
- int err;
-
- if (netif_is_bridge_master(v->obj.orig_dev)) {
- sparx5_mact_forget(spx5, v->addr, v->vid);
- return 0;
- }
+ u32 mact_entry, res, pgid_entry[3], misc_cfg;
+ bool host_ena;
if (!br_vlan_enabled(spx5->hw_bridge_dev))
vid = 1;
@@ -483,15 +492,21 @@ static int sparx5_handle_port_mdb_del(struct net_device *dev,
/* MC_IDX starts after the port masks in the PGID table */
pgid_idx += SPX5_PORTS;
- sparx5_pgid_update_mask(port, pgid_idx, false);
+
+ if (netif_is_bridge_master(v->obj.orig_dev))
+ spx5_rmw(ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA_SET(0),
+ ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA, spx5,
+ ANA_AC_PGID_MISC_CFG(pgid_idx));
+ else
+ sparx5_pgid_update_mask(port, pgid_idx, false);
+
+ misc_cfg = spx5_rd(spx5, ANA_AC_PGID_MISC_CFG(pgid_idx));
+ host_ena = ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA_GET(misc_cfg);
sparx5_pgid_read_mask(spx5, pgid_idx, pgid_entry);
- if (bitmap_empty((unsigned long *)pgid_entry, SPX5_PORTS)) {
- /* No ports are in MC group. Remove entry */
- err = sparx5_mdb_del_entry(dev, spx5, v->addr, vid, pgid_idx);
- if (err)
- return err;
- }
+ if (bitmap_empty((unsigned long *)pgid_entry, SPX5_PORTS) && !host_ena)
+ /* No ports or CPU are in MC group. Remove entry */
+ return sparx5_mdb_del_entry(dev, spx5, v->addr, vid, pgid_idx);
}
return 0;
diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h b/drivers/net/ethernet/microsoft/mana/gdma.h
index 41ecd156e95f..4a6efe6ada08 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma.h
+++ b/drivers/net/ethernet/microsoft/mana/gdma.h
@@ -348,6 +348,7 @@ struct gdma_context {
struct completion eq_test_event;
u32 test_event_eq_id;
+ bool is_pf;
void __iomem *bar0_va;
void __iomem *shm_base;
void __iomem *db_page_base;
@@ -469,6 +470,15 @@ struct gdma_eqe {
#define GDMA_REG_DB_PAGE_SIZE 0x10
#define GDMA_REG_SHM_OFFSET 0x18
+#define GDMA_PF_REG_DB_PAGE_SIZE 0xD0
+#define GDMA_PF_REG_DB_PAGE_OFF 0xC8
+#define GDMA_PF_REG_SHM_OFF 0x70
+
+#define GDMA_SRIOV_REG_CFG_BASE_OFF 0x108
+
+#define MANA_PF_DEVICE_ID 0x00B9
+#define MANA_VF_DEVICE_ID 0x00BA
+
struct gdma_posted_wqe_info {
u32 wqe_size_in_bu;
};
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 49b85ca578b0..5f9240182351 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -18,7 +18,24 @@ static u64 mana_gd_r64(struct gdma_context *g, u64 offset)
return readq(g->bar0_va + offset);
}
-static void mana_gd_init_registers(struct pci_dev *pdev)
+static void mana_gd_init_pf_regs(struct pci_dev *pdev)
+{
+ struct gdma_context *gc = pci_get_drvdata(pdev);
+ void __iomem *sriov_base_va;
+ u64 sriov_base_off;
+
+ gc->db_page_size = mana_gd_r32(gc, GDMA_PF_REG_DB_PAGE_SIZE) & 0xFFFF;
+ gc->db_page_base = gc->bar0_va +
+ mana_gd_r64(gc, GDMA_PF_REG_DB_PAGE_OFF);
+
+ sriov_base_off = mana_gd_r64(gc, GDMA_SRIOV_REG_CFG_BASE_OFF);
+
+ sriov_base_va = gc->bar0_va + sriov_base_off;
+ gc->shm_base = sriov_base_va +
+ mana_gd_r64(gc, sriov_base_off + GDMA_PF_REG_SHM_OFF);
+}
+
+static void mana_gd_init_vf_regs(struct pci_dev *pdev)
{
struct gdma_context *gc = pci_get_drvdata(pdev);
@@ -30,6 +47,16 @@ static void mana_gd_init_registers(struct pci_dev *pdev)
gc->shm_base = gc->bar0_va + mana_gd_r64(gc, GDMA_REG_SHM_OFFSET);
}
+static void mana_gd_init_registers(struct pci_dev *pdev)
+{
+ struct gdma_context *gc = pci_get_drvdata(pdev);
+
+ if (gc->is_pf)
+ mana_gd_init_pf_regs(pdev);
+ else
+ mana_gd_init_vf_regs(pdev);
+}
+
static int mana_gd_query_max_resources(struct pci_dev *pdev)
{
struct gdma_context *gc = pci_get_drvdata(pdev);
@@ -1304,6 +1331,11 @@ static void mana_gd_cleanup(struct pci_dev *pdev)
mana_gd_remove_irqs(pdev);
}
+static bool mana_is_pf(unsigned short dev_id)
+{
+ return dev_id == MANA_PF_DEVICE_ID;
+}
+
static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct gdma_context *gc;
@@ -1340,10 +1372,10 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (!bar0_va)
goto free_gc;
+ gc->is_pf = mana_is_pf(pdev->device);
gc->bar0_va = bar0_va;
gc->dev = &pdev->dev;
-
err = mana_gd_setup(pdev);
if (err)
goto unmap_bar;
@@ -1438,7 +1470,8 @@ static void mana_gd_shutdown(struct pci_dev *pdev)
#endif
static const struct pci_device_id mana_id_table[] = {
- { PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT, 0x00BA) },
+ { PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT, MANA_PF_DEVICE_ID) },
+ { PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT, MANA_VF_DEVICE_ID) },
{ }
};
diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c
index 078d6a5a0768..543a5d5c304f 100644
--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c
+++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
@@ -158,6 +158,14 @@ static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self,
hwc->rxq->msg_buf->gpa_mkey = val;
hwc->txq->msg_buf->gpa_mkey = val;
break;
+
+ case HWC_INIT_DATA_PF_DEST_RQ_ID:
+ hwc->pf_dest_vrq_id = val;
+ break;
+
+ case HWC_INIT_DATA_PF_DEST_CQ_ID:
+ hwc->pf_dest_vrcq_id = val;
+ break;
}
break;
@@ -773,10 +781,13 @@ void mana_hwc_destroy_channel(struct gdma_context *gc)
int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
const void *req, u32 resp_len, void *resp)
{
+ struct gdma_context *gc = hwc->gdma_dev->gdma_context;
struct hwc_work_request *tx_wr;
struct hwc_wq *txq = hwc->txq;
struct gdma_req_hdr *req_msg;
struct hwc_caller_ctx *ctx;
+ u32 dest_vrcq = 0;
+ u32 dest_vrq = 0;
u16 msg_id;
int err;
@@ -803,7 +814,12 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
tx_wr->msg_size = req_len;
- err = mana_hwc_post_tx_wqe(txq, tx_wr, 0, 0, false);
+ if (gc->is_pf) {
+ dest_vrq = hwc->pf_dest_vrq_id;
+ dest_vrcq = hwc->pf_dest_vrcq_id;
+ }
+
+ err = mana_hwc_post_tx_wqe(txq, tx_wr, dest_vrq, dest_vrcq, false);
if (err) {
dev_err(hwc->dev, "HWC: Failed to post send WQE: %d\n", err);
goto out;
diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.h b/drivers/net/ethernet/microsoft/mana/hw_channel.h
index 31c6e83c454a..6a757a6e2732 100644
--- a/drivers/net/ethernet/microsoft/mana/hw_channel.h
+++ b/drivers/net/ethernet/microsoft/mana/hw_channel.h
@@ -20,6 +20,8 @@
#define HWC_INIT_DATA_MAX_NUM_CQS 7
#define HWC_INIT_DATA_PDID 8
#define HWC_INIT_DATA_GPA_MKEY 9
+#define HWC_INIT_DATA_PF_DEST_RQ_ID 10
+#define HWC_INIT_DATA_PF_DEST_CQ_ID 11
/* Structures labeled with "HW DATA" are exchanged with the hardware. All of
* them are naturally aligned and hence don't need __packed.
@@ -178,6 +180,9 @@ struct hw_channel_context {
struct semaphore sema;
struct gdma_resource inflight_msg_res;
+ u32 pf_dest_vrq_id;
+ u32 pf_dest_vrcq_id;
+
struct hwc_caller_ctx *caller_ctx;
};
diff --git a/drivers/net/ethernet/microsoft/mana/mana.h b/drivers/net/ethernet/microsoft/mana/mana.h
index d36405af9432..d58be64374c8 100644
--- a/drivers/net/ethernet/microsoft/mana/mana.h
+++ b/drivers/net/ethernet/microsoft/mana/mana.h
@@ -53,12 +53,14 @@ struct mana_stats_rx {
u64 bytes;
u64 xdp_drop;
u64 xdp_tx;
+ u64 xdp_redirect;
struct u64_stats_sync syncp;
};
struct mana_stats_tx {
u64 packets;
u64 bytes;
+ u64 xdp_xmit;
struct u64_stats_sync syncp;
};
@@ -311,6 +313,8 @@ struct mana_rxq {
struct bpf_prog __rcu *bpf_prog;
struct xdp_rxq_info xdp_rxq;
struct page *xdp_save_page;
+ bool xdp_flush;
+ int xdp_rc; /* XDP redirect return code */
/* MUST BE THE LAST MEMBER:
* Each receive buffer has an associated mana_recv_buf_oob.
@@ -374,6 +378,7 @@ struct mana_port_context {
unsigned int num_queues;
mana_handle_t port_handle;
+ mana_handle_t pf_filter_handle;
u16 port_idx;
@@ -395,6 +400,8 @@ int mana_probe(struct gdma_dev *gd, bool resuming);
void mana_remove(struct gdma_dev *gd, bool suspending);
void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev);
+int mana_xdp_xmit(struct net_device *ndev, int n, struct xdp_frame **frames,
+ u32 flags);
u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq,
struct xdp_buff *xdp, void *buf_va, uint pkt_len);
struct bpf_prog *mana_xdp_get(struct mana_port_context *apc);
@@ -420,6 +427,12 @@ enum mana_command_code {
MANA_FENCE_RQ = 0x20006,
MANA_CONFIG_VPORT_RX = 0x20007,
MANA_QUERY_VPORT_CONFIG = 0x20008,
+
+ /* Privileged commands for the PF mode */
+ MANA_REGISTER_FILTER = 0x28000,
+ MANA_DEREGISTER_FILTER = 0x28001,
+ MANA_REGISTER_HW_PORT = 0x28003,
+ MANA_DEREGISTER_HW_PORT = 0x28004,
};
/* Query Device Configuration */
@@ -547,6 +560,63 @@ struct mana_cfg_rx_steer_resp {
struct gdma_resp_hdr hdr;
}; /* HW DATA */
+/* Register HW vPort */
+struct mana_register_hw_vport_req {
+ struct gdma_req_hdr hdr;
+ u16 attached_gfid;
+ u8 is_pf_default_vport;
+ u8 reserved1;
+ u8 allow_all_ether_types;
+ u8 reserved2;
+ u8 reserved3;
+ u8 reserved4;
+}; /* HW DATA */
+
+struct mana_register_hw_vport_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t hw_vport_handle;
+}; /* HW DATA */
+
+/* Deregister HW vPort */
+struct mana_deregister_hw_vport_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t hw_vport_handle;
+}; /* HW DATA */
+
+struct mana_deregister_hw_vport_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW DATA */
+
+/* Register filter */
+struct mana_register_filter_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t vport;
+ u8 mac_addr[6];
+ u8 reserved1;
+ u8 reserved2;
+ u8 reserved3;
+ u8 reserved4;
+ u16 reserved5;
+ u32 reserved6;
+ u32 reserved7;
+ u32 reserved8;
+}; /* HW DATA */
+
+struct mana_register_filter_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t filter_handle;
+}; /* HW DATA */
+
+/* Deregister filter */
+struct mana_deregister_filter_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t filter_handle;
+}; /* HW DATA */
+
+struct mana_deregister_filter_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW DATA */
+
#define MANA_MAX_NUM_QUEUES 64
#define MANA_SHORT_VPORT_OFFSET_MAX ((1U << 8) - 1)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_bpf.c b/drivers/net/ethernet/microsoft/mana/mana_bpf.c
index 1d2f948b5c00..421fd39ff3a8 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_bpf.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_bpf.c
@@ -32,9 +32,55 @@ void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev)
ndev->stats.tx_dropped++;
}
+static int mana_xdp_xmit_fm(struct net_device *ndev, struct xdp_frame *frame,
+ u16 q_idx)
+{
+ struct sk_buff *skb;
+
+ skb = xdp_build_skb_from_frame(frame, ndev);
+ if (unlikely(!skb))
+ return -ENOMEM;
+
+ skb_set_queue_mapping(skb, q_idx);
+
+ mana_xdp_tx(skb, ndev);
+
+ return 0;
+}
+
+int mana_xdp_xmit(struct net_device *ndev, int n, struct xdp_frame **frames,
+ u32 flags)
+{
+ struct mana_port_context *apc = netdev_priv(ndev);
+ struct mana_stats_tx *tx_stats;
+ int i, count = 0;
+ u16 q_idx;
+
+ if (unlikely(!apc->port_is_up))
+ return 0;
+
+ q_idx = smp_processor_id() % ndev->real_num_tx_queues;
+
+ for (i = 0; i < n; i++) {
+ if (mana_xdp_xmit_fm(ndev, frames[i], q_idx))
+ break;
+
+ count++;
+ }
+
+ tx_stats = &apc->tx_qp[q_idx].txq.stats;
+
+ u64_stats_update_begin(&tx_stats->syncp);
+ tx_stats->xdp_xmit += count;
+ u64_stats_update_end(&tx_stats->syncp);
+
+ return count;
+}
+
u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq,
struct xdp_buff *xdp, void *buf_va, uint pkt_len)
{
+ struct mana_stats_rx *rx_stats;
struct bpf_prog *prog;
u32 act = XDP_PASS;
@@ -49,12 +95,30 @@ u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq,
act = bpf_prog_run_xdp(prog, xdp);
+ rx_stats = &rxq->stats;
+
switch (act) {
case XDP_PASS:
case XDP_TX:
case XDP_DROP:
break;
+ case XDP_REDIRECT:
+ rxq->xdp_rc = xdp_do_redirect(ndev, xdp, prog);
+ if (!rxq->xdp_rc) {
+ rxq->xdp_flush = true;
+
+ u64_stats_update_begin(&rx_stats->syncp);
+ rx_stats->packets++;
+ rx_stats->bytes += pkt_len;
+ rx_stats->xdp_redirect++;
+ u64_stats_update_end(&rx_stats->syncp);
+
+ break;
+ }
+
+ fallthrough;
+
case XDP_ABORTED:
trace_xdp_exception(ndev, prog, act);
break;
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index b1d773823232..9259a74eca40 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -6,6 +6,7 @@
#include <linux/inetdevice.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
+#include <linux/filter.h>
#include <linux/mm.h>
#include <net/checksum.h>
@@ -382,6 +383,7 @@ static const struct net_device_ops mana_devops = {
.ndo_validate_addr = eth_validate_addr,
.ndo_get_stats64 = mana_get_stats64,
.ndo_bpf = mana_bpf,
+ .ndo_xdp_xmit = mana_xdp_xmit,
};
static void mana_cleanup_port_context(struct mana_port_context *apc)
@@ -446,6 +448,119 @@ static int mana_verify_resp_hdr(const struct gdma_resp_hdr *resp_hdr,
return 0;
}
+static int mana_pf_register_hw_vport(struct mana_port_context *apc)
+{
+ struct mana_register_hw_vport_resp resp = {};
+ struct mana_register_hw_vport_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_REGISTER_HW_PORT,
+ sizeof(req), sizeof(resp));
+ req.attached_gfid = 1;
+ req.is_pf_default_vport = 1;
+ req.allow_all_ether_types = 1;
+
+ err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
+ sizeof(resp));
+ if (err) {
+ netdev_err(apc->ndev, "Failed to register hw vPort: %d\n", err);
+ return err;
+ }
+
+ err = mana_verify_resp_hdr(&resp.hdr, MANA_REGISTER_HW_PORT,
+ sizeof(resp));
+ if (err || resp.hdr.status) {
+ netdev_err(apc->ndev, "Failed to register hw vPort: %d, 0x%x\n",
+ err, resp.hdr.status);
+ return err ? err : -EPROTO;
+ }
+
+ apc->port_handle = resp.hw_vport_handle;
+ return 0;
+}
+
+static void mana_pf_deregister_hw_vport(struct mana_port_context *apc)
+{
+ struct mana_deregister_hw_vport_resp resp = {};
+ struct mana_deregister_hw_vport_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_DEREGISTER_HW_PORT,
+ sizeof(req), sizeof(resp));
+ req.hw_vport_handle = apc->port_handle;
+
+ err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
+ sizeof(resp));
+ if (err) {
+ netdev_err(apc->ndev, "Failed to unregister hw vPort: %d\n",
+ err);
+ return;
+ }
+
+ err = mana_verify_resp_hdr(&resp.hdr, MANA_DEREGISTER_HW_PORT,
+ sizeof(resp));
+ if (err || resp.hdr.status)
+ netdev_err(apc->ndev,
+ "Failed to deregister hw vPort: %d, 0x%x\n",
+ err, resp.hdr.status);
+}
+
+static int mana_pf_register_filter(struct mana_port_context *apc)
+{
+ struct mana_register_filter_resp resp = {};
+ struct mana_register_filter_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_REGISTER_FILTER,
+ sizeof(req), sizeof(resp));
+ req.vport = apc->port_handle;
+ memcpy(req.mac_addr, apc->mac_addr, ETH_ALEN);
+
+ err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
+ sizeof(resp));
+ if (err) {
+ netdev_err(apc->ndev, "Failed to register filter: %d\n", err);
+ return err;
+ }
+
+ err = mana_verify_resp_hdr(&resp.hdr, MANA_REGISTER_FILTER,
+ sizeof(resp));
+ if (err || resp.hdr.status) {
+ netdev_err(apc->ndev, "Failed to register filter: %d, 0x%x\n",
+ err, resp.hdr.status);
+ return err ? err : -EPROTO;
+ }
+
+ apc->pf_filter_handle = resp.filter_handle;
+ return 0;
+}
+
+static void mana_pf_deregister_filter(struct mana_port_context *apc)
+{
+ struct mana_deregister_filter_resp resp = {};
+ struct mana_deregister_filter_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_DEREGISTER_FILTER,
+ sizeof(req), sizeof(resp));
+ req.filter_handle = apc->pf_filter_handle;
+
+ err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
+ sizeof(resp));
+ if (err) {
+ netdev_err(apc->ndev, "Failed to unregister filter: %d\n",
+ err);
+ return;
+ }
+
+ err = mana_verify_resp_hdr(&resp.hdr, MANA_DEREGISTER_FILTER,
+ sizeof(resp));
+ if (err || resp.hdr.status)
+ netdev_err(apc->ndev,
+ "Failed to deregister filter: %d, 0x%x\n",
+ err, resp.hdr.status);
+}
+
static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver,
u32 proto_minor_ver, u32 proto_micro_ver,
u16 *max_num_vports)
@@ -1007,6 +1122,9 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
act = mana_run_xdp(ndev, rxq, &xdp, buf_va, pkt_len);
+ if (act == XDP_REDIRECT && !rxq->xdp_rc)
+ return;
+
if (act != XDP_PASS && act != XDP_TX)
goto drop_xdp;
@@ -1162,11 +1280,14 @@ drop:
static void mana_poll_rx_cq(struct mana_cq *cq)
{
struct gdma_comp *comp = cq->gdma_comp_buf;
+ struct mana_rxq *rxq = cq->rxq;
int comp_read, i;
comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER);
WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER);
+ rxq->xdp_flush = false;
+
for (i = 0; i < comp_read; i++) {
if (WARN_ON_ONCE(comp[i].is_sq))
return;
@@ -1175,8 +1296,11 @@ static void mana_poll_rx_cq(struct mana_cq *cq)
if (WARN_ON_ONCE(comp[i].wq_num != cq->rxq->gdma_id))
return;
- mana_process_rx_cqe(cq->rxq, cq, &comp[i]);
+ mana_process_rx_cqe(rxq, cq, &comp[i]);
}
+
+ if (rxq->xdp_flush)
+ xdp_do_flush();
}
static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
@@ -1653,6 +1777,7 @@ out:
static void mana_destroy_vport(struct mana_port_context *apc)
{
+ struct gdma_dev *gd = apc->ac->gdma_dev;
struct mana_rxq *rxq;
u32 rxq_idx;
@@ -1666,6 +1791,9 @@ static void mana_destroy_vport(struct mana_port_context *apc)
}
mana_destroy_txq(apc);
+
+ if (gd->gdma_context->is_pf)
+ mana_pf_deregister_hw_vport(apc);
}
static int mana_create_vport(struct mana_port_context *apc,
@@ -1676,6 +1804,12 @@ static int mana_create_vport(struct mana_port_context *apc,
apc->default_rxobj = INVALID_MANA_HANDLE;
+ if (gd->gdma_context->is_pf) {
+ err = mana_pf_register_hw_vport(apc);
+ if (err)
+ return err;
+ }
+
err = mana_cfg_vport(apc, gd->pdid, gd->doorbell);
if (err)
return err;
@@ -1755,6 +1889,7 @@ reset_apc:
int mana_alloc_queues(struct net_device *ndev)
{
struct mana_port_context *apc = netdev_priv(ndev);
+ struct gdma_dev *gd = apc->ac->gdma_dev;
int err;
err = mana_create_vport(apc, ndev);
@@ -1781,6 +1916,12 @@ int mana_alloc_queues(struct net_device *ndev)
if (err)
goto destroy_vport;
+ if (gd->gdma_context->is_pf) {
+ err = mana_pf_register_filter(apc);
+ if (err)
+ goto destroy_vport;
+ }
+
mana_chn_setxdp(apc, mana_xdp_get(apc));
return 0;
@@ -1825,6 +1966,7 @@ int mana_attach(struct net_device *ndev)
static int mana_dealloc_queues(struct net_device *ndev)
{
struct mana_port_context *apc = netdev_priv(ndev);
+ struct gdma_dev *gd = apc->ac->gdma_dev;
struct mana_txq *txq;
int i, err;
@@ -1833,6 +1975,9 @@ static int mana_dealloc_queues(struct net_device *ndev)
mana_chn_setxdp(apc, NULL);
+ if (gd->gdma_context->is_pf)
+ mana_pf_deregister_filter(apc);
+
/* No packet can be transmitted now since apc->port_is_up is false.
* There is still a tiny chance that mana_poll_tx_cq() can re-enable
* a txq because it may not timely see apc->port_is_up being cleared
@@ -1915,6 +2060,7 @@ static int mana_probe_port(struct mana_context *ac, int port_idx,
apc->max_queues = gc->max_num_queues;
apc->num_queues = gc->max_num_queues;
apc->port_handle = INVALID_MANA_HANDLE;
+ apc->pf_filter_handle = INVALID_MANA_HANDLE;
apc->port_idx = port_idx;
ndev->netdev_ops = &mana_devops;
diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
index e13f2453eabb..c530db76880f 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
@@ -23,7 +23,7 @@ static int mana_get_sset_count(struct net_device *ndev, int stringset)
if (stringset != ETH_SS_STATS)
return -EINVAL;
- return ARRAY_SIZE(mana_eth_stats) + num_queues * 6;
+ return ARRAY_SIZE(mana_eth_stats) + num_queues * 8;
}
static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
@@ -50,6 +50,8 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
p += ETH_GSTRING_LEN;
sprintf(p, "rx_%d_xdp_tx", i);
p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_%d_xdp_redirect", i);
+ p += ETH_GSTRING_LEN;
}
for (i = 0; i < num_queues; i++) {
@@ -57,6 +59,8 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
p += ETH_GSTRING_LEN;
sprintf(p, "tx_%d_bytes", i);
p += ETH_GSTRING_LEN;
+ sprintf(p, "tx_%d_xdp_xmit", i);
+ p += ETH_GSTRING_LEN;
}
}
@@ -70,6 +74,8 @@ static void mana_get_ethtool_stats(struct net_device *ndev,
struct mana_stats_tx *tx_stats;
unsigned int start;
u64 packets, bytes;
+ u64 xdp_redirect;
+ u64 xdp_xmit;
u64 xdp_drop;
u64 xdp_tx;
int q, i = 0;
@@ -89,12 +95,14 @@ static void mana_get_ethtool_stats(struct net_device *ndev,
bytes = rx_stats->bytes;
xdp_drop = rx_stats->xdp_drop;
xdp_tx = rx_stats->xdp_tx;
+ xdp_redirect = rx_stats->xdp_redirect;
} while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
data[i++] = packets;
data[i++] = bytes;
data[i++] = xdp_drop;
data[i++] = xdp_tx;
+ data[i++] = xdp_redirect;
}
for (q = 0; q < num_queues; q++) {
@@ -104,10 +112,12 @@ static void mana_get_ethtool_stats(struct net_device *ndev,
start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
packets = tx_stats->packets;
bytes = tx_stats->bytes;
+ xdp_xmit = tx_stats->xdp_xmit;
} while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
data[i++] = packets;
data[i++] = bytes;
+ data[i++] = xdp_xmit;
}
}
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 8da7e25a47c9..d4649e4ee0e7 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -3367,6 +3367,7 @@ int ocelot_init(struct ocelot *ocelot)
mutex_init(&ocelot->ptp_lock);
mutex_init(&ocelot->mact_lock);
mutex_init(&ocelot->fwd_domain_lock);
+ mutex_init(&ocelot->tas_lock);
spin_lock_init(&ocelot->ptp_clock_lock);
spin_lock_init(&ocelot->ts_id_lock);
snprintf(queue_name, sizeof(queue_name), "%s-stats",
diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c
index 87ad2137ba06..09c703efe946 100644
--- a/drivers/net/ethernet/mscc/ocelot_ptp.c
+++ b/drivers/net/ethernet/mscc/ocelot_ptp.c
@@ -72,6 +72,10 @@ int ocelot_ptp_settime64(struct ptp_clock_info *ptp,
ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+
+ if (ocelot->ops->tas_clock_adjust)
+ ocelot->ops->tas_clock_adjust(ocelot);
+
return 0;
}
EXPORT_SYMBOL(ocelot_ptp_settime64);
@@ -105,6 +109,9 @@ int ocelot_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+
+ if (ocelot->ops->tas_clock_adjust)
+ ocelot->ops->tas_clock_adjust(ocelot);
} else {
/* Fall back using ocelot_ptp_settime64 which is not exact. */
struct timespec64 ts;
@@ -117,6 +124,7 @@ int ocelot_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
ocelot_ptp_settime64(ptp, &ts);
}
+
return 0;
}
EXPORT_SYMBOL(ocelot_ptp_adjtime);
diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c
index 7db56abaa582..f9410d59146d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c
@@ -282,7 +282,7 @@ netdev_tx_t nfp_nfd3_tx(struct sk_buff *skb, struct net_device *netdev)
txd = &tx_ring->txds[wr_idx];
txd->offset_eop = (nr_frags ? 0 : NFD3_DESC_TX_EOP) | md_bytes;
txd->dma_len = cpu_to_le16(skb_headlen(skb));
- nfp_desc_set_dma_addr(txd, dma_addr);
+ nfp_desc_set_dma_addr_40b(txd, dma_addr);
txd->data_len = cpu_to_le16(skb->len);
txd->flags = 0;
@@ -320,7 +320,7 @@ netdev_tx_t nfp_nfd3_tx(struct sk_buff *skb, struct net_device *netdev)
txd = &tx_ring->txds[wr_idx];
txd->dma_len = cpu_to_le16(fsize);
- nfp_desc_set_dma_addr(txd, dma_addr);
+ nfp_desc_set_dma_addr_40b(txd, dma_addr);
txd->offset_eop = md_bytes |
((f == nr_frags - 1) ? NFD3_DESC_TX_EOP : 0);
txd->vals8[1] = second_half;
@@ -562,8 +562,12 @@ nfp_nfd3_rx_give_one(const struct nfp_net_dp *dp,
/* Fill freelist descriptor */
rx_ring->rxds[wr_idx].fld.reserved = 0;
rx_ring->rxds[wr_idx].fld.meta_len_dd = 0;
- nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld,
- dma_addr + dp->rx_dma_off);
+ /* DMA address is expanded to 48-bit width in freelist for NFP3800,
+ * so the *_48b macro is used accordingly, it's also OK to fill
+ * a 40-bit address since the top 8 bits are get set to 0.
+ */
+ nfp_desc_set_dma_addr_48b(&rx_ring->rxds[wr_idx].fld,
+ dma_addr + dp->rx_dma_off);
rx_ring->wr_p++;
if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) {
@@ -817,7 +821,7 @@ nfp_nfd3_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
txd = &tx_ring->txds[wr_idx];
txd->offset_eop = NFD3_DESC_TX_EOP;
txd->dma_len = cpu_to_le16(pkt_len);
- nfp_desc_set_dma_addr(txd, rxbuf->dma_addr + dma_off);
+ nfp_desc_set_dma_addr_40b(txd, rxbuf->dma_addr + dma_off);
txd->data_len = cpu_to_le16(pkt_len);
txd->flags = 0;
@@ -1193,7 +1197,7 @@ nfp_nfd3_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
txd = &tx_ring->txds[wr_idx];
txd->offset_eop = meta_len | NFD3_DESC_TX_EOP;
txd->dma_len = cpu_to_le16(skb_headlen(skb));
- nfp_desc_set_dma_addr(txd, dma_addr);
+ nfp_desc_set_dma_addr_40b(txd, dma_addr);
txd->data_len = cpu_to_le16(skb->len);
txd->flags = 0;
diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/rings.c b/drivers/net/ethernet/netronome/nfp/nfd3/rings.c
index 47604d5e25eb..f31eabdc0631 100644
--- a/drivers/net/ethernet/netronome/nfp/nfd3/rings.c
+++ b/drivers/net/ethernet/netronome/nfp/nfd3/rings.c
@@ -260,6 +260,7 @@ const struct nfp_dp_ops nfp_nfd3_ops = {
.version = NFP_NFD_VER_NFD3,
.tx_min_desc_per_pkt = 1,
.cap_mask = NFP_NFD3_CFG_CTRL_SUPPORTED,
+ .dma_mask = DMA_BIT_MASK(40),
.poll = nfp_nfd3_poll,
.xsk_poll = nfp_nfd3_xsk_poll,
.ctrl_poll = nfp_nfd3_ctrl_poll,
diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/xsk.c b/drivers/net/ethernet/netronome/nfp/nfd3/xsk.c
index c16c4b42ecfd..454fea4c8be2 100644
--- a/drivers/net/ethernet/netronome/nfp/nfd3/xsk.c
+++ b/drivers/net/ethernet/netronome/nfp/nfd3/xsk.c
@@ -40,7 +40,7 @@ nfp_nfd3_xsk_tx_xdp(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
txd = &tx_ring->txds[wr_idx];
txd->offset_eop = NFD3_DESC_TX_EOP;
txd->dma_len = cpu_to_le16(pkt_len);
- nfp_desc_set_dma_addr(txd, xrxbuf->dma_addr + pkt_off);
+ nfp_desc_set_dma_addr_40b(txd, xrxbuf->dma_addr + pkt_off);
txd->data_len = cpu_to_le16(pkt_len);
txd->flags = 0;
@@ -361,10 +361,8 @@ static void nfp_nfd3_xsk_tx(struct nfp_net_tx_ring *tx_ring)
/* Build TX descriptor. */
txd = &tx_ring->txds[wr_idx];
- nfp_desc_set_dma_addr(txd,
- xsk_buff_raw_get_dma(xsk_pool,
- desc[i].addr
- ));
+ nfp_desc_set_dma_addr_40b(txd,
+ xsk_buff_raw_get_dma(xsk_pool, desc[i].addr));
txd->offset_eop = NFD3_DESC_TX_EOP;
txd->dma_len = cpu_to_le16(desc[i].len);
txd->data_len = cpu_to_le16(desc[i].len);
diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
index e509d6dcba5c..300637e576a8 100644
--- a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
@@ -314,7 +314,7 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev)
FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type);
txd->dma_len_type = cpu_to_le16(dlen_type);
- nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr);
+ nfp_desc_set_dma_addr_48b(txd, dma_addr);
/* starts at bit 0 */
BUILD_BUG_ON(!(NFDK_DESC_TX_DMA_LEN_HEAD & 1));
@@ -339,7 +339,7 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev)
dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len);
txd->dma_len_type = cpu_to_le16(dlen_type);
- nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr);
+ nfp_desc_set_dma_addr_48b(txd, dma_addr);
dma_len -= dlen_type;
dma_addr += dlen_type + 1;
@@ -595,8 +595,8 @@ nfp_nfdk_rx_give_one(const struct nfp_net_dp *dp,
/* Fill freelist descriptor */
rx_ring->rxds[wr_idx].fld.reserved = 0;
rx_ring->rxds[wr_idx].fld.meta_len_dd = 0;
- nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld,
- dma_addr + dp->rx_dma_off);
+ nfp_desc_set_dma_addr_48b(&rx_ring->rxds[wr_idx].fld,
+ dma_addr + dp->rx_dma_off);
rx_ring->wr_p++;
if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) {
@@ -929,7 +929,7 @@ nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type);
txd->dma_len_type = cpu_to_le16(dlen_type);
- nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr);
+ nfp_desc_set_dma_addr_48b(txd, dma_addr);
tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD;
dma_len -= tmp_dlen;
@@ -940,7 +940,7 @@ nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
dma_len -= 1;
dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len);
txd->dma_len_type = cpu_to_le16(dlen_type);
- nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr);
+ nfp_desc_set_dma_addr_48b(txd, dma_addr);
dlen_type &= NFDK_DESC_TX_DMA_LEN;
dma_len -= dlen_type;
@@ -1332,7 +1332,7 @@ nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type);
txd->dma_len_type = cpu_to_le16(dlen_type);
- nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr);
+ nfp_desc_set_dma_addr_48b(txd, dma_addr);
tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD;
dma_len -= tmp_dlen;
@@ -1343,7 +1343,7 @@ nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
dma_len -= 1;
dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len);
txd->dma_len_type = cpu_to_le16(dlen_type);
- nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr);
+ nfp_desc_set_dma_addr_48b(txd, dma_addr);
dlen_type &= NFDK_DESC_TX_DMA_LEN;
dma_len -= dlen_type;
diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/rings.c b/drivers/net/ethernet/netronome/nfp/nfdk/rings.c
index 301f11108826..f4d94ae0a349 100644
--- a/drivers/net/ethernet/netronome/nfp/nfdk/rings.c
+++ b/drivers/net/ethernet/netronome/nfp/nfdk/rings.c
@@ -181,6 +181,7 @@ const struct nfp_dp_ops nfp_nfdk_ops = {
.version = NFP_NFD_VER_NFDK,
.tx_min_desc_per_pkt = NFDK_TX_DESC_PER_SIMPLE_PKT,
.cap_mask = NFP_NFDK_CFG_CTRL_SUPPORTED,
+ .dma_mask = DMA_BIT_MASK(48),
.poll = nfp_nfdk_poll,
.ctrl_poll = nfp_nfdk_ctrl_poll,
.xmit = nfp_nfdk_tx,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index 4f88d17536c3..36b173039024 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -410,7 +410,9 @@ nfp_net_fw_find(struct pci_dev *pdev, struct nfp_pf *pf)
return NULL;
}
- fw_model = nfp_hwinfo_lookup(pf->hwinfo, "assembly.partno");
+ fw_model = nfp_hwinfo_lookup(pf->hwinfo, "nffw.partno");
+ if (!fw_model)
+ fw_model = nfp_hwinfo_lookup(pf->hwinfo, "assembly.partno");
if (!fw_model) {
dev_err(&pdev->dev, "Error: can't read part number\n");
return NULL;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 3dd3a92d2e7f..b07cea8e354c 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -115,7 +115,7 @@ struct nfp_nfdk_tx_buf;
#define D_IDX(ring, idx) ((idx) & ((ring)->cnt - 1))
/* Convenience macro for writing dma address into RX/TX descriptors */
-#define nfp_desc_set_dma_addr(desc, dma_addr) \
+#define nfp_desc_set_dma_addr_40b(desc, dma_addr) \
do { \
__typeof__(desc) __d = (desc); \
dma_addr_t __addr = (dma_addr); \
@@ -124,13 +124,13 @@ struct nfp_nfdk_tx_buf;
__d->dma_addr_hi = upper_32_bits(__addr) & 0xff; \
} while (0)
-#define nfp_nfdk_tx_desc_set_dma_addr(desc, dma_addr) \
- do { \
- __typeof__(desc) __d = (desc); \
- dma_addr_t __addr = (dma_addr); \
- \
- __d->dma_addr_hi = cpu_to_le16(upper_32_bits(__addr) & 0xff); \
- __d->dma_addr_lo = cpu_to_le32(lower_32_bits(__addr)); \
+#define nfp_desc_set_dma_addr_48b(desc, dma_addr) \
+ do { \
+ __typeof__(desc) __d = (desc); \
+ dma_addr_t __addr = (dma_addr); \
+ \
+ __d->dma_addr_hi = cpu_to_le16(upper_32_bits(__addr)); \
+ __d->dma_addr_lo = cpu_to_le32(lower_32_bits(__addr)); \
} while (0)
/**
@@ -225,8 +225,8 @@ struct nfp_net_tx_ring {
struct nfp_net_rx_desc {
union {
struct {
- u8 dma_addr_hi; /* High bits of the buf address */
- __le16 reserved; /* Must be zero */
+ __le16 dma_addr_hi; /* High bits of the buf address */
+ u8 reserved; /* Must be zero */
u8 meta_len_dd; /* Must be zero */
__le32 dma_addr_lo; /* Low bits of the buffer address */
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 4e56a99087fa..57f284eefeb3 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2040,6 +2040,7 @@ nfp_net_alloc(struct pci_dev *pdev, const struct nfp_dev_info *dev_info,
void __iomem *ctrl_bar, bool needs_netdev,
unsigned int max_tx_rings, unsigned int max_rx_rings)
{
+ u64 dma_mask = dma_get_mask(&pdev->dev);
struct nfp_net *nn;
int err;
@@ -2085,6 +2086,14 @@ nfp_net_alloc(struct pci_dev *pdev, const struct nfp_dev_info *dev_info,
goto err_free_nn;
}
+ if ((dma_mask & nn->dp.ops->dma_mask) != dma_mask) {
+ dev_err(&pdev->dev,
+ "DMA mask of loaded firmware: %llx, required DMA mask: %llx\n",
+ nn->dp.ops->dma_mask, dma_mask);
+ err = -EINVAL;
+ goto err_free_nn;
+ }
+
nn->max_tx_rings = max_tx_rings;
nn->max_rx_rings = max_rx_rings;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_dp.h b/drivers/net/ethernet/netronome/nfp/nfp_net_dp.h
index c934cc2d3208..83becb338478 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_dp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_dp.h
@@ -117,6 +117,7 @@ enum nfp_nfd_version {
* @version: Indicate dp type
* @tx_min_desc_per_pkt: Minimal TX descs needed for each packet
* @cap_mask: Mask of supported features
+ * @dma_mask: DMA addressing capability
* @poll: Napi poll for normal rx/tx
* @xsk_poll: Napi poll when xsk is enabled
* @ctrl_poll: Tasklet poll for ctrl rx/tx
@@ -134,6 +135,7 @@ struct nfp_dp_ops {
enum nfp_nfd_version version;
unsigned int tx_min_desc_per_pkt;
u32 cap_mask;
+ u64 dma_mask;
int (*poll)(struct napi_struct *napi, int budget);
int (*xsk_poll)(struct napi_struct *napi, int budget);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index df0afd271a21..7475b209353f 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -1460,6 +1460,55 @@ static int nfp_net_set_channels(struct net_device *netdev,
return nfp_net_set_num_rings(nn, total_rx, total_tx);
}
+static void nfp_port_get_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pause)
+{
+ struct nfp_eth_table_port *eth_port;
+ struct nfp_port *port;
+
+ port = nfp_port_from_netdev(netdev);
+ eth_port = nfp_port_get_eth_port(port);
+ if (!eth_port)
+ return;
+
+ /* Currently pause frame support is fixed */
+ pause->autoneg = AUTONEG_DISABLE;
+ pause->rx_pause = 1;
+ pause->tx_pause = 1;
+}
+
+static int nfp_net_set_phys_id(struct net_device *netdev,
+ enum ethtool_phys_id_state state)
+{
+ struct nfp_eth_table_port *eth_port;
+ struct nfp_port *port;
+ int err;
+
+ port = nfp_port_from_netdev(netdev);
+ eth_port = __nfp_port_get_eth_port(port);
+ if (!eth_port)
+ return -EOPNOTSUPP;
+
+ switch (state) {
+ case ETHTOOL_ID_ACTIVE:
+ /* Control LED to blink */
+ err = nfp_eth_set_idmode(port->app->cpp, eth_port->index, 1);
+ break;
+
+ case ETHTOOL_ID_INACTIVE:
+ /* Control LED to normal mode */
+ err = nfp_eth_set_idmode(port->app->cpp, eth_port->index, 0);
+ break;
+
+ case ETHTOOL_ID_ON:
+ case ETHTOOL_ID_OFF:
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
static const struct ethtool_ops nfp_net_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
ETHTOOL_COALESCE_MAX_FRAMES |
@@ -1492,6 +1541,8 @@ static const struct ethtool_ops nfp_net_ethtool_ops = {
.set_link_ksettings = nfp_net_set_link_ksettings,
.get_fecparam = nfp_port_get_fecparam,
.set_fecparam = nfp_port_set_fecparam,
+ .get_pauseparam = nfp_port_get_pauseparam,
+ .set_phys_id = nfp_net_set_phys_id,
};
const struct ethtool_ops nfp_port_ethtool_ops = {
@@ -1509,6 +1560,8 @@ const struct ethtool_ops nfp_port_ethtool_ops = {
.set_link_ksettings = nfp_net_set_link_ksettings,
.get_fecparam = nfp_port_get_fecparam,
.set_fecparam = nfp_port_set_fecparam,
+ .get_pauseparam = nfp_port_get_pauseparam,
+ .set_phys_id = nfp_net_set_phys_id,
};
void nfp_net_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.c b/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.c
index 86829446c637..aea507aed49d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.c
@@ -70,8 +70,12 @@ void nfp_net_xsk_rx_ring_fill_freelist(struct nfp_net_rx_ring *rx_ring)
nfp_net_xsk_rx_bufs_stash(rx_ring, wr_idx, xdp);
- nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld,
- rx_ring->xsk_rxbufs[wr_idx].dma_addr);
+ /* DMA address is expanded to 48-bit width in freelist for NFP3800,
+ * so the *_48b macro is used accordingly, it's also OK to fill
+ * a 40-bit address since the top 8 bits are get set to 0.
+ */
+ nfp_desc_set_dma_addr_48b(&rx_ring->rxds[wr_idx].fld,
+ rx_ring->xsk_rxbufs[wr_idx].dma_addr);
rx_ring->wr_p++;
wr_ptr_add++;
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
index ddb34bfb9bef..3d379e937184 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
@@ -13,36 +13,22 @@
#include <linux/ctype.h>
#include <linux/types.h>
#include <linux/sizes.h>
-#include <linux/stringify.h>
#ifndef NFP_SUBSYS
#define NFP_SUBSYS "nfp"
#endif
-#define string_format(x) __FILE__ ":" __stringify(__LINE__) ": " x
-
-#define __nfp_err(cpp, fmt, args...) \
- dev_err(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args)
-#define __nfp_warn(cpp, fmt, args...) \
- dev_warn(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args)
-#define __nfp_info(cpp, fmt, args...) \
- dev_info(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args)
-#define __nfp_dbg(cpp, fmt, args...) \
- dev_dbg(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args)
-#define __nfp_printk(level, cpp, fmt, args...) \
- dev_printk(level, nfp_cpp_device(cpp)->parent, \
- NFP_SUBSYS ": " fmt, ## args)
-
#define nfp_err(cpp, fmt, args...) \
- __nfp_err(cpp, string_format(fmt), ## args)
+ dev_err(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args)
#define nfp_warn(cpp, fmt, args...) \
- __nfp_warn(cpp, string_format(fmt), ## args)
+ dev_warn(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args)
#define nfp_info(cpp, fmt, args...) \
- __nfp_info(cpp, string_format(fmt), ## args)
+ dev_info(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args)
#define nfp_dbg(cpp, fmt, args...) \
- __nfp_dbg(cpp, string_format(fmt), ## args)
+ dev_dbg(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args)
#define nfp_printk(level, cpp, fmt, args...) \
- __nfp_printk(level, cpp, string_format(fmt), ## args)
+ dev_printk(level, nfp_cpp_device(cpp)->parent, \
+ NFP_SUBSYS ": " fmt, ## args)
#define PCI_64BIT_BAR_COUNT 3
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_dev.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_dev.c
index 28384d6d1c6f..0725b51c2a95 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_dev.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_dev.c
@@ -9,7 +9,7 @@
const struct nfp_dev_info nfp_dev_info[NFP_DEV_CNT] = {
[NFP_DEV_NFP3800] = {
- .dma_mask = DMA_BIT_MASK(40),
+ .dma_mask = DMA_BIT_MASK(48),
.qc_idx_mask = GENMASK(8, 0),
.qc_addr_offset = 0x400000,
.min_qc_size = 512,
@@ -21,7 +21,7 @@ const struct nfp_dev_info nfp_dev_info[NFP_DEV_CNT] = {
.qc_area_sz = 0x100000,
},
[NFP_DEV_NFP3800_VF] = {
- .dma_mask = DMA_BIT_MASK(40),
+ .dma_mask = DMA_BIT_MASK(48),
.qc_idx_mask = GENMASK(8, 0),
.qc_addr_offset = 0,
.min_qc_size = 512,
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
index f5360bae6f75..77d66855be42 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
@@ -196,6 +196,8 @@ int nfp_eth_set_configured(struct nfp_cpp *cpp, unsigned int idx,
int
nfp_eth_set_fec(struct nfp_cpp *cpp, unsigned int idx, enum nfp_eth_fec mode);
+int nfp_eth_set_idmode(struct nfp_cpp *cpp, unsigned int idx, bool state);
+
static inline bool nfp_eth_can_support_fec(struct nfp_eth_table_port *eth_port)
{
return !!eth_port->fec_modes_supported;
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c
index 311a5be25acb..edd300033735 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c
@@ -49,6 +49,7 @@
#define NSP_ETH_CTRL_SET_LANES BIT_ULL(5)
#define NSP_ETH_CTRL_SET_ANEG BIT_ULL(6)
#define NSP_ETH_CTRL_SET_FEC BIT_ULL(7)
+#define NSP_ETH_CTRL_SET_IDMODE BIT_ULL(8)
enum nfp_eth_raw {
NSP_ETH_RAW_PORT = 0,
@@ -492,6 +493,35 @@ nfp_eth_set_bit_config(struct nfp_nsp *nsp, unsigned int raw_idx,
return 0;
}
+int nfp_eth_set_idmode(struct nfp_cpp *cpp, unsigned int idx, bool state)
+{
+ union eth_table_entry *entries;
+ struct nfp_nsp *nsp;
+ u64 reg;
+
+ nsp = nfp_eth_config_start(cpp, idx);
+ if (IS_ERR(nsp))
+ return PTR_ERR(nsp);
+
+ /* Set this features were added in ABI 0.32 */
+ if (nfp_nsp_get_abi_ver_minor(nsp) < 32) {
+ nfp_err(nfp_nsp_cpp(nsp),
+ "set id mode operation not supported, please update flash\n");
+ return -EOPNOTSUPP;
+ }
+
+ entries = nfp_nsp_config_entries(nsp);
+
+ reg = le64_to_cpu(entries[idx].control);
+ reg &= ~NSP_ETH_CTRL_SET_IDMODE;
+ reg |= FIELD_PREP(NSP_ETH_CTRL_SET_IDMODE, state);
+ entries[idx].control = cpu_to_le64(reg);
+
+ nfp_nsp_config_set_modified(nsp, true);
+
+ return nfp_eth_config_commit_end(nsp);
+}
+
#define NFP_ETH_SET_BIT_CONFIG(nsp, raw_idx, mask, val, ctrl_bit) \
({ \
__BF_FIELD_CHECK(mask, 0ULL, val, "NFP_ETH_SET_BIT_CONFIG: "); \
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
index 06104d2ff5b3..80c95c331c82 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
@@ -1465,7 +1465,7 @@ netdev_tx_t emac_mac_tx_buf_send(struct emac_adapter *adpt,
/* Make sure the are enough free descriptors to hold one
* maximum-sized SKB. We need one desc for each fragment,
* one for the checksum (emac_tso_csum), one for TSO, and
- * and one for the SKB header.
+ * one for the SKB header.
*/
if (emac_tpd_num_free_descs(tx_q) < (MAX_SKB_FRAGS + 3))
netif_stop_queue(adpt->netdev);
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index 50baf62b2cbc..a3425b6be3f7 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -1261,7 +1261,7 @@ static void efx_mcdi_ev_death(struct efx_nic *efx, int rc)
}
/* The MC is going down in to BIST mode. set the BIST flag to block
- * new MCDI, cancel any outstanding MCDI and and schedule a BIST-type reset
+ * new MCDI, cancel any outstanding MCDI and schedule a BIST-type reset
* (which doesn't actually execute a reset, it waits for the controlling
* function to reset it).
*/
diff --git a/drivers/net/ethernet/sfc/siena/mcdi.c b/drivers/net/ethernet/sfc/siena/mcdi.c
index 3df0f0eca3b7..3f7899daa86a 100644
--- a/drivers/net/ethernet/sfc/siena/mcdi.c
+++ b/drivers/net/ethernet/sfc/siena/mcdi.c
@@ -1264,7 +1264,7 @@ static void efx_mcdi_ev_death(struct efx_nic *efx, int rc)
}
/* The MC is going down in to BIST mode. set the BIST flag to block
- * new MCDI, cancel any outstanding MCDI and and schedule a BIST-type reset
+ * new MCDI, cancel any outstanding MCDI and schedule a BIST-type reset
* (which doesn't actually execute a reset, it waits for the controlling
* function to reset it).
*/
diff --git a/drivers/net/ethernet/sfc/siena/mcdi_pcol.h b/drivers/net/ethernet/sfc/siena/mcdi_pcol.h
index 89a7fd47b057..a3cc8b7ec732 100644
--- a/drivers/net/ethernet/sfc/siena/mcdi_pcol.h
+++ b/drivers/net/ethernet/sfc/siena/mcdi_pcol.h
@@ -274,7 +274,7 @@
* MC_CMD_WORKAROUND_BUG26807.
* May also returned for other operations such as sub-variant switching. */
#define MC_CMD_ERR_FILTERS_PRESENT 0x1014
-/* The clock whose frequency you've attempted to set set
+/* The clock whose frequency you've attempted to set
* doesn't exist on this NIC */
#define MC_CMD_ERR_NO_CLOCK 0x1015
/* Returned by MC_CMD_TESTASSERT if the action that should
@@ -7782,7 +7782,7 @@
* large number (253) it is not anticipated that this will be needed in the
* near future, so can currently be ignored.
*
- * On Riverhead this command is implemented as a a wrapper for `list` in the
+ * On Riverhead this command is implemented as a wrapper for `list` in the
* sensor_query SPHINX service.
*/
#define MC_CMD_DYNAMIC_SENSORS_LIST 0x66
@@ -7827,7 +7827,7 @@
* update is in progress, and effectively means the set of usable sensors is
* the intersection between the sets of sensors known to the driver and the MC.
*
- * On Riverhead this command is implemented as a a wrapper for
+ * On Riverhead this command is implemented as a wrapper for
* `get_descriptions` in the sensor_query SPHINX service.
*/
#define MC_CMD_DYNAMIC_SENSORS_GET_DESCRIPTIONS 0x67
@@ -7876,7 +7876,7 @@
* update is in progress, and effectively means the set of usable sensors is
* the intersection between the sets of sensors known to the driver and the MC.
*
- * On Riverhead this command is implemented as a a wrapper for `get_readings`
+ * On Riverhead this command is implemented as a wrapper for `get_readings`
* in the sensor_query SPHINX service.
*/
#define MC_CMD_DYNAMIC_SENSORS_GET_READINGS 0x68
@@ -16682,7 +16682,7 @@
* TLV_PORT_MODE_*). A superset of MC_CMD_GET_PORT_MODES_OUT/MODES that
* contains all modes implemented in firmware for a particular board. Modes
* listed in MODES are considered production modes and should be exposed in
- * userland tools. Modes listed in in ENGINEERING_MODES, but not in MODES
+ * userland tools. Modes listed in ENGINEERING_MODES, but not in MODES
* should be considered hidden (not to be exposed in userland tools) and for
* engineering use only. There are no other semantic differences and any mode
* listed in either MODES or ENGINEERING_MODES can be set on the board.
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 929cfc22cd0c..31ff35174034 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -91,6 +91,9 @@ config DWMAC_IPQ806X
acceleration features available on this SoC. Network devices
will behave like standard non-accelerated ethernet interfaces.
+ Select the QCOM_SOCINFO config flag to enable specific dwmac
+ fixup based on the ipq806x SoC revision.
+
config DWMAC_LPC18XX
tristate "NXP LPC18xx/43xx DWMAC support"
default ARCH_LPC18XX
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 38fe77d1035e..d0e82cb5ae03 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -251,7 +251,6 @@ static void intel_speed_mode_2500(struct net_device *ndev, void *intel_data)
priv->plat->mdio_bus_data->xpcs_an_inband = false;
} else {
priv->plat->max_speed = 1000;
- priv->plat->phy_interface = PHY_INTERFACE_MODE_SGMII;
priv->plat->mdio_bus_data->xpcs_an_inband = true;
}
}
@@ -443,6 +442,7 @@ static void common_default_data(struct plat_stmmacenet_data *plat)
static int intel_mgbe_common_data(struct pci_dev *pdev,
struct plat_stmmacenet_data *plat)
{
+ struct fwnode_handle *fwnode;
char clk_name[20];
int ret;
int i;
@@ -561,12 +561,42 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
/* Use the last Rx queue */
plat->vlan_fail_q = plat->rx_queues_to_use - 1;
+ /* For fixed-link setup, we allow phy-mode setting */
+ fwnode = dev_fwnode(&pdev->dev);
+ if (fwnode) {
+ int phy_mode;
+
+ /* "phy-mode" setting is optional. If it is set,
+ * we allow either sgmii or 1000base-x for now.
+ */
+ phy_mode = fwnode_get_phy_mode(fwnode);
+ if (phy_mode >= 0) {
+ if (phy_mode == PHY_INTERFACE_MODE_SGMII ||
+ phy_mode == PHY_INTERFACE_MODE_1000BASEX)
+ plat->phy_interface = phy_mode;
+ else
+ dev_warn(&pdev->dev, "Invalid phy-mode\n");
+ }
+ }
+
/* Intel mgbe SGMII interface uses pcs-xcps */
- if (plat->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+ if (plat->phy_interface == PHY_INTERFACE_MODE_SGMII ||
+ plat->phy_interface == PHY_INTERFACE_MODE_1000BASEX) {
plat->mdio_bus_data->has_xpcs = true;
plat->mdio_bus_data->xpcs_an_inband = true;
}
+ /* For fixed-link setup, we clear xpcs_an_inband */
+ if (fwnode) {
+ struct fwnode_handle *fixed_node;
+
+ fixed_node = fwnode_get_named_child_node(fwnode, "fixed-link");
+ if (fixed_node)
+ plat->mdio_bus_data->xpcs_an_inband = false;
+
+ fwnode_handle_put(fixed_node);
+ }
+
/* Ensure mdio bus scan skips intel serdes and pcs-xpcs */
plat->mdio_bus_data->phy_mask = 1 << INTEL_MGBE_ADHOC_ADDR;
plat->mdio_bus_data->phy_mask |= 1 << INTEL_MGBE_XPCS_ADDR;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
index f7dc8458cde8..e888c8a9c830 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
@@ -27,6 +27,8 @@
#include <linux/stmmac.h>
#include <linux/of_mdio.h>
#include <linux/module.h>
+#include <linux/sys_soc.h>
+#include <linux/bitfield.h>
#include "stmmac_platform.h"
@@ -64,6 +66,17 @@
#define NSS_COMMON_CLK_DIV_SGMII_100 4
#define NSS_COMMON_CLK_DIV_SGMII_10 49
+#define QSGMII_PCS_ALL_CH_CTL 0x80
+#define QSGMII_PCS_CH_SPEED_FORCE BIT(1)
+#define QSGMII_PCS_CH_SPEED_10 0x0
+#define QSGMII_PCS_CH_SPEED_100 BIT(2)
+#define QSGMII_PCS_CH_SPEED_1000 BIT(3)
+#define QSGMII_PCS_CH_SPEED_MASK (QSGMII_PCS_CH_SPEED_FORCE | \
+ QSGMII_PCS_CH_SPEED_10 | \
+ QSGMII_PCS_CH_SPEED_100 | \
+ QSGMII_PCS_CH_SPEED_1000)
+#define QSGMII_PCS_CH_SPEED_SHIFT(x) ((x) * 4)
+
#define QSGMII_PCS_CAL_LCKDT_CTL 0x120
#define QSGMII_PCS_CAL_LCKDT_CTL_RST BIT(19)
@@ -75,11 +88,20 @@
#define QSGMII_PHY_RX_SIGNAL_DETECT_EN BIT(2)
#define QSGMII_PHY_TX_DRIVER_EN BIT(3)
#define QSGMII_PHY_QSGMII_EN BIT(7)
-#define QSGMII_PHY_PHASE_LOOP_GAIN_OFFSET 12
-#define QSGMII_PHY_RX_DC_BIAS_OFFSET 18
-#define QSGMII_PHY_RX_INPUT_EQU_OFFSET 20
-#define QSGMII_PHY_CDR_PI_SLEW_OFFSET 22
-#define QSGMII_PHY_TX_DRV_AMP_OFFSET 28
+#define QSGMII_PHY_DEEMPHASIS_LVL_MASK GENMASK(11, 10)
+#define QSGMII_PHY_DEEMPHASIS_LVL(x) FIELD_PREP(QSGMII_PHY_DEEMPHASIS_LVL_MASK, (x))
+#define QSGMII_PHY_PHASE_LOOP_GAIN_MASK GENMASK(14, 12)
+#define QSGMII_PHY_PHASE_LOOP_GAIN(x) FIELD_PREP(QSGMII_PHY_PHASE_LOOP_GAIN_MASK, (x))
+#define QSGMII_PHY_RX_DC_BIAS_MASK GENMASK(19, 18)
+#define QSGMII_PHY_RX_DC_BIAS(x) FIELD_PREP(QSGMII_PHY_RX_DC_BIAS_MASK, (x))
+#define QSGMII_PHY_RX_INPUT_EQU_MASK GENMASK(21, 20)
+#define QSGMII_PHY_RX_INPUT_EQU(x) FIELD_PREP(QSGMII_PHY_RX_INPUT_EQU_MASK, (x))
+#define QSGMII_PHY_CDR_PI_SLEW_MASK GENMASK(23, 22)
+#define QSGMII_PHY_CDR_PI_SLEW(x) FIELD_PREP(QSGMII_PHY_CDR_PI_SLEW_MASK, (x))
+#define QSGMII_PHY_TX_SLEW_MASK GENMASK(27, 26)
+#define QSGMII_PHY_TX_SLEW(x) FIELD_PREP(QSGMII_PHY_TX_SLEW_MASK, (x))
+#define QSGMII_PHY_TX_DRV_AMP_MASK GENMASK(31, 28)
+#define QSGMII_PHY_TX_DRV_AMP(x) FIELD_PREP(QSGMII_PHY_TX_DRV_AMP_MASK, (x))
struct ipq806x_gmac {
struct platform_device *pdev;
@@ -242,6 +264,113 @@ static void ipq806x_gmac_fix_mac_speed(void *priv, unsigned int speed)
ipq806x_gmac_set_speed(gmac, speed);
}
+static int
+ipq806x_gmac_configure_qsgmii_pcs_speed(struct ipq806x_gmac *gmac)
+{
+ struct platform_device *pdev = gmac->pdev;
+ struct device *dev = &pdev->dev;
+ struct device_node *dn;
+ int link_speed;
+ int val = 0;
+ int ret;
+
+ /* Some bootloader may apply wrong configuration and cause
+ * not functioning port. If fixed link is not set,
+ * reset the force speed bit.
+ */
+ if (!of_phy_is_fixed_link(pdev->dev.of_node))
+ goto write;
+
+ dn = of_get_child_by_name(pdev->dev.of_node, "fixed-link");
+ ret = of_property_read_u32(dn, "speed", &link_speed);
+ of_node_put(dn);
+ if (ret) {
+ dev_err(dev, "found fixed-link node with no speed");
+ return ret;
+ }
+
+ val = QSGMII_PCS_CH_SPEED_FORCE;
+
+ switch (link_speed) {
+ case SPEED_1000:
+ val |= QSGMII_PCS_CH_SPEED_1000;
+ break;
+ case SPEED_100:
+ val |= QSGMII_PCS_CH_SPEED_100;
+ break;
+ case SPEED_10:
+ val |= QSGMII_PCS_CH_SPEED_10;
+ break;
+ }
+
+write:
+ regmap_update_bits(gmac->qsgmii_csr, QSGMII_PCS_ALL_CH_CTL,
+ QSGMII_PCS_CH_SPEED_MASK <<
+ QSGMII_PCS_CH_SPEED_SHIFT(gmac->id),
+ val <<
+ QSGMII_PCS_CH_SPEED_SHIFT(gmac->id));
+
+ return 0;
+}
+
+static const struct soc_device_attribute ipq806x_gmac_soc_v1[] = {
+ {
+ .revision = "1.*",
+ },
+ {
+ /* sentinel */
+ }
+};
+
+static int
+ipq806x_gmac_configure_qsgmii_params(struct ipq806x_gmac *gmac)
+{
+ struct platform_device *pdev = gmac->pdev;
+ const struct soc_device_attribute *soc;
+ struct device *dev = &pdev->dev;
+ u32 qsgmii_param;
+
+ switch (gmac->id) {
+ case 1:
+ soc = soc_device_match(ipq806x_gmac_soc_v1);
+
+ if (soc)
+ qsgmii_param = QSGMII_PHY_TX_DRV_AMP(0xc) |
+ QSGMII_PHY_TX_SLEW(0x2) |
+ QSGMII_PHY_DEEMPHASIS_LVL(0x2);
+ else
+ qsgmii_param = QSGMII_PHY_TX_DRV_AMP(0xd) |
+ QSGMII_PHY_TX_SLEW(0x0) |
+ QSGMII_PHY_DEEMPHASIS_LVL(0x0);
+
+ qsgmii_param |= QSGMII_PHY_RX_DC_BIAS(0x2);
+ break;
+ case 2:
+ case 3:
+ qsgmii_param = QSGMII_PHY_RX_DC_BIAS(0x3) |
+ QSGMII_PHY_TX_DRV_AMP(0xc);
+ break;
+ default: /* gmac 0 can't be set in SGMII mode */
+ dev_err(dev, "gmac id %d can't be in SGMII mode", gmac->id);
+ return -EINVAL;
+ }
+
+ /* Common params across all gmac id */
+ qsgmii_param |= QSGMII_PHY_CDR_EN |
+ QSGMII_PHY_RX_FRONT_EN |
+ QSGMII_PHY_RX_SIGNAL_DETECT_EN |
+ QSGMII_PHY_TX_DRIVER_EN |
+ QSGMII_PHY_QSGMII_EN |
+ QSGMII_PHY_PHASE_LOOP_GAIN(0x4) |
+ QSGMII_PHY_RX_INPUT_EQU(0x1) |
+ QSGMII_PHY_CDR_PI_SLEW(0x2);
+
+ regmap_write(gmac->qsgmii_csr, QSGMII_PHY_SGMII_CTL(gmac->id),
+ qsgmii_param);
+
+ return 0;
+}
+
static int ipq806x_gmac_probe(struct platform_device *pdev)
{
struct plat_stmmacenet_data *plat_dat;
@@ -328,17 +457,13 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
regmap_write(gmac->nss_common, NSS_COMMON_CLK_GATE, val);
if (gmac->phy_mode == PHY_INTERFACE_MODE_SGMII) {
- regmap_write(gmac->qsgmii_csr, QSGMII_PHY_SGMII_CTL(gmac->id),
- QSGMII_PHY_CDR_EN |
- QSGMII_PHY_RX_FRONT_EN |
- QSGMII_PHY_RX_SIGNAL_DETECT_EN |
- QSGMII_PHY_TX_DRIVER_EN |
- QSGMII_PHY_QSGMII_EN |
- 0x4ul << QSGMII_PHY_PHASE_LOOP_GAIN_OFFSET |
- 0x3ul << QSGMII_PHY_RX_DC_BIAS_OFFSET |
- 0x1ul << QSGMII_PHY_RX_INPUT_EQU_OFFSET |
- 0x2ul << QSGMII_PHY_CDR_PI_SLEW_OFFSET |
- 0xCul << QSGMII_PHY_TX_DRV_AMP_OFFSET);
+ err = ipq806x_gmac_configure_qsgmii_params(gmac);
+ if (err)
+ goto err_remove_config_dt;
+
+ err = ipq806x_gmac_configure_qsgmii_pcs_speed(gmac);
+ if (err)
+ goto err_remove_config_dt;
}
plat_dat->has_gmac = true;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d1a7cf4567bc..fe263cad8248 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1128,18 +1128,20 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv)
static int stmmac_init_phy(struct net_device *dev)
{
struct stmmac_priv *priv = netdev_priv(dev);
- struct device_node *node;
+ struct fwnode_handle *fwnode;
int ret;
- node = priv->plat->phylink_node;
+ fwnode = of_fwnode_handle(priv->plat->phylink_node);
+ if (!fwnode)
+ fwnode = dev_fwnode(priv->device);
- if (node)
- ret = phylink_of_phy_connect(priv->phylink, node, 0);
+ if (fwnode)
+ ret = phylink_fwnode_phy_connect(priv->phylink, fwnode, 0);
/* Some DT bindings do not set-up the PHY handle. Let's try to
* manually parse it
*/
- if (!node || ret) {
+ if (!fwnode || ret) {
int addr = priv->plat->phy_addr;
struct phy_device *phydev;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index 03d3d1f7aa4b..5f177ea80725 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -434,9 +434,11 @@ int stmmac_mdio_register(struct net_device *ndev)
int err = 0;
struct mii_bus *new_bus;
struct stmmac_priv *priv = netdev_priv(ndev);
+ struct fwnode_handle *fwnode = of_fwnode_handle(priv->plat->phylink_node);
struct stmmac_mdio_bus_data *mdio_bus_data = priv->plat->mdio_bus_data;
struct device_node *mdio_node = priv->plat->mdio_node;
struct device *dev = ndev->dev.parent;
+ struct fwnode_handle *fixed_node;
int addr, found, max_addr;
if (!mdio_bus_data)
@@ -490,6 +492,18 @@ int stmmac_mdio_register(struct net_device *ndev)
if (priv->plat->has_xgmac)
stmmac_xgmac2_mdio_read(new_bus, 0, MII_ADDR_C45);
+ /* If fixed-link is set, skip PHY scanning */
+ if (!fwnode)
+ fwnode = dev_fwnode(priv->device);
+
+ if (fwnode) {
+ fixed_node = fwnode_get_named_child_node(fwnode, "fixed-link");
+ if (fixed_node) {
+ fwnode_handle_put(fixed_node);
+ goto bus_register_done;
+ }
+ }
+
if (priv->plat->phy_node || mdio_node)
goto bus_register_done;
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h
index 4225efbeda3d..f2e2261b4b7d 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
@@ -547,6 +547,57 @@ static inline void axienet_iow(struct axienet_local *lp, off_t offset,
iowrite32(value, lp->regs + offset);
}
+/**
+ * axienet_dma_out32 - Memory mapped Axi DMA register write.
+ * @lp: Pointer to axienet local structure
+ * @reg: Address offset from the base address of the Axi DMA core
+ * @value: Value to be written into the Axi DMA register
+ *
+ * This function writes the desired value into the corresponding Axi DMA
+ * register.
+ */
+
+static inline void axienet_dma_out32(struct axienet_local *lp,
+ off_t reg, u32 value)
+{
+ iowrite32(value, lp->dma_regs + reg);
+}
+
+#if defined(CONFIG_64BIT) && defined(iowrite64)
+/**
+ * axienet_dma_out64 - Memory mapped Axi DMA register write.
+ * @lp: Pointer to axienet local structure
+ * @reg: Address offset from the base address of the Axi DMA core
+ * @value: Value to be written into the Axi DMA register
+ *
+ * This function writes the desired value into the corresponding Axi DMA
+ * register.
+ */
+static inline void axienet_dma_out64(struct axienet_local *lp,
+ off_t reg, u64 value)
+{
+ iowrite64(value, lp->dma_regs + reg);
+}
+
+static inline void axienet_dma_out_addr(struct axienet_local *lp, off_t reg,
+ dma_addr_t addr)
+{
+ if (lp->features & XAE_FEATURE_DMA_64BIT)
+ axienet_dma_out64(lp, reg, addr);
+ else
+ axienet_dma_out32(lp, reg, lower_32_bits(addr));
+}
+
+#else /* CONFIG_64BIT */
+
+static inline void axienet_dma_out_addr(struct axienet_local *lp, off_t reg,
+ dma_addr_t addr)
+{
+ axienet_dma_out32(lp, reg, lower_32_bits(addr));
+}
+
+#endif /* CONFIG_64BIT */
+
/* Function prototypes visible in xilinx_axienet_mdio.c for other files */
int axienet_mdio_enable(struct axienet_local *lp);
void axienet_mdio_disable(struct axienet_local *lp);
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 93c9f305bba4..1760930ec0c4 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -133,30 +133,6 @@ static inline u32 axienet_dma_in32(struct axienet_local *lp, off_t reg)
return ioread32(lp->dma_regs + reg);
}
-/**
- * axienet_dma_out32 - Memory mapped Axi DMA register write.
- * @lp: Pointer to axienet local structure
- * @reg: Address offset from the base address of the Axi DMA core
- * @value: Value to be written into the Axi DMA register
- *
- * This function writes the desired value into the corresponding Axi DMA
- * register.
- */
-static inline void axienet_dma_out32(struct axienet_local *lp,
- off_t reg, u32 value)
-{
- iowrite32(value, lp->dma_regs + reg);
-}
-
-static void axienet_dma_out_addr(struct axienet_local *lp, off_t reg,
- dma_addr_t addr)
-{
- axienet_dma_out32(lp, reg, lower_32_bits(addr));
-
- if (lp->features & XAE_FEATURE_DMA_64BIT)
- axienet_dma_out32(lp, reg + 4, upper_32_bits(addr));
-}
-
static void desc_set_phys_addr(struct axienet_local *lp, dma_addr_t addr,
struct axidma_bd *desc)
{
@@ -2061,6 +2037,11 @@ static int axienet_probe(struct platform_device *pdev)
iowrite32(0x0, desc);
}
}
+ if (!IS_ENABLED(CONFIG_64BIT) && lp->features & XAE_FEATURE_DMA_64BIT) {
+ dev_err(&pdev->dev, "64-bit addressable DMA is not compatible with 32-bit archecture\n");
+ ret = -EINVAL;
+ goto cleanup_clk;
+ }
ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(addr_width));
if (ret) {
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 45c3c4a1101b..9fb567524220 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -99,6 +99,7 @@ struct sixpack {
unsigned int rx_count;
unsigned int rx_count_cooked;
+ spinlock_t rxlock;
int mtu; /* Our mtu (to spot changes!) */
int buffsize; /* Max buffers sizes */
@@ -565,6 +566,7 @@ static int sixpack_open(struct tty_struct *tty)
sp->dev = dev;
spin_lock_init(&sp->lock);
+ spin_lock_init(&sp->rxlock);
refcount_set(&sp->refcnt, 1);
init_completion(&sp->dead);
@@ -913,6 +915,7 @@ static void decode_std_command(struct sixpack *sp, unsigned char cmd)
sp->led_state = 0x60;
/* fill trailing bytes with zeroes */
sp->tty->ops->write(sp->tty, &sp->led_state, 1);
+ spin_lock_bh(&sp->rxlock);
rest = sp->rx_count;
if (rest != 0)
for (i = rest; i <= 3; i++)
@@ -930,6 +933,7 @@ static void decode_std_command(struct sixpack *sp, unsigned char cmd)
sp_bump(sp, 0);
}
sp->rx_count_cooked = 0;
+ spin_unlock_bh(&sp->rxlock);
}
break;
case SIXP_TX_URUN: printk(KERN_DEBUG "6pack: TX underrun\n");
@@ -959,8 +963,11 @@ sixpack_decode(struct sixpack *sp, const unsigned char *pre_rbuff, int count)
decode_prio_command(sp, inbyte);
else if ((inbyte & SIXP_STD_CMD_MASK) != 0)
decode_std_command(sp, inbyte);
- else if ((sp->status & SIXP_RX_DCD_MASK) == SIXP_RX_DCD_MASK)
+ else if ((sp->status & SIXP_RX_DCD_MASK) == SIXP_RX_DCD_MASK) {
+ spin_lock_bh(&sp->rxlock);
decode_data(sp, inbyte);
+ spin_unlock_bh(&sp->rxlock);
+ }
}
}
diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index 9cfe84319ee4..4e46974a69ec 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -823,7 +823,7 @@ static void gsi_channel_program(struct gsi_channel *channel, bool doorbell)
/* Now update the scratch registers for GPI protocol */
gpi = &scr.gpi;
- gpi->max_outstanding_tre = gsi_channel_trans_tre_max(gsi, channel_id) *
+ gpi->max_outstanding_tre = channel->trans_tre_max *
GSI_RING_ELEMENT_SIZE;
gpi->outstanding_threshold = 2 * GSI_RING_ELEMENT_SIZE;
@@ -991,75 +991,66 @@ void gsi_resume(struct gsi *gsi)
enable_irq(gsi->irq);
}
-/**
- * gsi_channel_tx_queued() - Report queued TX transfers for a channel
- * @channel: Channel for which to report
- *
- * Report to the network stack the number of bytes and transactions that
- * have been queued to hardware since last call. This and the next function
- * supply information used by the network stack for throttling.
- *
- * For each channel we track the number of transactions used and bytes of
- * data those transactions represent. We also track what those values are
- * each time this function is called. Subtracting the two tells us
- * the number of bytes and transactions that have been added between
- * successive calls.
- *
- * Calling this each time we ring the channel doorbell allows us to
- * provide accurate information to the network stack about how much
- * work we've given the hardware at any point in time.
- */
-void gsi_channel_tx_queued(struct gsi_channel *channel)
+void gsi_trans_tx_committed(struct gsi_trans *trans)
{
+ struct gsi_channel *channel = &trans->gsi->channel[trans->channel_id];
+
+ channel->trans_count++;
+ channel->byte_count += trans->len;
+
+ trans->trans_count = channel->trans_count;
+ trans->byte_count = channel->byte_count;
+}
+
+void gsi_trans_tx_queued(struct gsi_trans *trans)
+{
+ u32 channel_id = trans->channel_id;
+ struct gsi *gsi = trans->gsi;
+ struct gsi_channel *channel;
u32 trans_count;
u32 byte_count;
+ channel = &gsi->channel[channel_id];
+
byte_count = channel->byte_count - channel->queued_byte_count;
trans_count = channel->trans_count - channel->queued_trans_count;
channel->queued_byte_count = channel->byte_count;
channel->queued_trans_count = channel->trans_count;
- ipa_gsi_channel_tx_queued(channel->gsi, gsi_channel_id(channel),
- trans_count, byte_count);
+ ipa_gsi_channel_tx_queued(gsi, channel_id, trans_count, byte_count);
}
/**
- * gsi_channel_tx_update() - Report completed TX transfers
- * @channel: Channel that has completed transmitting packets
- * @trans: Last transation known to be complete
+ * gsi_trans_tx_completed() - Report completed TX transactions
+ * @trans: TX channel transaction that has completed
*
- * Compute the number of transactions and bytes that have been transferred
- * over a TX channel since the given transaction was committed. Report this
- * information to the network stack.
+ * Report that a transaction on a TX channel has completed. At the time a
+ * transaction is committed, we record *in the transaction* its channel's
+ * committed transaction and byte counts. Transactions are completed in
+ * order, and the difference between the channel's byte/transaction count
+ * when the transaction was committed and when it completes tells us
+ * exactly how much data has been transferred while the transaction was
+ * pending.
*
- * At the time a transaction is committed, we record its channel's
- * committed transaction and byte counts *in the transaction*.
- * Completions are signaled by the hardware with an interrupt, and
- * we can determine the latest completed transaction at that time.
- *
- * The difference between the byte/transaction count recorded in
- * the transaction and the count last time we recorded a completion
- * tells us exactly how much data has been transferred between
- * completions.
- *
- * Calling this each time we learn of a newly-completed transaction
- * allows us to provide accurate information to the network stack
- * about how much work has been completed by the hardware at a given
- * point in time.
+ * We report this information to the network stack, which uses it to manage
+ * the rate at which data is sent to hardware.
*/
-static void
-gsi_channel_tx_update(struct gsi_channel *channel, struct gsi_trans *trans)
+static void gsi_trans_tx_completed(struct gsi_trans *trans)
{
- u64 byte_count = trans->byte_count + trans->len;
- u64 trans_count = trans->trans_count + 1;
+ u32 channel_id = trans->channel_id;
+ struct gsi *gsi = trans->gsi;
+ struct gsi_channel *channel;
+ u32 trans_count;
+ u32 byte_count;
+
+ channel = &gsi->channel[channel_id];
+ trans_count = trans->trans_count - channel->compl_trans_count;
+ byte_count = trans->byte_count - channel->compl_byte_count;
- byte_count -= channel->compl_byte_count;
- channel->compl_byte_count += byte_count;
- trans_count -= channel->compl_trans_count;
channel->compl_trans_count += trans_count;
+ channel->compl_byte_count += byte_count;
- ipa_gsi_channel_tx_completed(channel->gsi, gsi_channel_id(channel),
- trans_count, byte_count);
+ ipa_gsi_channel_tx_completed(gsi, channel_id, trans_count, byte_count);
}
/* Channel control interrupt handler */
@@ -1327,28 +1318,45 @@ static int gsi_irq_init(struct gsi *gsi, struct platform_device *pdev)
}
/* Return the transaction associated with a transfer completion event */
-static struct gsi_trans *gsi_event_trans(struct gsi_channel *channel,
- struct gsi_event *event)
+static struct gsi_trans *
+gsi_event_trans(struct gsi *gsi, struct gsi_event *event)
{
+ u32 channel_id = event->chid;
+ struct gsi_channel *channel;
+ struct gsi_trans *trans;
u32 tre_offset;
u32 tre_index;
+ channel = &gsi->channel[channel_id];
+ if (WARN(!channel->gsi, "event has bad channel %u\n", channel_id))
+ return NULL;
+
/* Event xfer_ptr records the TRE it's associated with */
tre_offset = lower_32_bits(le64_to_cpu(event->xfer_ptr));
tre_index = gsi_ring_index(&channel->tre_ring, tre_offset);
- return gsi_channel_trans_mapped(channel, tre_index);
+ trans = gsi_channel_trans_mapped(channel, tre_index);
+
+ if (WARN(!trans, "channel %u event with no transaction\n", channel_id))
+ return NULL;
+
+ return trans;
}
/**
- * gsi_evt_ring_rx_update() - Record lengths of received data
- * @evt_ring: Event ring associated with channel that received packets
- * @index: Event index in ring reported by hardware
+ * gsi_evt_ring_update() - Update transaction state from hardware
+ * @gsi: GSI pointer
+ * @evt_ring_id: Event ring ID
+ * @index: Event index in ring reported by hardware
*
* Events for RX channels contain the actual number of bytes received into
* the buffer. Every event has a transaction associated with it, and here
* we update transactions to record their actual received lengths.
*
+ * When an event for a TX channel arrives we use information in the
+ * transaction to report the number of requests and bytes have been
+ * transferred.
+ *
* This function is called whenever we learn that the GSI hardware has filled
* new events since the last time we checked. The ring's index field tells
* the first entry in need of processing. The index provided is the
@@ -1359,29 +1367,24 @@ static struct gsi_trans *gsi_event_trans(struct gsi_channel *channel,
*
* Note that @index always refers to an element *within* the event ring.
*/
-static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index)
+static void gsi_evt_ring_update(struct gsi *gsi, u32 evt_ring_id, u32 index)
{
- struct gsi_channel *channel = evt_ring->channel;
+ struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id];
struct gsi_ring *ring = &evt_ring->ring;
- struct gsi_trans_info *trans_info;
struct gsi_event *event_done;
struct gsi_event *event;
- struct gsi_trans *trans;
- u32 trans_count = 0;
- u32 byte_count = 0;
u32 event_avail;
u32 old_index;
- trans_info = &channel->trans_info;
-
- /* We'll start with the oldest un-processed event. RX channels
- * replenish receive buffers in single-TRE transactions, so we
- * can just map that event to its transaction. Transactions
- * associated with completion events are consecutive.
+ /* Starting with the oldest un-processed event, determine which
+ * transaction (and which channel) is associated with the event.
+ * For RX channels, update each completed transaction with the
+ * number of bytes that were actually received. For TX channels
+ * associated with a network device, report to the network stack
+ * the number of transfers and bytes this completion represents.
*/
old_index = ring->index;
event = gsi_ring_virt(ring, old_index);
- trans = gsi_event_trans(channel, event);
/* Compute the number of events to process before we wrap,
* and determine when we'll be done processing events.
@@ -1389,21 +1392,28 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index)
event_avail = ring->count - old_index % ring->count;
event_done = gsi_ring_virt(ring, index);
do {
- trans->len = __le16_to_cpu(event->len);
- byte_count += trans->len;
- trans_count++;
+ struct gsi_trans *trans;
+
+ trans = gsi_event_trans(gsi, event);
+ if (!trans)
+ return;
+
+ if (trans->direction == DMA_FROM_DEVICE)
+ trans->len = __le16_to_cpu(event->len);
+ else
+ gsi_trans_tx_completed(trans);
+
+ gsi_trans_move_complete(trans);
/* Move on to the next event and transaction */
if (--event_avail)
event++;
else
event = gsi_ring_virt(ring, 0);
- trans = gsi_trans_pool_next(&trans_info->pool, trans);
} while (event != event_done);
- /* We record RX bytes when they are received */
- channel->byte_count += byte_count;
- channel->trans_count += trans_count;
+ /* Tell the hardware we've handled these events */
+ gsi_evt_ring_doorbell(gsi, evt_ring_id, index);
}
/* Initialize a ring, including allocating DMA memory for its entries */
@@ -1493,22 +1503,16 @@ static struct gsi_trans *gsi_channel_update(struct gsi_channel *channel)
return NULL;
/* Get the transaction for the latest completed event. */
- trans = gsi_event_trans(channel, gsi_ring_virt(ring, index - 1));
+ trans = gsi_event_trans(gsi, gsi_ring_virt(ring, index - 1));
+ if (!trans)
+ return NULL;
/* For RX channels, update each completed transaction with the number
* of bytes that were actually received. For TX channels, report
* the number of transactions and bytes this completion represents
* up the network stack.
*/
- if (channel->toward_ipa)
- gsi_channel_tx_update(channel, trans);
- else
- gsi_evt_ring_rx_update(evt_ring, index);
-
- gsi_trans_move_complete(trans);
-
- /* Tell the hardware we've handled these events */
- gsi_evt_ring_doorbell(gsi, evt_ring_id, index);
+ gsi_evt_ring_update(gsi, evt_ring_id, index);
return gsi_channel_trans_complete(channel);
}
@@ -2001,9 +2005,10 @@ static void gsi_channel_evt_ring_exit(struct gsi_channel *channel)
gsi_evt_ring_id_free(gsi, evt_ring_id);
}
-static bool gsi_channel_data_valid(struct gsi *gsi,
+static bool gsi_channel_data_valid(struct gsi *gsi, bool command,
const struct ipa_gsi_endpoint_data *data)
{
+ const struct gsi_channel_data *channel_data;
u32 channel_id = data->channel_id;
struct device *dev = gsi->dev;
@@ -2019,10 +2024,24 @@ static bool gsi_channel_data_valid(struct gsi *gsi,
return false;
}
- if (!data->channel.tlv_count ||
- data->channel.tlv_count > GSI_TLV_MAX) {
+ if (command && !data->toward_ipa) {
+ dev_err(dev, "command channel %u is not TX\n", channel_id);
+ return false;
+ }
+
+ channel_data = &data->channel;
+
+ if (!channel_data->tlv_count ||
+ channel_data->tlv_count > GSI_TLV_MAX) {
dev_err(dev, "channel %u bad tlv_count %u; must be 1..%u\n",
- channel_id, data->channel.tlv_count, GSI_TLV_MAX);
+ channel_id, channel_data->tlv_count, GSI_TLV_MAX);
+ return false;
+ }
+
+ if (command && IPA_COMMAND_TRANS_TRE_MAX > channel_data->tlv_count) {
+ dev_err(dev, "command TRE max too big for channel %u (%u > %u)\n",
+ channel_id, IPA_COMMAND_TRANS_TRE_MAX,
+ channel_data->tlv_count);
return false;
}
@@ -2031,22 +2050,22 @@ static bool gsi_channel_data_valid(struct gsi *gsi,
* gsi_channel_tre_max() is computed, tre_count has to be almost
* twice the TLV FIFO size to satisfy this requirement.
*/
- if (data->channel.tre_count < 2 * data->channel.tlv_count - 1) {
+ if (channel_data->tre_count < 2 * channel_data->tlv_count - 1) {
dev_err(dev, "channel %u TLV count %u exceeds TRE count %u\n",
- channel_id, data->channel.tlv_count,
- data->channel.tre_count);
+ channel_id, channel_data->tlv_count,
+ channel_data->tre_count);
return false;
}
- if (!is_power_of_2(data->channel.tre_count)) {
+ if (!is_power_of_2(channel_data->tre_count)) {
dev_err(dev, "channel %u bad tre_count %u; not power of 2\n",
- channel_id, data->channel.tre_count);
+ channel_id, channel_data->tre_count);
return false;
}
- if (!is_power_of_2(data->channel.event_count)) {
+ if (!is_power_of_2(channel_data->event_count)) {
dev_err(dev, "channel %u bad event_count %u; not power of 2\n",
- channel_id, data->channel.event_count);
+ channel_id, channel_data->event_count);
return false;
}
@@ -2062,7 +2081,7 @@ static int gsi_channel_init_one(struct gsi *gsi,
u32 tre_count;
int ret;
- if (!gsi_channel_data_valid(gsi, data))
+ if (!gsi_channel_data_valid(gsi, command, data))
return -EINVAL;
/* Worst case we need an event for every outstanding TRE */
@@ -2080,7 +2099,7 @@ static int gsi_channel_init_one(struct gsi *gsi,
channel->gsi = gsi;
channel->toward_ipa = data->toward_ipa;
channel->command = command;
- channel->tlv_count = data->channel.tlv_count;
+ channel->trans_tre_max = data->channel.tlv_count;
channel->tre_count = tre_count;
channel->event_count = data->channel.event_count;
@@ -2295,13 +2314,5 @@ u32 gsi_channel_tre_max(struct gsi *gsi, u32 channel_id)
struct gsi_channel *channel = &gsi->channel[channel_id];
/* Hardware limit is channel->tre_count - 1 */
- return channel->tre_count - (channel->tlv_count - 1);
-}
-
-/* Returns the maximum number of TREs in a single transaction for a channel */
-u32 gsi_channel_trans_tre_max(struct gsi *gsi, u32 channel_id)
-{
- struct gsi_channel *channel = &gsi->channel[channel_id];
-
- return channel->tlv_count;
+ return channel->tre_count - (channel->trans_tre_max - 1);
}
diff --git a/drivers/net/ipa/gsi.h b/drivers/net/ipa/gsi.h
index 5d66116b46b0..bad1a78a96ed 100644
--- a/drivers/net/ipa/gsi.h
+++ b/drivers/net/ipa/gsi.h
@@ -110,16 +110,16 @@ struct gsi_channel {
bool toward_ipa;
bool command; /* AP command TX channel or not */
- u8 tlv_count; /* # entries in TLV FIFO */
+ u8 trans_tre_max; /* max TREs in a transaction */
u16 tre_count;
u16 event_count;
struct gsi_ring tre_ring;
u32 evt_ring_id;
+ /* The following counts are used only for TX endpoints */
u64 byte_count; /* total # bytes transferred */
u64 trans_count; /* total # transactions */
- /* The following counts are used only for TX endpoints */
u64 queued_byte_count; /* last reported queued byte count */
u64 queued_trans_count; /* ...and queued trans count */
u64 compl_byte_count; /* last reported completed byte count */
@@ -189,15 +189,6 @@ void gsi_teardown(struct gsi *gsi);
u32 gsi_channel_tre_max(struct gsi *gsi, u32 channel_id);
/**
- * gsi_channel_trans_tre_max() - Maximum TREs in a single transaction
- * @gsi: GSI pointer
- * @channel_id: Channel whose limit is to be returned
- *
- * Return: The maximum TRE count per transaction on the channel
- */
-u32 gsi_channel_trans_tre_max(struct gsi *gsi, u32 channel_id);
-
-/**
* gsi_channel_start() - Start an allocated GSI channel
* @gsi: GSI pointer
* @channel_id: Channel to start
diff --git a/drivers/net/ipa/gsi_private.h b/drivers/net/ipa/gsi_private.h
index ea333a244cf5..0b2516fa21b5 100644
--- a/drivers/net/ipa/gsi_private.h
+++ b/drivers/net/ipa/gsi_private.h
@@ -16,9 +16,6 @@ struct gsi_channel;
#define GSI_RING_ELEMENT_SIZE 16 /* bytes; must be a power of 2 */
-/* Return the entry that follows one provided in a transaction pool */
-void *gsi_trans_pool_next(struct gsi_trans_pool *pool, void *element);
-
/**
* gsi_trans_move_complete() - Mark a GSI transaction completed
* @trans: Transaction to commit
@@ -105,14 +102,21 @@ void gsi_channel_doorbell(struct gsi_channel *channel);
void *gsi_ring_virt(struct gsi_ring *ring, u32 index);
/**
- * gsi_channel_tx_queued() - Report the number of bytes queued to hardware
- * @channel: Channel whose bytes have been queued
+ * gsi_trans_tx_committed() - Record bytes committed for transmit
+ * @trans: TX endpoint transaction being committed
+ *
+ * Report that a TX transaction has been committed. It updates some
+ * statistics used to manage transmit rates.
+ */
+void gsi_trans_tx_committed(struct gsi_trans *trans);
+
+/**
+ * gsi_trans_tx_queued() - Report a queued TX channel transaction
+ * @trans: Transaction being passed to hardware
*
- * This arranges for the the number of transactions and bytes for
- * transfer that have been queued to hardware to be reported. It
- * passes this information up the network stack so it can be used to
- * throttle transmissions.
+ * Report to the network stack that a TX transaction is being supplied
+ * to the hardware.
*/
-void gsi_channel_tx_queued(struct gsi_channel *channel);
+void gsi_trans_tx_queued(struct gsi_trans *trans);
#endif /* _GSI_PRIVATE_H_ */
diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c
index 55f8fe7d2668..29496ca15825 100644
--- a/drivers/net/ipa/gsi_trans.c
+++ b/drivers/net/ipa/gsi_trans.c
@@ -214,26 +214,14 @@ void *gsi_trans_pool_alloc_dma(struct gsi_trans_pool *pool, dma_addr_t *addr)
return pool->base + offset;
}
-/* Return the pool element that immediately follows the one given.
- * This only works done if elements are allocated one at a time.
- */
-void *gsi_trans_pool_next(struct gsi_trans_pool *pool, void *element)
+/* Map a TRE ring entry index to the transaction it is associated with */
+static void gsi_trans_map(struct gsi_trans *trans, u32 index)
{
- void *end = pool->base + pool->count * pool->size;
-
- WARN_ON(element < pool->base);
- WARN_ON(element >= end);
- WARN_ON(pool->max_alloc != 1);
-
- element += pool->size;
+ struct gsi_channel *channel = &trans->gsi->channel[trans->channel_id];
- return element < end ? element : pool->base;
-}
+ /* The completion event will indicate the last TRE used */
+ index += trans->used_count - 1;
-/* Map a given ring entry index to the transaction associated with it */
-static void gsi_channel_trans_map(struct gsi_channel *channel, u32 index,
- struct gsi_trans *trans)
-{
/* Note: index *must* be used modulo the ring count here */
channel->trans_info.map[index % channel->tre_ring.count] = trans;
}
@@ -340,7 +328,7 @@ struct gsi_trans *gsi_channel_trans_alloc(struct gsi *gsi, u32 channel_id,
struct gsi_trans_info *trans_info;
struct gsi_trans *trans;
- if (WARN_ON(tre_count > gsi_channel_trans_tre_max(gsi, channel_id)))
+ if (WARN_ON(tre_count > channel->trans_tre_max))
return NULL;
trans_info = &channel->trans_info;
@@ -351,11 +339,11 @@ struct gsi_trans *gsi_channel_trans_alloc(struct gsi *gsi, u32 channel_id,
if (!gsi_trans_tre_reserve(trans_info, tre_count))
return NULL;
- /* Allocate and initialize non-zero fields in the the transaction */
+ /* Allocate and initialize non-zero fields in the transaction */
trans = gsi_trans_pool_alloc(&trans_info->pool, 1);
trans->gsi = gsi;
trans->channel_id = channel_id;
- trans->tre_count = tre_count;
+ trans->rsvd_count = tre_count;
init_completion(&trans->completion);
/* Allocate the scatterlist and (if requested) info entries. */
@@ -405,17 +393,17 @@ void gsi_trans_free(struct gsi_trans *trans)
/* Releasing the reserved TREs implicitly frees the sgl[] and
* (if present) info[] arrays, plus the transaction itself.
*/
- gsi_trans_tre_release(trans_info, trans->tre_count);
+ gsi_trans_tre_release(trans_info, trans->rsvd_count);
}
/* Add an immediate command to a transaction */
void gsi_trans_cmd_add(struct gsi_trans *trans, void *buf, u32 size,
dma_addr_t addr, enum ipa_cmd_opcode opcode)
{
- u32 which = trans->used++;
+ u32 which = trans->used_count++;
struct scatterlist *sg;
- WARN_ON(which >= trans->tre_count);
+ WARN_ON(which >= trans->rsvd_count);
/* Commands are quite different from data transfer requests.
* Their payloads come from a pool whose memory is allocated
@@ -446,9 +434,9 @@ int gsi_trans_page_add(struct gsi_trans *trans, struct page *page, u32 size,
struct scatterlist *sg = &trans->sgl[0];
int ret;
- if (WARN_ON(trans->tre_count != 1))
+ if (WARN_ON(trans->rsvd_count != 1))
return -EINVAL;
- if (WARN_ON(trans->used))
+ if (WARN_ON(trans->used_count))
return -EINVAL;
sg_set_page(sg, page, size, offset);
@@ -456,7 +444,7 @@ int gsi_trans_page_add(struct gsi_trans *trans, struct page *page, u32 size,
if (!ret)
return -ENOMEM;
- trans->used++; /* Transaction now owns the (DMA mapped) page */
+ trans->used_count++; /* Transaction now owns the (DMA mapped) page */
return 0;
}
@@ -465,25 +453,26 @@ int gsi_trans_page_add(struct gsi_trans *trans, struct page *page, u32 size,
int gsi_trans_skb_add(struct gsi_trans *trans, struct sk_buff *skb)
{
struct scatterlist *sg = &trans->sgl[0];
- u32 used;
+ u32 used_count;
int ret;
- if (WARN_ON(trans->tre_count != 1))
+ if (WARN_ON(trans->rsvd_count != 1))
return -EINVAL;
- if (WARN_ON(trans->used))
+ if (WARN_ON(trans->used_count))
return -EINVAL;
/* skb->len will not be 0 (checked early) */
ret = skb_to_sgvec(skb, sg, 0, skb->len);
if (ret < 0)
return ret;
- used = ret;
+ used_count = ret;
- ret = dma_map_sg(trans->gsi->dev, sg, used, trans->direction);
+ ret = dma_map_sg(trans->gsi->dev, sg, used_count, trans->direction);
if (!ret)
return -ENOMEM;
- trans->used += used; /* Transaction now owns the (DMA mapped) skb */
+ /* Transaction now owns the (DMA mapped) skb */
+ trans->used_count += used_count;
return 0;
}
@@ -549,7 +538,7 @@ static void gsi_trans_tre_fill(struct gsi_tre *dest_tre, dma_addr_t addr,
static void __gsi_trans_commit(struct gsi_trans *trans, bool ring_db)
{
struct gsi_channel *channel = &trans->gsi->channel[trans->channel_id];
- struct gsi_ring *ring = &channel->tre_ring;
+ struct gsi_ring *tre_ring = &channel->tre_ring;
enum ipa_cmd_opcode opcode = IPA_CMD_NONE;
bool bei = channel->toward_ipa;
struct gsi_tre *dest_tre;
@@ -559,7 +548,7 @@ static void __gsi_trans_commit(struct gsi_trans *trans, bool ring_db)
u32 avail;
u32 i;
- WARN_ON(!trans->used);
+ WARN_ON(!trans->used_count);
/* Consume the entries. If we cross the end of the ring while
* filling them we'll switch to the beginning to finish.
@@ -567,35 +556,30 @@ static void __gsi_trans_commit(struct gsi_trans *trans, bool ring_db)
* transfer request, whose opcode is IPA_CMD_NONE.
*/
cmd_opcode = channel->command ? &trans->cmd_opcode[0] : NULL;
- avail = ring->count - ring->index % ring->count;
- dest_tre = gsi_ring_virt(ring, ring->index);
- for_each_sg(trans->sgl, sg, trans->used, i) {
- bool last_tre = i == trans->used - 1;
+ avail = tre_ring->count - tre_ring->index % tre_ring->count;
+ dest_tre = gsi_ring_virt(tre_ring, tre_ring->index);
+ for_each_sg(trans->sgl, sg, trans->used_count, i) {
+ bool last_tre = i == trans->used_count - 1;
dma_addr_t addr = sg_dma_address(sg);
u32 len = sg_dma_len(sg);
byte_count += len;
if (!avail--)
- dest_tre = gsi_ring_virt(ring, 0);
+ dest_tre = gsi_ring_virt(tre_ring, 0);
if (cmd_opcode)
opcode = *cmd_opcode++;
gsi_trans_tre_fill(dest_tre, addr, len, last_tre, bei, opcode);
dest_tre++;
}
- ring->index += trans->used;
-
- if (channel->toward_ipa) {
- /* We record TX bytes when they are sent */
- trans->len = byte_count;
- trans->trans_count = channel->trans_count;
- trans->byte_count = channel->byte_count;
- channel->trans_count++;
- channel->byte_count += byte_count;
- }
+ /* Associate the TRE with the transaction */
+ gsi_trans_map(trans, tre_ring->index);
- /* Associate the last TRE with the transaction */
- gsi_channel_trans_map(channel, ring->index - 1, trans);
+ tre_ring->index += trans->used_count;
+
+ trans->len = byte_count;
+ if (channel->toward_ipa)
+ gsi_trans_tx_committed(trans);
gsi_trans_move_pending(trans);
@@ -603,7 +587,7 @@ static void __gsi_trans_commit(struct gsi_trans *trans, bool ring_db)
if (ring_db || !atomic_read(&channel->trans_info.tre_avail)) {
/* Report what we're handing off to hardware for TX channels */
if (channel->toward_ipa)
- gsi_channel_tx_queued(channel);
+ gsi_trans_tx_queued(trans);
gsi_channel_doorbell(channel);
}
}
@@ -611,7 +595,7 @@ static void __gsi_trans_commit(struct gsi_trans *trans, bool ring_db)
/* Commit a GSI transaction */
void gsi_trans_commit(struct gsi_trans *trans, bool ring_db)
{
- if (trans->used)
+ if (trans->used_count)
__gsi_trans_commit(trans, ring_db);
else
gsi_trans_free(trans);
@@ -620,7 +604,7 @@ void gsi_trans_commit(struct gsi_trans *trans, bool ring_db)
/* Commit a GSI transaction and wait for it to complete */
void gsi_trans_commit_wait(struct gsi_trans *trans)
{
- if (!trans->used)
+ if (!trans->used_count)
goto out_trans_free;
refcount_inc(&trans->refcount);
@@ -638,7 +622,7 @@ void gsi_trans_complete(struct gsi_trans *trans)
{
/* If the entire SGL was mapped when added, unmap it now */
if (trans->direction != DMA_NONE)
- dma_unmap_sg(trans->gsi->dev, trans->sgl, trans->used,
+ dma_unmap_sg(trans->gsi->dev, trans->sgl, trans->used_count,
trans->direction);
ipa_gsi_trans_complete(trans);
@@ -675,7 +659,7 @@ void gsi_channel_trans_cancel_pending(struct gsi_channel *channel)
int gsi_trans_read_byte(struct gsi *gsi, u32 channel_id, dma_addr_t addr)
{
struct gsi_channel *channel = &gsi->channel[channel_id];
- struct gsi_ring *ring = &channel->tre_ring;
+ struct gsi_ring *tre_ring = &channel->tre_ring;
struct gsi_trans_info *trans_info;
struct gsi_tre *dest_tre;
@@ -685,12 +669,12 @@ int gsi_trans_read_byte(struct gsi *gsi, u32 channel_id, dma_addr_t addr)
if (!gsi_trans_tre_reserve(trans_info, 1))
return -EBUSY;
- /* Now fill the the reserved TRE and tell the hardware */
+ /* Now fill the reserved TRE and tell the hardware */
- dest_tre = gsi_ring_virt(ring, ring->index);
+ dest_tre = gsi_ring_virt(tre_ring, tre_ring->index);
gsi_trans_tre_fill(dest_tre, addr, 1, true, false, IPA_CMD_NONE);
- ring->index++;
+ tre_ring->index++;
gsi_channel_doorbell(channel);
return 0;
@@ -745,14 +729,10 @@ int gsi_channel_trans_init(struct gsi *gsi, u32 channel_id)
* element is used to fill a single TRE when the transaction is
* committed. So we need as many scatterlist elements as the
* maximum number of TREs that can be outstanding.
- *
- * All TREs in a transaction must fit within the channel's TLV FIFO.
- * A transaction on a channel can allocate as many TREs as that but
- * no more.
*/
ret = gsi_trans_pool_init(&trans_info->sg_pool,
sizeof(struct scatterlist),
- tre_max, channel->tlv_count);
+ tre_max, channel->trans_tre_max);
if (ret)
goto err_trans_pool_exit;
diff --git a/drivers/net/ipa/gsi_trans.h b/drivers/net/ipa/gsi_trans.h
index 020c3b32de1d..7084507830c2 100644
--- a/drivers/net/ipa/gsi_trans.h
+++ b/drivers/net/ipa/gsi_trans.h
@@ -33,9 +33,9 @@ struct gsi_trans_pool;
* @gsi: GSI pointer
* @channel_id: Channel number transaction is associated with
* @cancelled: If set by the core code, transaction was cancelled
- * @tre_count: Number of TREs reserved for this transaction
- * @used: Number of TREs *used* (could be less than tre_count)
- * @len: Total # of transfer bytes represented in sgl[] (set by core)
+ * @rsvd_count: Number of TREs reserved for this transaction
+ * @used_count: Number of TREs *used* (could be less than rsvd_count)
+ * @len: Number of bytes sent or received by the transaction
* @data: Preserved but not touched by the core transaction code
* @cmd_opcode: Array of command opcodes (command channel only)
* @sgl: An array of scatter/gather entries managed by core code
@@ -45,8 +45,9 @@ struct gsi_trans_pool;
* @byte_count: TX channel byte count recorded when transaction committed
* @trans_count: Channel transaction count when committed (for BQL accounting)
*
- * The size used for some fields in this structure were chosen to ensure
- * the full structure size is no larger than 128 bytes.
+ * The @len field is set when the transaction is committed. For RX
+ * transactions it is updated later to reflect the actual number of bytes
+ * received.
*/
struct gsi_trans {
struct list_head links; /* gsi_channel lists */
@@ -56,8 +57,8 @@ struct gsi_trans {
bool cancelled; /* true if transaction was cancelled */
- u8 tre_count; /* # TREs requested */
- u8 used; /* # entries used in sgl[] */
+ u8 rsvd_count; /* # TREs requested */
+ u8 used_count; /* # entries used in sgl[] */
u32 len; /* total # bytes across sgl[] */
union {
diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c
index e58cd4478fd3..6dea40259b60 100644
--- a/drivers/net/ipa/ipa_cmd.c
+++ b/drivers/net/ipa/ipa_cmd.c
@@ -353,13 +353,13 @@ int ipa_cmd_pool_init(struct gsi_channel *channel, u32 tre_max)
/* This is as good a place as any to validate build constants */
ipa_cmd_validate_build();
- /* Even though command payloads are allocated one at a time,
- * a single transaction can require up to tlv_count of them,
- * so we treat them as if that many can be allocated at once.
+ /* Command payloads are allocated one at a time, but a single
+ * transaction can require up to the maximum supported by the
+ * channel; treat them as if they were allocated all at once.
*/
return gsi_trans_pool_init_dma(dev, &trans_info->cmd_pool,
sizeof(union ipa_cmd_payload),
- tre_max, channel->tlv_count);
+ tre_max, channel->trans_tre_max);
}
void ipa_cmd_pool_exit(struct gsi_channel *channel)
diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index d3b3255ac3d1..66d2bfdf9e42 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -1020,7 +1020,7 @@ int ipa_endpoint_skb_tx(struct ipa_endpoint *endpoint, struct sk_buff *skb)
* If not, see if we can linearize it before giving up.
*/
nr_frags = skb_shinfo(skb)->nr_frags;
- if (1 + nr_frags > endpoint->trans_tre_max) {
+ if (nr_frags > endpoint->skb_frag_max) {
if (skb_linearize(skb))
return -E2BIG;
nr_frags = 0;
@@ -1368,18 +1368,14 @@ static void ipa_endpoint_status_parse(struct ipa_endpoint *endpoint,
}
}
-/* Complete a TX transaction, command or from ipa_endpoint_skb_tx() */
-static void ipa_endpoint_tx_complete(struct ipa_endpoint *endpoint,
- struct gsi_trans *trans)
-{
-}
-
-/* Complete transaction initiated in ipa_endpoint_replenish_one() */
-static void ipa_endpoint_rx_complete(struct ipa_endpoint *endpoint,
- struct gsi_trans *trans)
+void ipa_endpoint_trans_complete(struct ipa_endpoint *endpoint,
+ struct gsi_trans *trans)
{
struct page *page;
+ if (endpoint->toward_ipa)
+ return;
+
if (trans->cancelled)
goto done;
@@ -1393,15 +1389,6 @@ done:
ipa_endpoint_replenish(endpoint);
}
-void ipa_endpoint_trans_complete(struct ipa_endpoint *endpoint,
- struct gsi_trans *trans)
-{
- if (endpoint->toward_ipa)
- ipa_endpoint_tx_complete(endpoint, trans);
- else
- ipa_endpoint_rx_complete(endpoint, trans);
-}
-
void ipa_endpoint_trans_release(struct ipa_endpoint *endpoint,
struct gsi_trans *trans)
{
@@ -1721,7 +1708,7 @@ static void ipa_endpoint_setup_one(struct ipa_endpoint *endpoint)
if (endpoint->ee_id != GSI_EE_AP)
return;
- endpoint->trans_tre_max = gsi_channel_trans_tre_max(gsi, channel_id);
+ endpoint->skb_frag_max = gsi->channel[channel_id].trans_tre_max - 1;
if (!endpoint->toward_ipa) {
/* RX transactions require a single TRE, so the maximum
* backlog is the same as the maximum outstanding TREs.
diff --git a/drivers/net/ipa/ipa_endpoint.h b/drivers/net/ipa/ipa_endpoint.h
index 01790c60bee8..28e0a7386fd7 100644
--- a/drivers/net/ipa/ipa_endpoint.h
+++ b/drivers/net/ipa/ipa_endpoint.h
@@ -142,7 +142,7 @@ enum ipa_replenish_flag {
* @endpoint_id: IPA endpoint number
* @toward_ipa: Endpoint direction (true = TX, false = RX)
* @config: Default endpoint configuration
- * @trans_tre_max: Maximum number of TRE descriptors per transaction
+ * @skb_frag_max: Maximum allowed number of TX SKB fragments
* @evt_ring_id: GSI event ring used by the endpoint
* @netdev: Network device pointer, if endpoint uses one
* @replenish_flags: Replenishing state flags
@@ -157,7 +157,7 @@ struct ipa_endpoint {
bool toward_ipa;
struct ipa_endpoint_config config;
- u32 trans_tre_max;
+ u32 skb_frag_max; /* Used for netdev TX only */
u32 evt_ring_id;
/* Net device this endpoint is associated with, if any */
diff --git a/drivers/net/pcs/Kconfig b/drivers/net/pcs/Kconfig
index 22ba7b0b476d..6289b7c765f1 100644
--- a/drivers/net/pcs/Kconfig
+++ b/drivers/net/pcs/Kconfig
@@ -6,8 +6,8 @@
menu "PCS device drivers"
config PCS_XPCS
- tristate "Synopsys DesignWare XPCS controller"
- depends on MDIO_DEVICE && MDIO_BUS
+ tristate
+ select PHYLINK
help
This module provides helper functions for Synopsys DesignWare XPCS
controllers.
@@ -18,4 +18,12 @@ config PCS_LYNX
This module provides helpers to phylink for managing the Lynx PCS
which is part of the Layerscape and QorIQ Ethernet SERDES.
+config PCS_RZN1_MIIC
+ tristate "Renesas RZ/N1 MII converter"
+ depends on OF && (ARCH_RZN1 || COMPILE_TEST)
+ help
+ This module provides a driver for the MII converter that is available
+ on RZ/N1 SoCs. This PCS converts MII to RMII/RGMII or can be set in
+ pass-through mode for MII.
+
endmenu
diff --git a/drivers/net/pcs/Makefile b/drivers/net/pcs/Makefile
index 0603d469bd57..0ff5388fcdea 100644
--- a/drivers/net/pcs/Makefile
+++ b/drivers/net/pcs/Makefile
@@ -5,3 +5,4 @@ pcs_xpcs-$(CONFIG_PCS_XPCS) := pcs-xpcs.o pcs-xpcs-nxp.o
obj-$(CONFIG_PCS_XPCS) += pcs_xpcs.o
obj-$(CONFIG_PCS_LYNX) += pcs-lynx.o
+obj-$(CONFIG_PCS_RZN1_MIIC) += pcs-rzn1-miic.o
diff --git a/drivers/net/pcs/pcs-lynx.c b/drivers/net/pcs/pcs-lynx.c
index fd3445374955..7d5fc7f54b2f 100644
--- a/drivers/net/pcs/pcs-lynx.c
+++ b/drivers/net/pcs/pcs-lynx.c
@@ -71,12 +71,10 @@ static void lynx_pcs_get_state_usxgmii(struct mdio_device *pcs,
static void lynx_pcs_get_state_2500basex(struct mdio_device *pcs,
struct phylink_link_state *state)
{
- struct mii_bus *bus = pcs->bus;
- int addr = pcs->addr;
int bmsr, lpa;
- bmsr = mdiobus_read(bus, addr, MII_BMSR);
- lpa = mdiobus_read(bus, addr, MII_LPA);
+ bmsr = mdiodev_read(pcs, MII_BMSR);
+ lpa = mdiodev_read(pcs, MII_LPA);
if (bmsr < 0 || lpa < 0) {
state->link = false;
return;
@@ -124,57 +122,39 @@ static void lynx_pcs_get_state(struct phylink_pcs *pcs,
state->link, state->an_enabled, state->an_complete);
}
-static int lynx_pcs_config_1000basex(struct mdio_device *pcs,
- unsigned int mode,
- const unsigned long *advertising)
+static int lynx_pcs_config_giga(struct mdio_device *pcs, unsigned int mode,
+ phy_interface_t interface,
+ const unsigned long *advertising)
{
- struct mii_bus *bus = pcs->bus;
- int addr = pcs->addr;
u32 link_timer;
- int err;
-
- link_timer = LINK_TIMER_VAL(IEEE8023_LINK_TIMER_NS);
- mdiobus_write(bus, addr, LINK_TIMER_LO, link_timer & 0xffff);
- mdiobus_write(bus, addr, LINK_TIMER_HI, link_timer >> 16);
-
- err = mdiobus_modify(bus, addr, IF_MODE,
- IF_MODE_SGMII_EN | IF_MODE_USE_SGMII_AN,
- 0);
- if (err)
- return err;
-
- return phylink_mii_c22_pcs_config(pcs, mode,
- PHY_INTERFACE_MODE_1000BASEX,
- advertising);
-}
-
-static int lynx_pcs_config_sgmii(struct mdio_device *pcs, unsigned int mode,
- const unsigned long *advertising)
-{
- struct mii_bus *bus = pcs->bus;
- int addr = pcs->addr;
u16 if_mode;
int err;
- if_mode = IF_MODE_SGMII_EN;
- if (mode == MLO_AN_INBAND) {
- u32 link_timer;
-
- if_mode |= IF_MODE_USE_SGMII_AN;
-
- /* Adjust link timer for SGMII */
- link_timer = LINK_TIMER_VAL(SGMII_AN_LINK_TIMER_NS);
- mdiobus_write(bus, addr, LINK_TIMER_LO, link_timer & 0xffff);
- mdiobus_write(bus, addr, LINK_TIMER_HI, link_timer >> 16);
+ if (interface == PHY_INTERFACE_MODE_1000BASEX) {
+ link_timer = LINK_TIMER_VAL(IEEE8023_LINK_TIMER_NS);
+ mdiodev_write(pcs, LINK_TIMER_LO, link_timer & 0xffff);
+ mdiodev_write(pcs, LINK_TIMER_HI, link_timer >> 16);
+
+ if_mode = 0;
+ } else {
+ if_mode = IF_MODE_SGMII_EN;
+ if (mode == MLO_AN_INBAND) {
+ if_mode |= IF_MODE_USE_SGMII_AN;
+
+ /* Adjust link timer for SGMII */
+ link_timer = LINK_TIMER_VAL(SGMII_AN_LINK_TIMER_NS);
+ mdiodev_write(pcs, LINK_TIMER_LO, link_timer & 0xffff);
+ mdiodev_write(pcs, LINK_TIMER_HI, link_timer >> 16);
+ }
}
- err = mdiobus_modify(bus, addr, IF_MODE,
+
+ err = mdiodev_modify(pcs, IF_MODE,
IF_MODE_SGMII_EN | IF_MODE_USE_SGMII_AN,
if_mode);
if (err)
return err;
- return phylink_mii_c22_pcs_config(pcs, mode, PHY_INTERFACE_MODE_SGMII,
- advertising);
+ return phylink_mii_c22_pcs_config(pcs, mode, interface, advertising);
}
static int lynx_pcs_config_usxgmii(struct mdio_device *pcs, unsigned int mode,
@@ -204,10 +184,10 @@ static int lynx_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
switch (ifmode) {
case PHY_INTERFACE_MODE_1000BASEX:
- return lynx_pcs_config_1000basex(lynx->mdio, mode, advertising);
case PHY_INTERFACE_MODE_SGMII:
case PHY_INTERFACE_MODE_QSGMII:
- return lynx_pcs_config_sgmii(lynx->mdio, mode, advertising);
+ return lynx_pcs_config_giga(lynx->mdio, mode, ifmode,
+ advertising);
case PHY_INTERFACE_MODE_2500BASEX:
if (phylink_autoneg_inband(mode)) {
dev_err(&lynx->mdio->dev,
@@ -237,9 +217,7 @@ static void lynx_pcs_an_restart(struct phylink_pcs *pcs)
static void lynx_pcs_link_up_sgmii(struct mdio_device *pcs, unsigned int mode,
int speed, int duplex)
{
- struct mii_bus *bus = pcs->bus;
u16 if_mode = 0, sgmii_speed;
- int addr = pcs->addr;
/* The PCS needs to be configured manually only
* when not operating on in-band mode
@@ -269,7 +247,7 @@ static void lynx_pcs_link_up_sgmii(struct mdio_device *pcs, unsigned int mode,
}
if_mode |= IF_MODE_SPEED(sgmii_speed);
- mdiobus_modify(bus, addr, IF_MODE,
+ mdiodev_modify(pcs, IF_MODE,
IF_MODE_HALF_DUPLEX | IF_MODE_SPEED_MSK,
if_mode);
}
@@ -294,8 +272,6 @@ static void lynx_pcs_link_up_2500basex(struct mdio_device *pcs,
unsigned int mode,
int speed, int duplex)
{
- struct mii_bus *bus = pcs->bus;
- int addr = pcs->addr;
u16 if_mode = 0;
if (mode == MLO_AN_INBAND) {
@@ -307,7 +283,7 @@ static void lynx_pcs_link_up_2500basex(struct mdio_device *pcs,
if_mode |= IF_MODE_HALF_DUPLEX;
if_mode |= IF_MODE_SPEED(SGMII_SPEED_2500);
- mdiobus_modify(bus, addr, IF_MODE,
+ mdiodev_modify(pcs, IF_MODE,
IF_MODE_HALF_DUPLEX | IF_MODE_SPEED_MSK,
if_mode);
}
diff --git a/drivers/net/pcs/pcs-rzn1-miic.c b/drivers/net/pcs/pcs-rzn1-miic.c
new file mode 100644
index 000000000000..8f5e910f443d
--- /dev/null
+++ b/drivers/net/pcs/pcs-rzn1-miic.c
@@ -0,0 +1,520 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Schneider Electric
+ *
+ * Clément Léger <clement.leger@bootlin.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/mdio.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/pcs-rzn1-miic.h>
+#include <linux/phylink.h>
+#include <linux/pm_runtime.h>
+#include <dt-bindings/net/pcs-rzn1-miic.h>
+
+#define MIIC_PRCMD 0x0
+#define MIIC_ESID_CODE 0x4
+
+#define MIIC_MODCTRL 0x20
+#define MIIC_MODCTRL_SW_MODE GENMASK(4, 0)
+
+#define MIIC_CONVCTRL(port) (0x100 + (port) * 4)
+
+#define MIIC_CONVCTRL_CONV_SPEED GENMASK(1, 0)
+#define CONV_MODE_10MBPS 0
+#define CONV_MODE_100MBPS 1
+#define CONV_MODE_1000MBPS 2
+
+#define MIIC_CONVCTRL_CONV_MODE GENMASK(3, 2)
+#define CONV_MODE_MII 0
+#define CONV_MODE_RMII 1
+#define CONV_MODE_RGMII 2
+
+#define MIIC_CONVCTRL_FULLD BIT(8)
+#define MIIC_CONVCTRL_RGMII_LINK BIT(12)
+#define MIIC_CONVCTRL_RGMII_DUPLEX BIT(13)
+#define MIIC_CONVCTRL_RGMII_SPEED GENMASK(15, 14)
+
+#define MIIC_CONVRST 0x114
+#define MIIC_CONVRST_PHYIF_RST(port) BIT(port)
+#define MIIC_CONVRST_PHYIF_RST_MASK GENMASK(4, 0)
+
+#define MIIC_SWCTRL 0x304
+#define MIIC_SWDUPC 0x308
+
+#define MIIC_MAX_NR_PORTS 5
+
+#define MIIC_MODCTRL_CONF_CONV_NUM 6
+#define MIIC_MODCTRL_CONF_NONE -1
+
+/**
+ * struct modctrl_match - Matching table entry for convctrl configuration
+ * See section 8.2.1 of manual.
+ * @mode_cfg: Configuration value for convctrl
+ * @conv: Configuration of ethernet port muxes. First index is SWITCH_PORTIN,
+ * then index 1 - 5 are CONV1 - CONV5.
+ */
+struct modctrl_match {
+ u32 mode_cfg;
+ u8 conv[MIIC_MODCTRL_CONF_CONV_NUM];
+};
+
+static struct modctrl_match modctrl_match_table[] = {
+ {0x0, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD,
+ MIIC_SWITCH_PORTC, MIIC_SERCOS_PORTB, MIIC_SERCOS_PORTA}},
+ {0x1, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD,
+ MIIC_SWITCH_PORTC, MIIC_ETHERCAT_PORTB, MIIC_ETHERCAT_PORTA}},
+ {0x2, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD,
+ MIIC_ETHERCAT_PORTC, MIIC_ETHERCAT_PORTB, MIIC_ETHERCAT_PORTA}},
+ {0x3, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD,
+ MIIC_SWITCH_PORTC, MIIC_SWITCH_PORTB, MIIC_SWITCH_PORTA}},
+
+ {0x8, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD,
+ MIIC_SWITCH_PORTC, MIIC_SERCOS_PORTB, MIIC_SERCOS_PORTA}},
+ {0x9, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD,
+ MIIC_SWITCH_PORTC, MIIC_ETHERCAT_PORTB, MIIC_ETHERCAT_PORTA}},
+ {0xA, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD,
+ MIIC_ETHERCAT_PORTC, MIIC_ETHERCAT_PORTB, MIIC_ETHERCAT_PORTA}},
+ {0xB, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD,
+ MIIC_SWITCH_PORTC, MIIC_SWITCH_PORTB, MIIC_SWITCH_PORTA}},
+
+ {0x10, {MIIC_GMAC2_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD,
+ MIIC_SWITCH_PORTC, MIIC_SERCOS_PORTB, MIIC_SERCOS_PORTA}},
+ {0x11, {MIIC_GMAC2_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD,
+ MIIC_SWITCH_PORTC, MIIC_ETHERCAT_PORTB, MIIC_ETHERCAT_PORTA}},
+ {0x12, {MIIC_GMAC2_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD,
+ MIIC_ETHERCAT_PORTC, MIIC_ETHERCAT_PORTB, MIIC_ETHERCAT_PORTA}},
+ {0x13, {MIIC_GMAC2_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD,
+ MIIC_SWITCH_PORTC, MIIC_SWITCH_PORTB, MIIC_SWITCH_PORTA}}
+};
+
+static const char * const conf_to_string[] = {
+ [MIIC_GMAC1_PORT] = "GMAC1_PORT",
+ [MIIC_GMAC2_PORT] = "GMAC2_PORT",
+ [MIIC_RTOS_PORT] = "RTOS_PORT",
+ [MIIC_SERCOS_PORTA] = "SERCOS_PORTA",
+ [MIIC_SERCOS_PORTB] = "SERCOS_PORTB",
+ [MIIC_ETHERCAT_PORTA] = "ETHERCAT_PORTA",
+ [MIIC_ETHERCAT_PORTB] = "ETHERCAT_PORTB",
+ [MIIC_ETHERCAT_PORTC] = "ETHERCAT_PORTC",
+ [MIIC_SWITCH_PORTA] = "SWITCH_PORTA",
+ [MIIC_SWITCH_PORTB] = "SWITCH_PORTB",
+ [MIIC_SWITCH_PORTC] = "SWITCH_PORTC",
+ [MIIC_SWITCH_PORTD] = "SWITCH_PORTD",
+ [MIIC_HSR_PORTA] = "HSR_PORTA",
+ [MIIC_HSR_PORTB] = "HSR_PORTB",
+};
+
+static const char *index_to_string[MIIC_MODCTRL_CONF_CONV_NUM] = {
+ "SWITCH_PORTIN",
+ "CONV1",
+ "CONV2",
+ "CONV3",
+ "CONV4",
+ "CONV5",
+};
+
+/**
+ * struct miic - MII converter structure
+ * @base: base address of the MII converter
+ * @dev: Device associated to the MII converter
+ * @clks: Clocks used for this device
+ * @nclk: Number of clocks
+ * @lock: Lock used for read-modify-write access
+ */
+struct miic {
+ void __iomem *base;
+ struct device *dev;
+ struct clk_bulk_data *clks;
+ int nclk;
+ spinlock_t lock;
+};
+
+/**
+ * struct miic_port - Per port MII converter struct
+ * @miic: backiling to MII converter structure
+ * @pcs: PCS structure associated to the port
+ * @port: port number
+ */
+struct miic_port {
+ struct miic *miic;
+ struct phylink_pcs pcs;
+ int port;
+};
+
+static struct miic_port *phylink_pcs_to_miic_port(struct phylink_pcs *pcs)
+{
+ return container_of(pcs, struct miic_port, pcs);
+}
+
+static void miic_reg_writel(struct miic *miic, int offset, u32 value)
+{
+ writel(value, miic->base + offset);
+}
+
+static u32 miic_reg_readl(struct miic *miic, int offset)
+{
+ return readl(miic->base + offset);
+}
+
+static void miic_reg_rmw(struct miic *miic, int offset, u32 mask, u32 val)
+{
+ u32 reg;
+
+ spin_lock(&miic->lock);
+
+ reg = miic_reg_readl(miic, offset);
+ reg &= ~mask;
+ reg |= val;
+ miic_reg_writel(miic, offset, reg);
+
+ spin_unlock(&miic->lock);
+}
+
+static void miic_converter_enable(struct miic *miic, int port, int enable)
+{
+ u32 val = 0;
+
+ if (enable)
+ val = MIIC_CONVRST_PHYIF_RST(port);
+
+ miic_reg_rmw(miic, MIIC_CONVRST, MIIC_CONVRST_PHYIF_RST(port), val);
+}
+
+static int miic_config(struct phylink_pcs *pcs, unsigned int mode,
+ phy_interface_t interface,
+ const unsigned long *advertising, bool permit)
+{
+ struct miic_port *miic_port = phylink_pcs_to_miic_port(pcs);
+ struct miic *miic = miic_port->miic;
+ int port = miic_port->port;
+ u32 speed, conv_mode, val;
+
+ switch (interface) {
+ case PHY_INTERFACE_MODE_RMII:
+ conv_mode = CONV_MODE_RMII;
+ speed = CONV_MODE_100MBPS;
+ break;
+ case PHY_INTERFACE_MODE_RGMII:
+ case PHY_INTERFACE_MODE_RGMII_ID:
+ case PHY_INTERFACE_MODE_RGMII_TXID:
+ case PHY_INTERFACE_MODE_RGMII_RXID:
+ conv_mode = CONV_MODE_RGMII;
+ speed = CONV_MODE_1000MBPS;
+ break;
+ case PHY_INTERFACE_MODE_MII:
+ conv_mode = CONV_MODE_MII;
+ /* When in MII mode, speed should be set to 0 (which is actually
+ * CONV_MODE_10MBPS)
+ */
+ speed = CONV_MODE_10MBPS;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ val = FIELD_PREP(MIIC_CONVCTRL_CONV_MODE, conv_mode) |
+ FIELD_PREP(MIIC_CONVCTRL_CONV_SPEED, speed);
+
+ miic_reg_rmw(miic, MIIC_CONVCTRL(port),
+ MIIC_CONVCTRL_CONV_MODE | MIIC_CONVCTRL_CONV_SPEED, val);
+ miic_converter_enable(miic_port->miic, miic_port->port, 1);
+
+ return 0;
+}
+
+static void miic_link_up(struct phylink_pcs *pcs, unsigned int mode,
+ phy_interface_t interface, int speed, int duplex)
+{
+ struct miic_port *miic_port = phylink_pcs_to_miic_port(pcs);
+ struct miic *miic = miic_port->miic;
+ u32 conv_speed = 0, val = 0;
+ int port = miic_port->port;
+
+ if (duplex == DUPLEX_FULL)
+ val |= MIIC_CONVCTRL_FULLD;
+
+ /* No speed in MII through-mode */
+ if (interface != PHY_INTERFACE_MODE_MII) {
+ switch (speed) {
+ case SPEED_1000:
+ conv_speed = CONV_MODE_1000MBPS;
+ break;
+ case SPEED_100:
+ conv_speed = CONV_MODE_100MBPS;
+ break;
+ case SPEED_10:
+ conv_speed = CONV_MODE_10MBPS;
+ break;
+ default:
+ return;
+ }
+ }
+
+ val |= FIELD_PREP(MIIC_CONVCTRL_CONV_SPEED, conv_speed);
+
+ miic_reg_rmw(miic, MIIC_CONVCTRL(port),
+ (MIIC_CONVCTRL_CONV_SPEED | MIIC_CONVCTRL_FULLD), val);
+}
+
+static int miic_validate(struct phylink_pcs *pcs, unsigned long *supported,
+ const struct phylink_link_state *state)
+{
+ if (phy_interface_mode_is_rgmii(state->interface) ||
+ state->interface == PHY_INTERFACE_MODE_RMII ||
+ state->interface == PHY_INTERFACE_MODE_MII)
+ return 1;
+
+ return -EINVAL;
+}
+
+static const struct phylink_pcs_ops miic_phylink_ops = {
+ .pcs_validate = miic_validate,
+ .pcs_config = miic_config,
+ .pcs_link_up = miic_link_up,
+};
+
+struct phylink_pcs *miic_create(struct device *dev, struct device_node *np)
+{
+ struct platform_device *pdev;
+ struct miic_port *miic_port;
+ struct device_node *pcs_np;
+ struct miic *miic;
+ u32 port;
+
+ if (!of_device_is_available(np))
+ return ERR_PTR(-ENODEV);
+
+ if (of_property_read_u32(np, "reg", &port))
+ return ERR_PTR(-EINVAL);
+
+ if (port > MIIC_MAX_NR_PORTS || port < 1)
+ return ERR_PTR(-EINVAL);
+
+ /* The PCS pdev is attached to the parent node */
+ pcs_np = of_get_parent(np);
+ if (!pcs_np)
+ return ERR_PTR(-ENODEV);
+
+ if (!of_device_is_available(pcs_np)) {
+ of_node_put(pcs_np);
+ return ERR_PTR(-ENODEV);
+ }
+
+ pdev = of_find_device_by_node(pcs_np);
+ of_node_put(pcs_np);
+ if (!pdev || !platform_get_drvdata(pdev))
+ return ERR_PTR(-EPROBE_DEFER);
+
+ miic_port = kzalloc(sizeof(*miic_port), GFP_KERNEL);
+ if (!miic_port)
+ return ERR_PTR(-ENOMEM);
+
+ miic = platform_get_drvdata(pdev);
+ device_link_add(dev, miic->dev, DL_FLAG_AUTOREMOVE_CONSUMER);
+
+ miic_port->miic = miic;
+ miic_port->port = port - 1;
+ miic_port->pcs.ops = &miic_phylink_ops;
+
+ return &miic_port->pcs;
+}
+EXPORT_SYMBOL(miic_create);
+
+void miic_destroy(struct phylink_pcs *pcs)
+{
+ struct miic_port *miic_port = phylink_pcs_to_miic_port(pcs);
+
+ miic_converter_enable(miic_port->miic, miic_port->port, 0);
+ kfree(miic_port);
+}
+EXPORT_SYMBOL(miic_destroy);
+
+static int miic_init_hw(struct miic *miic, u32 cfg_mode)
+{
+ int port;
+
+ /* Unlock write access to accessory registers (cf datasheet). If this
+ * is going to be used in conjunction with the Cortex-M3, this sequence
+ * will have to be moved in register write
+ */
+ miic_reg_writel(miic, MIIC_PRCMD, 0x00A5);
+ miic_reg_writel(miic, MIIC_PRCMD, 0x0001);
+ miic_reg_writel(miic, MIIC_PRCMD, 0xFFFE);
+ miic_reg_writel(miic, MIIC_PRCMD, 0x0001);
+
+ miic_reg_writel(miic, MIIC_MODCTRL,
+ FIELD_PREP(MIIC_MODCTRL_SW_MODE, cfg_mode));
+
+ for (port = 0; port < MIIC_MAX_NR_PORTS; port++) {
+ miic_converter_enable(miic, port, 0);
+ /* Disable speed/duplex control from these registers, datasheet
+ * says switch registers should be used to setup switch port
+ * speed and duplex.
+ */
+ miic_reg_writel(miic, MIIC_SWCTRL, 0x0);
+ miic_reg_writel(miic, MIIC_SWDUPC, 0x0);
+ }
+
+ return 0;
+}
+
+static bool miic_modctrl_match(s8 table_val[MIIC_MODCTRL_CONF_CONV_NUM],
+ s8 dt_val[MIIC_MODCTRL_CONF_CONV_NUM])
+{
+ int i;
+
+ for (i = 0; i < MIIC_MODCTRL_CONF_CONV_NUM; i++) {
+ if (dt_val[i] == MIIC_MODCTRL_CONF_NONE)
+ continue;
+
+ if (dt_val[i] != table_val[i])
+ return false;
+ }
+
+ return true;
+}
+
+static void miic_dump_conf(struct device *dev,
+ s8 conf[MIIC_MODCTRL_CONF_CONV_NUM])
+{
+ const char *conf_name;
+ int i;
+
+ for (i = 0; i < MIIC_MODCTRL_CONF_CONV_NUM; i++) {
+ if (conf[i] != MIIC_MODCTRL_CONF_NONE)
+ conf_name = conf_to_string[conf[i]];
+ else
+ conf_name = "NONE";
+
+ dev_err(dev, "%s: %s\n", index_to_string[i], conf_name);
+ }
+}
+
+static int miic_match_dt_conf(struct device *dev,
+ s8 dt_val[MIIC_MODCTRL_CONF_CONV_NUM],
+ u32 *mode_cfg)
+{
+ struct modctrl_match *table_entry;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(modctrl_match_table); i++) {
+ table_entry = &modctrl_match_table[i];
+
+ if (miic_modctrl_match(table_entry->conv, dt_val)) {
+ *mode_cfg = table_entry->mode_cfg;
+ return 0;
+ }
+ }
+
+ dev_err(dev, "Failed to apply requested configuration\n");
+ miic_dump_conf(dev, dt_val);
+
+ return -EINVAL;
+}
+
+static int miic_parse_dt(struct device *dev, u32 *mode_cfg)
+{
+ s8 dt_val[MIIC_MODCTRL_CONF_CONV_NUM];
+ struct device_node *np = dev->of_node;
+ struct device_node *conv;
+ u32 conf;
+ int port;
+
+ memset(dt_val, MIIC_MODCTRL_CONF_NONE, sizeof(dt_val));
+
+ if (of_property_read_u32(np, "renesas,miic-switch-portin", &conf) == 0)
+ dt_val[0] = conf;
+
+ for_each_child_of_node(np, conv) {
+ if (of_property_read_u32(conv, "reg", &port))
+ continue;
+
+ if (!of_device_is_available(conv))
+ continue;
+
+ if (of_property_read_u32(conv, "renesas,miic-input", &conf) == 0)
+ dt_val[port] = conf;
+ }
+
+ return miic_match_dt_conf(dev, dt_val, mode_cfg);
+}
+
+static int miic_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct miic *miic;
+ u32 mode_cfg;
+ int ret;
+
+ ret = miic_parse_dt(dev, &mode_cfg);
+ if (ret < 0)
+ return ret;
+
+ miic = devm_kzalloc(dev, sizeof(*miic), GFP_KERNEL);
+ if (!miic)
+ return -ENOMEM;
+
+ spin_lock_init(&miic->lock);
+ miic->dev = dev;
+ miic->base = devm_platform_ioremap_resource(pdev, 0);
+ if (!miic->base)
+ return -EINVAL;
+
+ ret = devm_pm_runtime_enable(dev);
+ if (ret < 0)
+ return ret;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret < 0)
+ return ret;
+
+ ret = miic_init_hw(miic, mode_cfg);
+ if (ret)
+ goto disable_runtime_pm;
+
+ /* miic_create() relies on that fact that data are attached to the
+ * platform device to determine if the driver is ready so this needs to
+ * be the last thing to be done after everything is initialized
+ * properly.
+ */
+ platform_set_drvdata(pdev, miic);
+
+ return 0;
+
+disable_runtime_pm:
+ pm_runtime_put(dev);
+
+ return ret;
+}
+
+static int miic_remove(struct platform_device *pdev)
+{
+ pm_runtime_put(&pdev->dev);
+
+ return 0;
+}
+
+static const struct of_device_id miic_of_mtable[] = {
+ { .compatible = "renesas,rzn1-miic" },
+ { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, miic_of_mtable);
+
+static struct platform_driver miic_driver = {
+ .driver = {
+ .name = "rzn1_miic",
+ .suppress_bind_attrs = true,
+ .of_match_table = miic_of_mtable,
+ },
+ .probe = miic_probe,
+ .remove = miic_remove,
+};
+module_platform_driver(miic_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Renesas MII converter PCS driver");
+MODULE_AUTHOR("Clément Léger <clement.leger@bootlin.com>");
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 4cfd05c15aee..ab0af1d2531f 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -77,6 +77,14 @@ static const int xpcs_sgmii_features[] = {
__ETHTOOL_LINK_MODE_MASK_NBITS,
};
+static const int xpcs_1000basex_features[] = {
+ ETHTOOL_LINK_MODE_Pause_BIT,
+ ETHTOOL_LINK_MODE_Asym_Pause_BIT,
+ ETHTOOL_LINK_MODE_Autoneg_BIT,
+ ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
+ __ETHTOOL_LINK_MODE_MASK_NBITS,
+};
+
static const int xpcs_2500basex_features[] = {
ETHTOOL_LINK_MODE_Pause_BIT,
ETHTOOL_LINK_MODE_Asym_Pause_BIT,
@@ -102,6 +110,10 @@ static const phy_interface_t xpcs_sgmii_interfaces[] = {
PHY_INTERFACE_MODE_SGMII,
};
+static const phy_interface_t xpcs_1000basex_interfaces[] = {
+ PHY_INTERFACE_MODE_1000BASEX,
+};
+
static const phy_interface_t xpcs_2500basex_interfaces[] = {
PHY_INTERFACE_MODE_2500BASEX,
PHY_INTERFACE_MODE_MAX,
@@ -112,6 +124,7 @@ enum {
DW_XPCS_10GKR,
DW_XPCS_XLGMII,
DW_XPCS_SGMII,
+ DW_XPCS_1000BASEX,
DW_XPCS_2500BASEX,
DW_XPCS_INTERFACE_MAX,
};
@@ -189,6 +202,14 @@ int xpcs_write(struct dw_xpcs *xpcs, int dev, u32 reg, u16 val)
return mdiobus_c45_write(bus, addr, dev, reg, val);
}
+static int xpcs_modify_changed(struct dw_xpcs *xpcs, int dev, u32 reg,
+ u16 mask, u16 set)
+{
+ u32 reg_addr = mdiobus_c45_addr(dev, reg);
+
+ return mdiodev_modify_changed(xpcs->mdiodev, reg_addr, mask, set);
+}
+
static int xpcs_read_vendor(struct dw_xpcs *xpcs, int dev, u32 reg)
{
return xpcs_read(xpcs, dev, DW_VENDOR | reg);
@@ -237,6 +258,7 @@ static int xpcs_soft_reset(struct dw_xpcs *xpcs,
break;
case DW_AN_C37_SGMII:
case DW_2500BASEX:
+ case DW_AN_C37_1000BASEX:
dev = MDIO_MMD_VEND2;
break;
default:
@@ -772,6 +794,68 @@ static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs, unsigned int mode)
return ret;
}
+static int xpcs_config_aneg_c37_1000basex(struct dw_xpcs *xpcs, unsigned int mode,
+ const unsigned long *advertising)
+{
+ phy_interface_t interface = PHY_INTERFACE_MODE_1000BASEX;
+ int ret, mdio_ctrl, adv;
+ bool changed = 0;
+
+ /* According to Chap 7.12, to set 1000BASE-X C37 AN, AN must
+ * be disabled first:-
+ * 1) VR_MII_MMD_CTRL Bit(12)[AN_ENABLE] = 0b
+ * 2) VR_MII_AN_CTRL Bit(2:1)[PCS_MODE] = 00b (1000BASE-X C37)
+ */
+ mdio_ctrl = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL);
+ if (mdio_ctrl < 0)
+ return mdio_ctrl;
+
+ if (mdio_ctrl & AN_CL37_EN) {
+ ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL,
+ mdio_ctrl & ~AN_CL37_EN);
+ if (ret < 0)
+ return ret;
+ }
+
+ ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_CTRL);
+ if (ret < 0)
+ return ret;
+
+ ret &= ~DW_VR_MII_PCS_MODE_MASK;
+ ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_CTRL, ret);
+ if (ret < 0)
+ return ret;
+
+ /* Check for advertising changes and update the C45 MII ADV
+ * register accordingly.
+ */
+ adv = phylink_mii_c22_pcs_encode_advertisement(interface,
+ advertising);
+ if (adv >= 0) {
+ ret = xpcs_modify_changed(xpcs, MDIO_MMD_VEND2,
+ MII_ADVERTISE, 0xffff, adv);
+ if (ret < 0)
+ return ret;
+
+ changed = ret;
+ }
+
+ /* Clear CL37 AN complete status */
+ ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS, 0);
+ if (ret < 0)
+ return ret;
+
+ if (phylink_autoneg_inband(mode) &&
+ linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, advertising)) {
+ ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL,
+ mdio_ctrl | AN_CL37_EN);
+ if (ret < 0)
+ return ret;
+ }
+
+ return changed;
+}
+
static int xpcs_config_2500basex(struct dw_xpcs *xpcs)
{
int ret;
@@ -795,7 +879,7 @@ static int xpcs_config_2500basex(struct dw_xpcs *xpcs)
}
int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
- unsigned int mode)
+ unsigned int mode, const unsigned long *advertising)
{
const struct xpcs_compat *compat;
int ret;
@@ -817,6 +901,12 @@ int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
if (ret)
return ret;
break;
+ case DW_AN_C37_1000BASEX:
+ ret = xpcs_config_aneg_c37_1000basex(xpcs, mode,
+ advertising);
+ if (ret)
+ return ret;
+ break;
case DW_2500BASEX:
ret = xpcs_config_2500basex(xpcs);
if (ret)
@@ -843,7 +933,7 @@ static int xpcs_config(struct phylink_pcs *pcs, unsigned int mode,
{
struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs);
- return xpcs_do_config(xpcs, interface, mode);
+ return xpcs_do_config(xpcs, interface, mode, advertising);
}
static int xpcs_get_state_c73(struct dw_xpcs *xpcs,
@@ -864,7 +954,7 @@ static int xpcs_get_state_c73(struct dw_xpcs *xpcs,
state->link = 0;
- return xpcs_do_config(xpcs, state->interface, MLO_AN_INBAND);
+ return xpcs_do_config(xpcs, state->interface, MLO_AN_INBAND, NULL);
}
if (state->an_enabled && xpcs_aneg_done_c73(xpcs, state, compat)) {
@@ -921,6 +1011,29 @@ static int xpcs_get_state_c37_sgmii(struct dw_xpcs *xpcs,
return 0;
}
+static int xpcs_get_state_c37_1000basex(struct dw_xpcs *xpcs,
+ struct phylink_link_state *state)
+{
+ int lpa, bmsr;
+
+ if (state->an_enabled) {
+ /* Reset link state */
+ state->link = false;
+
+ lpa = xpcs_read(xpcs, MDIO_MMD_VEND2, MII_LPA);
+ if (lpa < 0 || lpa & LPA_RFAULT)
+ return lpa;
+
+ bmsr = xpcs_read(xpcs, MDIO_MMD_VEND2, MII_BMSR);
+ if (bmsr < 0)
+ return bmsr;
+
+ phylink_mii_c22_pcs_decode_state(state, bmsr, lpa);
+ }
+
+ return 0;
+}
+
static void xpcs_get_state(struct phylink_pcs *pcs,
struct phylink_link_state *state)
{
@@ -948,6 +1061,13 @@ static void xpcs_get_state(struct phylink_pcs *pcs,
ERR_PTR(ret));
}
break;
+ case DW_AN_C37_1000BASEX:
+ ret = xpcs_get_state_c37_1000basex(xpcs, state);
+ if (ret) {
+ pr_err("xpcs_get_state_c37_1000basex returned %pe\n",
+ ERR_PTR(ret));
+ }
+ break;
default:
return;
}
@@ -961,22 +1081,35 @@ static void xpcs_link_up_sgmii(struct dw_xpcs *xpcs, unsigned int mode,
if (phylink_autoneg_inband(mode))
return;
+ val = mii_bmcr_encode_fixed(speed, duplex);
+ ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MDIO_CTRL1, val);
+ if (ret)
+ pr_err("%s: xpcs_write returned %pe\n", __func__, ERR_PTR(ret));
+}
+
+static void xpcs_link_up_1000basex(struct dw_xpcs *xpcs, unsigned int mode,
+ int speed, int duplex)
+{
+ int val, ret;
+
+ if (phylink_autoneg_inband(mode))
+ return;
+
switch (speed) {
case SPEED_1000:
val = BMCR_SPEED1000;
break;
case SPEED_100:
- val = BMCR_SPEED100;
- break;
case SPEED_10:
- val = BMCR_SPEED10;
- break;
default:
+ pr_err("%s: speed = %d\n", __func__, speed);
return;
}
if (duplex == DUPLEX_FULL)
val |= BMCR_FULLDPLX;
+ else
+ pr_err("%s: half duplex not supported\n", __func__);
ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MDIO_CTRL1, val);
if (ret)
@@ -992,9 +1125,23 @@ void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
return xpcs_config_usxgmii(xpcs, speed);
if (interface == PHY_INTERFACE_MODE_SGMII)
return xpcs_link_up_sgmii(xpcs, mode, speed, duplex);
+ if (interface == PHY_INTERFACE_MODE_1000BASEX)
+ return xpcs_link_up_1000basex(xpcs, mode, speed, duplex);
}
EXPORT_SYMBOL_GPL(xpcs_link_up);
+static void xpcs_an_restart(struct phylink_pcs *pcs)
+{
+ struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs);
+ int ret;
+
+ ret = xpcs_read(xpcs, MDIO_MMD_VEND2, MDIO_CTRL1);
+ if (ret >= 0) {
+ ret |= BMCR_ANRESTART;
+ xpcs_write(xpcs, MDIO_MMD_VEND2, MDIO_CTRL1, ret);
+ }
+}
+
static u32 xpcs_get_id(struct dw_xpcs *xpcs)
{
int ret;
@@ -1060,6 +1207,12 @@ static const struct xpcs_compat synopsys_xpcs_compat[DW_XPCS_INTERFACE_MAX] = {
.num_interfaces = ARRAY_SIZE(xpcs_sgmii_interfaces),
.an_mode = DW_AN_C37_SGMII,
},
+ [DW_XPCS_1000BASEX] = {
+ .supported = xpcs_1000basex_features,
+ .interface = xpcs_1000basex_interfaces,
+ .num_interfaces = ARRAY_SIZE(xpcs_1000basex_interfaces),
+ .an_mode = DW_AN_C37_1000BASEX,
+ },
[DW_XPCS_2500BASEX] = {
.supported = xpcs_2500basex_features,
.interface = xpcs_2500basex_interfaces,
@@ -1115,6 +1268,7 @@ static const struct phylink_pcs_ops xpcs_phylink_ops = {
.pcs_validate = xpcs_validate,
.pcs_config = xpcs_config,
.pcs_get_state = xpcs_get_state,
+ .pcs_an_restart = xpcs_an_restart,
.pcs_link_up = xpcs_link_up,
};
diff --git a/drivers/net/pcs/pcs-xpcs.h b/drivers/net/pcs/pcs-xpcs.h
index 35651d32a224..770df50323a0 100644
--- a/drivers/net/pcs/pcs-xpcs.h
+++ b/drivers/net/pcs/pcs-xpcs.h
@@ -109,7 +109,6 @@
int xpcs_read(struct dw_xpcs *xpcs, int dev, u32 reg);
int xpcs_write(struct dw_xpcs *xpcs, int dev, u32 reg, u16 val);
-
int nxp_sja1105_sgmii_pma_config(struct dw_xpcs *xpcs);
int nxp_sja1110_sgmii_pma_config(struct dw_xpcs *xpcs);
int nxp_sja1110_2500basex_pma_config(struct dw_xpcs *xpcs);
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 9fee639ee5c8..c57a0262fb64 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -104,6 +104,8 @@ config AX88796B_PHY
config BROADCOM_PHY
tristate "Broadcom 54XX PHYs"
select BCM_NET_PHYLIB
+ select BCM_NET_PHYPTP if NETWORK_PHY_TIMESTAMPING
+ depends on PTP_1588_CLOCK_OPTIONAL
help
Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464,
BCM5481, BCM54810 and BCM5482 PHYs.
@@ -160,6 +162,9 @@ config BCM_CYGNUS_PHY
config BCM_NET_PHYLIB
tristate
+config BCM_NET_PHYPTP
+ tristate
+
config CICADA_PHY
tristate "Cicada PHYs"
help
@@ -216,6 +221,8 @@ config MARVELL_88X2222_PHY
config MAXLINEAR_GPHY
tristate "Maxlinear Ethernet PHYs"
+ select POLYNOMIAL if HWMON
+ depends on HWMON || HWMON=n
help
Support for the Maxlinear GPY115, GPY211, GPY212, GPY215,
GPY241, GPY245 PHYs.
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index b12b1d86fc99..f7138d3c896b 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -47,6 +47,7 @@ obj-$(CONFIG_BCM84881_PHY) += bcm84881.o
obj-$(CONFIG_BCM87XX_PHY) += bcm87xx.o
obj-$(CONFIG_BCM_CYGNUS_PHY) += bcm-cygnus.o
obj-$(CONFIG_BCM_NET_PHYLIB) += bcm-phy-lib.o
+obj-$(CONFIG_BCM_NET_PHYPTP) += bcm-phy-ptp.o
obj-$(CONFIG_BROADCOM_PHY) += broadcom.o
obj-$(CONFIG_CICADA_PHY) += cicada.o
obj-$(CONFIG_CORTINA_PHY) += cortina.o
diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia_main.c
index a8db1a19011b..8b7a46db30e0 100644
--- a/drivers/net/phy/aquantia_main.c
+++ b/drivers/net/phy/aquantia_main.c
@@ -22,6 +22,7 @@
#define PHY_ID_AQR107 0x03a1b4e0
#define PHY_ID_AQCS109 0x03a1b5c2
#define PHY_ID_AQR405 0x03a1b4b0
+#define PHY_ID_AQR113C 0x31c31c12
#define MDIO_PHYXS_VEND_IF_STATUS 0xe812
#define MDIO_PHYXS_VEND_IF_STATUS_TYPE_MASK GENMASK(7, 3)
@@ -34,6 +35,8 @@
#define MDIO_AN_VEND_PROV 0xc400
#define MDIO_AN_VEND_PROV_1000BASET_FULL BIT(15)
#define MDIO_AN_VEND_PROV_1000BASET_HALF BIT(14)
+#define MDIO_AN_VEND_PROV_5000BASET_FULL BIT(11)
+#define MDIO_AN_VEND_PROV_2500BASET_FULL BIT(10)
#define MDIO_AN_VEND_PROV_DOWNSHIFT_EN BIT(4)
#define MDIO_AN_VEND_PROV_DOWNSHIFT_MASK GENMASK(3, 0)
#define MDIO_AN_VEND_PROV_DOWNSHIFT_DFLT 4
@@ -231,9 +234,20 @@ static int aqr_config_aneg(struct phy_device *phydev)
phydev->advertising))
reg |= MDIO_AN_VEND_PROV_1000BASET_HALF;
+ /* Handle the case when the 2.5G and 5G speeds are not advertised */
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+ phydev->advertising))
+ reg |= MDIO_AN_VEND_PROV_2500BASET_FULL;
+
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
+ phydev->advertising))
+ reg |= MDIO_AN_VEND_PROV_5000BASET_FULL;
+
ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_VEND_PROV,
MDIO_AN_VEND_PROV_1000BASET_HALF |
- MDIO_AN_VEND_PROV_1000BASET_FULL, reg);
+ MDIO_AN_VEND_PROV_1000BASET_FULL |
+ MDIO_AN_VEND_PROV_2500BASET_FULL |
+ MDIO_AN_VEND_PROV_5000BASET_FULL, reg);
if (ret < 0)
return ret;
if (ret > 0)
@@ -684,6 +698,24 @@ static struct phy_driver aqr_driver[] = {
.handle_interrupt = aqr_handle_interrupt,
.read_status = aqr_read_status,
},
+{
+ PHY_ID_MATCH_MODEL(PHY_ID_AQR113C),
+ .name = "Aquantia AQR113C",
+ .probe = aqr107_probe,
+ .config_init = aqr107_config_init,
+ .config_aneg = aqr_config_aneg,
+ .config_intr = aqr_config_intr,
+ .handle_interrupt = aqr_handle_interrupt,
+ .read_status = aqr107_read_status,
+ .get_tunable = aqr107_get_tunable,
+ .set_tunable = aqr107_set_tunable,
+ .suspend = aqr107_suspend,
+ .resume = aqr107_resume,
+ .get_sset_count = aqr107_get_sset_count,
+ .get_strings = aqr107_get_strings,
+ .get_stats = aqr107_get_stats,
+ .link_change_notify = aqr107_link_change_notify,
+},
};
module_phy_driver(aqr_driver);
@@ -696,6 +728,7 @@ static struct mdio_device_id __maybe_unused aqr_tbl[] = {
{ PHY_ID_MATCH_MODEL(PHY_ID_AQR107) },
{ PHY_ID_MATCH_MODEL(PHY_ID_AQCS109) },
{ PHY_ID_MATCH_MODEL(PHY_ID_AQR405) },
+ { PHY_ID_MATCH_MODEL(PHY_ID_AQR113C) },
{ }
};
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index 6a467e7817a6..59fe356942b5 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -2072,6 +2072,8 @@ static struct phy_driver at803x_driver[] = {
/* ATHEROS AR9331 */
PHY_ID_MATCH_EXACT(ATH9331_PHY_ID),
.name = "Qualcomm Atheros AR9331 built-in PHY",
+ .probe = at803x_probe,
+ .remove = at803x_remove,
.suspend = at803x_suspend,
.resume = at803x_resume,
.flags = PHY_POLL_CABLE_TEST,
@@ -2087,6 +2089,8 @@ static struct phy_driver at803x_driver[] = {
/* Qualcomm Atheros QCA9561 */
PHY_ID_MATCH_EXACT(QCA9561_PHY_ID),
.name = "Qualcomm Atheros QCA9561 built-in PHY",
+ .probe = at803x_probe,
+ .remove = at803x_remove,
.suspend = at803x_suspend,
.resume = at803x_resume,
.flags = PHY_POLL_CABLE_TEST,
@@ -2151,6 +2155,8 @@ static struct phy_driver at803x_driver[] = {
PHY_ID_MATCH_EXACT(QCA8081_PHY_ID),
.name = "Qualcomm QCA8081",
.flags = PHY_POLL_CABLE_TEST,
+ .probe = at803x_probe,
+ .remove = at803x_remove,
.config_intr = at803x_config_intr,
.handle_interrupt = at803x_handle_interrupt,
.get_tunable = at803x_get_tunable,
diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h
index c3842f87c33b..9902fb182099 100644
--- a/drivers/net/phy/bcm-phy-lib.h
+++ b/drivers/net/phy/bcm-phy-lib.h
@@ -87,4 +87,23 @@ int bcm_phy_cable_test_start_rdb(struct phy_device *phydev);
int bcm_phy_cable_test_start(struct phy_device *phydev);
int bcm_phy_cable_test_get_status(struct phy_device *phydev, bool *finished);
+#if IS_ENABLED(CONFIG_BCM_NET_PHYPTP)
+struct bcm_ptp_private *bcm_ptp_probe(struct phy_device *phydev);
+void bcm_ptp_config_init(struct phy_device *phydev);
+void bcm_ptp_stop(struct bcm_ptp_private *priv);
+#else
+static inline struct bcm_ptp_private *bcm_ptp_probe(struct phy_device *phydev)
+{
+ return NULL;
+}
+
+static inline void bcm_ptp_config_init(struct phy_device *phydev)
+{
+}
+
+static inline void bcm_ptp_stop(struct bcm_ptp_private *priv)
+{
+}
+#endif
+
#endif /* _LINUX_BCM_PHY_LIB_H */
diff --git a/drivers/net/phy/bcm-phy-ptp.c b/drivers/net/phy/bcm-phy-ptp.c
new file mode 100644
index 000000000000..ef00d6163061
--- /dev/null
+++ b/drivers/net/phy/bcm-phy-ptp.c
@@ -0,0 +1,944 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Meta Platforms Inc.
+ * Copyright (C) 2022 Jonathan Lemon <jonathan.lemon@gmail.com>
+ */
+
+#include <asm/unaligned.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
+#include <linux/ptp_classify.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/net_tstamp.h>
+#include <linux/netdevice.h>
+#include <linux/workqueue.h>
+
+#include "bcm-phy-lib.h"
+
+/* IEEE 1588 Expansion registers */
+#define SLICE_CTRL 0x0810
+#define SLICE_TX_EN BIT(0)
+#define SLICE_RX_EN BIT(8)
+#define TX_EVENT_MODE 0x0811
+#define MODE_TX_UPDATE_CF BIT(0)
+#define MODE_TX_REPLACE_TS_CF BIT(1)
+#define MODE_TX_REPLACE_TS GENMASK(1, 0)
+#define RX_EVENT_MODE 0x0819
+#define MODE_RX_UPDATE_CF BIT(0)
+#define MODE_RX_INSERT_TS_48 BIT(1)
+#define MODE_RX_INSERT_TS_64 GENMASK(1, 0)
+
+#define MODE_EVT_SHIFT_SYNC 0
+#define MODE_EVT_SHIFT_DELAY_REQ 2
+#define MODE_EVT_SHIFT_PDELAY_REQ 4
+#define MODE_EVT_SHIFT_PDELAY_RESP 6
+
+#define MODE_SEL_SHIFT_PORT 0
+#define MODE_SEL_SHIFT_CPU 8
+
+#define RX_MODE_SEL(sel, evt, act) \
+ (((MODE_RX_##act) << (MODE_EVT_SHIFT_##evt)) << (MODE_SEL_SHIFT_##sel))
+
+#define TX_MODE_SEL(sel, evt, act) \
+ (((MODE_TX_##act) << (MODE_EVT_SHIFT_##evt)) << (MODE_SEL_SHIFT_##sel))
+
+/* needs global TS capture first */
+#define TX_TS_CAPTURE 0x0821
+#define TX_TS_CAP_EN BIT(0)
+#define RX_TS_CAPTURE 0x0822
+#define RX_TS_CAP_EN BIT(0)
+
+#define TIME_CODE_0 0x0854
+#define TIME_CODE_1 0x0855
+#define TIME_CODE_2 0x0856
+#define TIME_CODE_3 0x0857
+#define TIME_CODE_4 0x0858
+
+#define DPLL_SELECT 0x085b
+#define DPLL_HB_MODE2 BIT(6)
+
+#define SHADOW_CTRL 0x085c
+#define SHADOW_LOAD 0x085d
+#define TIME_CODE_LOAD BIT(10)
+#define SYNC_OUT_LOAD BIT(9)
+#define NCO_TIME_LOAD BIT(7)
+#define FREQ_LOAD BIT(6)
+#define INTR_MASK 0x085e
+#define INTR_STATUS 0x085f
+#define INTC_FSYNC BIT(0)
+#define INTC_SOP BIT(1)
+
+#define NCO_FREQ_LSB 0x0873
+#define NCO_FREQ_MSB 0x0874
+
+#define NCO_TIME_0 0x0875
+#define NCO_TIME_1 0x0876
+#define NCO_TIME_2_CTRL 0x0877
+#define FREQ_MDIO_SEL BIT(14)
+
+#define SYNC_OUT_0 0x0878
+#define SYNC_OUT_1 0x0879
+#define SYNC_OUT_2 0x087a
+
+#define SYNC_IN_DIVIDER 0x087b
+
+#define SYNOUT_TS_0 0x087c
+#define SYNOUT_TS_1 0x087d
+#define SYNOUT_TS_2 0x087e
+
+#define NSE_CTRL 0x087f
+#define NSE_GMODE_EN GENMASK(15, 14)
+#define NSE_CAPTURE_EN BIT(13)
+#define NSE_INIT BIT(12)
+#define NSE_CPU_FRAMESYNC BIT(5)
+#define NSE_SYNC1_FRAMESYNC BIT(3)
+#define NSE_FRAMESYNC_MASK GENMASK(5, 2)
+#define NSE_PEROUT_EN BIT(1)
+#define NSE_ONESHOT_EN BIT(0)
+#define NSE_SYNC_OUT_MASK GENMASK(1, 0)
+
+#define TS_READ_CTRL 0x0885
+#define TS_READ_START BIT(0)
+#define TS_READ_END BIT(1)
+
+#define HB_REG_0 0x0886
+#define HB_REG_1 0x0887
+#define HB_REG_2 0x0888
+#define HB_REG_3 0x08ec
+#define HB_REG_4 0x08ed
+#define HB_STAT_CTRL 0x088e
+#define HB_READ_START BIT(10)
+#define HB_READ_END BIT(11)
+#define HB_READ_MASK GENMASK(11, 10)
+
+#define TS_REG_0 0x0889
+#define TS_REG_1 0x088a
+#define TS_REG_2 0x088b
+#define TS_REG_3 0x08c4
+
+#define TS_INFO_0 0x088c
+#define TS_INFO_1 0x088d
+
+#define TIMECODE_CTRL 0x08c3
+#define TX_TIMECODE_SEL GENMASK(7, 0)
+#define RX_TIMECODE_SEL GENMASK(15, 8)
+
+#define TIME_SYNC 0x0ff5
+#define TIME_SYNC_EN BIT(0)
+
+struct bcm_ptp_private {
+ struct phy_device *phydev;
+ struct mii_timestamper mii_ts;
+ struct ptp_clock *ptp_clock;
+ struct ptp_clock_info ptp_info;
+ struct ptp_pin_desc pin;
+ struct mutex mutex;
+ struct sk_buff_head tx_queue;
+ int tx_type;
+ bool hwts_rx;
+ u16 nse_ctrl;
+ bool pin_active;
+ struct delayed_work pin_work;
+};
+
+struct bcm_ptp_skb_cb {
+ unsigned long timeout;
+ u16 seq_id;
+ u8 msgtype;
+ bool discard;
+};
+
+struct bcm_ptp_capture {
+ ktime_t hwtstamp;
+ u16 seq_id;
+ u8 msgtype;
+ bool tx_dir;
+};
+
+#define BCM_SKB_CB(skb) ((struct bcm_ptp_skb_cb *)(skb)->cb)
+#define SKB_TS_TIMEOUT 10 /* jiffies */
+
+#define BCM_MAX_PULSE_8NS ((1U << 9) - 1)
+#define BCM_MAX_PERIOD_8NS ((1U << 30) - 1)
+
+#define BRCM_PHY_MODEL(phydev) \
+ ((phydev)->drv->phy_id & (phydev)->drv->phy_id_mask)
+
+static struct bcm_ptp_private *mii2priv(struct mii_timestamper *mii_ts)
+{
+ return container_of(mii_ts, struct bcm_ptp_private, mii_ts);
+}
+
+static struct bcm_ptp_private *ptp2priv(struct ptp_clock_info *info)
+{
+ return container_of(info, struct bcm_ptp_private, ptp_info);
+}
+
+static void bcm_ptp_get_framesync_ts(struct phy_device *phydev,
+ struct timespec64 *ts)
+{
+ u16 hb[4];
+
+ bcm_phy_write_exp(phydev, HB_STAT_CTRL, HB_READ_START);
+
+ hb[0] = bcm_phy_read_exp(phydev, HB_REG_0);
+ hb[1] = bcm_phy_read_exp(phydev, HB_REG_1);
+ hb[2] = bcm_phy_read_exp(phydev, HB_REG_2);
+ hb[3] = bcm_phy_read_exp(phydev, HB_REG_3);
+
+ bcm_phy_write_exp(phydev, HB_STAT_CTRL, HB_READ_END);
+ bcm_phy_write_exp(phydev, HB_STAT_CTRL, 0);
+
+ ts->tv_sec = (hb[3] << 16) | hb[2];
+ ts->tv_nsec = (hb[1] << 16) | hb[0];
+}
+
+static u16 bcm_ptp_framesync_disable(struct phy_device *phydev, u16 orig_ctrl)
+{
+ u16 ctrl = orig_ctrl & ~(NSE_FRAMESYNC_MASK | NSE_CAPTURE_EN);
+
+ bcm_phy_write_exp(phydev, NSE_CTRL, ctrl);
+
+ return ctrl;
+}
+
+static void bcm_ptp_framesync_restore(struct phy_device *phydev, u16 orig_ctrl)
+{
+ if (orig_ctrl & NSE_FRAMESYNC_MASK)
+ bcm_phy_write_exp(phydev, NSE_CTRL, orig_ctrl);
+}
+
+static void bcm_ptp_framesync(struct phy_device *phydev, u16 ctrl)
+{
+ /* trigger framesync - must have 0->1 transition. */
+ bcm_phy_write_exp(phydev, NSE_CTRL, ctrl | NSE_CPU_FRAMESYNC);
+}
+
+static int bcm_ptp_framesync_ts(struct phy_device *phydev,
+ struct ptp_system_timestamp *sts,
+ struct timespec64 *ts,
+ u16 orig_ctrl)
+{
+ u16 ctrl, reg;
+ int i;
+
+ ctrl = bcm_ptp_framesync_disable(phydev, orig_ctrl);
+
+ ptp_read_system_prets(sts);
+
+ /* trigger framesync + capture */
+ bcm_ptp_framesync(phydev, ctrl | NSE_CAPTURE_EN);
+
+ ptp_read_system_postts(sts);
+
+ /* poll for FSYNC interrupt from TS capture */
+ for (i = 0; i < 10; i++) {
+ reg = bcm_phy_read_exp(phydev, INTR_STATUS);
+ if (reg & INTC_FSYNC) {
+ bcm_ptp_get_framesync_ts(phydev, ts);
+ break;
+ }
+ }
+
+ bcm_ptp_framesync_restore(phydev, orig_ctrl);
+
+ return reg & INTC_FSYNC ? 0 : -ETIMEDOUT;
+}
+
+static int bcm_ptp_gettimex(struct ptp_clock_info *info,
+ struct timespec64 *ts,
+ struct ptp_system_timestamp *sts)
+{
+ struct bcm_ptp_private *priv = ptp2priv(info);
+ int err;
+
+ mutex_lock(&priv->mutex);
+ err = bcm_ptp_framesync_ts(priv->phydev, sts, ts, priv->nse_ctrl);
+ mutex_unlock(&priv->mutex);
+
+ return err;
+}
+
+static int bcm_ptp_settime_locked(struct bcm_ptp_private *priv,
+ const struct timespec64 *ts)
+{
+ struct phy_device *phydev = priv->phydev;
+ u16 ctrl;
+ u64 ns;
+
+ ctrl = bcm_ptp_framesync_disable(phydev, priv->nse_ctrl);
+
+ /* set up time code */
+ bcm_phy_write_exp(phydev, TIME_CODE_0, ts->tv_nsec);
+ bcm_phy_write_exp(phydev, TIME_CODE_1, ts->tv_nsec >> 16);
+ bcm_phy_write_exp(phydev, TIME_CODE_2, ts->tv_sec);
+ bcm_phy_write_exp(phydev, TIME_CODE_3, ts->tv_sec >> 16);
+ bcm_phy_write_exp(phydev, TIME_CODE_4, ts->tv_sec >> 32);
+
+ /* set NCO counter to match */
+ ns = timespec64_to_ns(ts);
+ bcm_phy_write_exp(phydev, NCO_TIME_0, ns >> 4);
+ bcm_phy_write_exp(phydev, NCO_TIME_1, ns >> 20);
+ bcm_phy_write_exp(phydev, NCO_TIME_2_CTRL, (ns >> 36) & 0xfff);
+
+ /* set up load on next frame sync (auto-clears due to NSE_INIT) */
+ bcm_phy_write_exp(phydev, SHADOW_LOAD, TIME_CODE_LOAD | NCO_TIME_LOAD);
+
+ /* must have NSE_INIT in order to write time code */
+ bcm_ptp_framesync(phydev, ctrl | NSE_INIT);
+
+ bcm_ptp_framesync_restore(phydev, priv->nse_ctrl);
+
+ return 0;
+}
+
+static int bcm_ptp_settime(struct ptp_clock_info *info,
+ const struct timespec64 *ts)
+{
+ struct bcm_ptp_private *priv = ptp2priv(info);
+ int err;
+
+ mutex_lock(&priv->mutex);
+ err = bcm_ptp_settime_locked(priv, ts);
+ mutex_unlock(&priv->mutex);
+
+ return err;
+}
+
+static int bcm_ptp_adjtime_locked(struct bcm_ptp_private *priv,
+ s64 delta_ns)
+{
+ struct timespec64 ts;
+ int err;
+ s64 ns;
+
+ err = bcm_ptp_framesync_ts(priv->phydev, NULL, &ts, priv->nse_ctrl);
+ if (!err) {
+ ns = timespec64_to_ns(&ts) + delta_ns;
+ ts = ns_to_timespec64(ns);
+ err = bcm_ptp_settime_locked(priv, &ts);
+ }
+ return err;
+}
+
+static int bcm_ptp_adjtime(struct ptp_clock_info *info, s64 delta_ns)
+{
+ struct bcm_ptp_private *priv = ptp2priv(info);
+ int err;
+
+ mutex_lock(&priv->mutex);
+ err = bcm_ptp_adjtime_locked(priv, delta_ns);
+ mutex_unlock(&priv->mutex);
+
+ return err;
+}
+
+/* A 125Mhz clock should adjust 8ns per pulse.
+ * The frequency adjustment base is 0x8000 0000, or 8*2^28.
+ *
+ * Frequency adjustment is
+ * adj = scaled_ppm * 8*2^28 / (10^6 * 2^16)
+ * which simplifies to:
+ * adj = scaled_ppm * 2^9 / 5^6
+ */
+static int bcm_ptp_adjfine(struct ptp_clock_info *info, long scaled_ppm)
+{
+ struct bcm_ptp_private *priv = ptp2priv(info);
+ int neg_adj = 0;
+ u32 diff, freq;
+ u16 ctrl;
+ u64 adj;
+
+ if (scaled_ppm < 0) {
+ neg_adj = 1;
+ scaled_ppm = -scaled_ppm;
+ }
+
+ adj = scaled_ppm << 9;
+ diff = div_u64(adj, 15625);
+ freq = (8 << 28) + (neg_adj ? -diff : diff);
+
+ mutex_lock(&priv->mutex);
+
+ ctrl = bcm_ptp_framesync_disable(priv->phydev, priv->nse_ctrl);
+
+ bcm_phy_write_exp(priv->phydev, NCO_FREQ_LSB, freq);
+ bcm_phy_write_exp(priv->phydev, NCO_FREQ_MSB, freq >> 16);
+
+ bcm_phy_write_exp(priv->phydev, NCO_TIME_2_CTRL, FREQ_MDIO_SEL);
+
+ /* load on next framesync */
+ bcm_phy_write_exp(priv->phydev, SHADOW_LOAD, FREQ_LOAD);
+
+ bcm_ptp_framesync(priv->phydev, ctrl);
+
+ /* clear load */
+ bcm_phy_write_exp(priv->phydev, SHADOW_LOAD, 0);
+
+ bcm_ptp_framesync_restore(priv->phydev, priv->nse_ctrl);
+
+ mutex_unlock(&priv->mutex);
+
+ return 0;
+}
+
+static bool bcm_ptp_rxtstamp(struct mii_timestamper *mii_ts,
+ struct sk_buff *skb, int type)
+{
+ struct bcm_ptp_private *priv = mii2priv(mii_ts);
+ struct skb_shared_hwtstamps *hwts;
+ struct ptp_header *header;
+ u32 sec, nsec;
+ u8 *data;
+ int off;
+
+ if (!priv->hwts_rx)
+ return false;
+
+ header = ptp_parse_header(skb, type);
+ if (!header)
+ return false;
+
+ data = (u8 *)(header + 1);
+ sec = get_unaligned_be32(data);
+ nsec = get_unaligned_be32(data + 4);
+
+ hwts = skb_hwtstamps(skb);
+ hwts->hwtstamp = ktime_set(sec, nsec);
+
+ off = data - skb->data + 8;
+ if (off < skb->len) {
+ memmove(data, data + 8, skb->len - off);
+ __pskb_trim(skb, skb->len - 8);
+ }
+
+ return false;
+}
+
+static bool bcm_ptp_get_tstamp(struct bcm_ptp_private *priv,
+ struct bcm_ptp_capture *capts)
+{
+ struct phy_device *phydev = priv->phydev;
+ u16 ts[4], reg;
+ u32 sec, nsec;
+
+ mutex_lock(&priv->mutex);
+
+ reg = bcm_phy_read_exp(phydev, INTR_STATUS);
+ if ((reg & INTC_SOP) == 0) {
+ mutex_unlock(&priv->mutex);
+ return false;
+ }
+
+ bcm_phy_write_exp(phydev, TS_READ_CTRL, TS_READ_START);
+
+ ts[0] = bcm_phy_read_exp(phydev, TS_REG_0);
+ ts[1] = bcm_phy_read_exp(phydev, TS_REG_1);
+ ts[2] = bcm_phy_read_exp(phydev, TS_REG_2);
+ ts[3] = bcm_phy_read_exp(phydev, TS_REG_3);
+
+ /* not in be32 format for some reason */
+ capts->seq_id = bcm_phy_read_exp(priv->phydev, TS_INFO_0);
+
+ reg = bcm_phy_read_exp(phydev, TS_INFO_1);
+ capts->msgtype = reg >> 12;
+ capts->tx_dir = !!(reg & BIT(11));
+
+ bcm_phy_write_exp(phydev, TS_READ_CTRL, TS_READ_END);
+ bcm_phy_write_exp(phydev, TS_READ_CTRL, 0);
+
+ mutex_unlock(&priv->mutex);
+
+ sec = (ts[3] << 16) | ts[2];
+ nsec = (ts[1] << 16) | ts[0];
+ capts->hwtstamp = ktime_set(sec, nsec);
+
+ return true;
+}
+
+static void bcm_ptp_match_tstamp(struct bcm_ptp_private *priv,
+ struct bcm_ptp_capture *capts)
+{
+ struct skb_shared_hwtstamps hwts;
+ struct sk_buff *skb, *ts_skb;
+ unsigned long flags;
+ bool first = false;
+
+ ts_skb = NULL;
+ spin_lock_irqsave(&priv->tx_queue.lock, flags);
+ skb_queue_walk(&priv->tx_queue, skb) {
+ if (BCM_SKB_CB(skb)->seq_id == capts->seq_id &&
+ BCM_SKB_CB(skb)->msgtype == capts->msgtype) {
+ first = skb_queue_is_first(&priv->tx_queue, skb);
+ __skb_unlink(skb, &priv->tx_queue);
+ ts_skb = skb;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&priv->tx_queue.lock, flags);
+
+ /* TX captures one-step packets, discard them if needed. */
+ if (ts_skb) {
+ if (BCM_SKB_CB(ts_skb)->discard) {
+ kfree_skb(ts_skb);
+ } else {
+ memset(&hwts, 0, sizeof(hwts));
+ hwts.hwtstamp = capts->hwtstamp;
+ skb_complete_tx_timestamp(ts_skb, &hwts);
+ }
+ }
+
+ /* not first match, try and expire entries */
+ if (!first) {
+ while ((skb = skb_dequeue(&priv->tx_queue))) {
+ if (!time_after(jiffies, BCM_SKB_CB(skb)->timeout)) {
+ skb_queue_head(&priv->tx_queue, skb);
+ break;
+ }
+ kfree_skb(skb);
+ }
+ }
+}
+
+static long bcm_ptp_do_aux_work(struct ptp_clock_info *info)
+{
+ struct bcm_ptp_private *priv = ptp2priv(info);
+ struct bcm_ptp_capture capts;
+ bool reschedule = false;
+
+ while (!skb_queue_empty_lockless(&priv->tx_queue)) {
+ if (!bcm_ptp_get_tstamp(priv, &capts)) {
+ reschedule = true;
+ break;
+ }
+ bcm_ptp_match_tstamp(priv, &capts);
+ }
+
+ return reschedule ? 1 : -1;
+}
+
+static int bcm_ptp_cancel_func(struct bcm_ptp_private *priv)
+{
+ if (!priv->pin_active)
+ return 0;
+
+ priv->pin_active = false;
+
+ priv->nse_ctrl &= ~(NSE_SYNC_OUT_MASK | NSE_SYNC1_FRAMESYNC |
+ NSE_CAPTURE_EN);
+ bcm_phy_write_exp(priv->phydev, NSE_CTRL, priv->nse_ctrl);
+
+ cancel_delayed_work_sync(&priv->pin_work);
+
+ return 0;
+}
+
+static void bcm_ptp_perout_work(struct work_struct *pin_work)
+{
+ struct bcm_ptp_private *priv =
+ container_of(pin_work, struct bcm_ptp_private, pin_work.work);
+ struct phy_device *phydev = priv->phydev;
+ struct timespec64 ts;
+ u64 ns, next;
+ u16 ctrl;
+
+ mutex_lock(&priv->mutex);
+
+ /* no longer running */
+ if (!priv->pin_active) {
+ mutex_unlock(&priv->mutex);
+ return;
+ }
+
+ bcm_ptp_framesync_ts(phydev, NULL, &ts, priv->nse_ctrl);
+
+ /* this is 1PPS only */
+ next = NSEC_PER_SEC - ts.tv_nsec;
+ ts.tv_sec += next < NSEC_PER_MSEC ? 2 : 1;
+ ts.tv_nsec = 0;
+
+ ns = timespec64_to_ns(&ts);
+
+ /* force 0->1 transition for ONESHOT */
+ ctrl = bcm_ptp_framesync_disable(phydev,
+ priv->nse_ctrl & ~NSE_ONESHOT_EN);
+
+ bcm_phy_write_exp(phydev, SYNOUT_TS_0, ns & 0xfff0);
+ bcm_phy_write_exp(phydev, SYNOUT_TS_1, ns >> 16);
+ bcm_phy_write_exp(phydev, SYNOUT_TS_2, ns >> 32);
+
+ /* load values on next framesync */
+ bcm_phy_write_exp(phydev, SHADOW_LOAD, SYNC_OUT_LOAD);
+
+ bcm_ptp_framesync(phydev, ctrl | NSE_ONESHOT_EN | NSE_INIT);
+
+ priv->nse_ctrl |= NSE_ONESHOT_EN;
+ bcm_ptp_framesync_restore(phydev, priv->nse_ctrl);
+
+ mutex_unlock(&priv->mutex);
+
+ next = next + NSEC_PER_MSEC;
+ schedule_delayed_work(&priv->pin_work, nsecs_to_jiffies(next));
+}
+
+static int bcm_ptp_perout_locked(struct bcm_ptp_private *priv,
+ struct ptp_perout_request *req, int on)
+{
+ struct phy_device *phydev = priv->phydev;
+ u64 period, pulse;
+ u16 val;
+
+ if (!on)
+ return bcm_ptp_cancel_func(priv);
+
+ /* 1PPS */
+ if (req->period.sec != 1 || req->period.nsec != 0)
+ return -EINVAL;
+
+ period = BCM_MAX_PERIOD_8NS; /* write nonzero value */
+
+ if (req->flags & PTP_PEROUT_PHASE)
+ return -EOPNOTSUPP;
+
+ if (req->flags & PTP_PEROUT_DUTY_CYCLE)
+ pulse = ktime_to_ns(ktime_set(req->on.sec, req->on.nsec));
+ else
+ pulse = (u64)BCM_MAX_PULSE_8NS << 3;
+
+ /* convert to 8ns units */
+ pulse >>= 3;
+
+ if (!pulse || pulse > period || pulse > BCM_MAX_PULSE_8NS)
+ return -EINVAL;
+
+ bcm_phy_write_exp(phydev, SYNC_OUT_0, period);
+
+ val = ((pulse & 0x3) << 14) | ((period >> 16) & 0x3fff);
+ bcm_phy_write_exp(phydev, SYNC_OUT_1, val);
+
+ val = ((pulse >> 2) & 0x7f) | (pulse << 7);
+ bcm_phy_write_exp(phydev, SYNC_OUT_2, val);
+
+ if (priv->pin_active)
+ cancel_delayed_work_sync(&priv->pin_work);
+
+ priv->pin_active = true;
+ INIT_DELAYED_WORK(&priv->pin_work, bcm_ptp_perout_work);
+ schedule_delayed_work(&priv->pin_work, 0);
+
+ return 0;
+}
+
+static void bcm_ptp_extts_work(struct work_struct *pin_work)
+{
+ struct bcm_ptp_private *priv =
+ container_of(pin_work, struct bcm_ptp_private, pin_work.work);
+ struct phy_device *phydev = priv->phydev;
+ struct ptp_clock_event event;
+ struct timespec64 ts;
+ u16 reg;
+
+ mutex_lock(&priv->mutex);
+
+ /* no longer running */
+ if (!priv->pin_active) {
+ mutex_unlock(&priv->mutex);
+ return;
+ }
+
+ reg = bcm_phy_read_exp(phydev, INTR_STATUS);
+ if ((reg & INTC_FSYNC) == 0)
+ goto out;
+
+ bcm_ptp_get_framesync_ts(phydev, &ts);
+
+ event.index = 0;
+ event.type = PTP_CLOCK_EXTTS;
+ event.timestamp = timespec64_to_ns(&ts);
+ ptp_clock_event(priv->ptp_clock, &event);
+
+out:
+ mutex_unlock(&priv->mutex);
+ schedule_delayed_work(&priv->pin_work, HZ / 4);
+}
+
+static int bcm_ptp_extts_locked(struct bcm_ptp_private *priv, int on)
+{
+ struct phy_device *phydev = priv->phydev;
+
+ if (!on)
+ return bcm_ptp_cancel_func(priv);
+
+ if (priv->pin_active)
+ cancel_delayed_work_sync(&priv->pin_work);
+
+ bcm_ptp_framesync_disable(phydev, priv->nse_ctrl);
+
+ priv->nse_ctrl |= NSE_SYNC1_FRAMESYNC | NSE_CAPTURE_EN;
+
+ bcm_ptp_framesync_restore(phydev, priv->nse_ctrl);
+
+ priv->pin_active = true;
+ INIT_DELAYED_WORK(&priv->pin_work, bcm_ptp_extts_work);
+ schedule_delayed_work(&priv->pin_work, 0);
+
+ return 0;
+}
+
+static int bcm_ptp_enable(struct ptp_clock_info *info,
+ struct ptp_clock_request *rq, int on)
+{
+ struct bcm_ptp_private *priv = ptp2priv(info);
+ int err = -EBUSY;
+
+ mutex_lock(&priv->mutex);
+
+ switch (rq->type) {
+ case PTP_CLK_REQ_PEROUT:
+ if (priv->pin.func == PTP_PF_PEROUT)
+ err = bcm_ptp_perout_locked(priv, &rq->perout, on);
+ break;
+ case PTP_CLK_REQ_EXTTS:
+ if (priv->pin.func == PTP_PF_EXTTS)
+ err = bcm_ptp_extts_locked(priv, on);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ mutex_unlock(&priv->mutex);
+
+ return err;
+}
+
+static int bcm_ptp_verify(struct ptp_clock_info *info, unsigned int pin,
+ enum ptp_pin_function func, unsigned int chan)
+{
+ switch (func) {
+ case PTP_PF_NONE:
+ case PTP_PF_EXTTS:
+ case PTP_PF_PEROUT:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static const struct ptp_clock_info bcm_ptp_clock_info = {
+ .owner = THIS_MODULE,
+ .name = KBUILD_MODNAME,
+ .max_adj = 100000000,
+ .gettimex64 = bcm_ptp_gettimex,
+ .settime64 = bcm_ptp_settime,
+ .adjtime = bcm_ptp_adjtime,
+ .adjfine = bcm_ptp_adjfine,
+ .enable = bcm_ptp_enable,
+ .verify = bcm_ptp_verify,
+ .do_aux_work = bcm_ptp_do_aux_work,
+ .n_pins = 1,
+ .n_per_out = 1,
+ .n_ext_ts = 1,
+};
+
+static void bcm_ptp_txtstamp(struct mii_timestamper *mii_ts,
+ struct sk_buff *skb, int type)
+{
+ struct bcm_ptp_private *priv = mii2priv(mii_ts);
+ struct ptp_header *hdr;
+ bool discard = false;
+ int msgtype;
+
+ hdr = ptp_parse_header(skb, type);
+ if (!hdr)
+ goto out;
+ msgtype = ptp_get_msgtype(hdr, type);
+
+ switch (priv->tx_type) {
+ case HWTSTAMP_TX_ONESTEP_P2P:
+ if (msgtype == PTP_MSGTYPE_PDELAY_RESP)
+ discard = true;
+ fallthrough;
+ case HWTSTAMP_TX_ONESTEP_SYNC:
+ if (msgtype == PTP_MSGTYPE_SYNC)
+ discard = true;
+ fallthrough;
+ case HWTSTAMP_TX_ON:
+ BCM_SKB_CB(skb)->timeout = jiffies + SKB_TS_TIMEOUT;
+ BCM_SKB_CB(skb)->seq_id = be16_to_cpu(hdr->sequence_id);
+ BCM_SKB_CB(skb)->msgtype = msgtype;
+ BCM_SKB_CB(skb)->discard = discard;
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+ skb_queue_tail(&priv->tx_queue, skb);
+ ptp_schedule_worker(priv->ptp_clock, 0);
+ return;
+ default:
+ break;
+ }
+
+out:
+ kfree_skb(skb);
+}
+
+static int bcm_ptp_hwtstamp(struct mii_timestamper *mii_ts,
+ struct ifreq *ifr)
+{
+ struct bcm_ptp_private *priv = mii2priv(mii_ts);
+ struct hwtstamp_config cfg;
+ u16 mode, ctrl;
+
+ if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+ return -EFAULT;
+
+ switch (cfg.rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ priv->hwts_rx = false;
+ break;
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+ priv->hwts_rx = true;
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ priv->tx_type = cfg.tx_type;
+
+ ctrl = priv->hwts_rx ? SLICE_RX_EN : 0;
+ ctrl |= priv->tx_type != HWTSTAMP_TX_OFF ? SLICE_TX_EN : 0;
+
+ mode = TX_MODE_SEL(PORT, SYNC, REPLACE_TS) |
+ TX_MODE_SEL(PORT, DELAY_REQ, REPLACE_TS) |
+ TX_MODE_SEL(PORT, PDELAY_REQ, REPLACE_TS) |
+ TX_MODE_SEL(PORT, PDELAY_RESP, REPLACE_TS);
+
+ bcm_phy_write_exp(priv->phydev, TX_EVENT_MODE, mode);
+
+ mode = RX_MODE_SEL(PORT, SYNC, INSERT_TS_64) |
+ RX_MODE_SEL(PORT, DELAY_REQ, INSERT_TS_64) |
+ RX_MODE_SEL(PORT, PDELAY_REQ, INSERT_TS_64) |
+ RX_MODE_SEL(PORT, PDELAY_RESP, INSERT_TS_64);
+
+ bcm_phy_write_exp(priv->phydev, RX_EVENT_MODE, mode);
+
+ bcm_phy_write_exp(priv->phydev, SLICE_CTRL, ctrl);
+
+ if (ctrl & SLICE_TX_EN)
+ bcm_phy_write_exp(priv->phydev, TX_TS_CAPTURE, TX_TS_CAP_EN);
+ else
+ ptp_cancel_worker_sync(priv->ptp_clock);
+
+ /* purge existing data */
+ skb_queue_purge(&priv->tx_queue);
+
+ return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+}
+
+static int bcm_ptp_ts_info(struct mii_timestamper *mii_ts,
+ struct ethtool_ts_info *ts_info)
+{
+ struct bcm_ptp_private *priv = mii2priv(mii_ts);
+
+ ts_info->phc_index = ptp_clock_index(priv->ptp_clock);
+ ts_info->so_timestamping =
+ SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+ ts_info->tx_types =
+ BIT(HWTSTAMP_TX_ON) |
+ BIT(HWTSTAMP_TX_OFF) |
+ BIT(HWTSTAMP_TX_ONESTEP_SYNC) |
+ BIT(HWTSTAMP_TX_ONESTEP_P2P);
+ ts_info->rx_filters =
+ BIT(HWTSTAMP_FILTER_NONE) |
+ BIT(HWTSTAMP_FILTER_PTP_V2_EVENT);
+
+ return 0;
+}
+
+void bcm_ptp_stop(struct bcm_ptp_private *priv)
+{
+ ptp_cancel_worker_sync(priv->ptp_clock);
+ bcm_ptp_cancel_func(priv);
+}
+EXPORT_SYMBOL_GPL(bcm_ptp_stop);
+
+void bcm_ptp_config_init(struct phy_device *phydev)
+{
+ /* init network sync engine */
+ bcm_phy_write_exp(phydev, NSE_CTRL, NSE_GMODE_EN | NSE_INIT);
+
+ /* enable time sync (TX/RX SOP capture) */
+ bcm_phy_write_exp(phydev, TIME_SYNC, TIME_SYNC_EN);
+
+ /* use sec.nsec heartbeat capture */
+ bcm_phy_write_exp(phydev, DPLL_SELECT, DPLL_HB_MODE2);
+
+ /* use 64 bit timecode for TX */
+ bcm_phy_write_exp(phydev, TIMECODE_CTRL, TX_TIMECODE_SEL);
+
+ /* always allow FREQ_LOAD on framesync */
+ bcm_phy_write_exp(phydev, SHADOW_CTRL, FREQ_LOAD);
+
+ bcm_phy_write_exp(phydev, SYNC_IN_DIVIDER, 1);
+}
+EXPORT_SYMBOL_GPL(bcm_ptp_config_init);
+
+static void bcm_ptp_init(struct bcm_ptp_private *priv)
+{
+ priv->nse_ctrl = NSE_GMODE_EN;
+
+ mutex_init(&priv->mutex);
+ skb_queue_head_init(&priv->tx_queue);
+
+ priv->mii_ts.rxtstamp = bcm_ptp_rxtstamp;
+ priv->mii_ts.txtstamp = bcm_ptp_txtstamp;
+ priv->mii_ts.hwtstamp = bcm_ptp_hwtstamp;
+ priv->mii_ts.ts_info = bcm_ptp_ts_info;
+
+ priv->phydev->mii_ts = &priv->mii_ts;
+}
+
+struct bcm_ptp_private *bcm_ptp_probe(struct phy_device *phydev)
+{
+ struct bcm_ptp_private *priv;
+ struct ptp_clock *clock;
+
+ switch (BRCM_PHY_MODEL(phydev)) {
+ case PHY_ID_BCM54210E:
+ break;
+ default:
+ return NULL;
+ }
+
+ priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return ERR_PTR(-ENOMEM);
+
+ priv->ptp_info = bcm_ptp_clock_info;
+
+ snprintf(priv->pin.name, sizeof(priv->pin.name), "SYNC_OUT");
+ priv->ptp_info.pin_config = &priv->pin;
+
+ clock = ptp_clock_register(&priv->ptp_info, &phydev->mdio.dev);
+ if (IS_ERR(clock))
+ return ERR_CAST(clock);
+ priv->ptp_clock = clock;
+
+ priv->phydev = phydev;
+ bcm_ptp_init(priv);
+
+ return priv;
+}
+EXPORT_SYMBOL_GPL(bcm_ptp_probe);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index e36809aa6d30..876bc45ede60 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -27,6 +27,11 @@ MODULE_DESCRIPTION("Broadcom PHY driver");
MODULE_AUTHOR("Maciej W. Rozycki");
MODULE_LICENSE("GPL");
+struct bcm54xx_phy_priv {
+ u64 *stats;
+ struct bcm_ptp_private *ptp;
+};
+
static int bcm54xx_config_clock_delay(struct phy_device *phydev)
{
int rc, val;
@@ -313,6 +318,22 @@ static void bcm54xx_adjust_rxrefclk(struct phy_device *phydev)
bcm_phy_write_shadow(phydev, BCM54XX_SHD_APD, val);
}
+static void bcm54xx_ptp_stop(struct phy_device *phydev)
+{
+ struct bcm54xx_phy_priv *priv = phydev->priv;
+
+ if (priv->ptp)
+ bcm_ptp_stop(priv->ptp);
+}
+
+static void bcm54xx_ptp_config_init(struct phy_device *phydev)
+{
+ struct bcm54xx_phy_priv *priv = phydev->priv;
+
+ if (priv->ptp)
+ bcm_ptp_config_init(phydev);
+}
+
static int bcm54xx_config_init(struct phy_device *phydev)
{
int reg, err, val;
@@ -390,6 +411,8 @@ static int bcm54xx_config_init(struct phy_device *phydev)
bcm_phy_write_exp(phydev, BCM_EXP_MULTICOLOR, val);
}
+ bcm54xx_ptp_config_init(phydev);
+
return 0;
}
@@ -418,6 +441,8 @@ static int bcm54xx_suspend(struct phy_device *phydev)
{
int ret;
+ bcm54xx_ptp_stop(phydev);
+
/* We cannot use a read/modify/write here otherwise the PHY gets into
* a bad state where its LEDs keep flashing, thus defeating the purpose
* of low power mode.
@@ -741,10 +766,6 @@ static irqreturn_t brcm_fet_handle_interrupt(struct phy_device *phydev)
return IRQ_HANDLED;
}
-struct bcm54xx_phy_priv {
- u64 *stats;
-};
-
static int bcm54xx_phy_probe(struct phy_device *phydev)
{
struct bcm54xx_phy_priv *priv;
@@ -761,6 +782,10 @@ static int bcm54xx_phy_probe(struct phy_device *phydev)
if (!priv->stats)
return -ENOMEM;
+ priv->ptp = bcm_ptp_probe(phydev);
+ if (IS_ERR(priv->ptp))
+ return PTR_ERR(priv->ptp);
+
return 0;
}
diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index 13dafe7a29bd..1e38039c5c56 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -14,6 +14,7 @@
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/bitfield.h>
+#include <linux/nvmem-consumer.h>
#include <dt-bindings/net/ti-dp83867.h>
@@ -522,6 +523,51 @@ static int dp83867_verify_rgmii_cfg(struct phy_device *phydev)
}
#if IS_ENABLED(CONFIG_OF_MDIO)
+static int dp83867_of_init_io_impedance(struct phy_device *phydev)
+{
+ struct dp83867_private *dp83867 = phydev->priv;
+ struct device *dev = &phydev->mdio.dev;
+ struct device_node *of_node = dev->of_node;
+ struct nvmem_cell *cell;
+ u8 *buf, val;
+ int ret;
+
+ cell = of_nvmem_cell_get(of_node, "io_impedance_ctrl");
+ if (IS_ERR(cell)) {
+ ret = PTR_ERR(cell);
+ if (ret != -ENOENT)
+ return phydev_err_probe(phydev, ret,
+ "failed to get nvmem cell io_impedance_ctrl\n");
+
+ /* If no nvmem cell, check for the boolean properties. */
+ if (of_property_read_bool(of_node, "ti,max-output-impedance"))
+ dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX;
+ else if (of_property_read_bool(of_node, "ti,min-output-impedance"))
+ dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN;
+ else
+ dp83867->io_impedance = -1; /* leave at default */
+
+ return 0;
+ }
+
+ buf = nvmem_cell_read(cell, NULL);
+ nvmem_cell_put(cell);
+
+ if (IS_ERR(buf))
+ return PTR_ERR(buf);
+
+ val = *buf;
+ kfree(buf);
+
+ if ((val & DP83867_IO_MUX_CFG_IO_IMPEDANCE_MASK) != val) {
+ phydev_err(phydev, "nvmem cell 'io_impedance_ctrl' contents out of range\n");
+ return -ERANGE;
+ }
+ dp83867->io_impedance = val;
+
+ return 0;
+}
+
static int dp83867_of_init(struct phy_device *phydev)
{
struct dp83867_private *dp83867 = phydev->priv;
@@ -549,12 +595,9 @@ static int dp83867_of_init(struct phy_device *phydev)
}
}
- if (of_property_read_bool(of_node, "ti,max-output-impedance"))
- dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX;
- else if (of_property_read_bool(of_node, "ti,min-output-impedance"))
- dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN;
- else
- dp83867->io_impedance = -1; /* leave at default */
+ ret = dp83867_of_init_io_impedance(phydev);
+ if (ret)
+ return ret;
dp83867->rxctrl_strap_quirk = of_property_read_bool(of_node,
"ti,dp83867-rxctrl-strap-quirk");
diff --git a/drivers/net/phy/dp83td510.c b/drivers/net/phy/dp83td510.c
index 1ae792b0daaa..3cd9a77f9532 100644
--- a/drivers/net/phy/dp83td510.c
+++ b/drivers/net/phy/dp83td510.c
@@ -27,6 +27,27 @@
#define DP83TD510E_AN_STAT_1 0x60c
#define DP83TD510E_MASTER_SLAVE_RESOL_FAIL BIT(15)
+#define DP83TD510E_MSE_DETECT 0xa85
+
+#define DP83TD510_SQI_MAX 7
+
+/* Register values are converted to SNR(dB) as suggested by
+ * "Application Report - DP83TD510E Cable Diagnostics Toolkit":
+ * SNR(dB) = -10 * log10 (VAL/2^17) - 1.76 dB.
+ * SQI ranges are implemented according to "OPEN ALLIANCE - Advanced diagnostic
+ * features for 100BASE-T1 automotive Ethernet PHYs"
+ */
+static const u16 dp83td510_mse_sqi_map[] = {
+ 0x0569, /* < 18dB */
+ 0x044c, /* 18dB =< SNR < 19dB */
+ 0x0369, /* 19dB =< SNR < 20dB */
+ 0x02b6, /* 20dB =< SNR < 21dB */
+ 0x0227, /* 21dB =< SNR < 22dB */
+ 0x01b6, /* 22dB =< SNR < 23dB */
+ 0x015b, /* 23dB =< SNR < 24dB */
+ 0x0000 /* 24dB =< SNR */
+};
+
static int dp83td510_config_intr(struct phy_device *phydev)
{
int ret;
@@ -164,6 +185,32 @@ static int dp83td510_config_aneg(struct phy_device *phydev)
return genphy_c45_check_and_restart_aneg(phydev, changed);
}
+static int dp83td510_get_sqi(struct phy_device *phydev)
+{
+ int sqi, ret;
+ u16 mse_val;
+
+ if (!phydev->link)
+ return 0;
+
+ ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_MSE_DETECT);
+ if (ret < 0)
+ return ret;
+
+ mse_val = 0xFFFF & ret;
+ for (sqi = 0; sqi < ARRAY_SIZE(dp83td510_mse_sqi_map); sqi++) {
+ if (mse_val >= dp83td510_mse_sqi_map[sqi])
+ return sqi;
+ }
+
+ return -EINVAL;
+}
+
+static int dp83td510_get_sqi_max(struct phy_device *phydev)
+{
+ return DP83TD510_SQI_MAX;
+}
+
static int dp83td510_get_features(struct phy_device *phydev)
{
/* This PHY can't respond on MDIO bus if no RMII clock is enabled.
@@ -192,6 +239,8 @@ static struct phy_driver dp83td510_driver[] = {
.get_features = dp83td510_get_features,
.config_intr = dp83td510_config_intr,
.handle_interrupt = dp83td510_handle_interrupt,
+ .get_sqi = dp83td510_get_sqi,
+ .get_sqi_max = dp83td510_get_sqi_max,
.suspend = genphy_suspend,
.resume = genphy_resume,
diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c
index 03abe6233bbb..aef739c20ac4 100644
--- a/drivers/net/phy/fixed_phy.c
+++ b/drivers/net/phy/fixed_phy.c
@@ -353,6 +353,7 @@ static int __init fixed_mdio_bus_init(void)
fmb->mii_bus->parent = &pdev->dev;
fmb->mii_bus->read = &fixed_mdio_read;
fmb->mii_bus->write = &fixed_mdio_write;
+ fmb->mii_bus->phy_mask = ~0;
ret = mdiobus_register(fmb->mii_bus);
if (ret)
diff --git a/drivers/net/phy/marvell-88x2222.c b/drivers/net/phy/marvell-88x2222.c
index d8b31d4d2a73..f070776ca904 100644
--- a/drivers/net/phy/marvell-88x2222.c
+++ b/drivers/net/phy/marvell-88x2222.c
@@ -490,6 +490,7 @@ static int mv2222_sfp_insert(void *upstream, const struct sfp_eeprom_id *id)
dev = &phydev->mdio.dev;
sfp_parse_support(phydev->sfp_bus, id, sfp_supported);
+ phydev->port = sfp_parse_port(phydev->sfp_bus, id, sfp_supported);
sfp_interface = sfp_select_interface(phydev->sfp_bus, sfp_supported);
dev_info(dev, "%s SFP module inserted\n", phy_modes(sfp_interface));
@@ -526,6 +527,7 @@ static void mv2222_sfp_remove(void *upstream)
priv->line_interface = PHY_INTERFACE_MODE_NA;
linkmode_zero(priv->supported);
+ phydev->port = PORT_NONE;
}
static void mv2222_sfp_link_up(void *upstream)
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index d777c8851ed6..a714150f5e8c 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -1991,15 +1991,9 @@ static int m88e1510_loopback(struct phy_device *phydev, bool enable)
int err;
if (enable) {
- u16 bmcr_ctl = 0, mscr2_ctl = 0;
+ u16 bmcr_ctl, mscr2_ctl = 0;
- if (phydev->speed == SPEED_1000)
- bmcr_ctl = BMCR_SPEED1000;
- else if (phydev->speed == SPEED_100)
- bmcr_ctl = BMCR_SPEED100;
-
- if (phydev->duplex == DUPLEX_FULL)
- bmcr_ctl |= BMCR_FULLDPLX;
+ bmcr_ctl = mii_bmcr_encode_fixed(phydev->speed, phydev->duplex);
err = phy_write(phydev, MII_BMCR, bmcr_ctl);
if (err < 0)
diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c
index 5ce1bf03bbd7..5b99acf44337 100644
--- a/drivers/net/phy/mxl-gpy.c
+++ b/drivers/net/phy/mxl-gpy.c
@@ -8,7 +8,9 @@
#include <linux/module.h>
#include <linux/bitfield.h>
+#include <linux/hwmon.h>
#include <linux/phy.h>
+#include <linux/polynomial.h>
#include <linux/netdevice.h>
/* PHY ID */
@@ -64,6 +66,10 @@
#define VSPEC1_SGMII_ANEN_ANRS (VSPEC1_SGMII_CTRL_ANEN | \
VSPEC1_SGMII_CTRL_ANRS)
+/* Temperature sensor */
+#define VPSPEC1_TEMP_STA 0x0E
+#define VPSPEC1_TEMP_STA_DATA GENMASK(9, 0)
+
/* WoL */
#define VPSPEC2_WOL_CTL 0x0E06
#define VPSPEC2_WOL_AD01 0x0E08
@@ -80,6 +86,102 @@ static const struct {
{9, 0x73},
};
+#if IS_ENABLED(CONFIG_HWMON)
+/* The original translation formulae of the temperature (in degrees of Celsius)
+ * are as follows:
+ *
+ * T = -2.5761e-11*(N^4) + 9.7332e-8*(N^3) + -1.9165e-4*(N^2) +
+ * 3.0762e-1*(N^1) + -5.2156e1
+ *
+ * where [-52.156, 137.961]C and N = [0, 1023].
+ *
+ * They must be accordingly altered to be suitable for the integer arithmetics.
+ * The technique is called 'factor redistribution', which just makes sure the
+ * multiplications and divisions are made so to have a result of the operations
+ * within the integer numbers limit. In addition we need to translate the
+ * formulae to accept millidegrees of Celsius. Here what it looks like after
+ * the alterations:
+ *
+ * T = -25761e-12*(N^4) + 97332e-9*(N^3) + -191650e-6*(N^2) +
+ * 307620e-3*(N^1) + -52156
+ *
+ * where T = [-52156, 137961]mC and N = [0, 1023].
+ */
+static const struct polynomial poly_N_to_temp = {
+ .terms = {
+ {4, -25761, 1000, 1},
+ {3, 97332, 1000, 1},
+ {2, -191650, 1000, 1},
+ {1, 307620, 1000, 1},
+ {0, -52156, 1, 1}
+ }
+};
+
+static int gpy_hwmon_read(struct device *dev,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel, long *value)
+{
+ struct phy_device *phydev = dev_get_drvdata(dev);
+ int ret;
+
+ ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VPSPEC1_TEMP_STA);
+ if (ret < 0)
+ return ret;
+ if (!ret)
+ return -ENODATA;
+
+ *value = polynomial_calc(&poly_N_to_temp,
+ FIELD_GET(VPSPEC1_TEMP_STA_DATA, ret));
+
+ return 0;
+}
+
+static umode_t gpy_hwmon_is_visible(const void *data,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel)
+{
+ return 0444;
+}
+
+static const struct hwmon_channel_info *gpy_hwmon_info[] = {
+ HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT),
+ NULL
+};
+
+static const struct hwmon_ops gpy_hwmon_hwmon_ops = {
+ .is_visible = gpy_hwmon_is_visible,
+ .read = gpy_hwmon_read,
+};
+
+static const struct hwmon_chip_info gpy_hwmon_chip_info = {
+ .ops = &gpy_hwmon_hwmon_ops,
+ .info = gpy_hwmon_info,
+};
+
+static int gpy_hwmon_register(struct phy_device *phydev)
+{
+ struct device *dev = &phydev->mdio.dev;
+ struct device *hwmon_dev;
+ char *hwmon_name;
+
+ hwmon_name = devm_hwmon_sanitize_name(dev, dev_name(dev));
+ if (IS_ERR(hwmon_name))
+ return PTR_ERR(hwmon_name);
+
+ hwmon_dev = devm_hwmon_device_register_with_info(dev, hwmon_name,
+ phydev,
+ &gpy_hwmon_chip_info,
+ NULL);
+
+ return PTR_ERR_OR_ZERO(hwmon_dev);
+}
+#else
+static int gpy_hwmon_register(struct phy_device *phydev)
+{
+ return 0;
+}
+#endif
+
static int gpy_config_init(struct phy_device *phydev)
{
int ret;
@@ -109,6 +211,10 @@ static int gpy_probe(struct phy_device *phydev)
if (ret < 0)
return ret;
+ ret = gpy_hwmon_register(phydev);
+ if (ret)
+ return ret;
+
phydev_info(phydev, "Firmware Version: 0x%04X (%s)\n", ret,
(ret & PHY_FWV_REL_MASK) ? "release" : "test");
@@ -295,6 +401,9 @@ static void gpy_update_interface(struct phy_device *phydev)
ret);
break;
}
+
+ if (phydev->speed == SPEED_2500 || phydev->speed == SPEED_1000)
+ genphy_read_master_slave(phydev);
}
static int gpy_read_status(struct phy_device *phydev)
diff --git a/drivers/net/phy/nxp-tja11xx.c b/drivers/net/phy/nxp-tja11xx.c
index 9944cc501806..2a8195c50d14 100644
--- a/drivers/net/phy/nxp-tja11xx.c
+++ b/drivers/net/phy/nxp-tja11xx.c
@@ -444,15 +444,10 @@ static int tja11xx_hwmon_register(struct phy_device *phydev,
struct tja11xx_priv *priv)
{
struct device *dev = &phydev->mdio.dev;
- int i;
-
- priv->hwmon_name = devm_kstrdup(dev, dev_name(dev), GFP_KERNEL);
- if (!priv->hwmon_name)
- return -ENOMEM;
- for (i = 0; priv->hwmon_name[i]; i++)
- if (hwmon_is_bad_char(priv->hwmon_name[i]))
- priv->hwmon_name[i] = '_';
+ priv->hwmon_name = devm_hwmon_sanitize_name(dev, dev_name(dev));
+ if (IS_ERR(priv->hwmon_name))
+ return PTR_ERR(priv->hwmon_name);
priv->hwmon_dev =
devm_hwmon_device_register_with_info(dev, priv->hwmon_name,
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 431a8719c635..7885bceff773 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -2001,18 +2001,12 @@ EXPORT_SYMBOL(genphy_config_eee_advert);
*/
int genphy_setup_forced(struct phy_device *phydev)
{
- u16 ctl = 0;
+ u16 ctl;
phydev->pause = 0;
phydev->asym_pause = 0;
- if (SPEED_1000 == phydev->speed)
- ctl |= BMCR_SPEED1000;
- else if (SPEED_100 == phydev->speed)
- ctl |= BMCR_SPEED100;
-
- if (DUPLEX_FULL == phydev->duplex)
- ctl |= BMCR_FULLDPLX;
+ ctl = mii_bmcr_encode_fixed(phydev->speed, phydev->duplex);
return phy_modify(phydev, MII_BMCR,
~(BMCR_LOOPBACK | BMCR_ISOLATE | BMCR_PDOWN), ctl);
@@ -2614,13 +2608,7 @@ int genphy_loopback(struct phy_device *phydev, bool enable)
u16 val, ctl = BMCR_LOOPBACK;
int ret;
- if (phydev->speed == SPEED_1000)
- ctl |= BMCR_SPEED1000;
- else if (phydev->speed == SPEED_100)
- ctl |= BMCR_SPEED100;
-
- if (phydev->duplex == DUPLEX_FULL)
- ctl |= BMCR_FULLDPLX;
+ ctl |= mii_bmcr_encode_fixed(phydev->speed, phydev->duplex);
phy_modify(phydev, MII_BMCR, ~0, ctl);
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 066684b80919..e20cdab824db 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -2991,6 +2991,7 @@ int phylink_mii_c22_pcs_encode_advertisement(phy_interface_t interface,
adv |= ADVERTISE_1000XPSE_ASYM;
return adv;
case PHY_INTERFACE_MODE_SGMII:
+ case PHY_INTERFACE_MODE_QSGMII:
return 0x0001;
default:
/* Nothing to do for other modes */
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 9a5d5a10560f..81a529c3dbe4 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -1290,7 +1290,7 @@ static const struct hwmon_chip_info sfp_hwmon_chip_info = {
static void sfp_hwmon_probe(struct work_struct *work)
{
struct sfp *sfp = container_of(work, struct sfp, hwmon_probe.work);
- int err, i;
+ int err;
/* hwmon interface needs to access 16bit registers in atomic way to
* guarantee coherency of the diagnostic monitoring data. If it is not
@@ -1318,16 +1318,12 @@ static void sfp_hwmon_probe(struct work_struct *work)
return;
}
- sfp->hwmon_name = kstrdup(dev_name(sfp->dev), GFP_KERNEL);
- if (!sfp->hwmon_name) {
+ sfp->hwmon_name = hwmon_sanitize_name(dev_name(sfp->dev));
+ if (IS_ERR(sfp->hwmon_name)) {
dev_err(sfp->dev, "out of memory for hwmon name\n");
return;
}
- for (i = 0; sfp->hwmon_name[i]; i++)
- if (hwmon_is_bad_char(sfp->hwmon_name[i]))
- sfp->hwmon_name[i] = '_';
-
sfp->hwmon_dev = hwmon_device_register_with_info(sfp->dev,
sfp->hwmon_name, sfp,
&sfp_hwmon_chip_info,
diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index 1b54684b68a0..69423b8965b3 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -110,7 +110,7 @@ static int smsc_phy_config_init(struct phy_device *phydev)
struct smsc_phy_priv *priv = phydev->priv;
int rc;
- if (!priv->energy_enable)
+ if (!priv->energy_enable || phydev->irq != PHY_POLL)
return 0;
rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS);
@@ -121,10 +121,7 @@ static int smsc_phy_config_init(struct phy_device *phydev)
/* Enable energy detect mode for this SMSC Transceivers */
rc = phy_write(phydev, MII_LAN83C185_CTRL_STATUS,
rc | MII_LAN83C185_EDPWRDOWN);
- if (rc < 0)
- return rc;
-
- return smsc_phy_ack_interrupt(phydev);
+ return rc;
}
static int smsc_phy_reset(struct phy_device *phydev)
@@ -146,11 +143,6 @@ static int smsc_phy_reset(struct phy_device *phydev)
return genphy_soft_reset(phydev);
}
-static int lan911x_config_init(struct phy_device *phydev)
-{
- return smsc_phy_ack_interrupt(phydev);
-}
-
static int lan87xx_config_aneg(struct phy_device *phydev)
{
int rc;
@@ -210,6 +202,8 @@ static int lan95xx_config_aneg_ext(struct phy_device *phydev)
* response on link pulses to detect presence of plugged Ethernet cable.
* The Energy Detect Power-Down mode is enabled again in the end of procedure to
* save approximately 220 mW of power if cable is unplugged.
+ * The workaround is only applicable to poll mode. Energy Detect Power-Down may
+ * not be used in interrupt mode lest link change detection becomes unreliable.
*/
static int lan87xx_read_status(struct phy_device *phydev)
{
@@ -217,7 +211,7 @@ static int lan87xx_read_status(struct phy_device *phydev)
int err = genphy_read_status(phydev);
- if (!phydev->link && priv->energy_enable) {
+ if (!phydev->link && priv->energy_enable && phydev->irq == PHY_POLL) {
/* Disable EDPD to wake up PHY */
int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS);
if (rc < 0)
@@ -418,9 +412,6 @@ static struct phy_driver smsc_phy_driver[] = {
.probe = smsc_phy_probe,
- /* basic functions */
- .config_init = lan911x_config_init,
-
/* IRQ related */
.config_intr = smsc_phy_config_intr,
.handle_interrupt = smsc_phy_handle_interrupt,
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 4a365f15533e..9206c660a72e 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -2968,7 +2968,7 @@ ppp_unregister_channel(struct ppp_channel *chan)
chan->ppp = NULL;
/*
- * This ensures that we have returned from any calls into the
+ * This ensures that we have returned from any calls into
* the channel's start_xmit or ioctl routine before we proceed.
*/
down_write(&pch->chan_sem);
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index 7a8c11a26eb5..4704ed6f00ef 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1750,7 +1750,7 @@ static const struct driver_info ax88179_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1763,7 +1763,7 @@ static const struct driver_info ax88178a_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1776,7 +1776,7 @@ static const struct driver_info cypress_GX3_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1789,7 +1789,7 @@ static const struct driver_info dlink_dub1312_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1802,7 +1802,7 @@ static const struct driver_info sitecom_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1815,7 +1815,7 @@ static const struct driver_info samsung_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1828,7 +1828,7 @@ static const struct driver_info lenovo_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1841,7 +1841,7 @@ static const struct driver_info belkin_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1854,7 +1854,7 @@ static const struct driver_info toshiba_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1867,7 +1867,7 @@ static const struct driver_info mct_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1880,7 +1880,7 @@ static const struct driver_info at_umc2000_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1893,7 +1893,7 @@ static const struct driver_info at_umc200_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1906,7 +1906,7 @@ static const struct driver_info at_umc2000sp_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index bd03e16f98a1..35110814ba22 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -2088,6 +2088,11 @@ static const struct usb_device_id products[] = {
USB_DEVICE(0x0424, 0x9E08),
.driver_info = (unsigned long) &smsc95xx_info,
},
+ {
+ /* Microchip's EVB-LAN8670-USB 10BASE-T1S Ethernet Device */
+ USB_DEVICE(0x184F, 0x0051),
+ .driver_info = (unsigned long)&smsc95xx_info,
+ },
{ }, /* END */
};
MODULE_DEVICE_TABLE(usb, products);
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index dc79811535c2..713cfc2723b8 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -849,13 +849,11 @@ int usbnet_stop (struct net_device *net)
mpn = !test_and_clear_bit(EVENT_NO_RUNTIME_PM, &dev->flags);
- /* deferred work (task, timer, softirq) must also stop.
- * can't flush_scheduled_work() until we drop rtnl (later),
- * else workers could deadlock; so make workers a NOP.
- */
+ /* deferred work (timer, softirq, task) must also stop */
dev->flags = 0;
del_timer_sync (&dev->delay);
tasklet_kill (&dev->bh);
+ cancel_work_sync(&dev->kevent);
if (!pm)
usb_autopm_put_interface(dev->intf);
@@ -1619,8 +1617,6 @@ void usbnet_disconnect (struct usb_interface *intf)
net = dev->net;
unregister_netdev (net);
- cancel_work_sync(&dev->kevent);
-
usb_scuttle_anchored_urbs(&dev->deferred);
if (dev->driver_info->unbind)
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 466da01ba2e3..2cb833b3006a 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -312,6 +312,7 @@ static bool veth_skb_is_eligible_for_gro(const struct net_device *dev,
static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
+ struct netdev_queue *queue = NULL;
struct veth_rq *rq = NULL;
struct net_device *rcv;
int length = skb->len;
@@ -329,6 +330,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
rxq = skb_get_queue_mapping(skb);
if (rxq < rcv->real_num_rx_queues) {
rq = &rcv_priv->rq[rxq];
+ queue = netdev_get_tx_queue(dev, rxq);
/* The napi pointer is available when an XDP program is
* attached or when GRO is enabled
@@ -340,6 +342,8 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
skb_tx_timestamp(skb);
if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
+ if (queue)
+ txq_trans_cond_update(queue);
if (!use_napi)
dev_lstats_add(dev, length);
} else {
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index db05b5e930be..969a67970e71 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2768,7 +2768,6 @@ static const struct ethtool_ops virtnet_ethtool_ops = {
static void virtnet_freeze_down(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
- int i;
/* Make sure no work handler is accessing the device */
flush_work(&vi->config_work);
@@ -2776,14 +2775,8 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
netif_tx_lock_bh(vi->dev);
netif_device_detach(vi->dev);
netif_tx_unlock_bh(vi->dev);
- cancel_delayed_work_sync(&vi->refill);
-
- if (netif_running(vi->dev)) {
- for (i = 0; i < vi->max_queue_pairs; i++) {
- napi_disable(&vi->rq[i].napi);
- virtnet_napi_tx_disable(&vi->sq[i].napi);
- }
- }
+ if (netif_running(vi->dev))
+ virtnet_close(vi->dev);
}
static int init_vqs(struct virtnet_info *vi);
@@ -2791,7 +2784,7 @@ static int init_vqs(struct virtnet_info *vi);
static int virtnet_restore_up(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
- int err, i;
+ int err;
err = init_vqs(vi);
if (err)
@@ -2800,15 +2793,9 @@ static int virtnet_restore_up(struct virtio_device *vdev)
virtio_device_ready(vdev);
if (netif_running(vi->dev)) {
- for (i = 0; i < vi->curr_queue_pairs; i++)
- if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
- schedule_delayed_work(&vi->refill, 0);
-
- for (i = 0; i < vi->max_queue_pairs; i++) {
- virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
- virtnet_napi_tx_enable(vi, vi->sq[i].vq,
- &vi->sq[i].napi);
- }
+ err = virtnet_open(vi->dev);
+ if (err)
+ return err;
}
netif_tx_lock_bh(vi->dev);
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 1565e1808a19..19c414733747 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -3365,10 +3365,17 @@ vmxnet3_declare_features(struct vmxnet3_adapter *adapter)
adapter->dev_caps[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+ if (!(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD)) &&
+ !(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD)) &&
+ !(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_GENEVE_TSO)) &&
+ !(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_VXLAN_TSO))) {
+ netdev->hw_enc_features &= ~NETIF_F_GSO_UDP_TUNNEL;
+ netdev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL;
+ }
if (!(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD)) &&
!(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD))) {
netdev->hw_enc_features &= ~NETIF_F_GSO_UDP_TUNNEL_CSUM;
- netdev->features &= ~NETIF_F_GSO_UDP_TUNNEL_CSUM;
+ netdev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL_CSUM;
}
}
diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index ce3993282c0f..c3eaf1b864ed 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -346,6 +346,12 @@ static void vmxnet3_enable_encap_offloads(struct net_device *netdev, netdev_feat
adapter->dev_caps[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+ if (!(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD)) &&
+ !(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD)) &&
+ !(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_GENEVE_TSO)) &&
+ !(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_VXLAN_TSO))) {
+ netdev->hw_enc_features &= ~NETIF_F_GSO_UDP_TUNNEL;
+ }
if (!(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD)) &&
!(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD))) {
netdev->hw_enc_features &= ~NETIF_F_GSO_UDP_TUNNEL_CSUM;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 24165daee3c8..3ab2cfd254a4 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3285,8 +3285,8 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
* we have no UUID set
*/
if (uuid_is_null(&ids->uuid)) {
- printk_ratelimited(KERN_WARNING
- "No UUID available providing old NGUID\n");
+ dev_warn_ratelimited(dev,
+ "No UUID available providing old NGUID\n");
return sysfs_emit(buf, "%pU\n", ids->nguid);
}
return sysfs_emit(buf, "%pU\n", &ids->uuid);
@@ -3863,6 +3863,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
if (ret) {
dev_err(ctrl->device,
"globally duplicate IDs for nsid %d\n", nsid);
+ nvme_print_device_info(ctrl);
return ret;
}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9b72b6ecf33c..0da94b233fed 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -503,6 +503,7 @@ struct nvme_ctrl_ops {
void (*submit_async_event)(struct nvme_ctrl *ctrl);
void (*delete_ctrl)(struct nvme_ctrl *ctrl);
int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
+ void (*print_device_info)(struct nvme_ctrl *ctrl);
};
/*
@@ -548,6 +549,33 @@ static inline struct request *nvme_cid_to_rq(struct blk_mq_tags *tags,
return blk_mq_tag_to_rq(tags, nvme_tag_from_cid(command_id));
}
+/*
+ * Return the length of the string without the space padding
+ */
+static inline int nvme_strlen(char *s, int len)
+{
+ while (s[len - 1] == ' ')
+ len--;
+ return len;
+}
+
+static inline void nvme_print_device_info(struct nvme_ctrl *ctrl)
+{
+ struct nvme_subsystem *subsys = ctrl->subsys;
+
+ if (ctrl->ops->print_device_info) {
+ ctrl->ops->print_device_info(ctrl);
+ return;
+ }
+
+ dev_err(ctrl->device,
+ "VID:%04x model:%.*s firmware:%.*s\n", subsys->vendor_id,
+ nvme_strlen(subsys->model, sizeof(subsys->model)),
+ subsys->model, nvme_strlen(subsys->firmware_rev,
+ sizeof(subsys->firmware_rev)),
+ subsys->firmware_rev);
+}
+
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj,
const char *dev_name);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 48f4f6eb877b..c7012e85d035 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1334,6 +1334,14 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
dev_warn(dev->ctrl.device,
"controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
csts, result);
+
+ if (csts != ~0)
+ return;
+
+ dev_warn(dev->ctrl.device,
+ "Does your device have a faulty power saving mode enabled?\n");
+ dev_warn(dev->ctrl.device,
+ "Try \"nvme_core.default_ps_max_latency_us=0 pcie_aspm=off\" and report a bug\n");
}
static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
@@ -2976,6 +2984,21 @@ static int nvme_pci_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
return snprintf(buf, size, "%s\n", dev_name(&pdev->dev));
}
+
+static void nvme_pci_print_device_info(struct nvme_ctrl *ctrl)
+{
+ struct pci_dev *pdev = to_pci_dev(to_nvme_dev(ctrl)->dev);
+ struct nvme_subsystem *subsys = ctrl->subsys;
+
+ dev_err(ctrl->device,
+ "VID:DID %04x:%04x model:%.*s firmware:%.*s\n",
+ pdev->vendor, pdev->device,
+ nvme_strlen(subsys->model, sizeof(subsys->model)),
+ subsys->model, nvme_strlen(subsys->firmware_rev,
+ sizeof(subsys->firmware_rev)),
+ subsys->firmware_rev);
+}
+
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.name = "pcie",
.module = THIS_MODULE,
@@ -2987,6 +3010,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.free_ctrl = nvme_pci_free_ctrl,
.submit_async_event = nvme_pci_submit_async_event,
.get_address = nvme_pci_get_address,
+ .print_device_info = nvme_pci_print_device_info,
};
static int nvme_dev_map(struct nvme_dev *dev)
@@ -3421,7 +3445,8 @@ static const struct pci_device_id nvme_id_table[] = {
{ PCI_VDEVICE(REDHAT, 0x0010), /* Qemu emulated controller */
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x126f, 0x2263), /* Silicon Motion unidentified */
- .driver_data = NVME_QUIRK_NO_NS_DESC_LIST, },
+ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST |
+ NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY |
NVME_QUIRK_NO_NS_DESC_LIST, },
@@ -3437,6 +3462,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY |
NVME_QUIRK_DISABLE_WRITE_ZEROES|
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
+ { PCI_DEVICE(0x1987, 0x5012), /* Phison E12 */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */
.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */
@@ -3449,10 +3476,20 @@ static const struct pci_device_id nvme_id_table[] = {
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ { PCI_DEVICE(0x1c5c, 0x174a), /* SK Hynix P31 SSD */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x1d97, 0x2263), /* SPCC */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ { PCI_DEVICE(0x144d, 0xa80b), /* Samsung PM9B1 256G and 512G */
+ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ { PCI_DEVICE(0x144d, 0xa809), /* Samsung MZALQ256HBJD 256G */
+ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ { PCI_DEVICE(0x1cc4, 0x6303), /* UMIS RPJTJ512MGE1QDY 512G */
+ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ { PCI_DEVICE(0x1cc4, 0x6302), /* UMIS RPJTJ256MGE1QDY 256G */
+ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x2646, 0x2262), /* KINGSTON SKC2000 NVMe SSD */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
{ PCI_DEVICE(0x2646, 0x2263), /* KINGSTON A2000 NVMe SSD */
@@ -3463,6 +3500,10 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1e4B, 0x1202), /* MAXIO MAP1202 */
.driver_data = NVME_QUIRK_BOGUS_NID, },
+ { PCI_DEVICE(0x1cc1, 0x5350), /* ADATA XPG GAMMIX S50 */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
+ { PCI_DEVICE(0x1e49, 0x0041), /* ZHITAI TiPro7000 NVMe SSD */
+ .driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061),
.driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, },
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065),
diff --git a/drivers/platform/mellanox/Kconfig b/drivers/platform/mellanox/Kconfig
index 72df4b8f4dd8..09c7829e95c4 100644
--- a/drivers/platform/mellanox/Kconfig
+++ b/drivers/platform/mellanox/Kconfig
@@ -85,7 +85,7 @@ config NVSW_SN2201
depends on I2C
depends on REGMAP_I2C
help
- This driver provides support for the Nvidia SN2201 platfom.
+ This driver provides support for the Nvidia SN2201 platform.
The SN2201 is a highly integrated for one rack unit system with
L3 management switches. It has 48 x 1Gbps RJ45 + 4 x 100G QSFP28
ports in a compact 1RU form factor. The system also including a
diff --git a/drivers/platform/mellanox/nvsw-sn2201.c b/drivers/platform/mellanox/nvsw-sn2201.c
index 0bcdc7c75007..2923daf63b75 100644
--- a/drivers/platform/mellanox/nvsw-sn2201.c
+++ b/drivers/platform/mellanox/nvsw-sn2201.c
@@ -326,7 +326,7 @@ static struct resource nvsw_sn2201_lpc_res[] = {
};
/* SN2201 I2C platform data. */
-struct mlxreg_core_hotplug_platform_data nvsw_sn2201_i2c_data = {
+static struct mlxreg_core_hotplug_platform_data nvsw_sn2201_i2c_data = {
.irq = NVSW_SN2201_CPLD_SYSIRQ,
};
diff --git a/drivers/platform/x86/barco-p50-gpio.c b/drivers/platform/x86/barco-p50-gpio.c
index 05534287bc26..8dd672339485 100644
--- a/drivers/platform/x86/barco-p50-gpio.c
+++ b/drivers/platform/x86/barco-p50-gpio.c
@@ -405,11 +405,14 @@ MODULE_DEVICE_TABLE(dmi, dmi_ids);
static int __init p50_module_init(void)
{
struct resource res = DEFINE_RES_IO(P50_GPIO_IO_PORT_BASE, P50_PORT_CMD + 1);
+ int ret;
if (!dmi_first_match(dmi_ids))
return -ENODEV;
- platform_driver_register(&p50_gpio_driver);
+ ret = platform_driver_register(&p50_gpio_driver);
+ if (ret)
+ return ret;
gpio_pdev = platform_device_register_simple(DRIVER_NAME, PLATFORM_DEVID_NONE, &res, 1);
if (IS_ERR(gpio_pdev)) {
diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c
index 1ef606e3ef80..497ad2f64a51 100644
--- a/drivers/platform/x86/gigabyte-wmi.c
+++ b/drivers/platform/x86/gigabyte-wmi.c
@@ -140,6 +140,7 @@ static u8 gigabyte_wmi_detect_sensor_usability(struct wmi_device *wdev)
}}
static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
+ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B450M DS3H-CF"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B450M S2H V2"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE AX V2"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE"),
@@ -156,6 +157,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 GAMING X"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 I AORUS PRO WIFI"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 UD"),
+ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z690M AORUS ELITE AX DDR4"),
{ }
};
diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c
index 667f94bba905..0d8cb22e30df 100644
--- a/drivers/platform/x86/hp-wmi.c
+++ b/drivers/platform/x86/hp-wmi.c
@@ -38,6 +38,7 @@ MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4");
#define HPWMI_EVENT_GUID "95F24279-4D7B-4334-9387-ACCDC67EF61C"
#define HPWMI_BIOS_GUID "5FB7F034-2C63-45e9-BE91-3D44E2C707E4"
#define HP_OMEN_EC_THERMAL_PROFILE_OFFSET 0x95
+#define zero_if_sup(tmp) (zero_insize_support?0:sizeof(tmp)) // use when zero insize is required
/* DMI board names of devices that should use the omen specific path for
* thermal profiles.
@@ -220,6 +221,7 @@ static struct input_dev *hp_wmi_input_dev;
static struct platform_device *hp_wmi_platform_dev;
static struct platform_profile_handler platform_profile_handler;
static bool platform_profile_support;
+static bool zero_insize_support;
static struct rfkill *wifi_rfkill;
static struct rfkill *bluetooth_rfkill;
@@ -290,14 +292,16 @@ static int hp_wmi_perform_query(int query, enum hp_wmi_command command,
struct bios_return *bios_return;
union acpi_object *obj = NULL;
struct bios_args *args = NULL;
- int mid, actual_outsize, ret;
+ int mid, actual_insize, actual_outsize;
size_t bios_args_size;
+ int ret;
mid = encode_outsize_for_pvsz(outsize);
if (WARN_ON(mid < 0))
return mid;
- bios_args_size = struct_size(args, data, insize);
+ actual_insize = max(insize, 128);
+ bios_args_size = struct_size(args, data, actual_insize);
args = kmalloc(bios_args_size, GFP_KERNEL);
if (!args)
return -ENOMEM;
@@ -374,7 +378,7 @@ static int hp_wmi_read_int(int query)
int val = 0, ret;
ret = hp_wmi_perform_query(query, HPWMI_READ, &val,
- 0, sizeof(val));
+ zero_if_sup(val), sizeof(val));
if (ret)
return ret < 0 ? ret : -EINVAL;
@@ -410,7 +414,8 @@ static int hp_wmi_get_tablet_mode(void)
return -ENODEV;
ret = hp_wmi_perform_query(HPWMI_SYSTEM_DEVICE_MODE, HPWMI_READ,
- system_device_mode, 0, sizeof(system_device_mode));
+ system_device_mode, zero_if_sup(system_device_mode),
+ sizeof(system_device_mode));
if (ret < 0)
return ret;
@@ -497,7 +502,7 @@ static int hp_wmi_fan_speed_max_get(void)
int val = 0, ret;
ret = hp_wmi_perform_query(HPWMI_FAN_SPEED_MAX_GET_QUERY, HPWMI_GM,
- &val, 0, sizeof(val));
+ &val, zero_if_sup(val), sizeof(val));
if (ret)
return ret < 0 ? ret : -EINVAL;
@@ -509,7 +514,7 @@ static int __init hp_wmi_bios_2008_later(void)
{
int state = 0;
int ret = hp_wmi_perform_query(HPWMI_FEATURE_QUERY, HPWMI_READ, &state,
- 0, sizeof(state));
+ zero_if_sup(state), sizeof(state));
if (!ret)
return 1;
@@ -520,7 +525,7 @@ static int __init hp_wmi_bios_2009_later(void)
{
u8 state[128];
int ret = hp_wmi_perform_query(HPWMI_FEATURE2_QUERY, HPWMI_READ, &state,
- 0, sizeof(state));
+ zero_if_sup(state), sizeof(state));
if (!ret)
return 1;
@@ -598,7 +603,7 @@ static int hp_wmi_rfkill2_refresh(void)
int err, i;
err = hp_wmi_perform_query(HPWMI_WIRELESS2_QUERY, HPWMI_READ, &state,
- 0, sizeof(state));
+ zero_if_sup(state), sizeof(state));
if (err)
return err;
@@ -1007,7 +1012,7 @@ static int __init hp_wmi_rfkill2_setup(struct platform_device *device)
int err, i;
err = hp_wmi_perform_query(HPWMI_WIRELESS2_QUERY, HPWMI_READ, &state,
- 0, sizeof(state));
+ zero_if_sup(state), sizeof(state));
if (err)
return err < 0 ? err : -EINVAL;
@@ -1483,11 +1488,15 @@ static int __init hp_wmi_init(void)
{
int event_capable = wmi_has_guid(HPWMI_EVENT_GUID);
int bios_capable = wmi_has_guid(HPWMI_BIOS_GUID);
- int err;
+ int err, tmp = 0;
if (!bios_capable && !event_capable)
return -ENODEV;
+ if (hp_wmi_perform_query(HPWMI_HARDWARE_QUERY, HPWMI_READ, &tmp,
+ sizeof(tmp), sizeof(tmp)) == HPWMI_RET_INVALID_PARAMETERS)
+ zero_insize_support = true;
+
if (event_capable) {
err = hp_wmi_input_setup();
if (err)
diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c
index 216d31e3403d..79cff1fc675c 100644
--- a/drivers/platform/x86/intel/hid.c
+++ b/drivers/platform/x86/intel/hid.c
@@ -122,6 +122,12 @@ static const struct dmi_system_id dmi_vgbs_allow_list[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "HP Spectre x360 Convertible 15-df0xxx"),
},
},
+ {
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go"),
+ },
+ },
{ }
};
diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c
index edaf22e5ae98..40183bda7894 100644
--- a/drivers/platform/x86/intel/pmc/core.c
+++ b/drivers/platform/x86/intel/pmc/core.c
@@ -1912,6 +1912,7 @@ static const struct x86_cpu_id intel_pmc_core_ids[] = {
X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &tgl_reg_map),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &tgl_reg_map),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_reg_map),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &tgl_reg_map),
{}
};
diff --git a/drivers/platform/x86/intel/pmt/crashlog.c b/drivers/platform/x86/intel/pmt/crashlog.c
index 34daf9df168b..ace1239bc0a0 100644
--- a/drivers/platform/x86/intel/pmt/crashlog.c
+++ b/drivers/platform/x86/intel/pmt/crashlog.c
@@ -282,7 +282,7 @@ static int pmt_crashlog_probe(struct auxiliary_device *auxdev,
auxiliary_set_drvdata(auxdev, priv);
for (i = 0; i < intel_vsec_dev->num_resources; i++) {
- struct intel_pmt_entry *entry = &priv->entry[i].entry;
+ struct intel_pmt_entry *entry = &priv->entry[priv->num_entries].entry;
ret = intel_pmt_dev_create(entry, &pmt_crashlog_ns, intel_vsec_dev, i);
if (ret < 0)
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index d0eab5700dc5..00684e11976b 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -160,8 +160,8 @@ static void ibmvfc_npiv_logout(struct ibmvfc_host *);
static void ibmvfc_tgt_implicit_logout_and_del(struct ibmvfc_target *);
static void ibmvfc_tgt_move_login(struct ibmvfc_target *);
-static void ibmvfc_release_sub_crqs(struct ibmvfc_host *);
-static void ibmvfc_init_sub_crqs(struct ibmvfc_host *);
+static void ibmvfc_dereg_sub_crqs(struct ibmvfc_host *);
+static void ibmvfc_reg_sub_crqs(struct ibmvfc_host *);
static const char *unknown_error = "unknown error";
@@ -917,7 +917,7 @@ static int ibmvfc_reenable_crq_queue(struct ibmvfc_host *vhost)
struct vio_dev *vdev = to_vio_dev(vhost->dev);
unsigned long flags;
- ibmvfc_release_sub_crqs(vhost);
+ ibmvfc_dereg_sub_crqs(vhost);
/* Re-enable the CRQ */
do {
@@ -936,7 +936,7 @@ static int ibmvfc_reenable_crq_queue(struct ibmvfc_host *vhost)
spin_unlock(vhost->crq.q_lock);
spin_unlock_irqrestore(vhost->host->host_lock, flags);
- ibmvfc_init_sub_crqs(vhost);
+ ibmvfc_reg_sub_crqs(vhost);
return rc;
}
@@ -955,7 +955,7 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
struct vio_dev *vdev = to_vio_dev(vhost->dev);
struct ibmvfc_queue *crq = &vhost->crq;
- ibmvfc_release_sub_crqs(vhost);
+ ibmvfc_dereg_sub_crqs(vhost);
/* Close the CRQ */
do {
@@ -988,7 +988,7 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
spin_unlock(vhost->crq.q_lock);
spin_unlock_irqrestore(vhost->host->host_lock, flags);
- ibmvfc_init_sub_crqs(vhost);
+ ibmvfc_reg_sub_crqs(vhost);
return rc;
}
@@ -5682,6 +5682,8 @@ static int ibmvfc_alloc_queue(struct ibmvfc_host *vhost,
queue->cur = 0;
queue->fmt = fmt;
queue->size = PAGE_SIZE / fmt_size;
+
+ queue->vhost = vhost;
return 0;
}
@@ -5757,9 +5759,6 @@ static int ibmvfc_register_scsi_channel(struct ibmvfc_host *vhost,
ENTER;
- if (ibmvfc_alloc_queue(vhost, scrq, IBMVFC_SUB_CRQ_FMT))
- return -ENOMEM;
-
rc = h_reg_sub_crq(vdev->unit_address, scrq->msg_token, PAGE_SIZE,
&scrq->cookie, &scrq->hw_irq);
@@ -5790,7 +5789,6 @@ static int ibmvfc_register_scsi_channel(struct ibmvfc_host *vhost,
}
scrq->hwq_id = index;
- scrq->vhost = vhost;
LEAVE;
return 0;
@@ -5800,7 +5798,6 @@ irq_failed:
rc = plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address, scrq->cookie);
} while (rtas_busy_delay(rc));
reg_failed:
- ibmvfc_free_queue(vhost, scrq);
LEAVE;
return rc;
}
@@ -5826,12 +5823,50 @@ static void ibmvfc_deregister_scsi_channel(struct ibmvfc_host *vhost, int index)
if (rc)
dev_err(dev, "Failed to free sub-crq[%d]: rc=%ld\n", index, rc);
- ibmvfc_free_queue(vhost, scrq);
+ /* Clean out the queue */
+ memset(scrq->msgs.crq, 0, PAGE_SIZE);
+ scrq->cur = 0;
+
+ LEAVE;
+}
+
+static void ibmvfc_reg_sub_crqs(struct ibmvfc_host *vhost)
+{
+ int i, j;
+
+ ENTER;
+ if (!vhost->mq_enabled || !vhost->scsi_scrqs.scrqs)
+ return;
+
+ for (i = 0; i < nr_scsi_hw_queues; i++) {
+ if (ibmvfc_register_scsi_channel(vhost, i)) {
+ for (j = i; j > 0; j--)
+ ibmvfc_deregister_scsi_channel(vhost, j - 1);
+ vhost->do_enquiry = 0;
+ return;
+ }
+ }
+
+ LEAVE;
+}
+
+static void ibmvfc_dereg_sub_crqs(struct ibmvfc_host *vhost)
+{
+ int i;
+
+ ENTER;
+ if (!vhost->mq_enabled || !vhost->scsi_scrqs.scrqs)
+ return;
+
+ for (i = 0; i < nr_scsi_hw_queues; i++)
+ ibmvfc_deregister_scsi_channel(vhost, i);
+
LEAVE;
}
static void ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost)
{
+ struct ibmvfc_queue *scrq;
int i, j;
ENTER;
@@ -5847,30 +5882,41 @@ static void ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost)
}
for (i = 0; i < nr_scsi_hw_queues; i++) {
- if (ibmvfc_register_scsi_channel(vhost, i)) {
- for (j = i; j > 0; j--)
- ibmvfc_deregister_scsi_channel(vhost, j - 1);
+ scrq = &vhost->scsi_scrqs.scrqs[i];
+ if (ibmvfc_alloc_queue(vhost, scrq, IBMVFC_SUB_CRQ_FMT)) {
+ for (j = i; j > 0; j--) {
+ scrq = &vhost->scsi_scrqs.scrqs[j - 1];
+ ibmvfc_free_queue(vhost, scrq);
+ }
kfree(vhost->scsi_scrqs.scrqs);
vhost->scsi_scrqs.scrqs = NULL;
vhost->scsi_scrqs.active_queues = 0;
vhost->do_enquiry = 0;
- break;
+ vhost->mq_enabled = 0;
+ return;
}
}
+ ibmvfc_reg_sub_crqs(vhost);
+
LEAVE;
}
static void ibmvfc_release_sub_crqs(struct ibmvfc_host *vhost)
{
+ struct ibmvfc_queue *scrq;
int i;
ENTER;
if (!vhost->scsi_scrqs.scrqs)
return;
- for (i = 0; i < nr_scsi_hw_queues; i++)
- ibmvfc_deregister_scsi_channel(vhost, i);
+ ibmvfc_dereg_sub_crqs(vhost);
+
+ for (i = 0; i < nr_scsi_hw_queues; i++) {
+ scrq = &vhost->scsi_scrqs.scrqs[i];
+ ibmvfc_free_queue(vhost, scrq);
+ }
kfree(vhost->scsi_scrqs.scrqs);
vhost->scsi_scrqs.scrqs = NULL;
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index 3718406e0988..c39a245f43d0 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -789,6 +789,7 @@ struct ibmvfc_queue {
spinlock_t _lock;
spinlock_t *q_lock;
+ struct ibmvfc_host *vhost;
struct ibmvfc_event_pool evt_pool;
struct list_head sent;
struct list_head free;
@@ -797,7 +798,6 @@ struct ibmvfc_queue {
union ibmvfc_iu cancel_rsp;
/* Sub-CRQ fields */
- struct ibmvfc_host *vhost;
unsigned long cookie;
unsigned long vios_cookie;
unsigned long hw_irq;
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 256ec6d08c16..9d01a3e3c26a 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -9795,7 +9795,7 @@ static int ipr_alloc_mem(struct ipr_ioa_cfg *ioa_cfg)
GFP_KERNEL);
if (!ioa_cfg->hrrq[i].host_rrq) {
- while (--i > 0)
+ while (--i >= 0)
dma_free_coherent(&pdev->dev,
sizeof(u32) * ioa_cfg->hrrq[i].size,
ioa_cfg->hrrq[i].host_rrq,
@@ -10068,7 +10068,7 @@ static int ipr_request_other_msi_irqs(struct ipr_ioa_cfg *ioa_cfg,
ioa_cfg->vectors_info[i].desc,
&ioa_cfg->hrrq[i]);
if (rc) {
- while (--i >= 0)
+ while (--i > 0)
free_irq(pci_irq_vector(pdev, i),
&ioa_cfg->hrrq[i]);
return rc;
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index b1be0dd0337a..f5d74958b664 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -420,8 +420,6 @@ int lpfc_sli_issue_iocb_wait(struct lpfc_hba *, uint32_t,
uint32_t);
void lpfc_sli_abort_fcp_cmpl(struct lpfc_hba *, struct lpfc_iocbq *,
struct lpfc_iocbq *);
-void lpfc_sli4_abort_fcp_cmpl(struct lpfc_hba *h, struct lpfc_iocbq *i,
- struct lpfc_wcqe_complete *w);
void lpfc_sli_free_hbq(struct lpfc_hba *, struct hbq_dmabuf *);
@@ -630,7 +628,7 @@ void lpfc_nvmet_invalidate_host(struct lpfc_hba *phba,
struct lpfc_nodelist *ndlp);
void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba,
struct lpfc_iocbq *cmdiocb,
- struct lpfc_wcqe_complete *abts_cmpl);
+ struct lpfc_iocbq *rspiocb);
void lpfc_create_multixri_pools(struct lpfc_hba *phba);
void lpfc_create_destroy_pools(struct lpfc_hba *phba);
void lpfc_move_xri_pvt_to_pbl(struct lpfc_hba *phba, u32 hwqid);
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 9d36b20fb878..13dfe285493d 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -197,7 +197,7 @@ lpfc_ct_reject_event(struct lpfc_nodelist *ndlp,
memset(bpl, 0, sizeof(struct ulp_bde64));
bpl->addrHigh = le32_to_cpu(putPaddrHigh(mp->phys));
bpl->addrLow = le32_to_cpu(putPaddrLow(mp->phys));
- bpl->tus.f.bdeFlags = BUFF_TYPE_BLP_64;
+ bpl->tus.f.bdeFlags = BUFF_TYPE_BDE_64;
bpl->tus.f.bdeSize = (LPFC_CT_PREAMBLE - 4);
bpl->tus.w = le32_to_cpu(bpl->tus.w);
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 07f9a6e61e10..3fababb7c181 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -2998,10 +2998,7 @@ lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
ndlp->nlp_DID, ulp_status,
ulp_word4);
- /* Call NLP_EVT_DEVICE_RM if link is down or LOGO is aborted */
if (lpfc_error_lost_link(ulp_status, ulp_word4)) {
- lpfc_disc_state_machine(vport, ndlp, cmdiocb,
- NLP_EVT_DEVICE_RM);
skip_recovery = 1;
goto out;
}
@@ -3021,18 +3018,10 @@ lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
spin_unlock_irq(&ndlp->lock);
lpfc_disc_state_machine(vport, ndlp, cmdiocb,
NLP_EVT_DEVICE_RM);
- lpfc_els_free_iocb(phba, cmdiocb);
- lpfc_nlp_put(ndlp);
-
- /* Presume the node was released. */
- return;
+ goto out_rsrc_free;
}
out:
- /* Driver is done with the IO. */
- lpfc_els_free_iocb(phba, cmdiocb);
- lpfc_nlp_put(ndlp);
-
/* At this point, the LOGO processing is complete. NOTE: For a
* pt2pt topology, we are assuming the NPortID will only change
* on link up processing. For a LOGO / PLOGI initiated by the
@@ -3059,6 +3048,10 @@ out:
ndlp->nlp_DID, ulp_status,
ulp_word4, tmo,
vport->num_disc_nodes);
+
+ lpfc_els_free_iocb(phba, cmdiocb);
+ lpfc_nlp_put(ndlp);
+
lpfc_disc_start(vport);
return;
}
@@ -3075,6 +3068,10 @@ out:
lpfc_disc_state_machine(vport, ndlp, cmdiocb,
NLP_EVT_DEVICE_RM);
}
+out_rsrc_free:
+ /* Driver is done with the I/O. */
+ lpfc_els_free_iocb(phba, cmdiocb);
+ lpfc_nlp_put(ndlp);
}
/**
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index 8511369d2cf8..f024415731ac 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -4487,6 +4487,9 @@ struct wqe_common {
#define wqe_sup_SHIFT 6
#define wqe_sup_MASK 0x00000001
#define wqe_sup_WORD word11
+#define wqe_ffrq_SHIFT 6
+#define wqe_ffrq_MASK 0x00000001
+#define wqe_ffrq_WORD word11
#define wqe_wqec_SHIFT 7
#define wqe_wqec_MASK 0x00000001
#define wqe_wqec_WORD word11
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 93b94c64518d..750dd1e9f2cc 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -12188,7 +12188,7 @@ lpfc_sli_enable_msi(struct lpfc_hba *phba)
rc = pci_enable_msi(phba->pcidev);
if (!rc)
lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
- "0462 PCI enable MSI mode success.\n");
+ "0012 PCI enable MSI mode success.\n");
else {
lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
"0471 PCI enable MSI mode failed (%d)\n", rc);
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 639f86635127..b86ff9fcdf0c 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -834,7 +834,8 @@ lpfc_rcv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
lpfc_nvmet_invalidate_host(phba, ndlp);
if (ndlp->nlp_DID == Fabric_DID) {
- if (vport->port_state <= LPFC_FDISC)
+ if (vport->port_state <= LPFC_FDISC ||
+ vport->fc_flag & FC_PT2PT)
goto out;
lpfc_linkdown_port(vport);
spin_lock_irq(shost->host_lock);
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 335e90633933..cd10ee6482fc 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -1065,25 +1065,37 @@ lpfc_nvme_io_cmd_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
nCmd->rcv_rsplen = wcqe->parameter;
nCmd->status = 0;
+ /* Get the NVME cmd details for this unique error. */
+ cp = (struct nvme_fc_cmd_iu *)nCmd->cmdaddr;
+ ep = (struct nvme_fc_ersp_iu *)nCmd->rspaddr;
+
/* Check if this is really an ERSP */
if (nCmd->rcv_rsplen == LPFC_NVME_ERSP_LEN) {
lpfc_ncmd->status = IOSTAT_SUCCESS;
lpfc_ncmd->result = 0;
lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
- "6084 NVME Completion ERSP: "
- "xri %x placed x%x\n",
- lpfc_ncmd->cur_iocbq.sli4_xritag,
- wcqe->total_data_placed);
+ "6084 NVME FCP_ERR ERSP: "
+ "xri %x placed x%x opcode x%x cmd_id "
+ "x%x cqe_status x%x\n",
+ lpfc_ncmd->cur_iocbq.sli4_xritag,
+ wcqe->total_data_placed,
+ cp->sqe.common.opcode,
+ cp->sqe.common.command_id,
+ ep->cqe.status);
break;
}
lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
"6081 NVME Completion Protocol Error: "
"xri %x status x%x result x%x "
- "placed x%x\n",
+ "placed x%x opcode x%x cmd_id x%x, "
+ "cqe_status x%x\n",
lpfc_ncmd->cur_iocbq.sli4_xritag,
lpfc_ncmd->status, lpfc_ncmd->result,
- wcqe->total_data_placed);
+ wcqe->total_data_placed,
+ cp->sqe.common.opcode,
+ cp->sqe.common.command_id,
+ ep->cqe.status);
break;
case IOSTAT_LOCAL_REJECT:
/* Let fall through to set command final state. */
@@ -1195,7 +1207,8 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
{
struct lpfc_hba *phba = vport->phba;
struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd;
- struct lpfc_iocbq *pwqeq = &(lpfc_ncmd->cur_iocbq);
+ struct nvme_common_command *sqe;
+ struct lpfc_iocbq *pwqeq = &lpfc_ncmd->cur_iocbq;
union lpfc_wqe128 *wqe = &pwqeq->wqe;
uint32_t req_len;
@@ -1252,8 +1265,14 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
cstat->control_requests++;
}
- if (pnode->nlp_nvme_info & NLP_NVME_NSLER)
+ if (pnode->nlp_nvme_info & NLP_NVME_NSLER) {
bf_set(wqe_erp, &wqe->generic.wqe_com, 1);
+ sqe = &((struct nvme_fc_cmd_iu *)
+ nCmd->cmdaddr)->sqe.common;
+ if (sqe->opcode == nvme_admin_async_event)
+ bf_set(wqe_ffrq, &wqe->generic.wqe_com, 1);
+ }
+
/*
* Finish initializing those WQE fields that are independent
* of the nvme_cmnd request_buffer
@@ -1787,7 +1806,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
* lpfc_nvme_abort_fcreq_cmpl - Complete an NVME FCP abort request.
* @phba: Pointer to HBA context object
* @cmdiocb: Pointer to command iocb object.
- * @abts_cmpl: Pointer to wcqe complete object.
+ * @rspiocb: Pointer to response iocb object.
*
* This is the callback function for any NVME FCP IO that was aborted.
*
@@ -1796,8 +1815,10 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
**/
void
lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
- struct lpfc_wcqe_complete *abts_cmpl)
+ struct lpfc_iocbq *rspiocb)
{
+ struct lpfc_wcqe_complete *abts_cmpl = &rspiocb->wcqe_cmpl;
+
lpfc_printf_log(phba, KERN_INFO, LOG_NVME,
"6145 ABORT_XRI_CN completing on rpi x%x "
"original iotag x%x, abort cmd iotag x%x "
@@ -1840,6 +1861,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
struct lpfc_nvme_fcpreq_priv *freqpriv;
unsigned long flags;
int ret_val;
+ struct nvme_fc_cmd_iu *cp;
/* Validate pointers. LLDD fault handling with transport does
* have timing races.
@@ -1963,10 +1985,16 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
return;
}
+ /*
+ * Get Command Id from cmd to plug into response. This
+ * code is not needed in the next NVME Transport drop.
+ */
+ cp = (struct nvme_fc_cmd_iu *)lpfc_nbuf->nvmeCmd->cmdaddr;
lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS,
"6138 Transport Abort NVME Request Issued for "
- "ox_id x%x\n",
- nvmereq_wqe->sli4_xritag);
+ "ox_id x%x nvme opcode x%x nvme cmd_id x%x\n",
+ nvmereq_wqe->sli4_xritag, cp->sqe.common.opcode,
+ cp->sqe.common.command_id);
return;
out_unlock:
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index d43968203248..ba5e4016262e 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -6062,6 +6062,9 @@ lpfc_device_reset_handler(struct scsi_cmnd *cmnd)
int status;
u32 logit = LOG_FCP;
+ if (!rport)
+ return FAILED;
+
rdata = rport->dd_data;
if (!rdata || !rdata->pnode) {
lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
@@ -6140,6 +6143,9 @@ lpfc_target_reset_handler(struct scsi_cmnd *cmnd)
unsigned long flags;
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waitq);
+ if (!rport)
+ return FAILED;
+
rdata = rport->dd_data;
if (!rdata || !rdata->pnode) {
lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 6ed696c4602a..80ac3a051c19 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1930,7 +1930,7 @@ lpfc_issue_cmf_sync_wqe(struct lpfc_hba *phba, u32 ms, u64 total)
sync_buf = __lpfc_sli_get_iocbq(phba);
if (!sync_buf) {
lpfc_printf_log(phba, KERN_ERR, LOG_CGN_MGMT,
- "6213 No available WQEs for CMF_SYNC_WQE\n");
+ "6244 No available WQEs for CMF_SYNC_WQE\n");
ret_val = ENOMEM;
goto out_unlock;
}
@@ -3805,7 +3805,7 @@ lpfc_sli_process_sol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
set_job_ulpword4(cmdiocbp,
IOERR_ABORT_REQUESTED);
/*
- * For SLI4, irsiocb contains
+ * For SLI4, irspiocb contains
* NO_XRI in sli_xritag, it
* shall not affect releasing
* sgl (xri) process.
@@ -3823,7 +3823,7 @@ lpfc_sli_process_sol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
}
}
}
- (cmdiocbp->cmd_cmpl) (phba, cmdiocbp, saveq);
+ cmdiocbp->cmd_cmpl(phba, cmdiocbp, saveq);
} else
lpfc_sli_release_iocbq(phba, cmdiocbp);
} else {
@@ -4063,8 +4063,7 @@ lpfc_sli_handle_fast_ring_event(struct lpfc_hba *phba,
cmdiocbq->cmd_flag &= ~LPFC_DRIVER_ABORTED;
if (cmdiocbq->cmd_cmpl) {
spin_unlock_irqrestore(&phba->hbalock, iflag);
- (cmdiocbq->cmd_cmpl)(phba, cmdiocbq,
- &rspiocbq);
+ cmdiocbq->cmd_cmpl(phba, cmdiocbq, &rspiocbq);
spin_lock_irqsave(&phba->hbalock, iflag);
}
break;
@@ -10288,7 +10287,7 @@ __lpfc_sli_issue_iocb_s3(struct lpfc_hba *phba, uint32_t ring_number,
* @flag: Flag indicating if this command can be put into txq.
*
* __lpfc_sli_issue_fcp_io_s3 is wrapper function to invoke lockless func to
- * send an iocb command to an HBA with SLI-4 interface spec.
+ * send an iocb command to an HBA with SLI-3 interface spec.
*
* This function takes the hbalock before invoking the lockless version.
* The function will return success after it successfully submit the wqe to
@@ -12740,7 +12739,7 @@ lpfc_sli_wake_iocb_wait(struct lpfc_hba *phba,
cmdiocbq->cmd_cmpl = cmdiocbq->wait_cmd_cmpl;
cmdiocbq->wait_cmd_cmpl = NULL;
if (cmdiocbq->cmd_cmpl)
- (cmdiocbq->cmd_cmpl)(phba, cmdiocbq, NULL);
+ cmdiocbq->cmd_cmpl(phba, cmdiocbq, NULL);
else
lpfc_sli_release_iocbq(phba, cmdiocbq);
return;
@@ -12754,9 +12753,9 @@ lpfc_sli_wake_iocb_wait(struct lpfc_hba *phba,
/* Set the exchange busy flag for task management commands */
if ((cmdiocbq->cmd_flag & LPFC_IO_FCP) &&
- !(cmdiocbq->cmd_flag & LPFC_IO_LIBDFC)) {
+ !(cmdiocbq->cmd_flag & LPFC_IO_LIBDFC)) {
lpfc_cmd = container_of(cmdiocbq, struct lpfc_io_buf,
- cur_iocbq);
+ cur_iocbq);
if (rspiocbq && (rspiocbq->cmd_flag & LPFC_EXCHANGE_BUSY))
lpfc_cmd->flags |= LPFC_SBUF_XBUSY;
else
@@ -13896,7 +13895,7 @@ void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *phba)
* @irspiocbq: Pointer to work-queue completion queue entry.
*
* This routine handles an ELS work-queue completion event and construct
- * a pseudo response ELS IODBQ from the SLI4 ELS WCQE for the common
+ * a pseudo response ELS IOCBQ from the SLI4 ELS WCQE for the common
* discovery engine to handle.
*
* Return: Pointer to the receive IOCBQ, NULL otherwise.
@@ -13940,7 +13939,7 @@ lpfc_sli4_els_preprocess_rspiocbq(struct lpfc_hba *phba,
if (bf_get(lpfc_wcqe_c_xb, wcqe)) {
spin_lock_irqsave(&phba->hbalock, iflags);
- cmdiocbq->cmd_flag |= LPFC_EXCHANGE_BUSY;
+ irspiocbq->cmd_flag |= LPFC_EXCHANGE_BUSY;
spin_unlock_irqrestore(&phba->hbalock, iflags);
}
@@ -14799,7 +14798,7 @@ lpfc_sli4_fp_handle_fcp_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
/* Pass the cmd_iocb and the wcqe to the upper layer */
memcpy(&cmdiocbq->wcqe_cmpl, wcqe,
sizeof(struct lpfc_wcqe_complete));
- (cmdiocbq->cmd_cmpl)(phba, cmdiocbq, cmdiocbq);
+ cmdiocbq->cmd_cmpl(phba, cmdiocbq, cmdiocbq);
} else {
lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
"0375 FCP cmdiocb not callback function "
@@ -18956,7 +18955,7 @@ lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *vport,
/* Free iocb created in lpfc_prep_seq */
list_for_each_entry_safe(curr_iocb, next_iocb,
- &iocbq->list, list) {
+ &iocbq->list, list) {
list_del_init(&curr_iocb->list);
lpfc_sli_release_iocbq(phba, curr_iocb);
}
diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 4fab79ed58ed..2ab6f7db64d8 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -20,7 +20,7 @@
* included with this package. *
*******************************************************************/
-#define LPFC_DRIVER_VERSION "14.2.0.3"
+#define LPFC_DRIVER_VERSION "14.2.0.4"
#define LPFC_DRIVER_NAME "lpfc"
/* Used for SLI 2/3 */
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 37d46ae5c61d..9a1ae52bb621 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -5369,6 +5369,7 @@ static int _base_assign_fw_reported_qd(struct MPT3SAS_ADAPTER *ioc)
Mpi2ConfigReply_t mpi_reply;
Mpi2SasIOUnitPage1_t *sas_iounit_pg1 = NULL;
Mpi26PCIeIOUnitPage1_t pcie_iounit_pg1;
+ u16 depth;
int sz;
int rc = 0;
@@ -5380,7 +5381,7 @@ static int _base_assign_fw_reported_qd(struct MPT3SAS_ADAPTER *ioc)
goto out;
/* sas iounit page 1 */
sz = offsetof(Mpi2SasIOUnitPage1_t, PhyData);
- sas_iounit_pg1 = kzalloc(sz, GFP_KERNEL);
+ sas_iounit_pg1 = kzalloc(sizeof(Mpi2SasIOUnitPage1_t), GFP_KERNEL);
if (!sas_iounit_pg1) {
pr_err("%s: failure at %s:%d/%s()!\n",
ioc->name, __FILE__, __LINE__, __func__);
@@ -5393,16 +5394,16 @@ static int _base_assign_fw_reported_qd(struct MPT3SAS_ADAPTER *ioc)
ioc->name, __FILE__, __LINE__, __func__);
goto out;
}
- ioc->max_wideport_qd =
- (le16_to_cpu(sas_iounit_pg1->SASWideMaxQueueDepth)) ?
- le16_to_cpu(sas_iounit_pg1->SASWideMaxQueueDepth) :
- MPT3SAS_SAS_QUEUE_DEPTH;
- ioc->max_narrowport_qd =
- (le16_to_cpu(sas_iounit_pg1->SASNarrowMaxQueueDepth)) ?
- le16_to_cpu(sas_iounit_pg1->SASNarrowMaxQueueDepth) :
- MPT3SAS_SAS_QUEUE_DEPTH;
- ioc->max_sata_qd = (sas_iounit_pg1->SATAMaxQDepth) ?
- sas_iounit_pg1->SATAMaxQDepth : MPT3SAS_SATA_QUEUE_DEPTH;
+
+ depth = le16_to_cpu(sas_iounit_pg1->SASWideMaxQueueDepth);
+ ioc->max_wideport_qd = (depth ? depth : MPT3SAS_SAS_QUEUE_DEPTH);
+
+ depth = le16_to_cpu(sas_iounit_pg1->SASNarrowMaxQueueDepth);
+ ioc->max_narrowport_qd = (depth ? depth : MPT3SAS_SAS_QUEUE_DEPTH);
+
+ depth = sas_iounit_pg1->SATAMaxQDepth;
+ ioc->max_sata_qd = (depth ? depth : MPT3SAS_SATA_QUEUE_DEPTH);
+
/* pcie iounit page 1 */
rc = mpt3sas_config_get_pcie_iounit_pg1(ioc, &mpi_reply,
&pcie_iounit_pg1, sizeof(Mpi26PCIeIOUnitPage1_t));
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index bfce60183a6e..836ddc476764 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -4031,7 +4031,7 @@ pmcraid_register_interrupt_handler(struct pmcraid_instance *pinstance)
return 0;
out_unwind:
- while (--i > 0)
+ while (--i >= 0)
free_irq(pci_irq_vector(pdev, i), &pinstance->hrrq_vector[i]);
pci_free_irq_vectors(pdev);
return rc;
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 1f423f723d06..b8a76b89f85a 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -2826,6 +2826,24 @@ static void zbc_open_zone(struct sdebug_dev_info *devip,
}
}
+static inline void zbc_set_zone_full(struct sdebug_dev_info *devip,
+ struct sdeb_zone_state *zsp)
+{
+ switch (zsp->z_cond) {
+ case ZC2_IMPLICIT_OPEN:
+ devip->nr_imp_open--;
+ break;
+ case ZC3_EXPLICIT_OPEN:
+ devip->nr_exp_open--;
+ break;
+ default:
+ WARN_ONCE(true, "Invalid zone %llu condition %x\n",
+ zsp->z_start, zsp->z_cond);
+ break;
+ }
+ zsp->z_cond = ZC5_FULL;
+}
+
static void zbc_inc_wp(struct sdebug_dev_info *devip,
unsigned long long lba, unsigned int num)
{
@@ -2838,7 +2856,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip,
if (zsp->z_type == ZBC_ZTYPE_SWR) {
zsp->z_wp += num;
if (zsp->z_wp >= zend)
- zsp->z_cond = ZC5_FULL;
+ zbc_set_zone_full(devip, zsp);
return;
}
@@ -2857,7 +2875,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip,
n = num;
}
if (zsp->z_wp >= zend)
- zsp->z_cond = ZC5_FULL;
+ zbc_set_zone_full(devip, zsp);
num -= n;
lba += n;
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 2c0dd64159b0..5d21f07456c6 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -212,7 +212,12 @@ iscsi_create_endpoint(int dd_size)
return NULL;
mutex_lock(&iscsi_ep_idr_mutex);
- id = idr_alloc(&iscsi_ep_idr, ep, 0, -1, GFP_NOIO);
+
+ /*
+ * First endpoint id should be 1 to comply with user space
+ * applications (iscsid).
+ */
+ id = idr_alloc(&iscsi_ep_idr, ep, 1, -1, GFP_NOIO);
if (id < 0) {
mutex_unlock(&iscsi_ep_idr_mutex);
printk(KERN_ERR "Could not allocate endpoint ID. Error %d.\n",
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 895b56c8f25e..a1a2ac09066f 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3072,7 +3072,7 @@ static void sd_read_cpr(struct scsi_disk *sdkp)
goto out;
/* We must have at least a 64B header and one 32B range descriptor */
- vpd_len = get_unaligned_be16(&buffer[2]) + 3;
+ vpd_len = get_unaligned_be16(&buffer[2]) + 4;
if (vpd_len > buf_len || vpd_len < 64 + 32 || (vpd_len & 31)) {
sd_printk(KERN_ERR, sdkp,
"Invalid Concurrent Positioning Ranges VPD page\n");
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index ca3530982e52..fe000da11332 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1844,7 +1844,7 @@ static struct scsi_host_template scsi_driver = {
.cmd_per_lun = 2048,
.this_id = -1,
/* Ensure there are no gaps in presented sgls */
- .virt_boundary_mask = PAGE_SIZE-1,
+ .virt_boundary_mask = HV_HYP_PAGE_SIZE - 1,
.no_write_same = 1,
.track_queue_depth = 1,
.change_queue_depth = storvsc_change_queue_depth,
@@ -1895,6 +1895,7 @@ static int storvsc_probe(struct hv_device *device,
int target = 0;
struct storvsc_device *stor_device;
int max_sub_channels = 0;
+ u32 max_xfer_bytes;
/*
* We support sub-channels for storage on SCSI and FC controllers.
@@ -1968,12 +1969,28 @@ static int storvsc_probe(struct hv_device *device,
}
/* max cmd length */
host->max_cmd_len = STORVSC_MAX_CMD_LEN;
-
/*
- * set the table size based on the info we got
- * from the host.
+ * Any reasonable Hyper-V configuration should provide
+ * max_transfer_bytes value aligning to HV_HYP_PAGE_SIZE,
+ * protecting it from any weird value.
+ */
+ max_xfer_bytes = round_down(stor_device->max_transfer_bytes, HV_HYP_PAGE_SIZE);
+ /* max_hw_sectors_kb */
+ host->max_sectors = max_xfer_bytes >> 9;
+ /*
+ * There are 2 requirements for Hyper-V storvsc sgl segments,
+ * based on which the below calculation for max segments is
+ * done:
+ *
+ * 1. Except for the first and last sgl segment, all sgl segments
+ * should be align to HV_HYP_PAGE_SIZE, that also means the
+ * maximum number of segments in a sgl can be calculated by
+ * dividing the total max transfer length by HV_HYP_PAGE_SIZE.
+ *
+ * 2. Except for the first and last, each entry in the SGL must
+ * have an offset that is a multiple of HV_HYP_PAGE_SIZE.
*/
- host->sg_tablesize = (stor_device->max_transfer_bytes >> PAGE_SHIFT);
+ host->sg_tablesize = (max_xfer_bytes >> HV_HYP_PAGE_SHIFT) + 1;
/*
* For non-IDE disks, the host supports multiple channels.
* Set the number of HW queues we are supporting.
diff --git a/drivers/scsi/vmw_pvscsi.h b/drivers/scsi/vmw_pvscsi.h
index 51a82f7803d3..9d16cf925483 100644
--- a/drivers/scsi/vmw_pvscsi.h
+++ b/drivers/scsi/vmw_pvscsi.h
@@ -331,8 +331,8 @@ struct PVSCSIRingReqDesc {
u8 tag;
u8 bus;
u8 target;
- u8 vcpuHint;
- u8 unused[59];
+ u16 vcpuHint;
+ u8 unused[58];
} __packed;
/*
diff --git a/drivers/staging/olpc_dcon/Kconfig b/drivers/staging/olpc_dcon/Kconfig
index d1a0dea09ef0..d0ba34cc32f7 100644
--- a/drivers/staging/olpc_dcon/Kconfig
+++ b/drivers/staging/olpc_dcon/Kconfig
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
config FB_OLPC_DCON
tristate "One Laptop Per Child Display CONtroller support"
- depends on OLPC && FB
+ depends on OLPC && FB && BROKEN
depends on I2C
depends on GPIO_CS5535 && ACPI
select BACKLIGHT_CLASS_DEVICE
diff --git a/drivers/staging/r8188eu/core/rtw_xmit.c b/drivers/staging/r8188eu/core/rtw_xmit.c
index 3d8e9dea7651..7135d89caac1 100644
--- a/drivers/staging/r8188eu/core/rtw_xmit.c
+++ b/drivers/staging/r8188eu/core/rtw_xmit.c
@@ -178,8 +178,7 @@ s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter)
pxmitpriv->free_xmit_extbuf_cnt = num_xmit_extbuf;
- res = rtw_alloc_hwxmits(padapter);
- if (res) {
+ if (rtw_alloc_hwxmits(padapter)) {
res = _FAIL;
goto exit;
}
@@ -1483,19 +1482,10 @@ int rtw_alloc_hwxmits(struct adapter *padapter)
hwxmits = pxmitpriv->hwxmits;
- if (pxmitpriv->hwxmit_entry == 5) {
- hwxmits[0] .sta_queue = &pxmitpriv->bm_pending;
- hwxmits[1] .sta_queue = &pxmitpriv->vo_pending;
- hwxmits[2] .sta_queue = &pxmitpriv->vi_pending;
- hwxmits[3] .sta_queue = &pxmitpriv->bk_pending;
- hwxmits[4] .sta_queue = &pxmitpriv->be_pending;
- } else if (pxmitpriv->hwxmit_entry == 4) {
- hwxmits[0] .sta_queue = &pxmitpriv->vo_pending;
- hwxmits[1] .sta_queue = &pxmitpriv->vi_pending;
- hwxmits[2] .sta_queue = &pxmitpriv->be_pending;
- hwxmits[3] .sta_queue = &pxmitpriv->bk_pending;
- } else {
- }
+ hwxmits[0].sta_queue = &pxmitpriv->vo_pending;
+ hwxmits[1].sta_queue = &pxmitpriv->vi_pending;
+ hwxmits[2].sta_queue = &pxmitpriv->be_pending;
+ hwxmits[3].sta_queue = &pxmitpriv->bk_pending;
return 0;
}
diff --git a/drivers/staging/r8188eu/os_dep/ioctl_linux.c b/drivers/staging/r8188eu/os_dep/ioctl_linux.c
index 1b09462ca908..8dd280e2739a 100644
--- a/drivers/staging/r8188eu/os_dep/ioctl_linux.c
+++ b/drivers/staging/r8188eu/os_dep/ioctl_linux.c
@@ -403,7 +403,7 @@ static int wpa_set_encryption(struct net_device *dev, struct ieee_param *param,
if (wep_key_len > 0) {
wep_key_len = wep_key_len <= 5 ? 5 : 13;
- wep_total_len = wep_key_len + FIELD_OFFSET(struct ndis_802_11_wep, KeyMaterial);
+ wep_total_len = wep_key_len + sizeof(*pwep);
pwep = kzalloc(wep_total_len, GFP_KERNEL);
if (!pwep)
goto exit;
diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
index ece97e37ac91..30374a820496 100644
--- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
+++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
@@ -90,7 +90,8 @@ static int wpa_set_encryption(struct net_device *dev, struct ieee_param *param,
if (wep_key_len > 0) {
wep_key_len = wep_key_len <= 5 ? 5 : 13;
wep_total_len = wep_key_len + FIELD_OFFSET(struct ndis_802_11_wep, key_material);
- pwep = kzalloc(wep_total_len, GFP_KERNEL);
+ /* Allocate a full structure to avoid potentially running off the end. */
+ pwep = kzalloc(sizeof(*pwep), GFP_KERNEL);
if (!pwep) {
ret = -ENOMEM;
goto exit;
@@ -582,7 +583,8 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param,
if (wep_key_len > 0) {
wep_key_len = wep_key_len <= 5 ? 5 : 13;
wep_total_len = wep_key_len + FIELD_OFFSET(struct ndis_802_11_wep, key_material);
- pwep = kzalloc(wep_total_len, GFP_KERNEL);
+ /* Allocate a full structure to avoid potentially running off the end. */
+ pwep = kzalloc(sizeof(*pwep), GFP_KERNEL);
if (!pwep)
goto exit;
diff --git a/drivers/tty/goldfish.c b/drivers/tty/goldfish.c
index c7968aecd870..d02de3f0326f 100644
--- a/drivers/tty/goldfish.c
+++ b/drivers/tty/goldfish.c
@@ -426,7 +426,7 @@ static int goldfish_tty_remove(struct platform_device *pdev)
tty_unregister_device(goldfish_tty_driver, qtty->console.index);
iounmap(qtty->base);
qtty->base = NULL;
- free_irq(qtty->irq, pdev);
+ free_irq(qtty->irq, qtty);
tty_port_destroy(&qtty->port);
goldfish_tty_current_line_count--;
if (goldfish_tty_current_line_count == 0)
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 137eebdcfda9..fd4d24f61c46 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -455,7 +455,7 @@ static void gsm_hex_dump_bytes(const char *fname, const u8 *data,
return;
}
- prefix = kasprintf(GFP_KERNEL, "%s: ", fname);
+ prefix = kasprintf(GFP_ATOMIC, "%s: ", fname);
if (!prefix)
return;
print_hex_dump(KERN_INFO, prefix, DUMP_PREFIX_OFFSET, 16, 1, data, len,
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 78b6dedc43e6..8f32fe9e149e 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -1517,6 +1517,8 @@ static inline void __stop_tx(struct uart_8250_port *p)
unsigned char lsr = serial_in(p, UART_LSR);
u64 stop_delay = 0;
+ p->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS;
+
if (!(lsr & UART_LSR_THRE))
return;
/*
diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 4733a233bd0c..f8f950641ad9 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -1306,6 +1306,7 @@ static const struct uart_ops qcom_geni_console_pops = {
.stop_tx = qcom_geni_serial_stop_tx,
.start_tx = qcom_geni_serial_start_tx,
.stop_rx = qcom_geni_serial_stop_rx,
+ .start_rx = qcom_geni_serial_start_rx,
.set_termios = qcom_geni_serial_set_termios,
.startup = qcom_geni_serial_startup,
.request_port = qcom_geni_serial_request_port,
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 9a85b41caa0a..338ebadfd44b 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -2214,11 +2214,12 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport)
/*
* Nothing to do if the console is not suspending
* except stop_rx to prevent any asynchronous data
- * over RX line. Re-start_rx, when required, is
- * done by set_termios in resume sequence
+ * over RX line. However ensure that we will be
+ * able to Re-start_rx later.
*/
if (!console_suspend_enabled && uart_console(uport)) {
- uport->ops->stop_rx(uport);
+ if (uport->ops->start_rx)
+ uport->ops->stop_rx(uport);
goto unlock;
}
@@ -2310,6 +2311,8 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport)
if (console_suspend_enabled)
uart_change_pm(state, UART_PM_STATE_ON);
uport->ops->set_termios(uport, &termios, NULL);
+ if (!console_suspend_enabled && uport->ops->start_rx)
+ uport->ops->start_rx(uport);
if (console_suspend_enabled)
console_start(uport->cons);
}
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 01fb4bad86be..ce86d1b790c0 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -748,17 +748,28 @@ static enum utp_ocs ufshcd_get_tr_ocs(struct ufshcd_lrb *lrbp)
}
/**
- * ufshcd_utrl_clear - Clear a bit in UTRLCLR register
+ * ufshcd_utrl_clear() - Clear requests from the controller request list.
* @hba: per adapter instance
- * @pos: position of the bit to be cleared
+ * @mask: mask with one bit set for each request to be cleared
*/
-static inline void ufshcd_utrl_clear(struct ufs_hba *hba, u32 pos)
+static inline void ufshcd_utrl_clear(struct ufs_hba *hba, u32 mask)
{
if (hba->quirks & UFSHCI_QUIRK_BROKEN_REQ_LIST_CLR)
- ufshcd_writel(hba, (1 << pos), REG_UTP_TRANSFER_REQ_LIST_CLEAR);
- else
- ufshcd_writel(hba, ~(1 << pos),
- REG_UTP_TRANSFER_REQ_LIST_CLEAR);
+ mask = ~mask;
+ /*
+ * From the UFSHCI specification: "UTP Transfer Request List CLear
+ * Register (UTRLCLR): This field is bit significant. Each bit
+ * corresponds to a slot in the UTP Transfer Request List, where bit 0
+ * corresponds to request slot 0. A bit in this field is set to ‘0’
+ * by host software to indicate to the host controller that a transfer
+ * request slot is cleared. The host controller
+ * shall free up any resources associated to the request slot
+ * immediately, and shall set the associated bit in UTRLDBR to ‘0’. The
+ * host software indicates no change to request slots by setting the
+ * associated bits in this field to ‘1’. Bits in this field shall only
+ * be set ‘1’ or ‘0’ by host software when UTRLRSR is set to ‘1’."
+ */
+ ufshcd_writel(hba, ~mask, REG_UTP_TRANSFER_REQ_LIST_CLEAR);
}
/**
@@ -2863,27 +2874,26 @@ static int ufshcd_compose_dev_cmd(struct ufs_hba *hba,
return ufshcd_compose_devman_upiu(hba, lrbp);
}
-static int
-ufshcd_clear_cmd(struct ufs_hba *hba, int tag)
+/*
+ * Clear all the requests from the controller for which a bit has been set in
+ * @mask and wait until the controller confirms that these requests have been
+ * cleared.
+ */
+static int ufshcd_clear_cmds(struct ufs_hba *hba, u32 mask)
{
- int err = 0;
unsigned long flags;
- u32 mask = 1 << tag;
/* clear outstanding transaction before retry */
spin_lock_irqsave(hba->host->host_lock, flags);
- ufshcd_utrl_clear(hba, tag);
+ ufshcd_utrl_clear(hba, mask);
spin_unlock_irqrestore(hba->host->host_lock, flags);
/*
* wait for h/w to clear corresponding bit in door-bell.
* max. wait is 1 sec.
*/
- err = ufshcd_wait_for_register(hba,
- REG_UTP_TRANSFER_REQ_DOOR_BELL,
- mask, ~mask, 1000, 1000);
-
- return err;
+ return ufshcd_wait_for_register(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL,
+ mask, ~mask, 1000, 1000);
}
static int
@@ -2963,7 +2973,7 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
err = -ETIMEDOUT;
dev_dbg(hba->dev, "%s: dev_cmd request timedout, tag %d\n",
__func__, lrbp->task_tag);
- if (!ufshcd_clear_cmd(hba, lrbp->task_tag))
+ if (!ufshcd_clear_cmds(hba, 1U << lrbp->task_tag))
/* successfully cleared the command, retry if needed */
err = -EAGAIN;
/*
@@ -6958,14 +6968,14 @@ int ufshcd_exec_raw_upiu_cmd(struct ufs_hba *hba,
}
/**
- * ufshcd_eh_device_reset_handler - device reset handler registered to
- * scsi layer.
+ * ufshcd_eh_device_reset_handler() - Reset a single logical unit.
* @cmd: SCSI command pointer
*
* Returns SUCCESS/FAILED
*/
static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
{
+ unsigned long flags, pending_reqs = 0, not_cleared = 0;
struct Scsi_Host *host;
struct ufs_hba *hba;
u32 pos;
@@ -6984,14 +6994,24 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
}
/* clear the commands that were pending for corresponding LUN */
- for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs) {
- if (hba->lrb[pos].lun == lun) {
- err = ufshcd_clear_cmd(hba, pos);
- if (err)
- break;
- __ufshcd_transfer_req_compl(hba, 1U << pos);
- }
+ spin_lock_irqsave(&hba->outstanding_lock, flags);
+ for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs)
+ if (hba->lrb[pos].lun == lun)
+ __set_bit(pos, &pending_reqs);
+ hba->outstanding_reqs &= ~pending_reqs;
+ spin_unlock_irqrestore(&hba->outstanding_lock, flags);
+
+ if (ufshcd_clear_cmds(hba, pending_reqs) < 0) {
+ spin_lock_irqsave(&hba->outstanding_lock, flags);
+ not_cleared = pending_reqs &
+ ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
+ hba->outstanding_reqs |= not_cleared;
+ spin_unlock_irqrestore(&hba->outstanding_lock, flags);
+
+ dev_err(hba->dev, "%s: failed to clear requests %#lx\n",
+ __func__, not_cleared);
}
+ __ufshcd_transfer_req_compl(hba, pending_reqs & ~not_cleared);
out:
hba->req_abort_count = 0;
@@ -7088,7 +7108,7 @@ static int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag)
goto out;
}
- err = ufshcd_clear_cmd(hba, tag);
+ err = ufshcd_clear_cmds(hba, 1U << tag);
if (err)
dev_err(hba->dev, "%s: Failed clearing cmd at tag %d, err %d\n",
__func__, tag, err);
diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c
index e45c3d6e1536..794e413800ae 100644
--- a/drivers/usb/cdns3/cdnsp-ring.c
+++ b/drivers/usb/cdns3/cdnsp-ring.c
@@ -1941,13 +1941,16 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq)
}
if (enqd_len + trb_buff_len >= full_len) {
- if (need_zero_pkt)
- zero_len_trb = !zero_len_trb;
-
- field &= ~TRB_CHAIN;
- field |= TRB_IOC;
- more_trbs_coming = false;
- preq->td.last_trb = ring->enqueue;
+ if (need_zero_pkt && !zero_len_trb) {
+ zero_len_trb = true;
+ } else {
+ zero_len_trb = false;
+ field &= ~TRB_CHAIN;
+ field |= TRB_IOC;
+ more_trbs_coming = false;
+ need_zero_pkt = false;
+ preq->td.last_trb = ring->enqueue;
+ }
}
/* Only set interrupt on short packet for OUT endpoints. */
@@ -1962,7 +1965,7 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq)
length_field = TRB_LEN(trb_buff_len) | TRB_TD_SIZE(remainder) |
TRB_INTR_TARGET(0);
- cdnsp_queue_trb(pdev, ring, more_trbs_coming | zero_len_trb,
+ cdnsp_queue_trb(pdev, ring, more_trbs_coming,
lower_32_bits(send_addr),
upper_32_bits(send_addr),
length_field,
diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index f63a27d11fac..3f107a06817d 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -5190,7 +5190,7 @@ int dwc2_hcd_init(struct dwc2_hsotg *hsotg)
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res) {
retval = -EINVAL;
- goto error1;
+ goto error2;
}
hcd->rsrc_start = res->start;
hcd->rsrc_len = resource_size(res);
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index e027c0420dc3..573421984948 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -1644,13 +1644,8 @@ static struct extcon_dev *dwc3_get_extcon(struct dwc3 *dwc)
* This device property is for kernel internal use only and
* is expected to be set by the glue code.
*/
- if (device_property_read_string(dev, "linux,extcon-name", &name) == 0) {
- edev = extcon_get_extcon_dev(name);
- if (!edev)
- return ERR_PTR(-EPROBE_DEFER);
-
- return edev;
- }
+ if (device_property_read_string(dev, "linux,extcon-name", &name) == 0)
+ return extcon_get_extcon_dev(name);
/*
* Try to get an extcon device from the USB PHY controller's "port"
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index ba51de7dd760..6b018048fe2e 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -127,6 +127,7 @@ static const struct property_entry dwc3_pci_intel_phy_charger_detect_properties[
PROPERTY_ENTRY_STRING("dr_mode", "peripheral"),
PROPERTY_ENTRY_BOOL("snps,dis_u2_susphy_quirk"),
PROPERTY_ENTRY_BOOL("linux,phy_charger_detect"),
+ PROPERTY_ENTRY_BOOL("linux,sysdev_is_parent"),
{}
};
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 00427d108ab9..8716bece1072 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2976,6 +2976,7 @@ static int dwc3_gadget_init_in_endpoint(struct dwc3_ep *dep)
struct dwc3 *dwc = dep->dwc;
u32 mdwidth;
int size;
+ int maxpacket;
mdwidth = dwc3_mdwidth(dwc);
@@ -2988,21 +2989,24 @@ static int dwc3_gadget_init_in_endpoint(struct dwc3_ep *dep)
else
size = DWC31_GTXFIFOSIZ_TXFDEP(size);
- /* FIFO Depth is in MDWDITH bytes. Multiply */
- size *= mdwidth;
-
/*
- * To meet performance requirement, a minimum TxFIFO size of 3x
- * MaxPacketSize is recommended for endpoints that support burst and a
- * minimum TxFIFO size of 2x MaxPacketSize for endpoints that don't
- * support burst. Use those numbers and we can calculate the max packet
- * limit as below.
+ * maxpacket size is determined as part of the following, after assuming
+ * a mult value of one maxpacket:
+ * DWC3 revision 280A and prior:
+ * fifo_size = mult * (max_packet / mdwidth) + 1;
+ * maxpacket = mdwidth * (fifo_size - 1);
+ *
+ * DWC3 revision 290A and onwards:
+ * fifo_size = mult * ((max_packet + mdwidth)/mdwidth + 1) + 1
+ * maxpacket = mdwidth * ((fifo_size - 1) - 1) - mdwidth;
*/
- if (dwc->maximum_speed >= USB_SPEED_SUPER)
- size /= 3;
+ if (DWC3_VER_IS_PRIOR(DWC3, 290A))
+ maxpacket = mdwidth * (size - 1);
else
- size /= 2;
+ maxpacket = mdwidth * ((size - 1) - 1) - mdwidth;
+ /* Functionally, space for one max packet is sufficient */
+ size = min_t(int, maxpacket, 1024);
usb_ep_set_maxpacket_limit(&dep->endpoint, size);
dep->endpoint.max_streams = 16;
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 4585ee3a444a..e0fa4b186ec6 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -122,8 +122,6 @@ struct ffs_ep {
struct usb_endpoint_descriptor *descs[3];
u8 num;
-
- int status; /* P: epfile->mutex */
};
struct ffs_epfile {
@@ -227,6 +225,9 @@ struct ffs_io_data {
bool use_sg;
struct ffs_data *ffs;
+
+ int status;
+ struct completion done;
};
struct ffs_desc_helper {
@@ -707,12 +708,15 @@ static const struct file_operations ffs_ep0_operations = {
static void ffs_epfile_io_complete(struct usb_ep *_ep, struct usb_request *req)
{
+ struct ffs_io_data *io_data = req->context;
+
ENTER();
- if (req->context) {
- struct ffs_ep *ep = _ep->driver_data;
- ep->status = req->status ? req->status : req->actual;
- complete(req->context);
- }
+ if (req->status)
+ io_data->status = req->status;
+ else
+ io_data->status = req->actual;
+
+ complete(&io_data->done);
}
static ssize_t ffs_copy_to_iter(void *data, int data_len, struct iov_iter *iter)
@@ -1050,7 +1054,6 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
WARN(1, "%s: data_len == -EINVAL\n", __func__);
ret = -EINVAL;
} else if (!io_data->aio) {
- DECLARE_COMPLETION_ONSTACK(done);
bool interrupted = false;
req = ep->req;
@@ -1066,7 +1069,8 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
io_data->buf = data;
- req->context = &done;
+ init_completion(&io_data->done);
+ req->context = io_data;
req->complete = ffs_epfile_io_complete;
ret = usb_ep_queue(ep->ep, req, GFP_ATOMIC);
@@ -1075,7 +1079,12 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
spin_unlock_irq(&epfile->ffs->eps_lock);
- if (wait_for_completion_interruptible(&done)) {
+ if (wait_for_completion_interruptible(&io_data->done)) {
+ spin_lock_irq(&epfile->ffs->eps_lock);
+ if (epfile->ep != ep) {
+ ret = -ESHUTDOWN;
+ goto error_lock;
+ }
/*
* To avoid race condition with ffs_epfile_io_complete,
* dequeue the request first then check
@@ -1083,17 +1092,18 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
* condition with req->complete callback.
*/
usb_ep_dequeue(ep->ep, req);
- wait_for_completion(&done);
- interrupted = ep->status < 0;
+ spin_unlock_irq(&epfile->ffs->eps_lock);
+ wait_for_completion(&io_data->done);
+ interrupted = io_data->status < 0;
}
if (interrupted)
ret = -EINTR;
- else if (io_data->read && ep->status > 0)
- ret = __ffs_epfile_read_data(epfile, data, ep->status,
+ else if (io_data->read && io_data->status > 0)
+ ret = __ffs_epfile_read_data(epfile, data, io_data->status,
&io_data->data);
else
- ret = ep->status;
+ ret = io_data->status;
goto error_mutex;
} else if (!(req = usb_ep_alloc_request(ep->ep, GFP_ATOMIC))) {
ret = -ENOMEM;
diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
index 6f5d45ef2e39..f51694f29de9 100644
--- a/drivers/usb/gadget/function/u_ether.c
+++ b/drivers/usb/gadget/function/u_ether.c
@@ -775,9 +775,13 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g,
dev->qmult = qmult;
snprintf(net->name, sizeof(net->name), "%s%%d", netname);
- if (get_ether_addr(dev_addr, addr))
+ if (get_ether_addr(dev_addr, addr)) {
+ net->addr_assign_type = NET_ADDR_RANDOM;
dev_warn(&g->dev,
"using random %s ethernet address\n", "self");
+ } else {
+ net->addr_assign_type = NET_ADDR_SET;
+ }
eth_hw_addr_set(net, addr);
if (get_ether_addr(host_addr, dev->host_mac))
dev_warn(&g->dev,
@@ -844,6 +848,10 @@ struct net_device *gether_setup_name_default(const char *netname)
eth_random_addr(dev->dev_mac);
pr_warn("using random %s ethernet address\n", "self");
+
+ /* by default we always have a random MAC address */
+ net->addr_assign_type = NET_ADDR_RANDOM;
+
eth_random_addr(dev->host_mac);
pr_warn("using random %s ethernet address\n", "host");
@@ -871,7 +879,6 @@ int gether_register_netdev(struct net_device *net)
dev = netdev_priv(net);
g = dev->gadget;
- net->addr_assign_type = NET_ADDR_RANDOM;
eth_hw_addr_set(net, dev->dev_mac);
status = register_netdev(net);
@@ -912,6 +919,7 @@ int gether_set_dev_addr(struct net_device *net, const char *dev_addr)
if (get_ether_addr(dev_addr, new_addr))
return -EINVAL;
memcpy(dev->dev_mac, new_addr, ETH_ALEN);
+ net->addr_assign_type = NET_ADDR_SET;
return 0;
}
EXPORT_SYMBOL_GPL(gether_set_dev_addr);
diff --git a/drivers/usb/gadget/udc/lpc32xx_udc.c b/drivers/usb/gadget/udc/lpc32xx_udc.c
index 6117ae8e7242..cea10cdb83ae 100644
--- a/drivers/usb/gadget/udc/lpc32xx_udc.c
+++ b/drivers/usb/gadget/udc/lpc32xx_udc.c
@@ -3016,6 +3016,7 @@ static int lpc32xx_udc_probe(struct platform_device *pdev)
}
udc->isp1301_i2c_client = isp1301_get_client(isp1301_node);
+ of_node_put(isp1301_node);
if (!udc->isp1301_i2c_client) {
return -EPROBE_DEFER;
}
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index f0ab63138016..9ac56e9ffc64 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1107,7 +1107,6 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
{
u32 command, temp = 0;
struct usb_hcd *hcd = xhci_to_hcd(xhci);
- struct usb_hcd *secondary_hcd;
int retval = 0;
bool comp_timer_running = false;
bool pending_portevent = false;
@@ -1214,23 +1213,19 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
* first with the primary HCD, and then with the secondary HCD.
* If we don't do the same, the host will never be started.
*/
- if (!usb_hcd_is_primary_hcd(hcd))
- secondary_hcd = hcd;
- else
- secondary_hcd = xhci->shared_hcd;
-
xhci_dbg(xhci, "Initialize the xhci_hcd\n");
- retval = xhci_init(hcd->primary_hcd);
+ retval = xhci_init(hcd);
if (retval)
return retval;
comp_timer_running = true;
xhci_dbg(xhci, "Start the primary HCD\n");
- retval = xhci_run(hcd->primary_hcd);
- if (!retval && secondary_hcd) {
+ retval = xhci_run(hcd);
+ if (!retval && xhci->shared_hcd) {
xhci_dbg(xhci, "Start the secondary HCD\n");
- retval = xhci_run(secondary_hcd);
+ retval = xhci_run(xhci->shared_hcd);
}
+
hcd->state = HC_STATE_SUSPENDED;
if (xhci->shared_hcd)
xhci->shared_hcd->state = HC_STATE_SUSPENDED;
diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index a7b3c15957ba..feba2a8d1233 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -166,6 +166,7 @@ static const struct usb_device_id edgeport_2port_id_table[] = {
{ USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_8S) },
{ USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416) },
{ USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416B) },
+ { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_E5805A) },
{ }
};
@@ -204,6 +205,7 @@ static const struct usb_device_id id_table_combined[] = {
{ USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_8S) },
{ USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416) },
{ USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416B) },
+ { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_E5805A) },
{ }
};
diff --git a/drivers/usb/serial/io_usbvend.h b/drivers/usb/serial/io_usbvend.h
index 52cbc353051f..9a6f742ad3ab 100644
--- a/drivers/usb/serial/io_usbvend.h
+++ b/drivers/usb/serial/io_usbvend.h
@@ -212,6 +212,7 @@
//
// Definitions for other product IDs
#define ION_DEVICE_ID_MT4X56USB 0x1403 // OEM device
+#define ION_DEVICE_ID_E5805A 0x1A01 // OEM device (rebranded Edgeport/4)
#define GENERATION_ID_FROM_USB_PRODUCT_ID(ProductId) \
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index e60425bbf537..ed1e50d83cca 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -432,6 +432,8 @@ static void option_instat_callback(struct urb *urb);
#define CINTERION_PRODUCT_CLS8 0x00b0
#define CINTERION_PRODUCT_MV31_MBIM 0x00b3
#define CINTERION_PRODUCT_MV31_RMNET 0x00b7
+#define CINTERION_PRODUCT_MV31_2_MBIM 0x00b8
+#define CINTERION_PRODUCT_MV31_2_RMNET 0x00b9
#define CINTERION_PRODUCT_MV32_WA 0x00f1
#define CINTERION_PRODUCT_MV32_WB 0x00f2
@@ -1979,6 +1981,10 @@ static const struct usb_device_id option_ids[] = {
.driver_info = RSVD(3)},
{ USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_RMNET, 0xff),
.driver_info = RSVD(0)},
+ { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_2_MBIM, 0xff),
+ .driver_info = RSVD(3)},
+ { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_2_RMNET, 0xff),
+ .driver_info = RSVD(0)},
{ USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WA, 0xff),
.driver_info = RSVD(3)},
{ USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WB, 0xff),
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index b7a955479156..1b6d46b86f81 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -107,7 +107,7 @@ struct mlx5_vdpa_virtqueue {
/* Resources for implementing the notification channel from the device
* to the driver. fwqp is the firmware end of an RC connection; the
- * other end is vqqp used by the driver. cq is is where completions are
+ * other end is vqqp used by the driver. cq is where completions are
* reported.
*/
struct mlx5_vdpa_cq cq;
@@ -1814,12 +1814,13 @@ static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
id = mlx5vdpa16_to_cpu(mvdev, vlan);
mac_vlan_del(ndev, ndev->config.mac, id, true);
+ status = VIRTIO_NET_OK;
break;
default:
- break;
-}
+ break;
+ }
-return status;
+ return status;
}
static void mlx5_cvq_kick_handler(struct work_struct *work)
diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index d503848b3b6e..776ad7496f53 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -1345,9 +1345,9 @@ static int vduse_create_dev(struct vduse_dev_config *config,
dev->minor = ret;
dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
- dev->dev = device_create(vduse_class, NULL,
- MKDEV(MAJOR(vduse_major), dev->minor),
- dev, "%s", config->name);
+ dev->dev = device_create_with_groups(vduse_class, NULL,
+ MKDEV(MAJOR(vduse_major), dev->minor),
+ dev, vduse_dev_groups, "%s", config->name);
if (IS_ERR(dev->dev)) {
ret = PTR_ERR(dev->dev);
goto err_dev;
@@ -1596,7 +1596,6 @@ static int vduse_init(void)
return PTR_ERR(vduse_class);
vduse_class->devnode = vduse_devnode;
- vduse_class->dev_groups = vduse_dev_groups;
ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
if (ret)
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 935a1d0ddb97..5ad2596c6e8a 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -499,6 +499,8 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
ops->set_vq_ready(vdpa, idx, s.num);
return 0;
case VHOST_VDPA_GET_VRING_GROUP:
+ if (!ops->get_vq_group)
+ return -EOPNOTSUPP;
s.index = idx;
s.num = ops->get_vq_group(vdpa, idx);
if (s.num >= vdpa->ngroups)
diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index 14e2043d7685..eab55accf381 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -292,7 +292,7 @@ __vringh_iov(struct vringh *vrh, u16 i,
int (*copy)(const struct vringh *vrh,
void *dst, const void *src, size_t len))
{
- int err, count = 0, up_next, desc_max;
+ int err, count = 0, indirect_count = 0, up_next, desc_max;
struct vring_desc desc, *descs;
struct vringh_range range = { -1ULL, 0 }, slowrange;
bool slow = false;
@@ -349,7 +349,12 @@ __vringh_iov(struct vringh *vrh, u16 i,
continue;
}
- if (count++ == vrh->vring.num) {
+ if (up_next == -1)
+ count++;
+ else
+ indirect_count++;
+
+ if (count > vrh->vring.num || indirect_count > desc_max) {
vringh_bad("Descriptor loop in %p", descs);
err = -ELOOP;
goto fail;
@@ -411,6 +416,7 @@ __vringh_iov(struct vringh *vrh, u16 i,
i = return_from_indirect(vrh, &up_next,
&descs, &desc_max);
slow = false;
+ indirect_count = 0;
} else
break;
}
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index f9a36bc7ac27..c9bec3813e94 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -255,7 +255,7 @@ static void vm_set_status(struct virtio_device *vdev, u8 status)
/*
* Per memory-barriers.txt, wmb() is not needed to guarantee
- * that the the cache coherent memory writes have completed
+ * that the cache coherent memory writes have completed
* before writing to the MMIO region.
*/
writel(status, vm_dev->base + VIRTIO_MMIO_STATUS);
@@ -701,6 +701,7 @@ static int vm_cmdline_set(const char *device,
if (!vm_cmdline_parent_registered) {
err = device_register(&vm_cmdline_parent);
if (err) {
+ put_device(&vm_cmdline_parent);
pr_err("Failed to register parent device!\n");
return err;
}
diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c
index a0fa14f28a7f..b790f30b2b56 100644
--- a/drivers/virtio/virtio_pci_modern_dev.c
+++ b/drivers/virtio/virtio_pci_modern_dev.c
@@ -469,7 +469,7 @@ void vp_modern_set_status(struct virtio_pci_modern_device *mdev,
/*
* Per memory-barriers.txt, wmb() is not needed to guarantee
- * that the the cache coherent memory writes have completed
+ * that the cache coherent memory writes have completed
* before writing to the MMIO region.
*/
vp_iowrite8(status, &cfg->device_status);
diff --git a/drivers/watchdog/gxp-wdt.c b/drivers/watchdog/gxp-wdt.c
index b0b2d7a6fdde..2fd85be88278 100644
--- a/drivers/watchdog/gxp-wdt.c
+++ b/drivers/watchdog/gxp-wdt.c
@@ -172,3 +172,4 @@ module_platform_driver(gxp_wdt_driver);
MODULE_AUTHOR("Nick Hawkins <nick.hawkins@hpe.com>");
MODULE_AUTHOR("Jean-Marie Verdun <verdun@hpe.com>");
MODULE_DESCRIPTION("Driver for GXP watchdog timer");
+MODULE_LICENSE("GPL");
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 79df61fe0e59..baf2b152229e 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -152,7 +152,7 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
const unsigned char **wnames, *uname;
int i, n, l, clone, access;
struct v9fs_session_info *v9ses;
- struct p9_fid *fid, *old_fid = NULL;
+ struct p9_fid *fid, *old_fid;
v9ses = v9fs_dentry2v9ses(dentry);
access = v9ses->flags & V9FS_ACCESS_MASK;
@@ -194,13 +194,12 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
if (IS_ERR(fid))
return fid;
+ refcount_inc(&fid->count);
v9fs_fid_add(dentry->d_sb->s_root, fid);
}
/* If we are root ourself just return that */
- if (dentry->d_sb->s_root == dentry) {
- refcount_inc(&fid->count);
+ if (dentry->d_sb->s_root == dentry)
return fid;
- }
/*
* Do a multipath walk with attached root.
* When walking parent we need to make sure we
@@ -212,6 +211,7 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
fid = ERR_PTR(n);
goto err_out;
}
+ old_fid = fid;
clone = 1;
i = 0;
while (i < n) {
@@ -221,19 +221,15 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
* walk to ensure none of the patch component change
*/
fid = p9_client_walk(fid, l, &wnames[i], clone);
+ /* non-cloning walk will return the same fid */
+ if (fid != old_fid) {
+ p9_client_clunk(old_fid);
+ old_fid = fid;
+ }
if (IS_ERR(fid)) {
- if (old_fid) {
- /*
- * If we fail, clunk fid which are mapping
- * to path component and not the last component
- * of the path.
- */
- p9_client_clunk(old_fid);
- }
kfree(wnames);
goto err_out;
}
- old_fid = fid;
i += l;
clone = 0;
}
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 1b219c21d15e..6acabc2e7dc9 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -124,7 +124,7 @@ static inline struct v9fs_inode *V9FS_I(const struct inode *inode)
static inline struct fscache_cookie *v9fs_inode_cookie(struct v9fs_inode *v9inode)
{
#ifdef CONFIG_9P_FSCACHE
- return netfs_i_cookie(&v9inode->netfs.inode);
+ return netfs_i_cookie(&v9inode->netfs);
#else
return NULL;
#endif
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 90c6c1ba03ab..d0833fa69faf 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -58,21 +58,33 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
*/
static int v9fs_init_request(struct netfs_io_request *rreq, struct file *file)
{
+ struct inode *inode = file_inode(file);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
struct p9_fid *fid = file->private_data;
+ BUG_ON(!fid);
+
+ /* we might need to read from a fid that was opened write-only
+ * for read-modify-write of page cache, use the writeback fid
+ * for that */
+ if (rreq->origin == NETFS_READ_FOR_WRITE &&
+ (fid->mode & O_ACCMODE) == O_WRONLY) {
+ fid = v9inode->writeback_fid;
+ BUG_ON(!fid);
+ }
+
refcount_inc(&fid->count);
rreq->netfs_priv = fid;
return 0;
}
/**
- * v9fs_req_cleanup - Cleanup request initialized by v9fs_init_request
- * @mapping: unused mapping of request to cleanup
- * @priv: private data to cleanup, a fid, guaranted non-null.
+ * v9fs_free_request - Cleanup request initialized by v9fs_init_rreq
+ * @rreq: The I/O request to clean up
*/
-static void v9fs_req_cleanup(struct address_space *mapping, void *priv)
+static void v9fs_free_request(struct netfs_io_request *rreq)
{
- struct p9_fid *fid = priv;
+ struct p9_fid *fid = rreq->netfs_priv;
p9_client_clunk(fid);
}
@@ -94,9 +106,9 @@ static int v9fs_begin_cache_operation(struct netfs_io_request *rreq)
const struct netfs_request_ops v9fs_req_ops = {
.init_request = v9fs_init_request,
+ .free_request = v9fs_free_request,
.begin_cache_operation = v9fs_begin_cache_operation,
.issue_read = v9fs_issue_read,
- .cleanup = v9fs_req_cleanup,
};
/**
@@ -274,7 +286,7 @@ static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
* file. We need to do this before we get a lock on the page in case
* there's more than one writer competing for the same cache block.
*/
- retval = netfs_write_begin(filp, mapping, pos, len, &folio, fsdata);
+ retval = netfs_write_begin(&v9inode->netfs, filp, mapping, pos, len, &folio, fsdata);
if (retval < 0)
return retval;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index e660c6348b9d..3d8297714772 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -252,7 +252,8 @@ void v9fs_free_inode(struct inode *inode)
*/
static void v9fs_set_netfs_context(struct inode *inode)
{
- netfs_inode_init(inode, &v9fs_req_ops);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ netfs_inode_init(&v9inode->netfs, &v9fs_req_ops);
}
int v9fs_init_inode(struct v9fs_session_info *v9ses,
@@ -1250,15 +1251,15 @@ static const char *v9fs_vfs_get_link(struct dentry *dentry,
return ERR_PTR(-ECHILD);
v9ses = v9fs_dentry2v9ses(dentry);
- fid = v9fs_fid_lookup(dentry);
+ if (!v9fs_proto_dotu(v9ses))
+ return ERR_PTR(-EBADF);
+
p9_debug(P9_DEBUG_VFS, "%pd\n", dentry);
+ fid = v9fs_fid_lookup(dentry);
if (IS_ERR(fid))
return ERR_CAST(fid);
- if (!v9fs_proto_dotu(v9ses))
- return ERR_PTR(-EBADF);
-
st = p9_client_stat(fid);
p9_client_clunk(fid);
if (IS_ERR(st))
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index d17502a738a9..b6eb1160296c 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -274,6 +274,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
if (IS_ERR(ofid)) {
err = PTR_ERR(ofid);
p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
+ p9_client_clunk(dfid);
goto out;
}
@@ -285,6 +286,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
if (err) {
p9_debug(P9_DEBUG_VFS, "Failed to get acl values in creat %d\n",
err);
+ p9_client_clunk(dfid);
goto error;
}
err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags),
@@ -292,6 +294,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
if (err < 0) {
p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in creat %d\n",
err);
+ p9_client_clunk(dfid);
goto error;
}
v9fs_invalidate_inode_attr(dir);
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index 3a5bbffdf053..d7d9402ff718 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -76,7 +76,7 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
/* there shouldn't be an existing inode */
BUG_ON(!(inode->i_state & I_NEW));
- netfs_inode_init(inode, NULL);
+ netfs_inode_init(&vnode->netfs, NULL);
inode->i_size = 0;
inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
if (root) {
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 4de7af7c2f09..42118a4f3383 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -382,17 +382,17 @@ static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len,
return test_bit(AFS_VNODE_DELETED, &vnode->flags) ? -ESTALE : 0;
}
-static void afs_priv_cleanup(struct address_space *mapping, void *netfs_priv)
+static void afs_free_request(struct netfs_io_request *rreq)
{
- key_put(netfs_priv);
+ key_put(rreq->netfs_priv);
}
const struct netfs_request_ops afs_req_ops = {
.init_request = afs_init_request,
+ .free_request = afs_free_request,
.begin_cache_operation = afs_begin_cache_operation,
.check_write_begin = afs_check_write_begin,
.issue_read = afs_issue_read,
- .cleanup = afs_priv_cleanup,
};
int afs_write_inode(struct inode *inode, struct writeback_control *wbc)
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 22811e9eacf5..64dab70d4a4f 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -58,7 +58,7 @@ static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *paren
*/
static void afs_set_netfs_context(struct afs_vnode *vnode)
{
- netfs_inode_init(&vnode->netfs.inode, &afs_req_ops);
+ netfs_inode_init(&vnode->netfs, &afs_req_ops);
}
/*
@@ -745,7 +745,8 @@ int afs_getattr(struct user_namespace *mnt_userns, const struct path *path,
_enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
- if (!(query_flags & AT_STATX_DONT_SYNC) &&
+ if (vnode->volume &&
+ !(query_flags & AT_STATX_DONT_SYNC) &&
!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
key = afs_request_key(vnode->volume->cell);
if (IS_ERR(key))
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 984b113a9107..a6f25d9e75b5 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -670,7 +670,7 @@ struct afs_vnode {
static inline struct fscache_cookie *afs_vnode_cache(struct afs_vnode *vnode)
{
#ifdef CONFIG_AFS_FSCACHE
- return netfs_i_cookie(&vnode->netfs.inode);
+ return netfs_i_cookie(&vnode->netfs);
#else
return NULL;
#endif
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 94a3d247924b..cc665cef0abe 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -9,8 +9,7 @@
#include <linux/slab.h>
#include "internal.h"
-unsigned __read_mostly afs_volume_gc_delay = 10;
-unsigned __read_mostly afs_volume_record_life = 60 * 60;
+static unsigned __read_mostly afs_volume_record_life = 60 * 60;
/*
* Insert a volume into a cell. If there's an existing volume record, that is
diff --git a/fs/afs/write.c b/fs/afs/write.c
index f80a6096d91c..2c885b22de34 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -60,7 +60,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
* file. We need to do this before we get a lock on the page in case
* there's more than one writer competing for the same cache block.
*/
- ret = netfs_write_begin(file, mapping, pos, len, &folio, fsdata);
+ ret = netfs_write_begin(&vnode->netfs, file, mapping, pos, len, &folio, fsdata);
if (ret < 0)
return ret;
diff --git a/fs/attr.c b/fs/attr.c
index 66899b6e9bd8..dbe996b0dedf 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -61,9 +61,15 @@ static bool chgrp_ok(struct user_namespace *mnt_userns,
const struct inode *inode, kgid_t gid)
{
kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
- if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode)) &&
- (in_group_p(gid) || gid_eq(gid, inode->i_gid)))
- return true;
+ if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode))) {
+ kgid_t mapped_gid;
+
+ if (gid_eq(gid, inode->i_gid))
+ return true;
+ mapped_gid = mapped_kgid_fs(mnt_userns, i_user_ns(inode), gid);
+ if (in_group_p(mapped_gid))
+ return true;
+ }
if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN))
return true;
if (gid_eq(kgid, INVALID_GID) &&
@@ -123,12 +129,20 @@ int setattr_prepare(struct user_namespace *mnt_userns, struct dentry *dentry,
/* Make sure a caller can chmod. */
if (ia_valid & ATTR_MODE) {
+ kgid_t mapped_gid;
+
if (!inode_owner_or_capable(mnt_userns, inode))
return -EPERM;
+
+ if (ia_valid & ATTR_GID)
+ mapped_gid = mapped_kgid_fs(mnt_userns,
+ i_user_ns(inode), attr->ia_gid);
+ else
+ mapped_gid = i_gid_into_mnt(mnt_userns, inode);
+
/* Also check the setgid bit! */
- if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
- i_gid_into_mnt(mnt_userns, inode)) &&
- !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
+ if (!in_group_p(mapped_gid) &&
+ !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
attr->ia_mode &= ~S_ISGID;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 89e94ea2fef5..4ba005c41983 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4632,6 +4632,17 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
int ret;
set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags);
+
+ /*
+ * We may have the reclaim task running and relocating a data block group,
+ * in which case it may create delayed iputs. So stop it before we park
+ * the cleaner kthread otherwise we can get new delayed iputs after
+ * parking the cleaner, and that can make the async reclaim task to hang
+ * if it's waiting for delayed iputs to complete, since the cleaner is
+ * parked and can not run delayed iputs - this will make us hang when
+ * trying to stop the async reclaim task.
+ */
+ cancel_work_sync(&fs_info->reclaim_bgs_work);
/*
* We don't want the cleaner to start new transactions, add more delayed
* iputs, etc. while we're closing. We can't use kthread_stop() yet
@@ -4672,8 +4683,6 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
cancel_work_sync(&fs_info->async_data_reclaim_work);
cancel_work_sync(&fs_info->preempt_reclaim_work);
- cancel_work_sync(&fs_info->reclaim_bgs_work);
-
/* Cancel or finish ongoing discard work */
btrfs_discard_cleanup(fs_info);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index b1fdc6a26c76..6627dd7875ee 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -763,6 +763,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
compress_force = false;
no_compress++;
} else {
+ btrfs_err(info, "unrecognized compression value %s",
+ args[0].from);
ret = -EINVAL;
goto out;
}
@@ -821,8 +823,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
case Opt_thread_pool:
ret = match_int(&args[0], &intarg);
if (ret) {
+ btrfs_err(info, "unrecognized thread_pool value %s",
+ args[0].from);
goto out;
} else if (intarg == 0) {
+ btrfs_err(info, "invalid value 0 for thread_pool");
ret = -EINVAL;
goto out;
}
@@ -883,8 +888,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
break;
case Opt_ratio:
ret = match_int(&args[0], &intarg);
- if (ret)
+ if (ret) {
+ btrfs_err(info, "unrecognized metadata_ratio value %s",
+ args[0].from);
goto out;
+ }
info->metadata_ratio = intarg;
btrfs_info(info, "metadata ratio %u",
info->metadata_ratio);
@@ -901,6 +909,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
btrfs_set_and_info(info, DISCARD_ASYNC,
"turning on async discard");
} else {
+ btrfs_err(info, "unrecognized discard mode value %s",
+ args[0].from);
ret = -EINVAL;
goto out;
}
@@ -933,6 +943,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
btrfs_set_and_info(info, FREE_SPACE_TREE,
"enabling free space tree");
} else {
+ btrfs_err(info, "unrecognized space_cache value %s",
+ args[0].from);
ret = -EINVAL;
goto out;
}
@@ -1014,8 +1026,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
break;
case Opt_check_integrity_print_mask:
ret = match_int(&args[0], &intarg);
- if (ret)
+ if (ret) {
+ btrfs_err(info,
+ "unrecognized check_integrity_print_mask value %s",
+ args[0].from);
goto out;
+ }
info->check_integrity_print_mask = intarg;
btrfs_info(info, "check_integrity_print_mask 0x%x",
info->check_integrity_print_mask);
@@ -1030,13 +1046,15 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
goto out;
#endif
case Opt_fatal_errors:
- if (strcmp(args[0].from, "panic") == 0)
+ if (strcmp(args[0].from, "panic") == 0) {
btrfs_set_opt(info->mount_opt,
PANIC_ON_FATAL_ERROR);
- else if (strcmp(args[0].from, "bug") == 0)
+ } else if (strcmp(args[0].from, "bug") == 0) {
btrfs_clear_opt(info->mount_opt,
PANIC_ON_FATAL_ERROR);
- else {
+ } else {
+ btrfs_err(info, "unrecognized fatal_errors value %s",
+ args[0].from);
ret = -EINVAL;
goto out;
}
@@ -1044,8 +1062,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
case Opt_commit_interval:
intarg = 0;
ret = match_int(&args[0], &intarg);
- if (ret)
+ if (ret) {
+ btrfs_err(info, "unrecognized commit_interval value %s",
+ args[0].from);
+ ret = -EINVAL;
goto out;
+ }
if (intarg == 0) {
btrfs_info(info,
"using default commit interval %us",
@@ -1059,8 +1081,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
break;
case Opt_rescue:
ret = parse_rescue_options(info, args[0].from);
- if (ret < 0)
+ if (ret < 0) {
+ btrfs_err(info, "unrecognized rescue value %s",
+ args[0].from);
goto out;
+ }
break;
#ifdef CONFIG_BTRFS_DEBUG
case Opt_fragment_all:
@@ -1985,6 +2010,14 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
if (ret)
goto restore;
+ /* V1 cache is not supported for subpage mount. */
+ if (fs_info->sectorsize < PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) {
+ btrfs_warn(fs_info,
+ "v1 space cache is not supported for page size %lu with sectorsize %u",
+ PAGE_SIZE, fs_info->sectorsize);
+ ret = -EINVAL;
+ goto restore;
+ }
btrfs_remount_begin(fs_info, old_opts, *flags);
btrfs_resize_thread_pool(fs_info,
fs_info->thread_pool_size, old_thread_pool_size);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index f5f116ed1b9e..6dee88815491 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -394,11 +394,10 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
return 0;
}
-static void ceph_readahead_cleanup(struct address_space *mapping, void *priv)
+static void ceph_netfs_free_request(struct netfs_io_request *rreq)
{
- struct inode *inode = mapping->host;
- struct ceph_inode_info *ci = ceph_inode(inode);
- int got = (uintptr_t)priv;
+ struct ceph_inode_info *ci = ceph_inode(rreq->inode);
+ int got = (uintptr_t)rreq->netfs_priv;
if (got)
ceph_put_cap_refs(ci, got);
@@ -406,12 +405,12 @@ static void ceph_readahead_cleanup(struct address_space *mapping, void *priv)
const struct netfs_request_ops ceph_netfs_ops = {
.init_request = ceph_init_request,
+ .free_request = ceph_netfs_free_request,
.begin_cache_operation = ceph_begin_cache_operation,
.issue_read = ceph_netfs_issue_read,
.expand_readahead = ceph_netfs_expand_readahead,
.clamp_length = ceph_netfs_clamp_length,
.check_write_begin = ceph_netfs_check_write_begin,
- .cleanup = ceph_readahead_cleanup,
};
#ifdef CONFIG_CEPH_FSCACHE
@@ -1322,10 +1321,11 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
struct page **pagep, void **fsdata)
{
struct inode *inode = file_inode(file);
+ struct ceph_inode_info *ci = ceph_inode(inode);
struct folio *folio = NULL;
int r;
- r = netfs_write_begin(file, inode->i_mapping, pos, len, &folio, NULL);
+ r = netfs_write_begin(&ci->netfs, file, inode->i_mapping, pos, len, &folio, NULL);
if (r == 0)
folio_wait_fscache(folio);
if (r < 0) {
diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h
index 26c6ae06e2f4..dc502daac49a 100644
--- a/fs/ceph/cache.h
+++ b/fs/ceph/cache.h
@@ -28,7 +28,7 @@ void ceph_fscache_invalidate(struct inode *inode, bool dio_write);
static inline struct fscache_cookie *ceph_fscache_cookie(struct ceph_inode_info *ci)
{
- return netfs_i_cookie(&ci->netfs.inode);
+ return netfs_i_cookie(&ci->netfs);
}
static inline void ceph_fscache_resize(struct inode *inode, loff_t to)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 650746b3ba99..56c53ab3618e 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -460,7 +460,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
dout("alloc_inode %p\n", &ci->netfs.inode);
/* Set parameters for the netfs library */
- netfs_inode_init(&ci->netfs.inode, &ceph_netfs_ops);
+ netfs_inode_init(&ci->netfs, &ceph_netfs_ops);
spin_lock_init(&ci->i_ceph_lock);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index c85d9a378325..8f2e003e0590 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1086,7 +1086,7 @@ struct file_system_type cifs_fs_type = {
};
MODULE_ALIAS_FS("cifs");
-static struct file_system_type smb3_fs_type = {
+struct file_system_type smb3_fs_type = {
.owner = THIS_MODULE,
.name = "smb3",
.init_fs_context = smb3_init_fs_context,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index dd7e070ca243..b17be47a8e59 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -38,7 +38,7 @@ static inline unsigned long cifs_get_time(struct dentry *dentry)
return (unsigned long) dentry->d_fsdata;
}
-extern struct file_system_type cifs_fs_type;
+extern struct file_system_type cifs_fs_type, smb3_fs_type;
extern const struct address_space_operations cifs_addr_ops;
extern const struct address_space_operations cifs_addr_ops_smallbuf;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index d46702f5a663..1849e3411487 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -97,6 +97,10 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server)
if (!server->hostname)
return -EINVAL;
+ /* if server hostname isn't populated, there's nothing to do here */
+ if (server->hostname[0] == '\0')
+ return 0;
+
len = strlen(server->hostname) + 3;
unc = kmalloc(len, GFP_KERNEL);
diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h
index ab9a51d0125c..aa3b941a5555 100644
--- a/fs/cifs/fscache.h
+++ b/fs/cifs/fscache.h
@@ -61,7 +61,7 @@ void cifs_fscache_fill_coherency(struct inode *inode,
static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode)
{
- return netfs_i_cookie(inode);
+ return netfs_i_cookie(&CIFS_I(inode)->netfs);
}
static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags)
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index cbc3b433f502..c69e1240d730 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -1211,18 +1211,23 @@ static struct super_block *__cifs_get_super(void (*f)(struct super_block *, void
.data = data,
.sb = NULL,
};
+ struct file_system_type **fs_type = (struct file_system_type *[]) {
+ &cifs_fs_type, &smb3_fs_type, NULL,
+ };
- iterate_supers_type(&cifs_fs_type, f, &sd);
-
- if (!sd.sb)
- return ERR_PTR(-EINVAL);
- /*
- * Grab an active reference in order to prevent automounts (DFS links)
- * of expiring and then freeing up our cifs superblock pointer while
- * we're doing failover.
- */
- cifs_sb_active(sd.sb);
- return sd.sb;
+ for (; *fs_type; fs_type++) {
+ iterate_supers_type(*fs_type, f, &sd);
+ if (sd.sb) {
+ /*
+ * Grab an active reference in order to prevent automounts (DFS links)
+ * of expiring and then freeing up our cifs superblock pointer while
+ * we're doing failover.
+ */
+ cifs_sb_active(sd.sb);
+ return sd.sb;
+ }
+ }
+ return ERR_PTR(-EINVAL);
}
static void __cifs_put_super(struct super_block *sb)
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 3b7915af1f62..d417de354d9d 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -81,6 +81,9 @@ cifs_ses_get_chan_index(struct cifs_ses *ses,
}
/* If we didn't find the channel, it is likely a bug */
+ if (server)
+ cifs_dbg(VFS, "unable to get chan index for server: 0x%llx",
+ server->conn_id);
WARN_ON(1);
return 0;
}
@@ -301,7 +304,10 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
/* Auth */
ctx.domainauto = ses->domainAuto;
ctx.domainname = ses->domainName;
- ctx.server_hostname = ses->server->hostname;
+
+ /* no hostname for extra channels */
+ ctx.server_hostname = "";
+
ctx.username = ses->user_name;
ctx.password = ses->password;
ctx.sectype = ses->sectype;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 0e8c85249579..b515140bad8d 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -288,6 +288,9 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
mutex_unlock(&ses->session_mutex);
rc = -EHOSTDOWN;
goto failed;
+ } else if (rc) {
+ mutex_unlock(&ses->session_mutex);
+ goto out;
}
} else {
mutex_unlock(&ses->session_mutex);
@@ -5151,6 +5154,8 @@ SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
data = &info;
size = sizeof(struct smb2_file_eof_info);
+ trace_smb3_set_eof(xid, persistent_fid, tcon->tid, tcon->ses->Suid, le64_to_cpu(*eof));
+
return send_set_info(xid, tcon, persistent_fid, volatile_fid,
pid, FILE_END_OF_FILE_INFORMATION, SMB2_O_INFO_FILE,
0, 1, &data, &size);
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index 2be5e0c8564d..6b88dc2e364f 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -121,6 +121,44 @@ DEFINE_SMB3_RW_DONE_EVENT(query_dir_done);
DEFINE_SMB3_RW_DONE_EVENT(zero_done);
DEFINE_SMB3_RW_DONE_EVENT(falloc_done);
+/* For logging successful set EOF (truncate) */
+DECLARE_EVENT_CLASS(smb3_eof_class,
+ TP_PROTO(unsigned int xid,
+ __u64 fid,
+ __u32 tid,
+ __u64 sesid,
+ __u64 offset),
+ TP_ARGS(xid, fid, tid, sesid, offset),
+ TP_STRUCT__entry(
+ __field(unsigned int, xid)
+ __field(__u64, fid)
+ __field(__u32, tid)
+ __field(__u64, sesid)
+ __field(__u64, offset)
+ ),
+ TP_fast_assign(
+ __entry->xid = xid;
+ __entry->fid = fid;
+ __entry->tid = tid;
+ __entry->sesid = sesid;
+ __entry->offset = offset;
+ ),
+ TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx",
+ __entry->xid, __entry->sesid, __entry->tid, __entry->fid,
+ __entry->offset)
+)
+
+#define DEFINE_SMB3_EOF_EVENT(name) \
+DEFINE_EVENT(smb3_eof_class, smb3_##name, \
+ TP_PROTO(unsigned int xid, \
+ __u64 fid, \
+ __u32 tid, \
+ __u64 sesid, \
+ __u64 offset), \
+ TP_ARGS(xid, fid, tid, sesid, offset))
+
+DEFINE_SMB3_EOF_EVENT(set_eof);
+
/*
* For handle based calls other than read and write, and get/set info
*/
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 2c2f179b6977..43de293cef56 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -672,17 +672,14 @@ int ext2_empty_dir (struct inode * inode)
void *page_addr = NULL;
struct page *page = NULL;
unsigned long i, npages = dir_pages(inode);
- int dir_has_error = 0;
for (i = 0; i < npages; i++) {
char *kaddr;
ext2_dirent * de;
- page = ext2_get_page(inode, i, dir_has_error, &page_addr);
+ page = ext2_get_page(inode, i, 0, &page_addr);
- if (IS_ERR(page)) {
- dir_has_error = 1;
- continue;
- }
+ if (IS_ERR(page))
+ goto not_empty;
kaddr = page_addr;
de = (ext2_dirent *)kaddr;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3dce7d058985..84c0eb55071d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -829,7 +829,7 @@ int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
ext4_debug("ext4_get_block_unwritten: inode %lu, create flag %d\n",
inode->i_ino, create);
return _ext4_get_block(inode, iblock, bh_result,
- EXT4_GET_BLOCKS_IO_CREATE_EXT);
+ EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT);
}
/* Maximum number of blocks we map for direct IO at once. */
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 9f12f29bc346..9e06334771a3 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4104,6 +4104,15 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
size = size >> bsbits;
start = start_off >> bsbits;
+ /*
+ * For tiny groups (smaller than 8MB) the chosen allocation
+ * alignment may be larger than group size. Make sure the
+ * alignment does not move allocation to a different group which
+ * makes mballoc fail assertions later.
+ */
+ start = max(start, rounddown(ac->ac_o_ex.fe_logical,
+ (ext4_lblk_t)EXT4_BLOCKS_PER_GROUP(ac->ac_sb)));
+
/* don't cover already allocated blocks in selected range */
if (ar->pleft && start <= ar->lleft) {
size -= ar->lleft + 1 - start;
@@ -4176,7 +4185,22 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
}
rcu_read_unlock();
- if (start + size <= ac->ac_o_ex.fe_logical &&
+ /*
+ * In this function "start" and "size" are normalized for better
+ * alignment and length such that we could preallocate more blocks.
+ * This normalization is done such that original request of
+ * ac->ac_o_ex.fe_logical & fe_len should always lie within "start" and
+ * "size" boundaries.
+ * (Note fe_len can be relaxed since FS block allocation API does not
+ * provide gurantee on number of contiguous blocks allocation since that
+ * depends upon free space left, etc).
+ * In case of inode pa, later we use the allocated blocks
+ * [pa_start + fe_logical - pa_lstart, fe_len/size] from the preallocated
+ * range of goal/best blocks [start, size] to put it at the
+ * ac_o_ex.fe_logical extent of this inode.
+ * (See ext4_mb_use_inode_pa() for more details)
+ */
+ if (start + size <= ac->ac_o_ex.fe_logical ||
start > ac->ac_o_ex.fe_logical) {
ext4_msg(ac->ac_sb, KERN_ERR,
"start %lu, size %lu, fe_logical %lu",
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 7a5353a8cfd7..42f590518b4c 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -438,7 +438,7 @@ int ext4_ext_migrate(struct inode *inode)
/*
* Worst case we can touch the allocation bitmaps and a block
- * group descriptor block. We do need need to worry about
+ * group descriptor block. We do need to worry about
* credits for modifying the quota inode.
*/
handle = ext4_journal_start(inode, EXT4_HT_MIGRATE,
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 47d0ca4c795b..db4ba99d1ceb 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1929,7 +1929,8 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
struct dx_hash_info *hinfo)
{
unsigned blocksize = dir->i_sb->s_blocksize;
- unsigned count, continued;
+ unsigned continued;
+ int count;
struct buffer_head *bh2;
ext4_lblk_t newblock;
u32 hash2;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 14695e2b5042..97fa7b4c645f 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -465,7 +465,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
/*
* In the first loop we prepare and mark buffers to submit. We have to
* mark all buffers in the page before submitting so that
- * end_page_writeback() cannot be called from ext4_bio_end_io() when IO
+ * end_page_writeback() cannot be called from ext4_end_bio() when IO
* on the first buffer finishes and we are still working on submitting
* the second buffer.
*/
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 90a941d20dff..8b70a4701293 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -54,6 +54,16 @@ int ext4_resize_begin(struct super_block *sb)
return -EPERM;
/*
+ * If the reserved GDT blocks is non-zero, the resize_inode feature
+ * should always be set.
+ */
+ if (EXT4_SB(sb)->s_es->s_reserved_gdt_blocks &&
+ !ext4_has_feature_resize_inode(sb)) {
+ ext4_error(sb, "resize_inode disabled but reserved GDT blocks non-zero");
+ return -EFSCORRUPTED;
+ }
+
+ /*
* If we are not using the primary superblock/GDT copy don't resize,
* because the user tools have no way of handling this. Probably a
* bad time to do it anyways.
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 450c918d68fc..845f2f8aee5f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -87,7 +87,7 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
static int ext4_validate_options(struct fs_context *fc);
static int ext4_check_opt_consistency(struct fs_context *fc,
struct super_block *sb);
-static int ext4_apply_options(struct fs_context *fc, struct super_block *sb);
+static void ext4_apply_options(struct fs_context *fc, struct super_block *sb);
static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param);
static int ext4_get_tree(struct fs_context *fc);
static int ext4_reconfigure(struct fs_context *fc);
@@ -1870,31 +1870,12 @@ ext4_sb_read_encoding(const struct ext4_super_block *es)
}
#endif
-static int ext4_set_test_dummy_encryption(struct super_block *sb, char *arg)
-{
-#ifdef CONFIG_FS_ENCRYPTION
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- int err;
-
- err = fscrypt_set_test_dummy_encryption(sb, arg,
- &sbi->s_dummy_enc_policy);
- if (err) {
- ext4_msg(sb, KERN_WARNING,
- "Error while setting test dummy encryption [%d]", err);
- return err;
- }
- ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
-#endif
- return 0;
-}
-
#define EXT4_SPEC_JQUOTA (1 << 0)
#define EXT4_SPEC_JQFMT (1 << 1)
#define EXT4_SPEC_DATAJ (1 << 2)
#define EXT4_SPEC_SB_BLOCK (1 << 3)
#define EXT4_SPEC_JOURNAL_DEV (1 << 4)
#define EXT4_SPEC_JOURNAL_IOPRIO (1 << 5)
-#define EXT4_SPEC_DUMMY_ENCRYPTION (1 << 6)
#define EXT4_SPEC_s_want_extra_isize (1 << 7)
#define EXT4_SPEC_s_max_batch_time (1 << 8)
#define EXT4_SPEC_s_min_batch_time (1 << 9)
@@ -1911,7 +1892,7 @@ static int ext4_set_test_dummy_encryption(struct super_block *sb, char *arg)
struct ext4_fs_context {
char *s_qf_names[EXT4_MAXQUOTAS];
- char *test_dummy_enc_arg;
+ struct fscrypt_dummy_policy dummy_enc_policy;
int s_jquota_fmt; /* Format of quota to use */
#ifdef CONFIG_EXT4_DEBUG
int s_fc_debug_max_replay;
@@ -1953,7 +1934,7 @@ static void ext4_fc_free(struct fs_context *fc)
for (i = 0; i < EXT4_MAXQUOTAS; i++)
kfree(ctx->s_qf_names[i]);
- kfree(ctx->test_dummy_enc_arg);
+ fscrypt_free_dummy_policy(&ctx->dummy_enc_policy);
kfree(ctx);
}
@@ -2029,6 +2010,29 @@ static int unnote_qf_name(struct fs_context *fc, int qtype)
}
#endif
+static int ext4_parse_test_dummy_encryption(const struct fs_parameter *param,
+ struct ext4_fs_context *ctx)
+{
+ int err;
+
+ if (!IS_ENABLED(CONFIG_FS_ENCRYPTION)) {
+ ext4_msg(NULL, KERN_WARNING,
+ "test_dummy_encryption option not supported");
+ return -EINVAL;
+ }
+ err = fscrypt_parse_test_dummy_encryption(param,
+ &ctx->dummy_enc_policy);
+ if (err == -EINVAL) {
+ ext4_msg(NULL, KERN_WARNING,
+ "Value of option \"%s\" is unrecognized", param->key);
+ } else if (err == -EEXIST) {
+ ext4_msg(NULL, KERN_WARNING,
+ "Conflicting test_dummy_encryption options");
+ return -EINVAL;
+ }
+ return err;
+}
+
#define EXT4_SET_CTX(name) \
static inline void ctx_set_##name(struct ext4_fs_context *ctx, \
unsigned long flag) \
@@ -2291,29 +2295,7 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
ctx->spec |= EXT4_SPEC_JOURNAL_IOPRIO;
return 0;
case Opt_test_dummy_encryption:
-#ifdef CONFIG_FS_ENCRYPTION
- if (param->type == fs_value_is_flag) {
- ctx->spec |= EXT4_SPEC_DUMMY_ENCRYPTION;
- ctx->test_dummy_enc_arg = NULL;
- return 0;
- }
- if (*param->string &&
- !(!strcmp(param->string, "v1") ||
- !strcmp(param->string, "v2"))) {
- ext4_msg(NULL, KERN_WARNING,
- "Value of option \"%s\" is unrecognized",
- param->key);
- return -EINVAL;
- }
- ctx->spec |= EXT4_SPEC_DUMMY_ENCRYPTION;
- ctx->test_dummy_enc_arg = kmemdup_nul(param->string, param->size,
- GFP_KERNEL);
- return 0;
-#else
- ext4_msg(NULL, KERN_WARNING,
- "test_dummy_encryption option not supported");
- return -EINVAL;
-#endif
+ return ext4_parse_test_dummy_encryption(param, ctx);
case Opt_dax:
case Opt_dax_type:
#ifdef CONFIG_FS_DAX
@@ -2504,7 +2486,8 @@ parse_failed:
if (s_ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)
m_ctx->journal_ioprio = s_ctx->journal_ioprio;
- ret = ext4_apply_options(fc, sb);
+ ext4_apply_options(fc, sb);
+ ret = 0;
out_free:
if (fc) {
@@ -2673,11 +2656,11 @@ err_jquota_specified:
static int ext4_check_test_dummy_encryption(const struct fs_context *fc,
struct super_block *sb)
{
-#ifdef CONFIG_FS_ENCRYPTION
const struct ext4_fs_context *ctx = fc->fs_private;
const struct ext4_sb_info *sbi = EXT4_SB(sb);
+ int err;
- if (!(ctx->spec & EXT4_SPEC_DUMMY_ENCRYPTION))
+ if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy))
return 0;
if (!ext4_has_feature_encrypt(sb)) {
@@ -2691,14 +2674,46 @@ static int ext4_check_test_dummy_encryption(const struct fs_context *fc,
* needed to allow it to be set or changed during remount. We do allow
* it to be specified during remount, but only if there is no change.
*/
- if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE &&
- !sbi->s_dummy_enc_policy.policy) {
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+ if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
+ &ctx->dummy_enc_policy))
+ return 0;
ext4_msg(NULL, KERN_WARNING,
- "Can't set test_dummy_encryption on remount");
+ "Can't set or change test_dummy_encryption on remount");
return -EINVAL;
}
-#endif /* CONFIG_FS_ENCRYPTION */
- return 0;
+ /* Also make sure s_mount_opts didn't contain a conflicting value. */
+ if (fscrypt_is_dummy_policy_set(&sbi->s_dummy_enc_policy)) {
+ if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
+ &ctx->dummy_enc_policy))
+ return 0;
+ ext4_msg(NULL, KERN_WARNING,
+ "Conflicting test_dummy_encryption options");
+ return -EINVAL;
+ }
+ /*
+ * fscrypt_add_test_dummy_key() technically changes the super_block, so
+ * technically it should be delayed until ext4_apply_options() like the
+ * other changes. But since we never get here for remounts (see above),
+ * and this is the last chance to report errors, we do it here.
+ */
+ err = fscrypt_add_test_dummy_key(sb, &ctx->dummy_enc_policy);
+ if (err)
+ ext4_msg(NULL, KERN_WARNING,
+ "Error adding test dummy encryption key [%d]", err);
+ return err;
+}
+
+static void ext4_apply_test_dummy_encryption(struct ext4_fs_context *ctx,
+ struct super_block *sb)
+{
+ if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy) ||
+ /* if already set, it was already verified to be the same */
+ fscrypt_is_dummy_policy_set(&EXT4_SB(sb)->s_dummy_enc_policy))
+ return;
+ EXT4_SB(sb)->s_dummy_enc_policy = ctx->dummy_enc_policy;
+ memset(&ctx->dummy_enc_policy, 0, sizeof(ctx->dummy_enc_policy));
+ ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
}
static int ext4_check_opt_consistency(struct fs_context *fc,
@@ -2785,11 +2800,10 @@ fail_dax_change_remount:
return ext4_check_quota_consistency(fc, sb);
}
-static int ext4_apply_options(struct fs_context *fc, struct super_block *sb)
+static void ext4_apply_options(struct fs_context *fc, struct super_block *sb)
{
struct ext4_fs_context *ctx = fc->fs_private;
struct ext4_sb_info *sbi = fc->s_fs_info;
- int ret = 0;
sbi->s_mount_opt &= ~ctx->mask_s_mount_opt;
sbi->s_mount_opt |= ctx->vals_s_mount_opt;
@@ -2825,11 +2839,7 @@ static int ext4_apply_options(struct fs_context *fc, struct super_block *sb)
#endif
ext4_apply_quota_options(fc, sb);
-
- if (ctx->spec & EXT4_SPEC_DUMMY_ENCRYPTION)
- ret = ext4_set_test_dummy_encryption(sb, ctx->test_dummy_enc_arg);
-
- return ret;
+ ext4_apply_test_dummy_encryption(ctx, sb);
}
@@ -4552,9 +4562,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
if (err < 0)
goto failed_mount;
- err = ext4_apply_options(fc, sb);
- if (err < 0)
- goto failed_mount;
+ ext4_apply_options(fc, sb);
#if IS_ENABLED(CONFIG_UNICODE)
if (ext4_has_feature_casefold(sb) && !sb->s_encoding) {
@@ -5302,14 +5310,6 @@ no_journal:
err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
GFP_KERNEL);
}
- /*
- * Update the checksum after updating free space/inode
- * counters. Otherwise the superblock can have an incorrect
- * checksum in the buffer cache until it is written out and
- * e2fsprogs programs trying to open a file system immediately
- * after it is mounted can fail.
- */
- ext4_superblock_csum_set(sb);
if (!err)
err = percpu_counter_init(&sbi->s_dirs_counter,
ext4_count_dirs(sb), GFP_KERNEL);
@@ -5367,6 +5367,14 @@ no_journal:
EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
ext4_orphan_cleanup(sb, es);
EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
+ /*
+ * Update the checksum after updating free space/inode counters and
+ * ext4_orphan_cleanup. Otherwise the superblock can have an incorrect
+ * checksum in the buffer cache until it is written out and
+ * e2fsprogs programs trying to open a file system immediately
+ * after it is mounted can fail.
+ */
+ ext4_superblock_csum_set(sb);
if (needs_recovery) {
ext4_msg(sb, KERN_INFO, "recovery complete");
err = ext4_mark_recovery_complete(sb, es);
@@ -5898,7 +5906,6 @@ static void ext4_update_super(struct super_block *sb)
static int ext4_commit_super(struct super_block *sb)
{
struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
- int error = 0;
if (!sbh)
return -EINVAL;
@@ -5907,6 +5914,13 @@ static int ext4_commit_super(struct super_block *sb)
ext4_update_super(sb);
+ lock_buffer(sbh);
+ /* Buffer got discarded which means block device got invalidated */
+ if (!buffer_mapped(sbh)) {
+ unlock_buffer(sbh);
+ return -EIO;
+ }
+
if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
/*
* Oh, dear. A previous attempt to write the
@@ -5921,17 +5935,21 @@ static int ext4_commit_super(struct super_block *sb)
clear_buffer_write_io_error(sbh);
set_buffer_uptodate(sbh);
}
- BUFFER_TRACE(sbh, "marking dirty");
- mark_buffer_dirty(sbh);
- error = __sync_dirty_buffer(sbh,
- REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
+ get_bh(sbh);
+ /* Clear potential dirty bit if it was journalled update */
+ clear_buffer_dirty(sbh);
+ sbh->b_end_io = end_buffer_write_sync;
+ submit_bh(REQ_OP_WRITE,
+ REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0), sbh);
+ wait_on_buffer(sbh);
if (buffer_write_io_error(sbh)) {
ext4_msg(sb, KERN_ERR, "I/O error while writing "
"superblock");
clear_buffer_write_io_error(sbh);
set_buffer_uptodate(sbh);
+ return -EIO;
}
- return error;
+ return 0;
}
/*
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 042325349098..564e28a1aa94 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1895,11 +1895,10 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
unlock_buffer(bs->bh);
ea_bdebug(bs->bh, "cloning");
- s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
+ s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
error = -ENOMEM;
if (s->base == NULL)
goto cleanup;
- memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
s->first = ENTRY(header(s->base)+1);
header(s->base)->h_refcount = cpu_to_le32(1);
s->here = ENTRY(s->base + offset);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 3aab4182fd89..d3ee4fc532fa 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -298,8 +298,8 @@ struct io_buffer_list {
/* below is for ring provided buffers */
__u16 buf_nr_pages;
__u16 nr_entries;
- __u32 head;
- __u32 mask;
+ __u16 head;
+ __u16 mask;
};
struct io_buffer {
@@ -576,7 +576,6 @@ struct io_close {
struct file *file;
int fd;
u32 file_slot;
- u32 flags;
};
struct io_timeout_data {
@@ -784,12 +783,6 @@ struct io_msg {
u32 len;
};
-struct io_nop {
- struct file *file;
- u64 extra1;
- u64 extra2;
-};
-
struct io_async_connect {
struct sockaddr_storage address;
};
@@ -851,6 +844,7 @@ enum {
REQ_F_SINGLE_POLL_BIT,
REQ_F_DOUBLE_POLL_BIT,
REQ_F_PARTIAL_IO_BIT,
+ REQ_F_CQE32_INIT_BIT,
REQ_F_APOLL_MULTISHOT_BIT,
/* keep async read/write and isreg together and in order */
REQ_F_SUPPORT_NOWAIT_BIT,
@@ -920,6 +914,8 @@ enum {
REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT),
/* fast poll multishot mode */
REQ_F_APOLL_MULTISHOT = BIT(REQ_F_APOLL_MULTISHOT_BIT),
+ /* ->extra1 and ->extra2 are initialised */
+ REQ_F_CQE32_INIT = BIT(REQ_F_CQE32_INIT_BIT),
};
struct async_poll {
@@ -994,7 +990,6 @@ struct io_kiocb {
struct io_msg msg;
struct io_xattr xattr;
struct io_socket sock;
- struct io_nop nop;
struct io_uring_cmd uring_cmd;
};
@@ -1121,7 +1116,6 @@ static const struct io_op_def io_op_defs[] = {
[IORING_OP_NOP] = {
.audit_skip = 1,
.iopoll = 1,
- .buffer_select = 1,
},
[IORING_OP_READV] = {
.needs_file = 1,
@@ -1729,9 +1723,16 @@ static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
return;
- /* don't recycle if we already did IO to this buffer */
- if (req->flags & REQ_F_PARTIAL_IO)
+ /*
+ * For legacy provided buffer mode, don't recycle if we already did
+ * IO to this buffer. For ring-mapped provided buffer mode, we should
+ * increment ring->head to explicitly monopolize the buffer to avoid
+ * multiple use.
+ */
+ if ((req->flags & REQ_F_BUFFER_SELECTED) &&
+ (req->flags & REQ_F_PARTIAL_IO))
return;
+
/*
* We don't need to recycle for REQ_F_BUFFER_RING, we can just clear
* the flag and hence ensure that bl->head doesn't get incremented.
@@ -1739,8 +1740,13 @@ static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
*/
if (req->flags & REQ_F_BUFFER_RING) {
if (req->buf_list) {
- req->buf_index = req->buf_list->bgid;
- req->flags &= ~REQ_F_BUFFER_RING;
+ if (req->flags & REQ_F_PARTIAL_IO) {
+ req->buf_list->head++;
+ req->buf_list = NULL;
+ } else {
+ req->buf_index = req->buf_list->bgid;
+ req->flags &= ~REQ_F_BUFFER_RING;
+ }
}
return;
}
@@ -2441,94 +2447,66 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
return true;
}
-static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
- s32 res, u32 cflags)
+static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx,
+ struct io_kiocb *req)
{
struct io_uring_cqe *cqe;
- /*
- * If we can't get a cq entry, userspace overflowed the
- * submission (by quite a lot). Increment the overflow count in
- * the ring.
- */
- cqe = io_get_cqe(ctx);
- if (likely(cqe)) {
- WRITE_ONCE(cqe->user_data, user_data);
- WRITE_ONCE(cqe->res, res);
- WRITE_ONCE(cqe->flags, cflags);
- return true;
- }
- return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
-}
+ if (!(ctx->flags & IORING_SETUP_CQE32)) {
+ trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags, 0, 0);
-static inline bool __io_fill_cqe_req_filled(struct io_ring_ctx *ctx,
- struct io_kiocb *req)
-{
- struct io_uring_cqe *cqe;
+ /*
+ * If we can't get a cq entry, userspace overflowed the
+ * submission (by quite a lot). Increment the overflow count in
+ * the ring.
+ */
+ cqe = io_get_cqe(ctx);
+ if (likely(cqe)) {
+ memcpy(cqe, &req->cqe, sizeof(*cqe));
+ return true;
+ }
- trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
- req->cqe.res, req->cqe.flags, 0, 0);
+ return io_cqring_event_overflow(ctx, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags,
+ 0, 0);
+ } else {
+ u64 extra1 = 0, extra2 = 0;
- /*
- * If we can't get a cq entry, userspace overflowed the
- * submission (by quite a lot). Increment the overflow count in
- * the ring.
- */
- cqe = io_get_cqe(ctx);
- if (likely(cqe)) {
- memcpy(cqe, &req->cqe, sizeof(*cqe));
- return true;
- }
- return io_cqring_event_overflow(ctx, req->cqe.user_data,
- req->cqe.res, req->cqe.flags, 0, 0);
-}
+ if (req->flags & REQ_F_CQE32_INIT) {
+ extra1 = req->extra1;
+ extra2 = req->extra2;
+ }
-static inline bool __io_fill_cqe32_req_filled(struct io_ring_ctx *ctx,
- struct io_kiocb *req)
-{
- struct io_uring_cqe *cqe;
- u64 extra1 = req->extra1;
- u64 extra2 = req->extra2;
+ trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags, extra1, extra2);
- trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
- req->cqe.res, req->cqe.flags, extra1, extra2);
+ /*
+ * If we can't get a cq entry, userspace overflowed the
+ * submission (by quite a lot). Increment the overflow count in
+ * the ring.
+ */
+ cqe = io_get_cqe(ctx);
+ if (likely(cqe)) {
+ memcpy(cqe, &req->cqe, sizeof(struct io_uring_cqe));
+ WRITE_ONCE(cqe->big_cqe[0], extra1);
+ WRITE_ONCE(cqe->big_cqe[1], extra2);
+ return true;
+ }
- /*
- * If we can't get a cq entry, userspace overflowed the
- * submission (by quite a lot). Increment the overflow count in
- * the ring.
- */
- cqe = io_get_cqe(ctx);
- if (likely(cqe)) {
- memcpy(cqe, &req->cqe, sizeof(struct io_uring_cqe));
- cqe->big_cqe[0] = extra1;
- cqe->big_cqe[1] = extra2;
- return true;
+ return io_cqring_event_overflow(ctx, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags,
+ extra1, extra2);
}
-
- return io_cqring_event_overflow(ctx, req->cqe.user_data, req->cqe.res,
- req->cqe.flags, extra1, extra2);
-}
-
-static inline bool __io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
-{
- trace_io_uring_complete(req->ctx, req, req->cqe.user_data, res, cflags, 0, 0);
- return __io_fill_cqe(req->ctx, req->cqe.user_data, res, cflags);
}
-static inline void __io_fill_cqe32_req(struct io_kiocb *req, s32 res, u32 cflags,
- u64 extra1, u64 extra2)
+static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
+ s32 res, u32 cflags)
{
- struct io_ring_ctx *ctx = req->ctx;
struct io_uring_cqe *cqe;
- if (WARN_ON_ONCE(!(ctx->flags & IORING_SETUP_CQE32)))
- return;
- if (req->flags & REQ_F_CQE_SKIP)
- return;
-
- trace_io_uring_complete(ctx, req, req->cqe.user_data, res, cflags,
- extra1, extra2);
+ ctx->cq_extra++;
+ trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0);
/*
* If we can't get a cq entry, userspace overflowed the
@@ -2537,23 +2515,17 @@ static inline void __io_fill_cqe32_req(struct io_kiocb *req, s32 res, u32 cflags
*/
cqe = io_get_cqe(ctx);
if (likely(cqe)) {
- WRITE_ONCE(cqe->user_data, req->cqe.user_data);
+ WRITE_ONCE(cqe->user_data, user_data);
WRITE_ONCE(cqe->res, res);
WRITE_ONCE(cqe->flags, cflags);
- WRITE_ONCE(cqe->big_cqe[0], extra1);
- WRITE_ONCE(cqe->big_cqe[1], extra2);
- return;
- }
- io_cqring_event_overflow(ctx, req->cqe.user_data, res, cflags, extra1, extra2);
-}
-
-static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
- s32 res, u32 cflags)
-{
- ctx->cq_extra++;
- trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0);
- return __io_fill_cqe(ctx, user_data, res, cflags);
+ if (ctx->flags & IORING_SETUP_CQE32) {
+ WRITE_ONCE(cqe->big_cqe[0], 0);
+ WRITE_ONCE(cqe->big_cqe[1], 0);
+ }
+ return true;
+ }
+ return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
}
static void __io_req_complete_put(struct io_kiocb *req)
@@ -2590,16 +2562,11 @@ static void __io_req_complete_put(struct io_kiocb *req)
static void __io_req_complete_post(struct io_kiocb *req, s32 res,
u32 cflags)
{
- if (!(req->flags & REQ_F_CQE_SKIP))
- __io_fill_cqe_req(req, res, cflags);
- __io_req_complete_put(req);
-}
-
-static void __io_req_complete_post32(struct io_kiocb *req, s32 res,
- u32 cflags, u64 extra1, u64 extra2)
-{
- if (!(req->flags & REQ_F_CQE_SKIP))
- __io_fill_cqe32_req(req, res, cflags, extra1, extra2);
+ if (!(req->flags & REQ_F_CQE_SKIP)) {
+ req->cqe.res = res;
+ req->cqe.flags = cflags;
+ __io_fill_cqe_req(req->ctx, req);
+ }
__io_req_complete_put(req);
}
@@ -2614,18 +2581,6 @@ static void io_req_complete_post(struct io_kiocb *req, s32 res, u32 cflags)
io_cqring_ev_posted(ctx);
}
-static void io_req_complete_post32(struct io_kiocb *req, s32 res,
- u32 cflags, u64 extra1, u64 extra2)
-{
- struct io_ring_ctx *ctx = req->ctx;
-
- spin_lock(&ctx->completion_lock);
- __io_req_complete_post32(req, res, cflags, extra1, extra2);
- io_commit_cqring(ctx);
- spin_unlock(&ctx->completion_lock);
- io_cqring_ev_posted(ctx);
-}
-
static inline void io_req_complete_state(struct io_kiocb *req, s32 res,
u32 cflags)
{
@@ -2643,19 +2598,6 @@ static inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags,
io_req_complete_post(req, res, cflags);
}
-static inline void __io_req_complete32(struct io_kiocb *req,
- unsigned int issue_flags, s32 res,
- u32 cflags, u64 extra1, u64 extra2)
-{
- if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
- io_req_complete_state(req, res, cflags);
- req->extra1 = extra1;
- req->extra2 = extra2;
- } else {
- io_req_complete_post32(req, res, cflags, extra1, extra2);
- }
-}
-
static inline void io_req_complete(struct io_kiocb *req, s32 res)
{
if (res < 0)
@@ -3202,12 +3144,8 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
struct io_kiocb *req = container_of(node, struct io_kiocb,
comp_list);
- if (!(req->flags & REQ_F_CQE_SKIP)) {
- if (!(ctx->flags & IORING_SETUP_CQE32))
- __io_fill_cqe_req_filled(ctx, req);
- else
- __io_fill_cqe32_req_filled(ctx, req);
- }
+ if (!(req->flags & REQ_F_CQE_SKIP))
+ __io_fill_cqe_req(ctx, req);
}
io_commit_cqring(ctx);
@@ -3326,7 +3264,9 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
nr_events++;
if (unlikely(req->flags & REQ_F_CQE_SKIP))
continue;
- __io_fill_cqe_req(req, req->cqe.res, io_put_kbuf(req, 0));
+
+ req->cqe.flags = io_put_kbuf(req, 0);
+ __io_fill_cqe_req(req->ctx, req);
}
if (unlikely(!nr_events))
@@ -3677,6 +3617,20 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
int ret;
kiocb->ki_pos = READ_ONCE(sqe->off);
+ /* used for fixed read/write too - just read unconditionally */
+ req->buf_index = READ_ONCE(sqe->buf_index);
+
+ if (req->opcode == IORING_OP_READ_FIXED ||
+ req->opcode == IORING_OP_WRITE_FIXED) {
+ struct io_ring_ctx *ctx = req->ctx;
+ u16 index;
+
+ if (unlikely(req->buf_index >= ctx->nr_user_bufs))
+ return -EFAULT;
+ index = array_index_nospec(req->buf_index, ctx->nr_user_bufs);
+ req->imu = ctx->user_bufs[index];
+ io_req_set_rsrc_node(req, ctx, 0);
+ }
ioprio = READ_ONCE(sqe->ioprio);
if (ioprio) {
@@ -3689,12 +3643,9 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
kiocb->ki_ioprio = get_current_ioprio();
}
- req->imu = NULL;
req->rw.addr = READ_ONCE(sqe->addr);
req->rw.len = READ_ONCE(sqe->len);
req->rw.flags = READ_ONCE(sqe->rw_flags);
- /* used for fixed read/write too - just read unconditionally */
- req->buf_index = READ_ONCE(sqe->buf_index);
return 0;
}
@@ -3826,20 +3777,9 @@ static int __io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter
static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter,
unsigned int issue_flags)
{
- struct io_mapped_ubuf *imu = req->imu;
- u16 index, buf_index = req->buf_index;
-
- if (likely(!imu)) {
- struct io_ring_ctx *ctx = req->ctx;
-
- if (unlikely(buf_index >= ctx->nr_user_bufs))
- return -EFAULT;
- io_req_set_rsrc_node(req, ctx, issue_flags);
- index = array_index_nospec(buf_index, ctx->nr_user_bufs);
- imu = READ_ONCE(ctx->user_bufs[index]);
- req->imu = imu;
- }
- return __io_import_fixed(req, rw, iter, imu);
+ if (WARN_ON_ONCE(!req->imu))
+ return -EFAULT;
+ return __io_import_fixed(req, rw, iter, req->imu);
}
static int io_buffer_add_list(struct io_ring_ctx *ctx,
@@ -3876,19 +3816,17 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
{
struct io_uring_buf_ring *br = bl->buf_ring;
struct io_uring_buf *buf;
- __u32 head = bl->head;
+ __u16 head = bl->head;
- if (unlikely(smp_load_acquire(&br->tail) == head)) {
- io_ring_submit_unlock(req->ctx, issue_flags);
+ if (unlikely(smp_load_acquire(&br->tail) == head))
return NULL;
- }
head &= bl->mask;
if (head < IO_BUFFER_LIST_BUF_PER_PAGE) {
buf = &br->bufs[head];
} else {
int off = head & (IO_BUFFER_LIST_BUF_PER_PAGE - 1);
- int index = head / IO_BUFFER_LIST_BUF_PER_PAGE - 1;
+ int index = head / IO_BUFFER_LIST_BUF_PER_PAGE;
buf = page_address(bl->buf_pages[index]);
buf += off;
}
@@ -3898,7 +3836,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
req->buf_list = bl;
req->buf_index = buf->bid;
- if (issue_flags & IO_URING_F_UNLOCKED) {
+ if (issue_flags & IO_URING_F_UNLOCKED || !file_can_poll(req->file)) {
/*
* If we came in unlocked, we have no choice but to consume the
* buffer here. This does mean it'll be pinned until the IO
@@ -5079,10 +5017,18 @@ void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
req->uring_cmd.task_work_cb = task_work_cb;
req->io_task_work.func = io_uring_cmd_work;
- io_req_task_prio_work_add(req);
+ io_req_task_work_add(req);
}
EXPORT_SYMBOL_GPL(io_uring_cmd_complete_in_task);
+static inline void io_req_set_cqe32_extra(struct io_kiocb *req,
+ u64 extra1, u64 extra2)
+{
+ req->extra1 = extra1;
+ req->extra2 = extra2;
+ req->flags |= REQ_F_CQE32_INIT;
+}
+
/*
* Called by consumers of io_uring_cmd, if they originally returned
* -EIOCBQUEUED upon receiving the command.
@@ -5093,10 +5039,10 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2)
if (ret < 0)
req_set_fail(req);
+
if (req->ctx->flags & IORING_SETUP_CQE32)
- __io_req_complete32(req, 0, ret, 0, res2, 0);
- else
- io_req_complete(req, ret);
+ io_req_set_cqe32_extra(req, res2, 0);
+ io_req_complete(req, ret);
}
EXPORT_SYMBOL_GPL(io_uring_cmd_done);
@@ -5258,14 +5204,6 @@ done:
static int io_nop_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- /*
- * If the ring is setup with CQE32, relay back addr/addr
- */
- if (req->ctx->flags & IORING_SETUP_CQE32) {
- req->nop.extra1 = READ_ONCE(sqe->addr);
- req->nop.extra2 = READ_ONCE(sqe->addr2);
- }
-
return 0;
}
@@ -5274,23 +5212,7 @@ static int io_nop_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
*/
static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
{
- unsigned int cflags;
- void __user *buf;
-
- if (req->flags & REQ_F_BUFFER_SELECT) {
- size_t len = 1;
-
- buf = io_buffer_select(req, &len, issue_flags);
- if (!buf)
- return -ENOBUFS;
- }
-
- cflags = io_put_kbuf(req, issue_flags);
- if (!(req->ctx->flags & IORING_SETUP_CQE32))
- __io_req_complete(req, issue_flags, 0, cflags);
- else
- __io_req_complete32(req, issue_flags, 0, cflags,
- req->nop.extra1, req->nop.extra2);
+ __io_req_complete(req, issue_flags, 0, 0);
return 0;
}
@@ -5988,18 +5910,14 @@ static int io_statx(struct io_kiocb *req, unsigned int issue_flags)
static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- if (sqe->off || sqe->addr || sqe->len || sqe->buf_index)
+ if (sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index)
return -EINVAL;
if (req->flags & REQ_F_FIXED_FILE)
return -EBADF;
req->close.fd = READ_ONCE(sqe->fd);
req->close.file_slot = READ_ONCE(sqe->file_index);
- req->close.flags = READ_ONCE(sqe->close_flags);
- if (req->close.flags & ~IORING_CLOSE_FD_AND_FILE_SLOT)
- return -EINVAL;
- if (!(req->close.flags & IORING_CLOSE_FD_AND_FILE_SLOT) &&
- req->close.file_slot && req->close.fd)
+ if (req->close.file_slot && req->close.fd)
return -EINVAL;
return 0;
@@ -6015,8 +5933,7 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags)
if (req->close.file_slot) {
ret = io_close_fixed(req, issue_flags);
- if (ret || !(req->close.flags & IORING_CLOSE_FD_AND_FILE_SLOT))
- goto err;
+ goto err;
}
spin_lock(&files->file_lock);
@@ -8063,8 +7980,8 @@ static int io_files_update_with_index_alloc(struct io_kiocb *req,
if (ret < 0)
break;
if (copy_to_user(&fds[done], &ret, sizeof(ret))) {
- ret = -EFAULT;
__io_close_fixed(req, issue_flags, ret);
+ ret = -EFAULT;
break;
}
}
@@ -8773,6 +8690,7 @@ static void io_queue_async(struct io_kiocb *req, int ret)
* Queued up for async execution, worker will release
* submit reference when the iocb is actually submitted.
*/
+ io_kbuf_recycle(req, 0);
io_queue_iowq(req, NULL);
break;
case IO_APOLL_OK:
@@ -9788,11 +9706,19 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
{
+ unsigned nr = ctx->nr_user_files;
int ret;
if (!ctx->file_data)
return -ENXIO;
+
+ /*
+ * Quiesce may unlock ->uring_lock, and while it's not held
+ * prevent new requests using the table.
+ */
+ ctx->nr_user_files = 0;
ret = io_rsrc_ref_quiesce(ctx->file_data, ctx);
+ ctx->nr_user_files = nr;
if (!ret)
__io_sqe_files_unregister(ctx);
return ret;
@@ -10690,12 +10616,19 @@ static void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
static int io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
{
+ unsigned nr = ctx->nr_user_bufs;
int ret;
if (!ctx->buf_data)
return -ENXIO;
+ /*
+ * Quiesce may unlock ->uring_lock, and while it's not held
+ * prevent new requests using the table.
+ */
+ ctx->nr_user_bufs = 0;
ret = io_rsrc_ref_quiesce(ctx->buf_data, ctx);
+ ctx->nr_user_bufs = nr;
if (!ret)
__io_sqe_buffers_unregister(ctx);
return ret;
@@ -13002,6 +12935,10 @@ static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
if (!is_power_of_2(reg.ring_entries))
return -EINVAL;
+ /* cannot disambiguate full vs empty due to head/tail size */
+ if (reg.ring_entries >= 65536)
+ return -EINVAL;
+
if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) {
int ret = io_init_bl_list(ctx);
if (ret)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e49bb0938376..e9c308ae475f 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -2114,7 +2114,7 @@ out:
/**
* jbd2_journal_try_to_free_buffers() - try to free page buffers.
* @journal: journal for operation
- * @page: to try and free
+ * @folio: Folio to detach data from.
*
* For all the buffers on this page,
* if they are fully written out ordered data, move them onto BUF_CLEAN
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index d37e012386f3..42f892c5712e 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -297,6 +297,7 @@ zero_out:
/**
* netfs_write_begin - Helper to prepare for writing
+ * @ctx: The netfs context
* @file: The file to read from
* @mapping: The mapping to read from
* @pos: File position at which the write will begin
@@ -326,12 +327,12 @@ zero_out:
*
* This is usable whether or not caching is enabled.
*/
-int netfs_write_begin(struct file *file, struct address_space *mapping,
+int netfs_write_begin(struct netfs_inode *ctx,
+ struct file *file, struct address_space *mapping,
loff_t pos, unsigned int len, struct folio **_folio,
void **_fsdata)
{
struct netfs_io_request *rreq;
- struct netfs_inode *ctx = netfs_inode(file_inode(file ));
struct folio *folio;
unsigned int fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
pgoff_t index = pos >> PAGE_SHIFT;
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index c6afa605b63b..e17cdf53f6a7 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -75,10 +75,10 @@ static void netfs_free_request(struct work_struct *work)
struct netfs_io_request *rreq =
container_of(work, struct netfs_io_request, work);
- netfs_clear_subrequests(rreq, false);
- if (rreq->netfs_priv)
- rreq->netfs_ops->cleanup(rreq->mapping, rreq->netfs_priv);
trace_netfs_rreq(rreq, netfs_rreq_trace_free);
+ netfs_clear_subrequests(rreq, false);
+ if (rreq->netfs_ops->free_request)
+ rreq->netfs_ops->free_request(rreq);
if (rreq->cache_resources.ops)
rreq->cache_resources.ops->end_operation(&rreq->cache_resources);
kfree(rreq);
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index c8520284dda7..c1eda73254e1 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -288,6 +288,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
rv = NFS4_OK;
break;
case -ENOENT:
+ set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags);
/* Embrace your forgetfulness! */
rv = NFS4ERR_NOMATCHING_LAYOUT;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a8ecdd527662..0c4e8dd6aa96 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2124,6 +2124,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
}
goto out;
}
+ file->f_mode |= FMODE_CAN_ODIRECT;
err = nfs_finish_open(ctx, ctx->dentry, file, open_flags);
trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 03d3a270eff4..e88f6b18445e 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -93,6 +93,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
nfs_file_set_open_context(filp, ctx);
nfs_fscache_open_file(inode, filp);
err = 0;
+ filp->f_mode |= FMODE_CAN_ODIRECT;
out_put_ctx:
put_nfs_open_context(ctx);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 68a87be3e6f9..41a9b6b58fb9 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -469,6 +469,7 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
pnfs_clear_lseg_state(lseg, lseg_list);
pnfs_clear_layoutreturn_info(lo);
pnfs_free_returned_lsegs(lo, lseg_list, &range, 0);
+ set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags);
if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) &&
!test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
pnfs_clear_layoutreturn_waitbit(lo);
@@ -1917,8 +1918,9 @@ static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo)
static void nfs_layoutget_end(struct pnfs_layout_hdr *lo)
{
- if (atomic_dec_and_test(&lo->plh_outstanding))
- wake_up_var(&lo->plh_outstanding);
+ if (atomic_dec_and_test(&lo->plh_outstanding) &&
+ test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags))
+ wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN);
}
static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo)
@@ -2025,11 +2027,11 @@ lookup_again:
* If the layout segment list is empty, but there are outstanding
* layoutget calls, then they might be subject to a layoutrecall.
*/
- if ((list_empty(&lo->plh_segs) || !pnfs_layout_is_valid(lo)) &&
+ if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) &&
atomic_read(&lo->plh_outstanding) != 0) {
spin_unlock(&ino->i_lock);
- lseg = ERR_PTR(wait_var_event_killable(&lo->plh_outstanding,
- !atomic_read(&lo->plh_outstanding)));
+ lseg = ERR_PTR(wait_on_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN,
+ TASK_KILLABLE));
if (IS_ERR(lseg))
goto out_put_layout_hdr;
pnfs_put_layout_hdr(lo);
@@ -2152,6 +2154,12 @@ lookup_again:
case -ERECALLCONFLICT:
case -EAGAIN:
break;
+ case -ENODATA:
+ /* The server returned NFS4ERR_LAYOUTUNAVAILABLE */
+ pnfs_layout_set_fail_bit(
+ lo, pnfs_iomode_to_fail_bit(iomode));
+ lseg = NULL;
+ goto out_put_layout_hdr;
default:
if (!nfs_error_is_fatal(PTR_ERR(lseg))) {
pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
@@ -2407,7 +2415,8 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
goto out_forget;
}
- if (!pnfs_layout_is_valid(lo) && !pnfs_is_first_layoutget(lo))
+ if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) &&
+ !pnfs_is_first_layoutget(lo))
goto out_forget;
if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) {
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 07f11489e4e9..f331f067691b 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -105,6 +105,7 @@ enum {
NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */
NFS_LAYOUT_INODE_FREEING, /* The inode is being freed */
NFS_LAYOUT_HASHED, /* The layout visible */
+ NFS_LAYOUT_DRAIN,
};
enum layoutdriver_policy_flags {
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index f172412447f5..9cb2d590c036 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -309,11 +309,12 @@ nfsd_file_put(struct nfsd_file *nf)
if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) {
nfsd_file_flush(nf);
nfsd_file_put_noref(nf);
- } else {
+ } else if (nf->nf_file) {
nfsd_file_put_noref(nf);
- if (nf->nf_file)
- nfsd_file_schedule_laundrette();
- }
+ nfsd_file_schedule_laundrette();
+ } else
+ nfsd_file_put_noref(nf);
+
if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT)
nfsd_file_gc();
}
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 836ab1b8ed7b..1824f61621a2 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -997,9 +997,11 @@ xfs_attr_set(
/*
* We have no control over the attribute names that userspace passes us
* to remove, so we have to allow the name lookup prior to attribute
- * removal to fail as well.
+ * removal to fail as well. Preserve the logged flag, since we need
+ * to pass that through to the logging code.
*/
- args->op_flags = XFS_DA_OP_OKNOENT;
+ args->op_flags = XFS_DA_OP_OKNOENT |
+ (args->op_flags & XFS_DA_OP_LOGGED);
if (args->value) {
XFS_STATS_INC(mp, xs_attr_set);
@@ -1439,12 +1441,11 @@ static int
xfs_attr_node_try_addname(
struct xfs_attr_intent *attr)
{
- struct xfs_da_args *args = attr->xattri_da_args;
struct xfs_da_state *state = attr->xattri_da_state;
struct xfs_da_state_blk *blk;
int error;
- trace_xfs_attr_node_addname(args);
+ trace_xfs_attr_node_addname(state->args);
blk = &state->path.blk[state->path.active-1];
ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
index e329da3e7afa..b4a2fc77017e 100644
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -28,16 +28,6 @@ struct xfs_attr_list_context;
*/
#define ATTR_MAX_VALUELEN (64*1024) /* max length of a value */
-static inline bool xfs_has_larp(struct xfs_mount *mp)
-{
-#ifdef DEBUG
- /* Logged xattrs require a V5 super for log_incompat */
- return xfs_has_crc(mp) && xfs_globals.larp;
-#else
- return false;
-#endif
-}
-
/*
* Kernel-internal version of the attrlist cursor.
*/
@@ -624,7 +614,7 @@ static inline enum xfs_delattr_state
xfs_attr_init_replace_state(struct xfs_da_args *args)
{
args->op_flags |= XFS_DA_OP_ADDNAME | XFS_DA_OP_REPLACE;
- if (xfs_has_larp(args->dp->i_mount))
+ if (args->op_flags & XFS_DA_OP_LOGGED)
return xfs_attr_init_remove_state(args);
return xfs_attr_init_add_state(args);
}
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 15a990409463..37e7c33f6283 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -1530,7 +1530,7 @@ xfs_attr3_leaf_add_work(
if (tmp)
entry->flags |= XFS_ATTR_LOCAL;
if (args->op_flags & XFS_DA_OP_REPLACE) {
- if (!xfs_has_larp(mp))
+ if (!(args->op_flags & XFS_DA_OP_LOGGED))
entry->flags |= XFS_ATTR_INCOMPLETE;
if ((args->blkno2 == args->blkno) &&
(args->index2 <= args->index)) {
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index d33b7686a0b3..ffa3df5b2893 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -92,6 +92,7 @@ typedef struct xfs_da_args {
#define XFS_DA_OP_NOTIME (1u << 5) /* don't update inode timestamps */
#define XFS_DA_OP_REMOVE (1u << 6) /* this is a remove operation */
#define XFS_DA_OP_RECOVERY (1u << 7) /* Log recovery operation */
+#define XFS_DA_OP_LOGGED (1u << 8) /* Use intent items to track op */
#define XFS_DA_OP_FLAGS \
{ XFS_DA_OP_JUSTCHECK, "JUSTCHECK" }, \
@@ -101,7 +102,8 @@ typedef struct xfs_da_args {
{ XFS_DA_OP_CILOOKUP, "CILOOKUP" }, \
{ XFS_DA_OP_NOTIME, "NOTIME" }, \
{ XFS_DA_OP_REMOVE, "REMOVE" }, \
- { XFS_DA_OP_RECOVERY, "RECOVERY" }
+ { XFS_DA_OP_RECOVERY, "RECOVERY" }, \
+ { XFS_DA_OP_LOGGED, "LOGGED" }
/*
* Storage for holding state during Btree searches and split/join ops.
diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
index 4a28c2d77070..135d44133477 100644
--- a/fs/xfs/xfs_attr_item.c
+++ b/fs/xfs/xfs_attr_item.c
@@ -413,18 +413,20 @@ xfs_attr_create_intent(
struct xfs_mount *mp = tp->t_mountp;
struct xfs_attri_log_item *attrip;
struct xfs_attr_intent *attr;
+ struct xfs_da_args *args;
ASSERT(count == 1);
- if (!xfs_sb_version_haslogxattrs(&mp->m_sb))
- return NULL;
-
/*
* Each attr item only performs one attribute operation at a time, so
* this is a list of one
*/
attr = list_first_entry_or_null(items, struct xfs_attr_intent,
xattri_list);
+ args = attr->xattri_da_args;
+
+ if (!(args->op_flags & XFS_DA_OP_LOGGED))
+ return NULL;
/*
* Create a buffer to store the attribute name and value. This buffer
@@ -432,8 +434,6 @@ xfs_attr_create_intent(
* and the lower level xattr log items.
*/
if (!attr->xattri_nameval) {
- struct xfs_da_args *args = attr->xattri_da_args;
-
/*
* Transfer our reference to the name/value buffer to the
* deferred work state structure.
@@ -617,7 +617,10 @@ xfs_attri_item_recover(
args->namelen = nv->name.i_len;
args->hashval = xfs_da_hashname(args->name, args->namelen);
args->attr_filter = attrp->alfi_attr_filter & XFS_ATTRI_FILTER_MASK;
- args->op_flags = XFS_DA_OP_RECOVERY | XFS_DA_OP_OKNOENT;
+ args->op_flags = XFS_DA_OP_RECOVERY | XFS_DA_OP_OKNOENT |
+ XFS_DA_OP_LOGGED;
+
+ ASSERT(xfs_sb_version_haslogxattrs(&mp->m_sb));
switch (attr->xattri_op_flags) {
case XFS_ATTRI_OP_FLAGS_SET:
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 5a364a7d58fd..0d67ff8a8961 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1096,7 +1096,8 @@ xfs_flags2diflags2(
{
uint64_t di_flags2 =
(ip->i_diflags2 & (XFS_DIFLAG2_REFLINK |
- XFS_DIFLAG2_BIGTIME));
+ XFS_DIFLAG2_BIGTIME |
+ XFS_DIFLAG2_NREXT64));
if (xflags & FS_XFLAG_DAX)
di_flags2 |= XFS_DIFLAG2_DAX;
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index 35e13e125ec6..c325a28b89a8 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -68,6 +68,18 @@ xfs_attr_rele_log_assist(
xlog_drop_incompat_feat(mp->m_log);
}
+static inline bool
+xfs_attr_want_log_assist(
+ struct xfs_mount *mp)
+{
+#ifdef DEBUG
+ /* Logged xattrs require a V5 super for log_incompat */
+ return xfs_has_crc(mp) && xfs_globals.larp;
+#else
+ return false;
+#endif
+}
+
/*
* Set or remove an xattr, having grabbed the appropriate logging resources
* prior to calling libxfs.
@@ -80,11 +92,14 @@ xfs_attr_change(
bool use_logging = false;
int error;
- if (xfs_has_larp(mp)) {
+ ASSERT(!(args->op_flags & XFS_DA_OP_LOGGED));
+
+ if (xfs_attr_want_log_assist(mp)) {
error = xfs_attr_grab_log_assist(mp);
if (error)
return error;
+ args->op_flags |= XFS_DA_OP_LOGGED;
use_logging = true;
}
diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h
index 0777725085df..10b1990bc1f6 100644
--- a/include/drm/drm_atomic.h
+++ b/include/drm/drm_atomic.h
@@ -1022,6 +1022,7 @@ void drm_state_dump(struct drm_device *dev, struct drm_printer *p);
for ((__i) = 0; \
(__i) < (__state)->num_private_objs && \
((obj) = (__state)->private_objs[__i].ptr, \
+ (void)(obj) /* Only to avoid unused-but-set-variable warning */, \
(new_obj_state) = (__state)->private_objs[__i].new_state, 1); \
(__i)++)
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index 441653693970..ca89a48c2460 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -311,12 +311,12 @@ ttm_resource_manager_cleanup(struct ttm_resource_manager *man)
}
void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk);
-void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,
- struct ttm_resource *res);
-void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
- struct ttm_resource *res);
void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk);
+void ttm_resource_add_bulk_move(struct ttm_resource *res,
+ struct ttm_buffer_object *bo);
+void ttm_resource_del_bulk_move(struct ttm_resource *res,
+ struct ttm_buffer_object *bo);
void ttm_resource_move_to_lru_tail(struct ttm_resource *res);
void ttm_resource_init(struct ttm_buffer_object *bo,
diff --git a/include/dt-bindings/net/pcs-rzn1-miic.h b/include/dt-bindings/net/pcs-rzn1-miic.h
new file mode 100644
index 000000000000..784782eaec9e
--- /dev/null
+++ b/include/dt-bindings/net/pcs-rzn1-miic.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (C) 2022 Schneider-Electric
+ *
+ * Clément Léger <clement.leger@bootlin.com>
+ */
+
+#ifndef _DT_BINDINGS_PCS_RZN1_MIIC
+#define _DT_BINDINGS_PCS_RZN1_MIIC
+
+/*
+ * Reefer to the datasheet [1] section 8.2.1, Internal Connection of Ethernet
+ * Ports to check the available combination
+ *
+ * [1] REN_r01uh0750ej0140-rzn1-introduction_MAT_20210228.pdf
+ */
+
+#define MIIC_GMAC1_PORT 0
+#define MIIC_GMAC2_PORT 1
+#define MIIC_RTOS_PORT 2
+#define MIIC_SERCOS_PORTA 3
+#define MIIC_SERCOS_PORTB 4
+#define MIIC_ETHERCAT_PORTA 5
+#define MIIC_ETHERCAT_PORTB 6
+#define MIIC_ETHERCAT_PORTC 7
+#define MIIC_SWITCH_PORTA 8
+#define MIIC_SWITCH_PORTB 9
+#define MIIC_SWITCH_PORTC 10
+#define MIIC_SWITCH_PORTD 11
+#define MIIC_HSR_PORTA 12
+#define MIIC_HSR_PORTB 13
+
+#endif
diff --git a/include/keys/asymmetric-type.h b/include/keys/asymmetric-type.h
index 6c5d4963e15b..69a13e1e5b2e 100644
--- a/include/keys/asymmetric-type.h
+++ b/include/keys/asymmetric-type.h
@@ -84,6 +84,9 @@ extern struct key *find_asymmetric_key(struct key *keyring,
const struct asymmetric_key_id *id_2,
bool partial);
+int x509_load_certificate_list(const u8 cert_list[], const unsigned long list_size,
+ const struct key *keyring);
+
/*
* The payload is at the discretion of the subtype.
*/
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 2bd073fa6bb5..d452071db572 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -119,6 +119,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
extern struct backing_dev_info noop_backing_dev_info;
+int bdi_init(struct backing_dev_info *bdi);
+
/**
* writeback_in_progress - determine whether there is writeback in progress
* @wb: bdi_writeback of interest
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 1cf3738ef1ea..992ee987f273 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -403,7 +403,6 @@ enum {
extern int bioset_init(struct bio_set *, unsigned int, unsigned int, int flags);
extern void bioset_exit(struct bio_set *);
extern int biovec_init_pool(mempool_t *pool, int pool_entries);
-extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src);
struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
unsigned int opf, gfp_t gfp_mask,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 608d577734c2..bb6e3c31b3b7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -575,6 +575,7 @@ struct request_queue {
#define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */
#define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */
#define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */
+#define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */
#define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_SAME_COMP) | \
@@ -616,6 +617,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
#define blk_queue_pm_only(q) atomic_read(&(q)->pm_only)
#define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
#define blk_queue_nowait(q) test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
+#define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags)
extern void blk_set_pm_only(struct request_queue *q);
extern void blk_clear_pm_only(struct request_queue *q);
@@ -1006,8 +1008,6 @@ void disk_set_independent_access_ranges(struct gendisk *disk,
*/
/* Supports zoned block devices sequential write constraint */
#define ELEVATOR_F_ZBD_SEQ_WRITE (1U << 0)
-/* Supports scheduling on multiple hardware queues */
-#define ELEVATOR_F_MQ_AWARE (1U << 1)
extern void blk_queue_required_elevator_features(struct request_queue *q,
unsigned int features);
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2b914a56a2c5..0edd7d2c0064 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -5,6 +5,7 @@
#define _LINUX_BPF_H 1
#include <uapi/linux/bpf.h>
+#include <uapi/linux/filter.h>
#include <linux/workqueue.h>
#include <linux/file.h>
@@ -22,8 +23,10 @@
#include <linux/sched/mm.h>
#include <linux/slab.h>
#include <linux/percpu-refcount.h>
+#include <linux/stddef.h>
#include <linux/bpfptr.h>
#include <linux/btf.h>
+#include <linux/rcupdate_trace.h>
struct bpf_verifier_env;
struct bpf_verifier_log;
@@ -398,6 +401,9 @@ enum bpf_type_flag {
/* DYNPTR points to a ringbuf record. */
DYNPTR_TYPE_RINGBUF = BIT(9 + BPF_BASE_TYPE_BITS),
+ /* Size is known at compile time. */
+ MEM_FIXED_SIZE = BIT(10 + BPF_BASE_TYPE_BITS),
+
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
@@ -461,6 +467,8 @@ enum bpf_arg_type {
* all bytes or clear them in error case.
*/
ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | ARG_PTR_TO_MEM,
+ /* Pointer to valid memory of size known at compile time. */
+ ARG_PTR_TO_FIXED_SIZE_MEM = MEM_FIXED_SIZE | ARG_PTR_TO_MEM,
/* This must be the last entry. Its purpose is to ensure the enum is
* wide enough to hold the higher bits reserved for bpf_type_flag.
@@ -526,6 +534,14 @@ struct bpf_func_proto {
u32 *arg5_btf_id;
};
u32 *arg_btf_id[5];
+ struct {
+ size_t arg1_size;
+ size_t arg2_size;
+ size_t arg3_size;
+ size_t arg4_size;
+ size_t arg5_size;
+ };
+ size_t arg_size[5];
};
int *ret_btf_id; /* return value btf_id */
bool (*allowed)(const struct bpf_prog *prog);
@@ -1084,6 +1100,40 @@ struct bpf_prog_aux {
};
};
+struct bpf_prog {
+ u16 pages; /* Number of allocated pages */
+ u16 jited:1, /* Is our filter JIT'ed? */
+ jit_requested:1,/* archs need to JIT the prog */
+ gpl_compatible:1, /* Is filter GPL compatible? */
+ cb_access:1, /* Is control block accessed? */
+ dst_needed:1, /* Do we need dst entry? */
+ blinding_requested:1, /* needs constant blinding */
+ blinded:1, /* Was blinded */
+ is_func:1, /* program is a bpf function */
+ kprobe_override:1, /* Do we override a kprobe? */
+ has_callchain_buf:1, /* callchain buffer allocated? */
+ enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */
+ call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */
+ call_get_func_ip:1, /* Do we call get_func_ip() */
+ tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */
+ enum bpf_prog_type type; /* Type of BPF program */
+ enum bpf_attach_type expected_attach_type; /* For some prog types */
+ u32 len; /* Number of filter blocks */
+ u32 jited_len; /* Size of jited insns in bytes */
+ u8 tag[BPF_TAG_SIZE];
+ struct bpf_prog_stats __percpu *stats;
+ int __percpu *active;
+ unsigned int (*bpf_func)(const void *ctx,
+ const struct bpf_insn *insn);
+ struct bpf_prog_aux *aux; /* Auxiliary fields */
+ struct sock_fprog_kern *orig_prog; /* Original BPF program */
+ /* Instructions for interpreter */
+ union {
+ DECLARE_FLEX_ARRAY(struct sock_filter, insns);
+ DECLARE_FLEX_ARRAY(struct bpf_insn, insnsi);
+ };
+};
+
struct bpf_array_aux {
/* Programs with direct jumps into programs part of this array. */
struct list_head poke_progs;
@@ -1336,6 +1386,8 @@ extern struct bpf_empty_prog_array bpf_empty_prog_array;
struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
void bpf_prog_array_free(struct bpf_prog_array *progs);
+/* Use when traversal over the bpf_prog_array uses tasks_trace rcu */
+void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs);
int bpf_prog_array_length(struct bpf_prog_array *progs);
bool bpf_prog_array_is_empty(struct bpf_prog_array *array);
int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs,
@@ -1427,6 +1479,55 @@ bpf_prog_run_array(const struct bpf_prog_array *array,
return ret;
}
+/* Notes on RCU design for bpf_prog_arrays containing sleepable programs:
+ *
+ * We use the tasks_trace rcu flavor read section to protect the bpf_prog_array
+ * overall. As a result, we must use the bpf_prog_array_free_sleepable
+ * in order to use the tasks_trace rcu grace period.
+ *
+ * When a non-sleepable program is inside the array, we take the rcu read
+ * section and disable preemption for that program alone, so it can access
+ * rcu-protected dynamically sized maps.
+ */
+static __always_inline u32
+bpf_prog_run_array_sleepable(const struct bpf_prog_array __rcu *array_rcu,
+ const void *ctx, bpf_prog_run_fn run_prog)
+{
+ const struct bpf_prog_array_item *item;
+ const struct bpf_prog *prog;
+ const struct bpf_prog_array *array;
+ struct bpf_run_ctx *old_run_ctx;
+ struct bpf_trace_run_ctx run_ctx;
+ u32 ret = 1;
+
+ might_fault();
+
+ rcu_read_lock_trace();
+ migrate_disable();
+
+ array = rcu_dereference_check(array_rcu, rcu_read_lock_trace_held());
+ if (unlikely(!array))
+ goto out;
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+ item = &array->items[0];
+ while ((prog = READ_ONCE(item->prog))) {
+ if (!prog->aux->sleepable)
+ rcu_read_lock();
+
+ run_ctx.bpf_cookie = item->bpf_cookie;
+ ret &= run_prog(prog, ctx);
+ item++;
+
+ if (!prog->aux->sleepable)
+ rcu_read_unlock();
+ }
+ bpf_reset_run_ctx(old_run_ctx);
+out:
+ migrate_enable();
+ rcu_read_unlock_trace();
+ return ret;
+}
+
#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);
extern struct mutex bpf_stats_enabled_mutex;
@@ -2104,6 +2205,7 @@ int sock_map_bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr);
void sock_map_unhash(struct sock *sk);
+void sock_map_destroy(struct sock *sk);
void sock_map_close(struct sock *sk, long timeout);
#else
static inline int bpf_prog_offload_init(struct bpf_prog *prog,
@@ -2261,12 +2363,9 @@ extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
-extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
extern const struct bpf_func_proto bpf_find_vma_proto;
extern const struct bpf_func_proto bpf_loop_proto;
-extern const struct bpf_func_proto bpf_strncmp_proto;
extern const struct bpf_func_proto bpf_copy_from_user_task_proto;
-extern const struct bpf_func_proto bpf_kptr_xchg_proto;
const struct bpf_func_proto *tracing_prog_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index e8439f6cbe57..3930c963fa67 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -299,7 +299,7 @@ struct bpf_verifier_state {
* If is_state_visited() sees a state with branches > 0 it means
* there is a loop. If such state is exactly equal to the current state
* it's an infinite loop. Note states_equal() checks for states
- * equvalency, so two states being 'states_equal' does not mean
+ * equivalency, so two states being 'states_equal' does not mean
* infinite loop. The exact comparison is provided by
* states_maybe_looping() function. It's a stronger pre-check and
* much faster than states_equal().
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 2611cea2c2b6..1bfed7fa0428 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -177,6 +177,19 @@ static inline bool btf_type_is_enum(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM;
}
+static inline bool btf_is_any_enum(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM ||
+ BTF_INFO_KIND(t->info) == BTF_KIND_ENUM64;
+}
+
+static inline bool btf_kind_core_compat(const struct btf_type *t1,
+ const struct btf_type *t2)
+{
+ return BTF_INFO_KIND(t1->info) == BTF_INFO_KIND(t2->info) ||
+ (btf_is_any_enum(t1) && btf_is_any_enum(t2));
+}
+
static inline bool str_is_empty(const char *s)
{
return !s || !s[0];
@@ -192,6 +205,16 @@ static inline bool btf_is_enum(const struct btf_type *t)
return btf_kind(t) == BTF_KIND_ENUM;
}
+static inline bool btf_is_enum64(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_ENUM64;
+}
+
+static inline u64 btf_enum64_value(const struct btf_enum64 *e)
+{
+ return ((u64)e->val_hi32 << 32) | e->val_lo32;
+}
+
static inline bool btf_is_composite(const struct btf_type *t)
{
u16 kind = btf_kind(t);
@@ -332,6 +355,11 @@ static inline struct btf_enum *btf_enum(const struct btf_type *t)
return (struct btf_enum *)(t + 1);
}
+static inline struct btf_enum64 *btf_enum64(const struct btf_type *t)
+{
+ return (struct btf_enum64 *)(t + 1);
+}
+
static inline const struct btf_var_secinfo *btf_type_var_secinfo(
const struct btf_type *t)
{
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 54dc2f9a2d56..2c7477354744 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -65,6 +65,9 @@ extern ssize_t cpu_show_tsx_async_abort(struct device *dev,
extern ssize_t cpu_show_itlb_multihit(struct device *dev,
struct device_attribute *attr, char *buf);
extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_mmio_stale_data(struct device *dev,
+ struct device_attribute *attr,
+ char *buf);
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,
diff --git a/include/linux/crc-itu-t.h b/include/linux/crc-itu-t.h
index a4367051e192..2f991a427ade 100644
--- a/include/linux/crc-itu-t.h
+++ b/include/linux/crc-itu-t.h
@@ -4,7 +4,7 @@
*
* Implements the standard CRC ITU-T V.41:
* Width 16
- * Poly 0x1021 (x^16 + x^12 + x^15 + 1)
+ * Poly 0x1021 (x^16 + x^12 + x^5 + 1)
* Init 0
*/
diff --git a/include/linux/filter.h b/include/linux/filter.h
index ed0c0ff42ad5..d0cbb31b1b4d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -559,40 +559,6 @@ struct bpf_prog_stats {
struct u64_stats_sync syncp;
} __aligned(2 * sizeof(u64));
-struct bpf_prog {
- u16 pages; /* Number of allocated pages */
- u16 jited:1, /* Is our filter JIT'ed? */
- jit_requested:1,/* archs need to JIT the prog */
- gpl_compatible:1, /* Is filter GPL compatible? */
- cb_access:1, /* Is control block accessed? */
- dst_needed:1, /* Do we need dst entry? */
- blinding_requested:1, /* needs constant blinding */
- blinded:1, /* Was blinded */
- is_func:1, /* program is a bpf function */
- kprobe_override:1, /* Do we override a kprobe? */
- has_callchain_buf:1, /* callchain buffer allocated? */
- enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */
- call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */
- call_get_func_ip:1, /* Do we call get_func_ip() */
- tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */
- enum bpf_prog_type type; /* Type of BPF program */
- enum bpf_attach_type expected_attach_type; /* For some prog types */
- u32 len; /* Number of filter blocks */
- u32 jited_len; /* Size of jited insns in bytes */
- u8 tag[BPF_TAG_SIZE];
- struct bpf_prog_stats __percpu *stats;
- int __percpu *active;
- unsigned int (*bpf_func)(const void *ctx,
- const struct bpf_insn *insn);
- struct bpf_prog_aux *aux; /* Auxiliary fields */
- struct sock_fprog_kern *orig_prog; /* Original BPF program */
- /* Instructions for interpreter */
- union {
- DECLARE_FLEX_ARRAY(struct sock_filter, insns);
- DECLARE_FLEX_ARRAY(struct bpf_insn, insnsi);
- };
-};
-
struct sk_filter {
refcount_t refcnt;
struct rcu_head rcu;
diff --git a/include/linux/mii.h b/include/linux/mii.h
index 5ee13083cec7..d5a959ce4877 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -545,4 +545,39 @@ static inline u8 mii_resolve_flowctrl_fdx(u16 lcladv, u16 rmtadv)
return cap;
}
+/**
+ * mii_bmcr_encode_fixed - encode fixed speed/duplex settings to a BMCR value
+ * @speed: a SPEED_* value
+ * @duplex: a DUPLEX_* value
+ *
+ * Encode the speed and duplex to a BMCR value. 2500, 1000, 100 and 10 Mbps are
+ * supported. 2500Mbps is encoded to 1000Mbps. Other speeds are encoded as 10
+ * Mbps. Unknown duplex values are encoded to half-duplex.
+ */
+static inline u16 mii_bmcr_encode_fixed(int speed, int duplex)
+{
+ u16 bmcr;
+
+ switch (speed) {
+ case SPEED_2500:
+ case SPEED_1000:
+ bmcr = BMCR_SPEED1000;
+ break;
+
+ case SPEED_100:
+ bmcr = BMCR_SPEED100;
+ break;
+
+ case SPEED_10:
+ default:
+ bmcr = BMCR_SPEED10;
+ break;
+ }
+
+ if (duplex == DUPLEX_FULL)
+ bmcr |= BMCR_FULLDPLX;
+
+ return bmcr;
+}
+
#endif /* __LINUX_MII_H__ */
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 604b85dd770a..b5f58fd37a0f 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -387,21 +387,6 @@ enum {
};
enum {
- MLX5_DEV_CAP_FLAG_XRC = 1LL << 3,
- MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1LL << 8,
- MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1LL << 9,
- MLX5_DEV_CAP_FLAG_APM = 1LL << 17,
- MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 18,
- MLX5_DEV_CAP_FLAG_BLOCK_MCAST = 1LL << 23,
- MLX5_DEV_CAP_FLAG_ON_DMND_PG = 1LL << 24,
- MLX5_DEV_CAP_FLAG_CQ_MODER = 1LL << 29,
- MLX5_DEV_CAP_FLAG_RESIZE_CQ = 1LL << 30,
- MLX5_DEV_CAP_FLAG_DCT = 1LL << 37,
- MLX5_DEV_CAP_FLAG_SIG_HAND_OVER = 1LL << 40,
- MLX5_DEV_CAP_FLAG_CMDIF_CSUM = 3LL << 46,
-};
-
-enum {
MLX5_ROCE_VERSION_1 = 0,
MLX5_ROCE_VERSION_2 = 2,
};
@@ -455,6 +440,7 @@ enum {
MLX5_OPCODE_UMR = 0x25,
+ MLX5_OPCODE_ACCESS_ASO = 0x2d,
};
enum {
@@ -496,10 +482,6 @@ enum {
};
enum {
- MLX5_CAP_OFF_CMDIF_CSUM = 46,
-};
-
-enum {
/*
* Max wqe size for rdma read is 512 bytes, so this
* limits our max_sge_rd as the wqe needs to fit:
@@ -840,7 +822,10 @@ struct mlx5_cqe64 {
__be32 timestamp_l;
__be32 sop_drop_qpn;
__be16 wqe_counter;
- u8 signature;
+ union {
+ u8 signature;
+ u8 validity_iteration_count;
+ };
u8 op_own;
};
@@ -872,6 +857,11 @@ enum {
MLX5_CQE_FORMAT_CSUM_STRIDX = 0x3,
};
+enum {
+ MLX5_CQE_COMPRESS_LAYOUT_BASIC = 0,
+ MLX5_CQE_COMPRESS_LAYOUT_ENHANCED = 1,
+};
+
#define MLX5_MINI_CQE_ARRAY_SIZE 8
static inline u8 mlx5_get_cqe_format(struct mlx5_cqe64 *cqe)
@@ -884,6 +874,12 @@ static inline u8 get_cqe_opcode(struct mlx5_cqe64 *cqe)
return cqe->op_own >> 4;
}
+static inline u8 get_cqe_enhanced_num_mini_cqes(struct mlx5_cqe64 *cqe)
+{
+ /* num_of_mini_cqes is zero based */
+ return get_cqe_opcode(cqe) + 1;
+}
+
static inline u8 get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe)
{
return (cqe->lro.tcppsh_abort_dupack >> 6) & 1;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 5040cd774c5a..76d7661e3e63 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -676,6 +676,7 @@ struct mlx5e_resources {
enum mlx5_sw_icm_type {
MLX5_SW_ICM_TYPE_STEERING,
MLX5_SW_ICM_TYPE_HEADER_MODIFY,
+ MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN,
};
#define MLX5_MAX_RESERVED_GIDS 8
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 8135713b0d2d..ece3e35622d7 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -212,6 +212,19 @@ struct mlx5_flow_group *
mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *in);
void mlx5_destroy_flow_group(struct mlx5_flow_group *fg);
+struct mlx5_exe_aso {
+ u32 object_id;
+ u8 type;
+ u8 return_reg_id;
+ union {
+ u32 ctrl_data;
+ struct {
+ u8 meter_idx;
+ u8 init_color;
+ } flow_meter;
+ };
+};
+
struct mlx5_fs_vlan {
u16 ethtype;
u16 vid;
@@ -237,6 +250,7 @@ struct mlx5_flow_act {
struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH];
struct ib_counters *counters;
struct mlx5_flow_group *fg;
+ struct mlx5_exe_aso exe_aso;
};
#define MLX5_DECLARE_FLOW_ACT(name) \
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index fd7d083a34d3..8e87eb47f9dc 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -442,7 +442,9 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 max_modify_header_actions[0x8];
u8 max_ft_level[0x8];
- u8 reserved_at_40[0x20];
+ u8 reserved_at_40[0x6];
+ u8 execute_aso[0x1];
+ u8 reserved_at_47[0x19];
u8 reserved_at_60[0x2];
u8 reformat_insert[0x1];
@@ -940,7 +942,17 @@ struct mlx5_ifc_qos_cap_bits {
u8 max_tsar_bw_share[0x20];
- u8 reserved_at_100[0x700];
+ u8 reserved_at_100[0x20];
+
+ u8 reserved_at_120[0x3];
+ u8 log_meter_aso_granularity[0x5];
+ u8 reserved_at_128[0x3];
+ u8 log_meter_aso_max_alloc[0x5];
+ u8 reserved_at_130[0x3];
+ u8 log_max_num_meter_aso[0x5];
+ u8 reserved_at_138[0x8];
+
+ u8 reserved_at_140[0x6c0];
};
struct mlx5_ifc_debug_cap_bits {
@@ -1086,11 +1098,14 @@ struct mlx5_ifc_device_mem_cap_bits {
u8 log_sw_icm_alloc_granularity[0x6];
u8 log_steering_sw_icm_size[0x8];
- u8 reserved_at_120[0x20];
+ u8 reserved_at_120[0x18];
+ u8 log_header_modify_pattern_sw_icm_size[0x8];
u8 header_modify_sw_icm_start_address[0x40];
- u8 reserved_at_180[0x80];
+ u8 reserved_at_180[0x40];
+
+ u8 header_modify_pattern_sw_icm_start_address[0x40];
u8 memic_operations[0x20];
@@ -1426,7 +1441,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_120[0xa];
u8 log_max_ra_req_dc[0x6];
- u8 reserved_at_130[0xa];
+ u8 reserved_at_130[0x9];
+ u8 vnic_env_cq_overrun[0x1];
u8 log_max_ra_res_dc[0x6];
u8 reserved_at_140[0x5];
@@ -1621,7 +1637,11 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 nic_receive_steering_discard[0x1];
u8 receive_discard_vport_down[0x1];
u8 transmit_discard_vport_down[0x1];
- u8 reserved_at_343[0x5];
+ u8 eq_overrun_count[0x1];
+ u8 reserved_at_344[0x1];
+ u8 invalid_command_count[0x1];
+ u8 quota_exceeded_count[0x1];
+ u8 reserved_at_347[0x1];
u8 log_max_flow_counter_bulk[0x8];
u8 max_flow_counter_15_0[0x10];
@@ -1719,7 +1739,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 log_max_dci_errored_streams[0x5];
u8 reserved_at_598[0x8];
- u8 reserved_at_5a0[0x13];
+ u8 reserved_at_5a0[0x10];
+ u8 enhanced_cqe_compression[0x1];
+ u8 reserved_at_5b1[0x2];
u8 log_max_dek[0x5];
u8 reserved_at_5b8[0x4];
u8 mini_cqe_resp_stride_index[0x1];
@@ -3277,6 +3299,7 @@ enum {
MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 = 0x800,
MLX5_FLOW_CONTEXT_ACTION_IPSEC_DECRYPT = 0x1000,
MLX5_FLOW_CONTEXT_ACTION_IPSEC_ENCRYPT = 0x2000,
+ MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO = 0x4000,
};
enum {
@@ -3292,6 +3315,38 @@ struct mlx5_ifc_vlan_bits {
u8 vid[0xc];
};
+enum {
+ MLX5_FLOW_METER_COLOR_RED = 0x0,
+ MLX5_FLOW_METER_COLOR_YELLOW = 0x1,
+ MLX5_FLOW_METER_COLOR_GREEN = 0x2,
+ MLX5_FLOW_METER_COLOR_UNDEFINED = 0x3,
+};
+
+enum {
+ MLX5_EXE_ASO_FLOW_METER = 0x2,
+};
+
+struct mlx5_ifc_exe_aso_ctrl_flow_meter_bits {
+ u8 return_reg_id[0x4];
+ u8 aso_type[0x4];
+ u8 reserved_at_8[0x14];
+ u8 action[0x1];
+ u8 init_color[0x2];
+ u8 meter_id[0x1];
+};
+
+union mlx5_ifc_exe_aso_ctrl {
+ struct mlx5_ifc_exe_aso_ctrl_flow_meter_bits exe_aso_ctrl_flow_meter;
+};
+
+struct mlx5_ifc_execute_aso_bits {
+ u8 valid[0x1];
+ u8 reserved_at_1[0x7];
+ u8 aso_object_id[0x18];
+
+ union mlx5_ifc_exe_aso_ctrl exe_aso_ctrl;
+};
+
struct mlx5_ifc_flow_context_bits {
struct mlx5_ifc_vlan_bits push_vlan;
@@ -3323,7 +3378,9 @@ struct mlx5_ifc_flow_context_bits {
struct mlx5_ifc_fte_match_param_bits match_value;
- u8 reserved_at_1200[0x600];
+ struct mlx5_ifc_execute_aso_bits execute_aso[4];
+
+ u8 reserved_at_1300[0x500];
union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits destination[];
};
@@ -3391,11 +3448,21 @@ struct mlx5_ifc_vnic_diagnostic_statistics_bits {
u8 transmit_discard_vport_down[0x40];
- u8 reserved_at_140[0xa0];
+ u8 async_eq_overrun[0x20];
+
+ u8 comp_eq_overrun[0x20];
+
+ u8 reserved_at_180[0x20];
+
+ u8 invalid_command[0x20];
+
+ u8 quota_exceeded_command[0x20];
u8 internal_rq_out_of_buffer[0x20];
- u8 reserved_at_200[0xe00];
+ u8 cq_overrun[0x20];
+
+ u8 reserved_at_220[0xde0];
};
struct mlx5_ifc_traffic_counter_bits {
@@ -4074,7 +4141,8 @@ struct mlx5_ifc_cqc_bits {
u8 cqe_comp_en[0x1];
u8 mini_cqe_res_format[0x2];
u8 st[0x4];
- u8 reserved_at_18[0x8];
+ u8 reserved_at_18[0x6];
+ u8 cqe_compression_layout[0x2];
u8 reserved_at_20[0x20];
@@ -5970,7 +6038,9 @@ struct mlx5_ifc_general_obj_in_cmd_hdr_bits {
u8 obj_id[0x20];
- u8 reserved_at_60[0x20];
+ u8 reserved_at_60[0x3];
+ u8 log_obj_range[0x5];
+ u8 reserved_at_68[0x18];
};
struct mlx5_ifc_general_obj_out_cmd_hdr_bits {
@@ -11370,12 +11440,14 @@ enum {
MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = BIT_ULL(0xc),
MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC = BIT_ULL(0x13),
MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER = BIT_ULL(0x20),
+ MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = BIT_ULL(0x24),
};
enum {
MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = 0xc,
MLX5_GENERAL_OBJECT_TYPES_IPSEC = 0x13,
MLX5_GENERAL_OBJECT_TYPES_SAMPLER = 0x20,
+ MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = 0x24,
};
enum {
@@ -11448,6 +11520,61 @@ struct mlx5_ifc_create_encryption_key_in_bits {
struct mlx5_ifc_encryption_key_obj_bits encryption_key_object;
};
+enum {
+ MLX5_FLOW_METER_MODE_BYTES_IP_LENGTH = 0x0,
+ MLX5_FLOW_METER_MODE_BYTES_CALC_WITH_L2 = 0x1,
+ MLX5_FLOW_METER_MODE_BYTES_CALC_WITH_L2_IPG = 0x2,
+ MLX5_FLOW_METER_MODE_NUM_PACKETS = 0x3,
+};
+
+struct mlx5_ifc_flow_meter_parameters_bits {
+ u8 valid[0x1];
+ u8 bucket_overflow[0x1];
+ u8 start_color[0x2];
+ u8 both_buckets_on_green[0x1];
+ u8 reserved_at_5[0x1];
+ u8 meter_mode[0x2];
+ u8 reserved_at_8[0x18];
+
+ u8 reserved_at_20[0x20];
+
+ u8 reserved_at_40[0x3];
+ u8 cbs_exponent[0x5];
+ u8 cbs_mantissa[0x8];
+ u8 reserved_at_50[0x3];
+ u8 cir_exponent[0x5];
+ u8 cir_mantissa[0x8];
+
+ u8 reserved_at_60[0x20];
+
+ u8 reserved_at_80[0x3];
+ u8 ebs_exponent[0x5];
+ u8 ebs_mantissa[0x8];
+ u8 reserved_at_90[0x3];
+ u8 eir_exponent[0x5];
+ u8 eir_mantissa[0x8];
+
+ u8 reserved_at_a0[0x60];
+};
+
+struct mlx5_ifc_flow_meter_aso_obj_bits {
+ u8 modify_field_select[0x40];
+
+ u8 reserved_at_40[0x40];
+
+ u8 reserved_at_80[0x8];
+ u8 meter_aso_access_pd[0x18];
+
+ u8 reserved_at_a0[0x160];
+
+ struct mlx5_ifc_flow_meter_parameters_bits flow_meter_parameters[2];
+};
+
+struct mlx5_ifc_create_flow_meter_aso_obj_in_bits {
+ struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
+ struct mlx5_ifc_flow_meter_aso_obj_bits flow_meter_aso_obj;
+};
+
struct mlx5_ifc_sampler_obj_bits {
u8 modify_field_select[0x40];
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index b34ff2cdbc4f..c29ab4c0cd5c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -227,6 +227,7 @@ struct page {
* struct folio - Represents a contiguous set of bytes.
* @flags: Identical to the page flags.
* @lru: Least Recently Used list; tracks how recently this folio was used.
+ * @mlock_count: Number of times this folio has been pinned by mlock().
* @mapping: The file this page belongs to, or refers to the anon_vma for
* anonymous memory.
* @index: Offset within the file, in units of pages. For anonymous memory,
@@ -255,10 +256,14 @@ struct folio {
unsigned long flags;
union {
struct list_head lru;
+ /* private: avoid cluttering the output */
struct {
void *__filler;
+ /* public: */
unsigned int mlock_count;
+ /* private: */
};
+ /* public: */
};
struct address_space *mapping;
pgoff_t index;
diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index e05ee9f001ff..9dd4bf157255 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -26,7 +26,7 @@
* @remote: Remote address for tunnels
*/
struct vif_device {
- struct net_device *dev;
+ struct net_device __rcu *dev;
netdevice_tracker dev_tracker;
unsigned long bytes_in, bytes_out;
unsigned long pkt_in, pkt_out;
@@ -52,6 +52,7 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb,
unsigned short family,
enum fib_event_type event_type,
struct vif_device *vif,
+ struct net_device *vif_dev,
unsigned short vif_index, u32 tb_id,
struct netlink_ext_ack *extack)
{
@@ -60,7 +61,7 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb,
.family = family,
.extack = extack,
},
- .dev = vif->dev,
+ .dev = vif_dev,
.vif_index = vif_index,
.vif_flags = vif->flags,
.tb_id = tb_id,
@@ -73,6 +74,7 @@ static inline int mr_call_vif_notifiers(struct net *net,
unsigned short family,
enum fib_event_type event_type,
struct vif_device *vif,
+ struct net_device *vif_dev,
unsigned short vif_index, u32 tb_id,
unsigned int *ipmr_seq)
{
@@ -80,7 +82,7 @@ static inline int mr_call_vif_notifiers(struct net *net,
.info = {
.family = family,
},
- .dev = vif->dev,
+ .dev = vif_dev,
.vif_index = vif_index,
.vif_flags = vif->flags,
.tb_id = tb_id,
@@ -98,7 +100,8 @@ static inline int mr_call_vif_notifiers(struct net *net,
#define MAXVIFS 32
#endif
-#define VIF_EXISTS(_mrt, _idx) (!!((_mrt)->vif_table[_idx].dev))
+/* Note: This helper is deprecated. */
+#define VIF_EXISTS(_mrt, _idx) (!!rcu_access_pointer((_mrt)->vif_table[_idx].dev))
/* mfc_flags:
* MFC_STATIC - the entry was added statically (not by a routing daemon)
@@ -305,7 +308,7 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
struct netlink_ext_ack *extack),
struct mr_table *(*mr_iter)(struct net *net,
struct mr_table *mrt),
- rwlock_t *mrt_lock, struct netlink_ext_ack *extack);
+ struct netlink_ext_ack *extack);
#else
static inline void vif_device_init(struct vif_device *v,
struct net_device *dev,
@@ -360,7 +363,7 @@ static inline int mr_dump(struct net *net, struct notifier_block *nb,
struct netlink_ext_ack *extack),
struct mr_table *(*mr_iter)(struct net *net,
struct mr_table *mrt),
- rwlock_t *mrt_lock, struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack)
{
return -EINVAL;
}
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 6dbb4c9ce50d..1773e5df8e65 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -206,7 +206,9 @@ struct netfs_io_request {
*/
struct netfs_request_ops {
int (*init_request)(struct netfs_io_request *rreq, struct file *file);
+ void (*free_request)(struct netfs_io_request *rreq);
int (*begin_cache_operation)(struct netfs_io_request *rreq);
+
void (*expand_readahead)(struct netfs_io_request *rreq);
bool (*clamp_length)(struct netfs_io_subrequest *subreq);
void (*issue_read)(struct netfs_io_subrequest *subreq);
@@ -214,7 +216,6 @@ struct netfs_request_ops {
int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
struct folio *folio, void **_fsdata);
void (*done)(struct netfs_io_request *rreq);
- void (*cleanup)(struct address_space *mapping, void *netfs_priv);
};
/*
@@ -277,7 +278,8 @@ struct netfs_cache_ops {
struct readahead_control;
extern void netfs_readahead(struct readahead_control *);
int netfs_read_folio(struct file *, struct folio *);
-extern int netfs_write_begin(struct file *, struct address_space *,
+extern int netfs_write_begin(struct netfs_inode *,
+ struct file *, struct address_space *,
loff_t, unsigned int, struct folio **,
void **);
@@ -302,19 +304,17 @@ static inline struct netfs_inode *netfs_inode(struct inode *inode)
/**
* netfs_inode_init - Initialise a netfslib inode context
- * @inode: The inode with which the context is associated
+ * @ctx: The netfs inode to initialise
* @ops: The netfs's operations list
*
* Initialise the netfs library context struct. This is expected to follow on
* directly from the VFS inode struct.
*/
-static inline void netfs_inode_init(struct inode *inode,
+static inline void netfs_inode_init(struct netfs_inode *ctx,
const struct netfs_request_ops *ops)
{
- struct netfs_inode *ctx = netfs_inode(inode);
-
ctx->ops = ops;
- ctx->remote_i_size = i_size_read(inode);
+ ctx->remote_i_size = i_size_read(&ctx->inode);
#if IS_ENABLED(CONFIG_FSCACHE)
ctx->cache = NULL;
#endif
@@ -322,28 +322,25 @@ static inline void netfs_inode_init(struct inode *inode,
/**
* netfs_resize_file - Note that a file got resized
- * @inode: The inode being resized
+ * @ctx: The netfs inode being resized
* @new_i_size: The new file size
*
* Inform the netfs lib that a file got resized so that it can adjust its state.
*/
-static inline void netfs_resize_file(struct inode *inode, loff_t new_i_size)
+static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size)
{
- struct netfs_inode *ctx = netfs_inode(inode);
-
ctx->remote_i_size = new_i_size;
}
/**
* netfs_i_cookie - Get the cache cookie from the inode
- * @inode: The inode to query
+ * @ctx: The netfs inode to query
*
* Get the caching cookie (if enabled) from the network filesystem's inode.
*/
-static inline struct fscache_cookie *netfs_i_cookie(struct inode *inode)
+static inline struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx)
{
#if IS_ENABLED(CONFIG_FSCACHE)
- struct netfs_inode *ctx = netfs_inode(inode);
return ctx->cache;
#else
return NULL;
diff --git a/include/linux/objtool.h b/include/linux/objtool.h
index 6491fa8fba6d..15b940ec1eac 100644
--- a/include/linux/objtool.h
+++ b/include/linux/objtool.h
@@ -143,6 +143,12 @@ struct unwind_hint {
.popsection
.endm
+.macro STACK_FRAME_NON_STANDARD_FP func:req
+#ifdef CONFIG_FRAME_POINTER
+ STACK_FRAME_NON_STANDARD \func
+#endif
+.endm
+
.macro ANNOTATE_NOENDBR
.Lhere_\@:
.pushsection .discard.noendbr
diff --git a/include/linux/pcs-rzn1-miic.h b/include/linux/pcs-rzn1-miic.h
new file mode 100644
index 000000000000..56d12b21365d
--- /dev/null
+++ b/include/linux/pcs-rzn1-miic.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022 Schneider Electric
+ *
+ * Clément Léger <clement.leger@bootlin.com>
+ */
+
+#ifndef __LINUX_PCS_MIIC_H
+#define __LINUX_PCS_MIIC_H
+
+struct phylink;
+struct device_node;
+
+struct phylink_pcs *miic_create(struct device *dev, struct device_node *np);
+
+void miic_destroy(struct phylink_pcs *pcs);
+
+#endif /* __LINUX_PCS_MIIC_H */
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index 266eb26fb029..d2da1e0b4a92 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -17,6 +17,7 @@
#define DW_AN_C73 1
#define DW_AN_C37_SGMII 2
#define DW_2500BASEX 3
+#define DW_AN_C37_1000BASEX 4
struct xpcs_id;
@@ -30,7 +31,7 @@ int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface);
void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
phy_interface_t interface, int speed, int duplex);
int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
- unsigned int mode);
+ unsigned int mode, const unsigned long *advertising);
void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces);
int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns,
int enable);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 508f1149665b..bed9a347481b 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1539,6 +1539,9 @@ static inline void phy_device_reset(struct phy_device *phydev, int value)
#define phydev_err(_phydev, format, args...) \
dev_err(&_phydev->mdio.dev, format, ##args)
+#define phydev_err_probe(_phydev, err, format, args...) \
+ dev_err_probe(&_phydev->mdio.dev, err, format, ##args)
+
#define phydev_info(_phydev, format, args...) \
dev_info(&_phydev->mdio.dev, format, ##args)
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 10ec29bc0135..f88ec15f83dc 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -173,6 +173,7 @@ extern void printk_prefer_direct_enter(void);
extern void printk_prefer_direct_exit(void);
extern bool pr_flush(int timeout_ms, bool reset_on_progress);
+extern void try_block_console_kthreads(int timeout_ms);
/*
* Please don't use printk_ratelimit(), because it shares ratelimiting state
@@ -237,6 +238,10 @@ static inline bool pr_flush(int timeout_ms, bool reset_on_progress)
return true;
}
+static inline void try_block_console_kthreads(int timeout_ms)
+{
+}
+
static inline int printk_ratelimit(void)
{
return 0;
diff --git a/include/linux/random.h b/include/linux/random.h
index fae0c84027fd..20e389a14e5c 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -13,7 +13,7 @@
struct notifier_block;
void add_device_randomness(const void *buf, size_t len);
-void add_bootloader_randomness(const void *buf, size_t len);
+void __init add_bootloader_randomness(const void *buf, size_t len);
void add_input_randomness(unsigned int type, unsigned int code,
unsigned int value) __latent_entropy;
void add_interrupt_randomness(int irq) __latent_entropy;
@@ -74,7 +74,6 @@ static inline unsigned long get_random_canary(void)
int __init random_init(const char *command_line);
bool rng_is_initialized(void);
-bool rng_has_arch_random(void);
int wait_for_random_bytes(void);
/* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes).
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index cbd5070bc87f..657a0fc68a3f 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -45,6 +45,7 @@ struct uart_ops {
void (*unthrottle)(struct uart_port *);
void (*send_xchar)(struct uart_port *, char ch);
void (*stop_rx)(struct uart_port *);
+ void (*start_rx)(struct uart_port *);
void (*enable_ms)(struct uart_port *);
void (*break_ctl)(struct uart_port *, int ctl);
int (*startup)(struct uart_port *);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 82edf0359ab3..f6a27ab19202 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2352,6 +2352,18 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb)
}
/**
+ * skb_len_add - adds a number to len fields of skb
+ * @skb: buffer to add len to
+ * @delta: number of bytes to add
+ */
+static inline void skb_len_add(struct sk_buff *skb, int delta)
+{
+ skb->len += delta;
+ skb->data_len += delta;
+ skb->truesize += delta;
+}
+
+/**
* __skb_fill_page_desc - initialise a paged fragment in an skb
* @skb: buffer containing fragment to be initialised
* @i: paged fragment index to initialise
@@ -2763,8 +2775,14 @@ static inline void skb_set_network_header(struct sk_buff *skb, const int offset)
skb->network_header += offset;
}
+static inline int skb_mac_header_was_set(const struct sk_buff *skb)
+{
+ return skb->mac_header != (typeof(skb->mac_header))~0U;
+}
+
static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
{
+ DEBUG_NET_WARN_ON_ONCE(!skb_mac_header_was_set(skb));
return skb->head + skb->mac_header;
}
@@ -2775,14 +2793,10 @@ static inline int skb_mac_offset(const struct sk_buff *skb)
static inline u32 skb_mac_header_len(const struct sk_buff *skb)
{
+ DEBUG_NET_WARN_ON_ONCE(!skb_mac_header_was_set(skb));
return skb->network_header - skb->mac_header;
}
-static inline int skb_mac_header_was_set(const struct sk_buff *skb)
-{
- return skb->mac_header != (typeof(skb->mac_header))~0U;
-}
-
static inline void skb_unset_mac_header(struct sk_buff *skb)
{
skb->mac_header = (typeof(skb->mac_header))~0U;
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index c5a2d6f50f25..153b6dec9b6a 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -95,6 +95,7 @@ struct sk_psock {
spinlock_t link_lock;
refcount_t refcnt;
void (*saved_unhash)(struct sock *sk);
+ void (*saved_destroy)(struct sock *sk);
void (*saved_close)(struct sock *sk, long timeout);
void (*saved_write_space)(struct sock *sk);
void (*saved_data_ready)(struct sock *sk);
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 17311ad9f9af..414b8c7bb8f7 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -428,10 +428,6 @@ extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size,
extern int __sys_sendto(int fd, void __user *buff, size_t len,
unsigned int flags, struct sockaddr __user *addr,
int addr_len);
-extern int __sys_accept4_file(struct file *file, unsigned file_flags,
- struct sockaddr __user *upeer_sockaddr,
- int __user *upeer_addrlen, int flags,
- unsigned long nofile);
extern struct file *do_accept(struct file *file, unsigned file_flags,
struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags);
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 4417f667c757..5860f32e3958 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -243,7 +243,7 @@ extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf,
extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec,
size_t nbytes);
-extern void xdr_commit_encode(struct xdr_stream *xdr);
+extern void __xdr_commit_encode(struct xdr_stream *xdr);
extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen);
extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
@@ -307,6 +307,20 @@ xdr_reset_scratch_buffer(struct xdr_stream *xdr)
}
/**
+ * xdr_commit_encode - Ensure all data is written to xdr->buf
+ * @xdr: pointer to xdr_stream
+ *
+ * Handle encoding across page boundaries by giving the caller a
+ * temporary location to write to, then later copying the data into
+ * place. __xdr_commit_encode() does that copying.
+ */
+static inline void xdr_commit_encode(struct xdr_stream *xdr)
+{
+ if (unlikely(xdr->scratch.iov_len))
+ __xdr_commit_encode(xdr);
+}
+
+/**
* xdr_stream_remaining - Return the number of bytes remaining in the stream
* @xdr: pointer to struct xdr_stream
*
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 4700a88a28f6..7b4a13d3bd91 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -178,7 +178,8 @@ struct vdpa_map_file {
* for the device
* @vdev: vdpa device
* Returns virtqueue algin requirement
- * @get_vq_group: Get the group id for a specific virtqueue
+ * @get_vq_group: Get the group id for a specific
+ * virtqueue (optional)
* @vdev: vdpa device
* @idx: virtqueue index
* Returns u32: group id for this virtqueue
@@ -243,7 +244,7 @@ struct vdpa_map_file {
* Returns the iova range supported by
* the device.
* @set_group_asid: Set address space identifier for a
- * virtqueue group
+ * virtqueue group (optional)
* @vdev: vdpa device
* @group: virtqueue group
* @asid: address space id for this group
diff --git a/include/linux/visorbus.h b/include/linux/visorbus.h
deleted file mode 100644
index 0d8bd6769b13..000000000000
--- a/include/linux/visorbus.h
+++ /dev/null
@@ -1,344 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2010 - 2013 UNISYS CORPORATION
- * All rights reserved.
- */
-
-/*
- * This header file is to be included by other kernel mode components that
- * implement a particular kind of visor_device. Each of these other kernel
- * mode components is called a visor device driver. Refer to visortemplate
- * for a minimal sample visor device driver.
- *
- * There should be nothing in this file that is private to the visorbus
- * bus implementation itself.
- */
-
-#ifndef __VISORBUS_H__
-#define __VISORBUS_H__
-
-#include <linux/device.h>
-
-#define VISOR_CHANNEL_SIGNATURE ('L' << 24 | 'N' << 16 | 'C' << 8 | 'E')
-
-/*
- * enum channel_serverstate
- * @CHANNELSRV_UNINITIALIZED: Channel is in an undefined state.
- * @CHANNELSRV_READY: Channel has been initialized by server.
- */
-enum channel_serverstate {
- CHANNELSRV_UNINITIALIZED = 0,
- CHANNELSRV_READY = 1
-};
-
-/*
- * enum channel_clientstate
- * @CHANNELCLI_DETACHED:
- * @CHANNELCLI_DISABLED: Client can see channel but is NOT allowed to use it
- * unless given TBD* explicit request
- * (should actually be < DETACHED).
- * @CHANNELCLI_ATTACHING: Legacy EFI client request for EFI server to attach.
- * @CHANNELCLI_ATTACHED: Idle, but client may want to use channel any time.
- * @CHANNELCLI_BUSY: Client either wants to use or is using channel.
- * @CHANNELCLI_OWNED: "No worries" state - client can access channel
- * anytime.
- */
-enum channel_clientstate {
- CHANNELCLI_DETACHED = 0,
- CHANNELCLI_DISABLED = 1,
- CHANNELCLI_ATTACHING = 2,
- CHANNELCLI_ATTACHED = 3,
- CHANNELCLI_BUSY = 4,
- CHANNELCLI_OWNED = 5
-};
-
-/*
- * Values for VISOR_CHANNEL_PROTOCOL.Features: This define exists so that
- * a guest can look at the FeatureFlags in the io channel, and configure the
- * driver to use interrupts or not based on this setting. All feature bits for
- * all channels should be defined here. The io channel feature bits are defined
- * below.
- */
-#define VISOR_DRIVER_ENABLES_INTS (0x1ULL << 1)
-#define VISOR_CHANNEL_IS_POLLING (0x1ULL << 3)
-#define VISOR_IOVM_OK_DRIVER_DISABLING_INTS (0x1ULL << 4)
-#define VISOR_DRIVER_DISABLES_INTS (0x1ULL << 5)
-#define VISOR_DRIVER_ENHANCED_RCVBUF_CHECKING (0x1ULL << 6)
-
-/*
- * struct channel_header - Common Channel Header
- * @signature: Signature.
- * @legacy_state: DEPRECATED - being replaced by.
- * @header_size: sizeof(struct channel_header).
- * @size: Total size of this channel in bytes.
- * @features: Flags to modify behavior.
- * @chtype: Channel type: data, bus, control, etc..
- * @partition_handle: ID of guest partition.
- * @handle: Device number of this channel in client.
- * @ch_space_offset: Offset in bytes to channel specific area.
- * @version_id: Struct channel_header Version ID.
- * @partition_index: Index of guest partition.
- * @zone_uuid: Guid of Channel's zone.
- * @cli_str_offset: Offset from channel header to null-terminated
- * ClientString (0 if ClientString not present).
- * @cli_state_boot: CHANNEL_CLIENTSTATE of pre-boot EFI client of this
- * channel.
- * @cmd_state_cli: CHANNEL_COMMANDSTATE (overloaded in Windows drivers, see
- * ServerStateUp, ServerStateDown, etc).
- * @cli_state_os: CHANNEL_CLIENTSTATE of Guest OS client of this channel.
- * @ch_characteristic: CHANNEL_CHARACTERISTIC_<xxx>.
- * @cmd_state_srv: CHANNEL_COMMANDSTATE (overloaded in Windows drivers, see
- * ServerStateUp, ServerStateDown, etc).
- * @srv_state: CHANNEL_SERVERSTATE.
- * @cli_error_boot: Bits to indicate err states for boot clients, so err
- * messages can be throttled.
- * @cli_error_os: Bits to indicate err states for OS clients, so err
- * messages can be throttled.
- * @filler: Pad out to 128 byte cacheline.
- * @recover_channel: Please add all new single-byte values below here.
- */
-struct channel_header {
- u64 signature;
- u32 legacy_state;
- /* SrvState, CliStateBoot, and CliStateOS below */
- u32 header_size;
- u64 size;
- u64 features;
- guid_t chtype;
- u64 partition_handle;
- u64 handle;
- u64 ch_space_offset;
- u32 version_id;
- u32 partition_index;
- guid_t zone_guid;
- u32 cli_str_offset;
- u32 cli_state_boot;
- u32 cmd_state_cli;
- u32 cli_state_os;
- u32 ch_characteristic;
- u32 cmd_state_srv;
- u32 srv_state;
- u8 cli_error_boot;
- u8 cli_error_os;
- u8 filler[1];
- u8 recover_channel;
-} __packed;
-
-#define VISOR_CHANNEL_ENABLE_INTS (0x1ULL << 0)
-
-/*
- * struct signal_queue_header - Subheader for the Signal Type variation of the
- * Common Channel.
- * @version: SIGNAL_QUEUE_HEADER Version ID.
- * @chtype: Queue type: storage, network.
- * @size: Total size of this queue in bytes.
- * @sig_base_offset: Offset to signal queue area.
- * @features: Flags to modify behavior.
- * @num_sent: Total # of signals placed in this queue.
- * @num_overflows: Total # of inserts failed due to full queue.
- * @signal_size: Total size of a signal for this queue.
- * @max_slots: Max # of slots in queue, 1 slot is always empty.
- * @max_signals: Max # of signals in queue (MaxSignalSlots-1).
- * @head: Queue head signal #.
- * @num_received: Total # of signals removed from this queue.
- * @tail: Queue tail signal.
- * @reserved1: Reserved field.
- * @reserved2: Reserved field.
- * @client_queue:
- * @num_irq_received: Total # of Interrupts received. This is incremented by the
- * ISR in the guest windows driver.
- * @num_empty: Number of times that visor_signal_remove is called and
- * returned Empty Status.
- * @errorflags: Error bits set during SignalReinit to denote trouble with
- * client's fields.
- * @filler: Pad out to 64 byte cacheline.
- */
-struct signal_queue_header {
- /* 1st cache line */
- u32 version;
- u32 chtype;
- u64 size;
- u64 sig_base_offset;
- u64 features;
- u64 num_sent;
- u64 num_overflows;
- u32 signal_size;
- u32 max_slots;
- u32 max_signals;
- u32 head;
- /* 2nd cache line */
- u64 num_received;
- u32 tail;
- u32 reserved1;
- u64 reserved2;
- u64 client_queue;
- u64 num_irq_received;
- u64 num_empty;
- u32 errorflags;
- u8 filler[12];
-} __packed;
-
-/* VISORCHANNEL Guids */
-/* {414815ed-c58c-11da-95a9-00e08161165f} */
-#define VISOR_VHBA_CHANNEL_GUID \
- GUID_INIT(0x414815ed, 0xc58c, 0x11da, \
- 0x95, 0xa9, 0x0, 0xe0, 0x81, 0x61, 0x16, 0x5f)
-#define VISOR_VHBA_CHANNEL_GUID_STR \
- "414815ed-c58c-11da-95a9-00e08161165f"
-struct visorchipset_state {
- u32 created:1;
- u32 attached:1;
- u32 configured:1;
- u32 running:1;
- /* Remaining bits in this 32-bit word are reserved. */
-};
-
-/**
- * struct visor_device - A device type for things "plugged" into the visorbus
- * bus
- * @visorchannel: Points to the channel that the device is
- * associated with.
- * @channel_type_guid: Identifies the channel type to the bus driver.
- * @device: Device struct meant for use by the bus driver
- * only.
- * @list_all: Used by the bus driver to enumerate devices.
- * @timer: Timer fired periodically to do interrupt-type
- * activity.
- * @being_removed: Indicates that the device is being removed from
- * the bus. Private bus driver use only.
- * @visordriver_callback_lock: Used by the bus driver to lock when adding and
- * removing devices.
- * @pausing: Indicates that a change towards a paused state.
- * is in progress. Only modified by the bus driver.
- * @resuming: Indicates that a change towards a running state
- * is in progress. Only modified by the bus driver.
- * @chipset_bus_no: Private field used by the bus driver.
- * @chipset_dev_no: Private field used the bus driver.
- * @state: Used to indicate the current state of the
- * device.
- * @inst: Unique GUID for this instance of the device.
- * @name: Name of the device.
- * @pending_msg_hdr: For private use by bus driver to respond to
- * hypervisor requests.
- * @vbus_hdr_info: A pointer to header info. Private use by bus
- * driver.
- * @partition_guid: Indicates client partion id. This should be the
- * same across all visor_devices in the current
- * guest. Private use by bus driver only.
- */
-struct visor_device {
- struct visorchannel *visorchannel;
- guid_t channel_type_guid;
- /* These fields are for private use by the bus driver only. */
- struct device device;
- struct list_head list_all;
- struct timer_list timer;
- bool timer_active;
- bool being_removed;
- struct mutex visordriver_callback_lock; /* synchronize probe/remove */
- bool pausing;
- bool resuming;
- u32 chipset_bus_no;
- u32 chipset_dev_no;
- struct visorchipset_state state;
- guid_t inst;
- u8 *name;
- struct controlvm_message_header *pending_msg_hdr;
- void *vbus_hdr_info;
- guid_t partition_guid;
- struct dentry *debugfs_dir;
- struct dentry *debugfs_bus_info;
-};
-
-#define to_visor_device(x) container_of(x, struct visor_device, device)
-
-typedef void (*visorbus_state_complete_func) (struct visor_device *dev,
- int status);
-
-/*
- * This struct describes a specific visor channel, by providing its GUID, name,
- * and sizes.
- */
-struct visor_channeltype_descriptor {
- const guid_t guid;
- const char *name;
- u64 min_bytes;
- u32 version;
-};
-
-/**
- * struct visor_driver - Information provided by each visor driver when it
- * registers with the visorbus driver
- * @name: Name of the visor driver.
- * @owner: The module owner.
- * @channel_types: Types of channels handled by this driver, ending with
- * a zero GUID. Our specialized BUS.match() method knows
- * about this list, and uses it to determine whether this
- * driver will in fact handle a new device that it has
- * detected.
- * @probe: Called when a new device comes online, by our probe()
- * function specified by driver.probe() (triggered
- * ultimately by some call to driver_register(),
- * bus_add_driver(), or driver_attach()).
- * @remove: Called when a new device is removed, by our remove()
- * function specified by driver.remove() (triggered
- * ultimately by some call to device_release_driver()).
- * @channel_interrupt: Called periodically, whenever there is a possiblity
- * that "something interesting" may have happened to the
- * channel.
- * @pause: Called to initiate a change of the device's state. If
- * the return valu`e is < 0, there was an error and the
- * state transition will NOT occur. If the return value
- * is >= 0, then the state transition was INITIATED
- * successfully, and complete_func() will be called (or
- * was just called) with the final status when either the
- * state transition fails or completes successfully.
- * @resume: Behaves similar to pause.
- * @driver: Private reference to the device driver. For use by bus
- * driver only.
- */
-struct visor_driver {
- const char *name;
- struct module *owner;
- struct visor_channeltype_descriptor *channel_types;
- int (*probe)(struct visor_device *dev);
- void (*remove)(struct visor_device *dev);
- void (*channel_interrupt)(struct visor_device *dev);
- int (*pause)(struct visor_device *dev,
- visorbus_state_complete_func complete_func);
- int (*resume)(struct visor_device *dev,
- visorbus_state_complete_func complete_func);
-
- /* These fields are for private use by the bus driver only. */
- struct device_driver driver;
-};
-
-#define to_visor_driver(x) (container_of(x, struct visor_driver, driver))
-
-int visor_check_channel(struct channel_header *ch, struct device *dev,
- const guid_t *expected_uuid, char *chname,
- u64 expected_min_bytes, u32 expected_version,
- u64 expected_signature);
-
-int visorbus_register_visor_driver(struct visor_driver *drv);
-void visorbus_unregister_visor_driver(struct visor_driver *drv);
-int visorbus_read_channel(struct visor_device *dev,
- unsigned long offset, void *dest,
- unsigned long nbytes);
-int visorbus_write_channel(struct visor_device *dev,
- unsigned long offset, void *src,
- unsigned long nbytes);
-int visorbus_enable_channel_interrupts(struct visor_device *dev);
-void visorbus_disable_channel_interrupts(struct visor_device *dev);
-
-int visorchannel_signalremove(struct visorchannel *channel, u32 queue,
- void *msg);
-int visorchannel_signalinsert(struct visorchannel *channel, u32 queue,
- void *msg);
-bool visorchannel_signalempty(struct visorchannel *channel, u32 queue);
-const guid_t *visorchannel_get_guid(struct visorchannel *channel);
-
-#define BUS_ROOT_DEVICE UINT_MAX
-struct visor_device *visorbus_get_device_by_id(u32 bus_no, u32 dev_no,
- struct visor_device *from);
-#endif
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index b159c2789961..096d48aa3437 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -215,6 +215,7 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size,
void free_vm_area(struct vm_struct *area);
extern struct vm_struct *remove_vm_area(const void *addr);
extern struct vm_struct *find_vm_area(const void *addr);
+struct vmap_area *find_vmap_area(unsigned long addr);
static inline bool is_vm_area_hugepages(const void *addr)
{
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 7fee9b6cfede..62e75dd40d9a 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -406,7 +406,7 @@ alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...);
* alloc_ordered_workqueue - allocate an ordered workqueue
* @fmt: printf format for the name of the workqueue
* @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful)
- * @args...: args for @fmt
+ * @args: args for @fmt
*
* Allocate an ordered workqueue. An ordered workqueue executes at
* most one work item at any given time in the queued order. They are
@@ -445,7 +445,7 @@ extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
struct delayed_work *dwork, unsigned long delay);
extern bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork);
-extern void flush_workqueue(struct workqueue_struct *wq);
+extern void __flush_workqueue(struct workqueue_struct *wq);
extern void drain_workqueue(struct workqueue_struct *wq);
extern int schedule_on_each_cpu(work_func_t func);
@@ -563,15 +563,23 @@ static inline bool schedule_work(struct work_struct *work)
return queue_work(system_wq, work);
}
+/*
+ * Detect attempt to flush system-wide workqueues at compile time when possible.
+ *
+ * See https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp
+ * for reasons and steps for converting system-wide workqueues into local workqueues.
+ */
+extern void __warn_flushing_systemwide_wq(void)
+ __compiletime_warning("Please avoid flushing system-wide workqueues.");
+
/**
* flush_scheduled_work - ensure that any scheduled work has run to completion.
*
* Forces execution of the kernel-global workqueue and blocks until its
* completion.
*
- * Think twice before calling this function! It's very easy to get into
- * trouble if you don't take great care. Either of the following situations
- * will lead to deadlock:
+ * It's very easy to get into trouble if you don't take great care.
+ * Either of the following situations will lead to deadlock:
*
* One of the work items currently on the workqueue needs to acquire
* a lock held by your code or its caller.
@@ -586,11 +594,51 @@ static inline bool schedule_work(struct work_struct *work)
* need to know that a particular work item isn't queued and isn't running.
* In such cases you should use cancel_delayed_work_sync() or
* cancel_work_sync() instead.
+ *
+ * Please stop calling this function! A conversion to stop flushing system-wide
+ * workqueues is in progress. This function will be removed after all in-tree
+ * users stopped calling this function.
*/
-static inline void flush_scheduled_work(void)
-{
- flush_workqueue(system_wq);
-}
+/*
+ * The background of commit 771c035372a036f8 ("deprecate the
+ * '__deprecated' attribute warnings entirely and for good") is that,
+ * since Linus builds all modules between every single pull he does,
+ * the standard kernel build needs to be _clean_ in order to be able to
+ * notice when new problems happen. Therefore, don't emit warning while
+ * there are in-tree users.
+ */
+#define flush_scheduled_work() \
+({ \
+ if (0) \
+ __warn_flushing_systemwide_wq(); \
+ __flush_workqueue(system_wq); \
+})
+
+/*
+ * Although there is no longer in-tree caller, for now just emit warning
+ * in order to give out-of-tree callers time to update.
+ */
+#define flush_workqueue(wq) \
+({ \
+ struct workqueue_struct *_wq = (wq); \
+ \
+ if ((__builtin_constant_p(_wq == system_wq) && \
+ _wq == system_wq) || \
+ (__builtin_constant_p(_wq == system_highpri_wq) && \
+ _wq == system_highpri_wq) || \
+ (__builtin_constant_p(_wq == system_long_wq) && \
+ _wq == system_long_wq) || \
+ (__builtin_constant_p(_wq == system_unbound_wq) && \
+ _wq == system_unbound_wq) || \
+ (__builtin_constant_p(_wq == system_freezable_wq) && \
+ _wq == system_freezable_wq) || \
+ (__builtin_constant_p(_wq == system_power_efficient_wq) && \
+ _wq == system_power_efficient_wq) || \
+ (__builtin_constant_p(_wq == system_freezable_power_efficient_wq) && \
+ _wq == system_freezable_power_efficient_wq)) \
+ __warn_flushing_systemwide_wq(); \
+ __flush_workqueue(_wq); \
+})
/**
* schedule_delayed_work_on - queue work in global workqueue on CPU after delay
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 72feab5ea8d4..c29e11b2c073 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -1508,6 +1508,7 @@ void *xas_find_marked(struct xa_state *, unsigned long max, xa_mark_t);
void xas_init_marks(const struct xa_state *);
bool xas_nomem(struct xa_state *, gfp_t);
+void xas_destroy(struct xa_state *);
void xas_pause(struct xa_state *);
void xas_create_range(struct xa_state *);
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index a7ef624ed726..480fa579787e 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -16,12 +16,11 @@ void wait_for_unix_gc(void);
struct sock *unix_get_socket(struct file *filp);
struct sock *unix_peer_get(struct sock *sk);
-#define UNIX_HASH_SIZE 256
+#define UNIX_HASH_MOD (256 - 1)
+#define UNIX_HASH_SIZE (256 * 2)
#define UNIX_HASH_BITS 8
extern unsigned int unix_tot_inflight;
-extern spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE];
-extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
struct unix_address {
refcount_t refcnt;
diff --git a/include/net/bond_options.h b/include/net/bond_options.h
index 1618b76f4903..d2aea5cf1e41 100644
--- a/include/net/bond_options.h
+++ b/include/net/bond_options.h
@@ -67,6 +67,7 @@ enum {
BOND_OPT_LACP_ACTIVE,
BOND_OPT_MISSED_MAX,
BOND_OPT_NS_TARGETS,
+ BOND_OPT_PRIO,
BOND_OPT_LAST
};
@@ -83,7 +84,10 @@ struct bond_opt_value {
char *string;
u64 value;
u32 flags;
- char extra[BOND_OPT_EXTRA_MAXLEN];
+ union {
+ char extra[BOND_OPT_EXTRA_MAXLEN];
+ struct net_device *slave_dev;
+ };
};
struct bonding;
@@ -133,13 +137,16 @@ static inline void __bond_opt_init(struct bond_opt_value *optval,
optval->value = value;
else if (string)
optval->string = string;
- else if (extra_len <= BOND_OPT_EXTRA_MAXLEN)
+
+ if (extra && extra_len <= BOND_OPT_EXTRA_MAXLEN)
memcpy(optval->extra, extra, extra_len);
}
#define bond_opt_initval(optval, value) __bond_opt_init(optval, NULL, value, NULL, 0)
#define bond_opt_initstr(optval, str) __bond_opt_init(optval, str, ULLONG_MAX, NULL, 0)
#define bond_opt_initextra(optval, extra, extra_len) \
__bond_opt_init(optval, NULL, ULLONG_MAX, extra, extra_len)
+#define bond_opt_slave_initval(optval, slave_dev, value) \
+ __bond_opt_init(optval, NULL, value, slave_dev, sizeof(struct net_device *))
void bond_option_arp_ip_targets_clear(struct bonding *bond);
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/include/net/bonding.h b/include/net/bonding.h
index cb904d356e31..6e78d657aa05 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -178,6 +178,7 @@ struct slave {
u32 speed;
u16 queue_id;
u8 perm_hwaddr[MAX_ADDR_LEN];
+ int prio;
struct ad_slave_info *ad_info;
struct tlb_slave_info tlb_info;
#ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 14f07275852b..33283eeda697 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -53,6 +53,7 @@ struct phylink_link_state;
#define DSA_TAG_PROTO_SJA1110_VALUE 23
#define DSA_TAG_PROTO_RTL8_4_VALUE 24
#define DSA_TAG_PROTO_RTL8_4T_VALUE 25
+#define DSA_TAG_PROTO_RZN1_A5PSW_VALUE 26
enum dsa_tag_protocol {
DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE,
@@ -81,6 +82,7 @@ enum dsa_tag_protocol {
DSA_TAG_PROTO_SJA1110 = DSA_TAG_PROTO_SJA1110_VALUE,
DSA_TAG_PROTO_RTL8_4 = DSA_TAG_PROTO_RTL8_4_VALUE,
DSA_TAG_PROTO_RTL8_4T = DSA_TAG_PROTO_RTL8_4T_VALUE,
+ DSA_TAG_PROTO_RZN1_A5PSW = DSA_TAG_PROTO_RZN1_A5PSW_VALUE,
};
struct dsa_switch;
@@ -888,6 +890,9 @@ struct dsa_switch_ops {
struct ethtool_eth_mac_stats *mac_stats);
void (*get_eth_ctrl_stats)(struct dsa_switch *ds, int port,
struct ethtool_eth_ctrl_stats *ctrl_stats);
+ void (*get_rmon_stats)(struct dsa_switch *ds, int port,
+ struct ethtool_rmon_stats *rmon_stats,
+ const struct ethtool_rmon_hist_range **ranges);
void (*get_stats64)(struct dsa_switch *ds, int port,
struct rtnl_link_stats64 *s);
void (*self_test)(struct dsa_switch *ds, int port,
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 077cd730ce2f..85cd695e7fd1 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -25,7 +25,6 @@
#undef INET_CSK_CLEAR_TIMERS
struct inet_bind_bucket;
-struct inet_bind2_bucket;
struct tcp_congestion_ops;
/*
@@ -58,7 +57,6 @@ struct inet_connection_sock_af_ops {
*
* @icsk_accept_queue: FIFO of established children
* @icsk_bind_hash: Bind node
- * @icsk_bind2_hash: Bind node in the bhash2 table
* @icsk_timeout: Timeout
* @icsk_retransmit_timer: Resend (no ack)
* @icsk_rto: Retransmit timeout
@@ -85,7 +83,6 @@ struct inet_connection_sock {
struct inet_sock icsk_inet;
struct request_sock_queue icsk_accept_queue;
struct inet_bind_bucket *icsk_bind_hash;
- struct inet_bind2_bucket *icsk_bind2_hash;
unsigned long icsk_timeout;
struct timer_list icsk_retransmit_timer;
struct timer_list icsk_delack_timer;
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index a0887b70967b..ebfa3df6f8dc 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -90,32 +90,11 @@ struct inet_bind_bucket {
struct hlist_head owners;
};
-struct inet_bind2_bucket {
- possible_net_t ib_net;
- int l3mdev;
- unsigned short port;
- union {
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr v6_rcv_saddr;
-#endif
- __be32 rcv_saddr;
- };
- /* Node in the inet2_bind_hashbucket chain */
- struct hlist_node node;
- /* List of sockets hashed to this bucket */
- struct hlist_head owners;
-};
-
static inline struct net *ib_net(struct inet_bind_bucket *ib)
{
return read_pnet(&ib->ib_net);
}
-static inline struct net *ib2_net(struct inet_bind2_bucket *ib)
-{
- return read_pnet(&ib->ib_net);
-}
-
#define inet_bind_bucket_for_each(tb, head) \
hlist_for_each_entry(tb, head, node)
@@ -124,15 +103,6 @@ struct inet_bind_hashbucket {
struct hlist_head chain;
};
-/* This is synchronized using the inet_bind_hashbucket's spinlock.
- * Instead of having separate spinlocks, the inet_bind2_hashbucket can share
- * the inet_bind_hashbucket's given that in every case where the bhash2 table
- * is useful, a lookup in the bhash table also occurs.
- */
-struct inet_bind2_hashbucket {
- struct hlist_head chain;
-};
-
/* Sockets can be hashed in established or listening table.
* We must use different 'nulls' end-of-chain value for all hash buckets :
* A socket might transition from ESTABLISH to LISTEN state without
@@ -164,12 +134,6 @@ struct inet_hashinfo {
*/
struct kmem_cache *bind_bucket_cachep;
struct inet_bind_hashbucket *bhash;
- /* The 2nd binding table hashed by port and address.
- * This is used primarily for expediting the resolution of bind
- * conflicts.
- */
- struct kmem_cache *bind2_bucket_cachep;
- struct inet_bind2_hashbucket *bhash2;
unsigned int bhash_size;
/* The 2nd listener table hashed by local port and address */
@@ -229,36 +193,6 @@ inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
void inet_bind_bucket_destroy(struct kmem_cache *cachep,
struct inet_bind_bucket *tb);
-static inline bool check_bind_bucket_match(struct inet_bind_bucket *tb,
- struct net *net,
- const unsigned short port,
- int l3mdev)
-{
- return net_eq(ib_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev;
-}
-
-struct inet_bind2_bucket *
-inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net,
- struct inet_bind2_hashbucket *head,
- const unsigned short port, int l3mdev,
- const struct sock *sk);
-
-void inet_bind2_bucket_destroy(struct kmem_cache *cachep,
- struct inet_bind2_bucket *tb);
-
-struct inet_bind2_bucket *
-inet_bind2_bucket_find(struct inet_hashinfo *hinfo, struct net *net,
- const unsigned short port, int l3mdev,
- struct sock *sk,
- struct inet_bind2_hashbucket **head);
-
-bool check_bind2_bucket_match_nulladdr(struct inet_bind2_bucket *tb,
- struct net *net,
- const unsigned short port,
- int l3mdev,
- const struct sock *sk);
-
static inline u32 inet_bhashfn(const struct net *net, const __u16 lport,
const u32 bhash_size)
{
@@ -266,7 +200,7 @@ static inline u32 inet_bhashfn(const struct net *net, const __u16 lport,
}
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
- struct inet_bind2_bucket *tb2, const unsigned short snum);
+ const unsigned short snum);
/* Caller must disable local BH processing. */
int __inet_inherit_port(const struct sock *sk, struct sock *child);
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index c1b5dcd6597c..daead5fb389a 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -253,6 +253,11 @@ struct inet_sock {
#define IP_CMSG_CHECKSUM BIT(7)
#define IP_CMSG_RECVFRAGSIZE BIT(8)
+static inline bool sk_is_inet(struct sock *sk)
+{
+ return sk->sk_family == AF_INET || sk->sk_family == AF_INET6;
+}
+
/**
* sk_to_full_sk - Access to a full socket
* @sk: pointer to a socket
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index c4f5601f6e32..20a2992901c2 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -120,7 +120,9 @@ struct net {
struct netns_core core;
struct netns_mib mib;
struct netns_packet packet;
+#if IS_ENABLED(CONFIG_UNIX)
struct netns_unix unx;
+#endif
struct netns_nexthop nexthop;
struct netns_ipv4 ipv4;
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/include/net/netns/unix.h b/include/net/netns/unix.h
index 91a3d7e39198..6f1a33df061d 100644
--- a/include/net/netns/unix.h
+++ b/include/net/netns/unix.h
@@ -5,8 +5,14 @@
#ifndef __NETNS_UNIX_H__
#define __NETNS_UNIX_H__
+struct unix_table {
+ spinlock_t *locks;
+ struct hlist_head *buckets;
+};
+
struct ctl_table_header;
struct netns_unix {
+ struct unix_table table;
int sysctl_max_dgram_qlen;
struct ctl_table_header *ctl;
};
diff --git a/include/net/raw.h b/include/net/raw.h
index 8ad8df594853..d224376360e1 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -20,9 +20,8 @@
extern struct proto raw_prot;
extern struct raw_hashinfo raw_v4_hashinfo;
-struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
- unsigned short num, __be32 raddr,
- __be32 laddr, int dif, int sdif);
+bool raw_v4_match(struct net *net, struct sock *sk, unsigned short num,
+ __be32 raddr, __be32 laddr, int dif, int sdif);
int raw_abort(struct sock *sk, int err);
void raw_icmp_error(struct sk_buff *, int, u32);
@@ -33,10 +32,19 @@ int raw_rcv(struct sock *, struct sk_buff *);
#define RAW_HTABLE_SIZE MAX_INET_PROTOS
struct raw_hashinfo {
- rwlock_t lock;
- struct hlist_head ht[RAW_HTABLE_SIZE];
+ spinlock_t lock;
+ struct hlist_nulls_head ht[RAW_HTABLE_SIZE];
};
+static inline void raw_hashinfo_init(struct raw_hashinfo *hashinfo)
+{
+ int i;
+
+ spin_lock_init(&hashinfo->lock);
+ for (i = 0; i < RAW_HTABLE_SIZE; i++)
+ INIT_HLIST_NULLS_HEAD(&hashinfo->ht[i], i);
+}
+
#ifdef CONFIG_PROC_FS
int raw_proc_init(void);
void raw_proc_exit(void);
diff --git a/include/net/rawv6.h b/include/net/rawv6.h
index 53d86b6055e8..bc70909625f6 100644
--- a/include/net/rawv6.h
+++ b/include/net/rawv6.h
@@ -3,11 +3,12 @@
#define _NET_RAWV6_H
#include <net/protocol.h>
+#include <net/raw.h>
extern struct raw_hashinfo raw_v6_hashinfo;
-struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
- unsigned short num, const struct in6_addr *loc_addr,
- const struct in6_addr *rmt_addr, int dif, int sdif);
+bool raw_v6_match(struct net *net, struct sock *sk, unsigned short num,
+ const struct in6_addr *loc_addr,
+ const struct in6_addr *rmt_addr, int dif, int sdif);
int raw_abort(struct sock *sk, int err);
diff --git a/include/net/sock.h b/include/net/sock.h
index 0063e8410a4e..40bbd0e8925b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -348,7 +348,6 @@ struct sk_filter;
* @sk_txtime_report_errors: set report errors mode for SO_TXTIME
* @sk_txtime_unused: unused txtime flags
* @ns_tracker: tracker for netns reference
- * @sk_bind2_node: bind node in the bhash2 table
*/
struct sock {
/*
@@ -538,7 +537,6 @@ struct sock {
#endif
struct rcu_head sk_rcu;
netns_tracker ns_tracker;
- struct hlist_node sk_bind2_node;
};
enum sk_pacing {
@@ -819,16 +817,6 @@ static inline void sk_add_bind_node(struct sock *sk,
hlist_add_head(&sk->sk_bind_node, list);
}
-static inline void __sk_del_bind2_node(struct sock *sk)
-{
- __hlist_del(&sk->sk_bind2_node);
-}
-
-static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list)
-{
- hlist_add_head(&sk->sk_bind2_node, list);
-}
-
#define sk_for_each(__sk, list) \
hlist_for_each_entry(__sk, list, sk_node)
#define sk_for_each_rcu(__sk, list) \
@@ -846,8 +834,6 @@ static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list)
hlist_for_each_entry_safe(__sk, tmp, list, sk_node)
#define sk_for_each_bound(__sk, list) \
hlist_for_each_entry(__sk, list, sk_bind_node)
-#define sk_for_each_bound_bhash2(__sk, list) \
- hlist_for_each_entry(__sk, list, sk_bind2_node)
/**
* sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset
@@ -1412,7 +1398,7 @@ sk_memory_allocated(const struct sock *sk)
/* 1 MB per cpu, in page units */
#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
-static inline long
+static inline void
sk_memory_allocated_add(struct sock *sk, int amt)
{
int local_reserve;
@@ -1424,7 +1410,6 @@ sk_memory_allocated_add(struct sock *sk, int amt)
atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
}
preempt_enable();
- return sk_memory_allocated(sk);
}
static inline void
@@ -2234,9 +2219,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro
if (err)
return err;
- skb->len += copy;
- skb->data_len += copy;
- skb->truesize += copy;
+ skb_len_add(skb, copy);
sk_wmem_queued_add(sk, copy);
sk_mem_charge(sk, copy);
return 0;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4794cae4577e..c21a9b516f1e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -434,6 +434,7 @@ u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
struct tcphdr *th, u32 *cookie);
u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
struct tcphdr *th, u32 *cookie);
+u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss);
u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
const struct tcp_request_sock_ops *af_ops,
struct sock *sk, struct tcphdr *th);
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 5f88385a7748..3737570116c3 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -575,6 +575,7 @@ struct ocelot_ops {
int (*psfp_stats_get)(struct ocelot *ocelot, struct flow_cls_offload *f,
struct flow_stats *stats);
void (*cut_through_fwd)(struct ocelot *ocelot);
+ void (*tas_clock_adjust)(struct ocelot *ocelot);
};
struct ocelot_vcap_policer {
@@ -691,6 +692,9 @@ struct ocelot_port {
int bridge_num;
int speed;
+
+ /* Store the AdminBaseTime of EST fetched from userspace. */
+ s64 base_time;
};
struct ocelot {
@@ -757,6 +761,9 @@ struct ocelot {
/* Lock for serializing forwarding domain changes */
struct mutex fwd_domain_lock;
+ /* Lock for serializing Time-Aware Shaper changes */
+ struct mutex tas_lock;
+
struct workqueue_struct *owq;
u8 ptp:1;
diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
index 6154a2e72bce..262d52021c23 100644
--- a/include/trace/events/workqueue.h
+++ b/include/trace/events/workqueue.h
@@ -22,7 +22,7 @@ struct pool_workqueue;
*/
TRACE_EVENT(workqueue_queue_work,
- TP_PROTO(unsigned int req_cpu, struct pool_workqueue *pwq,
+ TP_PROTO(int req_cpu, struct pool_workqueue *pwq,
struct work_struct *work),
TP_ARGS(req_cpu, pwq, work),
@@ -31,8 +31,8 @@ TRACE_EVENT(workqueue_queue_work,
__field( void *, work )
__field( void *, function)
__string( workqueue, pwq->wq->name)
- __field( unsigned int, req_cpu )
- __field( unsigned int, cpu )
+ __field( int, req_cpu )
+ __field( int, cpu )
),
TP_fast_assign(
@@ -43,7 +43,7 @@ TRACE_EVENT(workqueue_queue_work,
__entry->cpu = pwq->pool->cpu;
),
- TP_printk("work struct=%p function=%ps workqueue=%s req_cpu=%u cpu=%u",
+ TP_printk("work struct=%p function=%ps workqueue=%s req_cpu=%d cpu=%d",
__entry->work, __entry->function, __get_str(workqueue),
__entry->req_cpu, __entry->cpu)
);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f4009dbdf62d..e81362891596 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3597,10 +3597,11 @@ union bpf_attr {
*
* *iph* points to the start of the IPv4 or IPv6 header, while
* *iph_len* contains **sizeof**\ (**struct iphdr**) or
- * **sizeof**\ (**struct ip6hdr**).
+ * **sizeof**\ (**struct ipv6hdr**).
*
* *th* points to the start of the TCP header, while *th_len*
- * contains **sizeof**\ (**struct tcphdr**).
+ * contains the length of the TCP header (at least
+ * **sizeof**\ (**struct tcphdr**)).
* Return
* 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
* error otherwise.
@@ -3783,10 +3784,11 @@ union bpf_attr {
*
* *iph* points to the start of the IPv4 or IPv6 header, while
* *iph_len* contains **sizeof**\ (**struct iphdr**) or
- * **sizeof**\ (**struct ip6hdr**).
+ * **sizeof**\ (**struct ipv6hdr**).
*
* *th* points to the start of the TCP header, while *th_len*
- * contains the length of the TCP header.
+ * contains the length of the TCP header with options (at least
+ * **sizeof**\ (**struct tcphdr**)).
* Return
* On success, lower 32 bits hold the generated SYN cookie in
* followed by 16 bits which hold the MSS value for that cookie,
@@ -5249,6 +5251,80 @@ union bpf_attr {
* Pointer to the underlying dynptr data, NULL if the dynptr is
* read-only, if the dynptr is invalid, or if the offset and length
* is out of bounds.
+ *
+ * s64 bpf_tcp_raw_gen_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th, u32 th_len)
+ * Description
+ * Try to issue a SYN cookie for the packet with corresponding
+ * IPv4/TCP headers, *iph* and *th*, without depending on a
+ * listening socket.
+ *
+ * *iph* points to the IPv4 header.
+ *
+ * *th* points to the start of the TCP header, while *th_len*
+ * contains the length of the TCP header (at least
+ * **sizeof**\ (**struct tcphdr**)).
+ * Return
+ * On success, lower 32 bits hold the generated SYN cookie in
+ * followed by 16 bits which hold the MSS value for that cookie,
+ * and the top 16 bits are unused.
+ *
+ * On failure, the returned value is one of the following:
+ *
+ * **-EINVAL** if *th_len* is invalid.
+ *
+ * s64 bpf_tcp_raw_gen_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th, u32 th_len)
+ * Description
+ * Try to issue a SYN cookie for the packet with corresponding
+ * IPv6/TCP headers, *iph* and *th*, without depending on a
+ * listening socket.
+ *
+ * *iph* points to the IPv6 header.
+ *
+ * *th* points to the start of the TCP header, while *th_len*
+ * contains the length of the TCP header (at least
+ * **sizeof**\ (**struct tcphdr**)).
+ * Return
+ * On success, lower 32 bits hold the generated SYN cookie in
+ * followed by 16 bits which hold the MSS value for that cookie,
+ * and the top 16 bits are unused.
+ *
+ * On failure, the returned value is one of the following:
+ *
+ * **-EINVAL** if *th_len* is invalid.
+ *
+ * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
+ *
+ * long bpf_tcp_raw_check_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th)
+ * Description
+ * Check whether *iph* and *th* contain a valid SYN cookie ACK
+ * without depending on a listening socket.
+ *
+ * *iph* points to the IPv4 header.
+ *
+ * *th* points to the TCP header.
+ * Return
+ * 0 if *iph* and *th* are a valid SYN cookie ACK.
+ *
+ * On failure, the returned value is one of the following:
+ *
+ * **-EACCES** if the SYN cookie is not valid.
+ *
+ * long bpf_tcp_raw_check_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th)
+ * Description
+ * Check whether *iph* and *th* contain a valid SYN cookie ACK
+ * without depending on a listening socket.
+ *
+ * *iph* points to the IPv6 header.
+ *
+ * *th* points to the TCP header.
+ * Return
+ * 0 if *iph* and *th* are a valid SYN cookie ACK.
+ *
+ * On failure, the returned value is one of the following:
+ *
+ * **-EACCES** if the SYN cookie is not valid.
+ *
+ * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -5455,6 +5531,10 @@ union bpf_attr {
FN(dynptr_read), \
FN(dynptr_write), \
FN(dynptr_data), \
+ FN(tcp_raw_gen_syncookie_ipv4), \
+ FN(tcp_raw_gen_syncookie_ipv6), \
+ FN(tcp_raw_check_syncookie_ipv4), \
+ FN(tcp_raw_check_syncookie_ipv6), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index a9162a6c0284..ec1798b6d3ff 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -36,10 +36,10 @@ struct btf_type {
* bits 24-28: kind (e.g. int, ptr, array...etc)
* bits 29-30: unused
* bit 31: kind_flag, currently used by
- * struct, union and fwd
+ * struct, union, enum, fwd and enum64
*/
__u32 info;
- /* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC.
+ /* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64.
* "size" tells the size of the type it is describing.
*
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
@@ -63,7 +63,7 @@ enum {
BTF_KIND_ARRAY = 3, /* Array */
BTF_KIND_STRUCT = 4, /* Struct */
BTF_KIND_UNION = 5, /* Union */
- BTF_KIND_ENUM = 6, /* Enumeration */
+ BTF_KIND_ENUM = 6, /* Enumeration up to 32-bit values */
BTF_KIND_FWD = 7, /* Forward */
BTF_KIND_TYPEDEF = 8, /* Typedef */
BTF_KIND_VOLATILE = 9, /* Volatile */
@@ -76,6 +76,7 @@ enum {
BTF_KIND_FLOAT = 16, /* Floating point */
BTF_KIND_DECL_TAG = 17, /* Decl Tag */
BTF_KIND_TYPE_TAG = 18, /* Type Tag */
+ BTF_KIND_ENUM64 = 19, /* Enumeration up to 64-bit values */
NR_BTF_KINDS,
BTF_KIND_MAX = NR_BTF_KINDS - 1,
@@ -186,4 +187,14 @@ struct btf_decl_tag {
__s32 component_idx;
};
+/* BTF_KIND_ENUM64 is followed by multiple "struct btf_enum64".
+ * The exact number of btf_enum64 is stored in the vlen (of the
+ * info in "struct btf_type").
+ */
+struct btf_enum64 {
+ __u32 name_off;
+ __u32 val_lo32;
+ __u32 val_hi32;
+};
+
#endif /* _UAPI__LINUX_BTF_H__ */
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 1d0bccc3fa54..d370165bc621 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -116,6 +116,7 @@
#define ETH_P_QINQ3 0x9300 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
#define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
#define ETH_P_DSA_8021Q 0xDADB /* Fake VLAN Header for DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
+#define ETH_P_DSA_A5PSW 0xE001 /* A5PSW Tag Value [ NOT AN OFFICIALLY REGISTERED ID ] */
#define ETH_P_IFE 0xED3E /* ForCES inter-FE LFB type */
#define ETH_P_AF_IUCV 0xFBFB /* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 5f58dcfe2787..e36d9d2c65a7 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -963,6 +963,7 @@ enum {
IFLA_BOND_SLAVE_AD_AGGREGATOR_ID,
IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE,
IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE,
+ IFLA_BOND_SLAVE_PRIO,
__IFLA_BOND_SLAVE_MAX,
};
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 776e0278f9dd..53e7dae92e42 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -47,7 +47,6 @@ struct io_uring_sqe {
__u32 unlink_flags;
__u32 hardlink_flags;
__u32 xattr_flags;
- __u32 close_flags;
};
__u64 user_data; /* data to be passed back at completion time */
/* pack this to avoid bogus arm OABI complaints */
@@ -260,11 +259,6 @@ enum io_uring_op {
#define IORING_ACCEPT_MULTISHOT (1U << 0)
/*
- * close flags, store in sqe->close_flags
- */
-#define IORING_CLOSE_FD_AND_FILE_SLOT (1U << 0)
-
-/*
* IO completion data structure (Completion Queue Entry)
*/
struct io_uring_cqe {
diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
index a21ca8ece8db..7af9e09ea556 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
@@ -63,6 +63,7 @@ enum mlx5_ib_uapi_dm_type {
MLX5_IB_UAPI_DM_TYPE_MEMIC,
MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM,
MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM,
+ MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM,
};
enum mlx5_ib_uapi_devx_create_event_channel_flags {
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index f3a2abd6d1a1..3a8c9d744800 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1014,10 +1014,10 @@ static void audit_reset_context(struct audit_context *ctx)
ctx->target_comm[0] = '\0';
unroll_tree_refs(ctx, NULL, 0);
WARN_ON(!list_empty(&ctx->killed_trees));
- ctx->type = 0;
audit_free_module(ctx);
ctx->fds[0] = -1;
audit_proctitle_free(ctx);
+ ctx->type = 0; /* reset last for audit_free_*() */
}
static inline struct audit_context *audit_alloc_context(enum audit_state state)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 63d0ac7dfe2f..d003d4d8242a 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -309,6 +309,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
[BTF_KIND_FLOAT] = "FLOAT",
[BTF_KIND_DECL_TAG] = "DECL_TAG",
[BTF_KIND_TYPE_TAG] = "TYPE_TAG",
+ [BTF_KIND_ENUM64] = "ENUM64",
};
const char *btf_type_str(const struct btf_type *t)
@@ -666,6 +667,7 @@ static bool btf_type_has_size(const struct btf_type *t)
case BTF_KIND_ENUM:
case BTF_KIND_DATASEC:
case BTF_KIND_FLOAT:
+ case BTF_KIND_ENUM64:
return true;
}
@@ -711,6 +713,11 @@ static const struct btf_decl_tag *btf_type_decl_tag(const struct btf_type *t)
return (const struct btf_decl_tag *)(t + 1);
}
+static const struct btf_enum64 *btf_type_enum64(const struct btf_type *t)
+{
+ return (const struct btf_enum64 *)(t + 1);
+}
+
static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
{
return kind_ops[BTF_INFO_KIND(t->info)];
@@ -1019,6 +1026,7 @@ static const char *btf_show_name(struct btf_show *show)
parens = "{";
break;
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
prefix = "enum";
break;
default:
@@ -1834,6 +1842,7 @@ __btf_resolve_size(const struct btf *btf, const struct btf_type *type,
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
case BTF_KIND_FLOAT:
+ case BTF_KIND_ENUM64:
size = type->size;
goto resolved;
@@ -3670,6 +3679,7 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env,
{
const struct btf_enum *enums = btf_type_enum(t);
struct btf *btf = env->btf;
+ const char *fmt_str;
u16 i, nr_enums;
u32 meta_needed;
@@ -3683,11 +3693,6 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
- if (btf_type_kflag(t)) {
- btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
- return -EINVAL;
- }
-
if (t->size > 8 || !is_power_of_2(t->size)) {
btf_verifier_log_type(env, t, "Unexpected size");
return -EINVAL;
@@ -3718,7 +3723,8 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env,
if (env->log.level == BPF_LOG_KERNEL)
continue;
- btf_verifier_log(env, "\t%s val=%d\n",
+ fmt_str = btf_type_kflag(t) ? "\t%s val=%d\n" : "\t%s val=%u\n";
+ btf_verifier_log(env, fmt_str,
__btf_name_by_offset(btf, enums[i].name_off),
enums[i].val);
}
@@ -3759,7 +3765,10 @@ static void btf_enum_show(const struct btf *btf, const struct btf_type *t,
return;
}
- btf_show_type_value(show, "%d", v);
+ if (btf_type_kflag(t))
+ btf_show_type_value(show, "%d", v);
+ else
+ btf_show_type_value(show, "%u", v);
btf_show_end_type(show);
}
@@ -3772,6 +3781,109 @@ static struct btf_kind_operations enum_ops = {
.show = btf_enum_show,
};
+static s32 btf_enum64_check_meta(struct btf_verifier_env *env,
+ const struct btf_type *t,
+ u32 meta_left)
+{
+ const struct btf_enum64 *enums = btf_type_enum64(t);
+ struct btf *btf = env->btf;
+ const char *fmt_str;
+ u16 i, nr_enums;
+ u32 meta_needed;
+
+ nr_enums = btf_type_vlen(t);
+ meta_needed = nr_enums * sizeof(*enums);
+
+ if (meta_left < meta_needed) {
+ btf_verifier_log_basic(env, t,
+ "meta_left:%u meta_needed:%u",
+ meta_left, meta_needed);
+ return -EINVAL;
+ }
+
+ if (t->size > 8 || !is_power_of_2(t->size)) {
+ btf_verifier_log_type(env, t, "Unexpected size");
+ return -EINVAL;
+ }
+
+ /* enum type either no name or a valid one */
+ if (t->name_off &&
+ !btf_name_valid_identifier(env->btf, t->name_off)) {
+ btf_verifier_log_type(env, t, "Invalid name");
+ return -EINVAL;
+ }
+
+ btf_verifier_log_type(env, t, NULL);
+
+ for (i = 0; i < nr_enums; i++) {
+ if (!btf_name_offset_valid(btf, enums[i].name_off)) {
+ btf_verifier_log(env, "\tInvalid name_offset:%u",
+ enums[i].name_off);
+ return -EINVAL;
+ }
+
+ /* enum member must have a valid name */
+ if (!enums[i].name_off ||
+ !btf_name_valid_identifier(btf, enums[i].name_off)) {
+ btf_verifier_log_type(env, t, "Invalid name");
+ return -EINVAL;
+ }
+
+ if (env->log.level == BPF_LOG_KERNEL)
+ continue;
+
+ fmt_str = btf_type_kflag(t) ? "\t%s val=%lld\n" : "\t%s val=%llu\n";
+ btf_verifier_log(env, fmt_str,
+ __btf_name_by_offset(btf, enums[i].name_off),
+ btf_enum64_value(enums + i));
+ }
+
+ return meta_needed;
+}
+
+static void btf_enum64_show(const struct btf *btf, const struct btf_type *t,
+ u32 type_id, void *data, u8 bits_offset,
+ struct btf_show *show)
+{
+ const struct btf_enum64 *enums = btf_type_enum64(t);
+ u32 i, nr_enums = btf_type_vlen(t);
+ void *safe_data;
+ s64 v;
+
+ safe_data = btf_show_start_type(show, t, type_id, data);
+ if (!safe_data)
+ return;
+
+ v = *(u64 *)safe_data;
+
+ for (i = 0; i < nr_enums; i++) {
+ if (v != btf_enum64_value(enums + i))
+ continue;
+
+ btf_show_type_value(show, "%s",
+ __btf_name_by_offset(btf,
+ enums[i].name_off));
+
+ btf_show_end_type(show);
+ return;
+ }
+
+ if (btf_type_kflag(t))
+ btf_show_type_value(show, "%lld", v);
+ else
+ btf_show_type_value(show, "%llu", v);
+ btf_show_end_type(show);
+}
+
+static struct btf_kind_operations enum64_ops = {
+ .check_meta = btf_enum64_check_meta,
+ .resolve = btf_df_resolve,
+ .check_member = btf_enum_check_member,
+ .check_kflag_member = btf_enum_check_kflag_member,
+ .log_details = btf_enum_log,
+ .show = btf_enum64_show,
+};
+
static s32 btf_func_proto_check_meta(struct btf_verifier_env *env,
const struct btf_type *t,
u32 meta_left)
@@ -4438,6 +4550,7 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = {
[BTF_KIND_FLOAT] = &float_ops,
[BTF_KIND_DECL_TAG] = &decl_tag_ops,
[BTF_KIND_TYPE_TAG] = &modifier_ops,
+ [BTF_KIND_ENUM64] = &enum64_ops,
};
static s32 btf_check_meta(struct btf_verifier_env *env,
@@ -4815,6 +4928,7 @@ static int btf_check_type_tags(struct btf_verifier_env *env,
n = btf_nr_types(btf);
for (i = start_id; i < n; i++) {
const struct btf_type *t;
+ int chain_limit = 32;
u32 cur_id = i;
t = btf_type_by_id(btf, i);
@@ -4827,6 +4941,10 @@ static int btf_check_type_tags(struct btf_verifier_env *env,
in_tags = btf_type_is_type_tag(t);
while (btf_type_is_modifier(t)) {
+ if (!chain_limit--) {
+ btf_verifier_log(env, "Max chain length or cycle detected");
+ return -ELOOP;
+ }
if (btf_type_is_type_tag(t)) {
if (!in_tags) {
btf_verifier_log(env, "Type tags don't precede modifiers");
@@ -5299,7 +5417,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
/* skip modifiers */
while (btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type);
- if (btf_type_is_small_int(t) || btf_type_is_enum(t))
+ if (btf_type_is_small_int(t) || btf_is_any_enum(t))
/* accessing a scalar */
return true;
if (!btf_type_is_ptr(t)) {
@@ -5763,7 +5881,7 @@ static int __get_type_size(struct btf *btf, u32 btf_id,
if (btf_type_is_ptr(t))
/* kernel size of pointer. Not BPF's size of pointer*/
return sizeof(void *);
- if (btf_type_is_int(t) || btf_type_is_enum(t))
+ if (btf_type_is_int(t) || btf_is_any_enum(t))
return t->size;
*bad_type = t;
return -EINVAL;
@@ -5911,7 +6029,7 @@ static int btf_check_func_type_match(struct bpf_verifier_log *log,
* to context only. And only global functions can be replaced.
* Hence type check only those types.
*/
- if (btf_type_is_int(t1) || btf_type_is_enum(t1))
+ if (btf_type_is_int(t1) || btf_is_any_enum(t1))
continue;
if (!btf_type_is_ptr(t1)) {
bpf_log(log,
@@ -6409,7 +6527,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
t = btf_type_by_id(btf, t->type);
while (btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type);
- if (!btf_type_is_int(t) && !btf_type_is_enum(t)) {
+ if (!btf_type_is_int(t) && !btf_is_any_enum(t)) {
bpf_log(log,
"Global function %s() doesn't return scalar. Only those are supported.\n",
tname);
@@ -6424,7 +6542,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
t = btf_type_by_id(btf, args[i].type);
while (btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type);
- if (btf_type_is_int(t) || btf_type_is_enum(t)) {
+ if (btf_type_is_int(t) || btf_is_any_enum(t)) {
reg->type = SCALAR_VALUE;
continue;
}
@@ -7336,6 +7454,7 @@ recur:
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
case BTF_KIND_FWD:
+ case BTF_KIND_ENUM64:
return 1;
case BTF_KIND_INT:
/* just reject deprecated bitfield-like integers; all other
@@ -7388,10 +7507,10 @@ recur:
* field-based relocations. This function assumes that root types were already
* checked for name match. Beyond that initial root-level name check, names
* are completely ignored. Compatibility rules are as follows:
- * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
+ * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs/ENUM64s are considered compatible, but
* kind should match for local and target types (i.e., STRUCT is not
* compatible with UNION);
- * - for ENUMs, the size is ignored;
+ * - for ENUMs/ENUM64s, the size is ignored;
* - for INT, size and signedness are ignored;
* - for ARRAY, dimensionality is ignored, element types are checked for
* compatibility recursively;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index afb414b26d01..7a394f7c205c 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -721,6 +721,60 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
}
/**
+ * purge_effective_progs() - After compute_effective_progs fails to alloc new
+ * cgrp->bpf.inactive table we can recover by
+ * recomputing the array in place.
+ *
+ * @cgrp: The cgroup which descendants to travers
+ * @prog: A program to detach or NULL
+ * @link: A link to detach or NULL
+ * @atype: Type of detach operation
+ */
+static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
+ struct bpf_cgroup_link *link,
+ enum cgroup_bpf_attach_type atype)
+{
+ struct cgroup_subsys_state *css;
+ struct bpf_prog_array *progs;
+ struct bpf_prog_list *pl;
+ struct list_head *head;
+ struct cgroup *cg;
+ int pos;
+
+ /* recompute effective prog array in place */
+ css_for_each_descendant_pre(css, &cgrp->self) {
+ struct cgroup *desc = container_of(css, struct cgroup, self);
+
+ if (percpu_ref_is_zero(&desc->bpf.refcnt))
+ continue;
+
+ /* find position of link or prog in effective progs array */
+ for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
+ if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
+ continue;
+
+ head = &cg->bpf.progs[atype];
+ list_for_each_entry(pl, head, node) {
+ if (!prog_list_prog(pl))
+ continue;
+ if (pl->prog == prog && pl->link == link)
+ goto found;
+ pos++;
+ }
+ }
+found:
+ BUG_ON(!cg);
+ progs = rcu_dereference_protected(
+ desc->bpf.effective[atype],
+ lockdep_is_held(&cgroup_mutex));
+
+ /* Remove the program from the array */
+ WARN_ONCE(bpf_prog_array_delete_safe_at(progs, pos),
+ "Failed to purge a prog from array at index %d", pos);
+ }
+}
+
+/**
* __cgroup_bpf_detach() - Detach the program or link from a cgroup, and
* propagate the change to descendants
* @cgrp: The cgroup which descendants to traverse
@@ -739,7 +793,6 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
struct bpf_prog_list *pl;
struct list_head *progs;
u32 flags;
- int err;
atype = to_cgroup_bpf_attach_type(type);
if (atype < 0)
@@ -761,9 +814,12 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
pl->prog = NULL;
pl->link = NULL;
- err = update_effective_progs(cgrp, atype);
- if (err)
- goto cleanup;
+ if (update_effective_progs(cgrp, atype)) {
+ /* if update effective array failed replace the prog with a dummy prog*/
+ pl->prog = old_prog;
+ pl->link = link;
+ purge_effective_progs(cgrp, old_prog, link, atype);
+ }
/* now can actually delete it from this cgroup list */
list_del(&pl->node);
@@ -775,12 +831,6 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
bpf_prog_put(old_prog);
static_branch_dec(&cgroup_bpf_enabled_key[atype]);
return 0;
-
-cleanup:
- /* restore back prog or link */
- pl->prog = old_prog;
- pl->link = link;
- return err;
}
static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 5f6f3f829b36..b5ffebcce6cc 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -176,7 +176,7 @@ void bpf_prog_jit_attempt_done(struct bpf_prog *prog)
* here is relative to the prog itself instead of the main prog.
* This array has one entry for each xlated bpf insn.
*
- * jited_off is the byte off to the last byte of the jited insn.
+ * jited_off is the byte off to the end of the jited insn.
*
* Hence, with
* insn_start:
@@ -2279,6 +2279,21 @@ void bpf_prog_array_free(struct bpf_prog_array *progs)
kfree_rcu(progs, rcu);
}
+static void __bpf_prog_array_free_sleepable_cb(struct rcu_head *rcu)
+{
+ struct bpf_prog_array *progs;
+
+ progs = container_of(rcu, struct bpf_prog_array, rcu);
+ kfree_rcu(progs, rcu);
+}
+
+void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs)
+{
+ if (!progs || progs == &bpf_empty_prog_array.hdr)
+ return;
+ call_rcu_tasks_trace(&progs->rcu, __bpf_prog_array_free_sleepable_cb);
+}
+
int bpf_prog_array_length(struct bpf_prog_array *array)
{
struct bpf_prog_array_item *item;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 225806a02efb..a1c84d256f83 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -584,7 +584,7 @@ BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2)
return strncmp(s1, s2, s1_sz);
}
-const struct bpf_func_proto bpf_strncmp_proto = {
+static const struct bpf_func_proto bpf_strncmp_proto = {
.func = bpf_strncmp,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1402,7 +1402,7 @@ BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr)
*/
#define BPF_PTR_POISON ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA))
-const struct bpf_func_proto bpf_kptr_xchg_proto = {
+static const struct bpf_func_proto bpf_kptr_xchg_proto = {
.func = bpf_kptr_xchg,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
@@ -1487,7 +1487,7 @@ error:
return err;
}
-const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
+static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
.func = bpf_dynptr_from_mem,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1513,7 +1513,7 @@ BPF_CALL_4(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src
return 0;
}
-const struct bpf_func_proto bpf_dynptr_read_proto = {
+static const struct bpf_func_proto bpf_dynptr_read_proto = {
.func = bpf_dynptr_read,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1539,7 +1539,7 @@ BPF_CALL_4(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *,
return 0;
}
-const struct bpf_func_proto bpf_dynptr_write_proto = {
+static const struct bpf_func_proto bpf_dynptr_write_proto = {
.func = bpf_dynptr_write,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1566,7 +1566,7 @@ BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len
return (unsigned long)(ptr->data + ptr->offset + offset);
}
-const struct bpf_func_proto bpf_dynptr_data_proto = {
+static const struct bpf_func_proto bpf_dynptr_data_proto = {
.func = bpf_dynptr_data,
.gpl_only = false,
.ret_type = RET_PTR_TO_DYNPTR_MEM_OR_NULL,
diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c
index 3d897de89061..00b874c8e889 100644
--- a/kernel/bpf/percpu_freelist.c
+++ b/kernel/bpf/percpu_freelist.c
@@ -31,7 +31,7 @@ static inline void pcpu_freelist_push_node(struct pcpu_freelist_head *head,
struct pcpu_freelist_node *node)
{
node->next = head->first;
- head->first = node;
+ WRITE_ONCE(head->first, node);
}
static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head,
@@ -130,14 +130,17 @@ static struct pcpu_freelist_node *___pcpu_freelist_pop(struct pcpu_freelist *s)
orig_cpu = cpu = raw_smp_processor_id();
while (1) {
head = per_cpu_ptr(s->freelist, cpu);
+ if (!READ_ONCE(head->first))
+ goto next_cpu;
raw_spin_lock(&head->lock);
node = head->first;
if (node) {
- head->first = node->next;
+ WRITE_ONCE(head->first, node->next);
raw_spin_unlock(&head->lock);
return node;
}
raw_spin_unlock(&head->lock);
+next_cpu:
cpu = cpumask_next(cpu, cpu_possible_mask);
if (cpu >= nr_cpu_ids)
cpu = 0;
@@ -146,10 +149,12 @@ static struct pcpu_freelist_node *___pcpu_freelist_pop(struct pcpu_freelist *s)
}
/* per cpu lists are all empty, try extralist */
+ if (!READ_ONCE(s->extralist.first))
+ return NULL;
raw_spin_lock(&s->extralist.lock);
node = s->extralist.first;
if (node)
- s->extralist.first = node->next;
+ WRITE_ONCE(s->extralist.first, node->next);
raw_spin_unlock(&s->extralist.lock);
return node;
}
@@ -164,15 +169,18 @@ ___pcpu_freelist_pop_nmi(struct pcpu_freelist *s)
orig_cpu = cpu = raw_smp_processor_id();
while (1) {
head = per_cpu_ptr(s->freelist, cpu);
+ if (!READ_ONCE(head->first))
+ goto next_cpu;
if (raw_spin_trylock(&head->lock)) {
node = head->first;
if (node) {
- head->first = node->next;
+ WRITE_ONCE(head->first, node->next);
raw_spin_unlock(&head->lock);
return node;
}
raw_spin_unlock(&head->lock);
}
+next_cpu:
cpu = cpumask_next(cpu, cpu_possible_mask);
if (cpu >= nr_cpu_ids)
cpu = 0;
@@ -181,11 +189,11 @@ ___pcpu_freelist_pop_nmi(struct pcpu_freelist *s)
}
/* cannot pop from per cpu lists, try extralist */
- if (!raw_spin_trylock(&s->extralist.lock))
+ if (!READ_ONCE(s->extralist.first) || !raw_spin_trylock(&s->extralist.lock))
return NULL;
node = s->extralist.first;
if (node)
- s->extralist.first = node->next;
+ WRITE_ONCE(s->extralist.first, node->next);
raw_spin_unlock(&s->extralist.lock);
return node;
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2b69306d3c6e..7d5af5b99f0d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4090,14 +4090,15 @@ static int bpf_prog_get_info_by_fd(struct file *file,
info.nr_jited_line_info = 0;
if (info.nr_jited_line_info && ulen) {
if (bpf_dump_raw_ok(file->f_cred)) {
+ unsigned long line_addr;
__u64 __user *user_linfo;
u32 i;
user_linfo = u64_to_user_ptr(info.jited_line_info);
ulen = min_t(u32, info.nr_jited_line_info, ulen);
for (i = 0; i < ulen; i++) {
- if (put_user((__u64)(long)prog->aux->jited_linfo[i],
- &user_linfo[i]))
+ line_addr = (unsigned long)prog->aux->jited_linfo[i];
+ if (put_user((__u64)line_addr, &user_linfo[i]))
return -EFAULT;
}
} else {
@@ -5130,7 +5131,7 @@ BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flag
return *res ? 0 : -ENOENT;
}
-const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = {
+static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = {
.func = bpf_kallsyms_lookup_name,
.gpl_only = false,
.ret_type = RET_INTEGER,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index aedac2ac02b9..2859901ffbe3 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5848,6 +5848,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
enum bpf_arg_type arg_type = fn->arg_type[arg];
enum bpf_reg_type type = reg->type;
+ u32 *arg_btf_id = NULL;
int err = 0;
if (arg_type == ARG_DONTCARE)
@@ -5884,7 +5885,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
*/
goto skip_type_check;
- err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg], meta);
+ /* arg_btf_id and arg_size are in a union. */
+ if (base_type(arg_type) == ARG_PTR_TO_BTF_ID)
+ arg_btf_id = fn->arg_btf_id[arg];
+
+ err = check_reg_type(env, regno, arg_type, arg_btf_id, meta);
if (err)
return err;
@@ -6011,6 +6016,11 @@ skip_type_check:
* next is_mem_size argument below.
*/
meta->raw_mode = arg_type & MEM_UNINIT;
+ if (arg_type & MEM_FIXED_SIZE) {
+ err = check_helper_mem_access(env, regno,
+ fn->arg_size[arg], false,
+ meta);
+ }
} else if (arg_type_is_mem_size(arg_type)) {
bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
@@ -6400,11 +6410,19 @@ static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
return count <= 1;
}
-static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
- enum bpf_arg_type arg_next)
+static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg)
{
- return (base_type(arg_curr) == ARG_PTR_TO_MEM) !=
- arg_type_is_mem_size(arg_next);
+ bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
+ bool has_size = fn->arg_size[arg] != 0;
+ bool is_next_size = false;
+
+ if (arg + 1 < ARRAY_SIZE(fn->arg_type))
+ is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]);
+
+ if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM)
+ return is_next_size;
+
+ return has_size == is_next_size || is_next_size == is_fixed;
}
static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
@@ -6415,11 +6433,11 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
* helper function specification.
*/
if (arg_type_is_mem_size(fn->arg1_type) ||
- base_type(fn->arg5_type) == ARG_PTR_TO_MEM ||
- check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
- check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
- check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
- check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
+ check_args_pair_invalid(fn, 0) ||
+ check_args_pair_invalid(fn, 1) ||
+ check_args_pair_invalid(fn, 2) ||
+ check_args_pair_invalid(fn, 3) ||
+ check_args_pair_invalid(fn, 4))
return false;
return true;
@@ -6460,7 +6478,10 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn)
if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
return false;
- if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
+ if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
+ /* arg_btf_id and arg_size are in a union. */
+ (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
+ !(fn->arg_type[i] & MEM_FIXED_SIZE)))
return false;
}
@@ -10901,7 +10922,7 @@ static int check_btf_func(struct bpf_verifier_env *env,
goto err_free;
ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
scalar_return =
- btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type);
+ btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type);
if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
goto err_free;
@@ -14829,8 +14850,8 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
}
if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
- prog->type != BPF_PROG_TYPE_LSM) {
- verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
+ prog->type != BPF_PROG_TYPE_LSM && prog->type != BPF_PROG_TYPE_KPROBE) {
+ verbose(env, "Only fentry/fexit/fmod_ret, lsm, and kprobe/uprobe programs can be sleepable\n");
return -EINVAL;
}
diff --git a/kernel/cfi.c b/kernel/cfi.c
index 9594cfd1cf2c..08102d19ec15 100644
--- a/kernel/cfi.c
+++ b/kernel/cfi.c
@@ -281,6 +281,8 @@ static inline cfi_check_fn find_module_check_fn(unsigned long ptr)
static inline cfi_check_fn find_check_fn(unsigned long ptr)
{
cfi_check_fn fn = NULL;
+ unsigned long flags;
+ bool rcu_idle;
if (is_kernel_text(ptr))
return __cfi_check;
@@ -290,13 +292,21 @@ static inline cfi_check_fn find_check_fn(unsigned long ptr)
* the shadow and __module_address use RCU, so we need to wake it
* up if necessary.
*/
- RCU_NONIDLE({
- if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW))
- fn = find_shadow_check_fn(ptr);
+ rcu_idle = !rcu_is_watching();
+ if (rcu_idle) {
+ local_irq_save(flags);
+ rcu_irq_enter();
+ }
+
+ if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW))
+ fn = find_shadow_check_fn(ptr);
+ if (!fn)
+ fn = find_module_check_fn(ptr);
- if (!fn)
- fn = find_module_check_fn(ptr);
- });
+ if (rcu_idle) {
+ rcu_irq_exit();
+ local_irq_restore(flags);
+ }
return fn;
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 80782cddb1da..48bae58d240e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10068,26 +10068,30 @@ static inline bool perf_event_is_tracing(struct perf_event *event)
int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog,
u64 bpf_cookie)
{
- bool is_kprobe, is_tracepoint, is_syscall_tp;
+ bool is_kprobe, is_uprobe, is_tracepoint, is_syscall_tp;
if (!perf_event_is_tracing(event))
return perf_event_set_bpf_handler(event, prog, bpf_cookie);
- is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
+ is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_KPROBE;
+ is_uprobe = event->tp_event->flags & TRACE_EVENT_FL_UPROBE;
is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
is_syscall_tp = is_syscall_trace_event(event->tp_event);
- if (!is_kprobe && !is_tracepoint && !is_syscall_tp)
+ if (!is_kprobe && !is_uprobe && !is_tracepoint && !is_syscall_tp)
/* bpf programs can only be attached to u/kprobe or tracepoint */
return -EINVAL;
- if ((is_kprobe && prog->type != BPF_PROG_TYPE_KPROBE) ||
+ if (((is_kprobe || is_uprobe) && prog->type != BPF_PROG_TYPE_KPROBE) ||
(is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT) ||
(is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT))
return -EINVAL;
+ if (prog->type == BPF_PROG_TYPE_KPROBE && prog->aux->sleepable && !is_uprobe)
+ /* only uprobe programs are allowed to be sleepable */
+ return -EINVAL;
+
/* Kprobe override only works for kprobes, not uprobes. */
- if (prog->kprobe_override &&
- !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE))
+ if (prog->kprobe_override && !is_kprobe)
return -EINVAL;
if (is_tracepoint || is_syscall_tp) {
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index e6b8e564b37f..886789dcee43 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -1006,8 +1006,10 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
if (desc->irq_data.chip != &no_irq_chip)
mask_ack_irq(desc);
irq_state_set_disabled(desc);
- if (is_chained)
+ if (is_chained) {
desc->action = NULL;
+ WARN_ON(irq_chip_pm_put(irq_desc_get_irq_data(desc)));
+ }
desc->depth = 1;
}
desc->handle_irq = handle;
@@ -1033,6 +1035,7 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
irq_settings_set_norequest(desc);
irq_settings_set_nothread(desc);
desc->action = &chained_action;
+ WARN_ON(irq_chip_pm_get(irq_desc_get_irq_data(desc)));
irq_activate_and_startup(desc, IRQ_RESEND);
}
}
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 81e87280513e..f06b91ca6482 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -5432,7 +5432,7 @@ static struct pin_cookie __lock_pin_lock(struct lockdep_map *lock)
* be guessable and still allows some pin nesting in
* our u32 pin_count.
*/
- cookie.val = 1 + (prandom_u32() >> 16);
+ cookie.val = 1 + (sched_clock() & 0xffff);
hlock->pin_count += cookie.val;
return cookie;
}
diff --git a/kernel/panic.c b/kernel/panic.c
index a3c758dba15a..4cf13c37bd08 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -297,6 +297,7 @@ void panic(const char *fmt, ...)
* unfortunately means it may not be hardened to work in a
* panic situation.
*/
+ try_block_console_kthreads(10000);
smp_send_stop();
} else {
/*
@@ -304,6 +305,7 @@ void panic(const char *fmt, ...)
* kmsg_dump, we will need architecture dependent extra
* works in addition to stopping other CPUs.
*/
+ try_block_console_kthreads(10000);
crash_smp_send_stop();
}
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index d947ca6c84f9..e7d8578860ad 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -20,6 +20,8 @@ enum printk_info_flags {
LOG_CONT = 8, /* text is a fragment of a continuation line */
};
+extern bool block_console_kthreads;
+
__printf(4, 0)
int vprintk_store(int facility, int level,
const struct dev_printk_info *dev_info,
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index ea3dd55709e7..b095fb5f5f61 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -250,6 +250,9 @@ static atomic_t console_kthreads_active = ATOMIC_INIT(0);
#define console_kthread_printing_exit() \
atomic_dec(&console_kthreads_active)
+/* Block console kthreads to avoid processing new messages. */
+bool block_console_kthreads;
+
/*
* Helper macros to handle lockdep when locking/unlocking console_sem. We use
* macros instead of functions so that _RET_IP_ contains useful information.
@@ -3729,7 +3732,10 @@ static bool printer_should_wake(struct console *con, u64 seq)
return true;
if (con->blocked ||
- console_kthreads_atomically_blocked()) {
+ console_kthreads_atomically_blocked() ||
+ block_console_kthreads ||
+ system_state > SYSTEM_RUNNING ||
+ oops_in_progress) {
return false;
}
diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
index ef0f9a2044da..caac4de1ea59 100644
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -8,7 +8,9 @@
#include <linux/smp.h>
#include <linux/cpumask.h>
#include <linux/printk.h>
+#include <linux/console.h>
#include <linux/kprobes.h>
+#include <linux/delay.h>
#include "internal.h"
@@ -50,3 +52,33 @@ asmlinkage int vprintk(const char *fmt, va_list args)
return vprintk_default(fmt, args);
}
EXPORT_SYMBOL(vprintk);
+
+/**
+ * try_block_console_kthreads() - Try to block console kthreads and
+ * make the global console_lock() avaialble
+ *
+ * @timeout_ms: The maximum time (in ms) to wait.
+ *
+ * Prevent console kthreads from starting processing new messages. Wait
+ * until the global console_lock() become available.
+ *
+ * Context: Can be called in any context.
+ */
+void try_block_console_kthreads(int timeout_ms)
+{
+ block_console_kthreads = true;
+
+ /* Do not wait when the console lock could not be safely taken. */
+ if (this_cpu_read(printk_context) || in_nmi())
+ return;
+
+ while (timeout_ms > 0) {
+ if (console_trylock()) {
+ console_unlock();
+ return;
+ }
+
+ udelay(1000);
+ timeout_ms -= 1;
+ }
+}
diff --git a/kernel/reboot.c b/kernel/reboot.c
index b5a71d1ff603..80564ffafabf 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -82,6 +82,7 @@ void kernel_restart_prepare(char *cmd)
{
blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
system_state = SYSTEM_RESTART;
+ try_block_console_kthreads(10000);
usermodehelper_disable();
device_shutdown();
}
@@ -270,6 +271,7 @@ static void kernel_shutdown_prepare(enum system_states state)
blocking_notifier_call_chain(&reboot_notifier_list,
(state == SYSTEM_HALT) ? SYS_HALT : SYS_POWER_OFF, NULL);
system_state = state;
+ try_block_console_kthreads(10000);
usermodehelper_disable();
device_shutdown();
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index bfa7452ca92e..da0bf6fe9ecd 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4798,25 +4798,55 @@ static void do_balance_callbacks(struct rq *rq, struct callback_head *head)
static void balance_push(struct rq *rq);
+/*
+ * balance_push_callback is a right abuse of the callback interface and plays
+ * by significantly different rules.
+ *
+ * Where the normal balance_callback's purpose is to be ran in the same context
+ * that queued it (only later, when it's safe to drop rq->lock again),
+ * balance_push_callback is specifically targeted at __schedule().
+ *
+ * This abuse is tolerated because it places all the unlikely/odd cases behind
+ * a single test, namely: rq->balance_callback == NULL.
+ */
struct callback_head balance_push_callback = {
.next = NULL,
.func = (void (*)(struct callback_head *))balance_push,
};
-static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
+static inline struct callback_head *
+__splice_balance_callbacks(struct rq *rq, bool split)
{
struct callback_head *head = rq->balance_callback;
+ if (likely(!head))
+ return NULL;
+
lockdep_assert_rq_held(rq);
- if (head)
+ /*
+ * Must not take balance_push_callback off the list when
+ * splice_balance_callbacks() and balance_callbacks() are not
+ * in the same rq->lock section.
+ *
+ * In that case it would be possible for __schedule() to interleave
+ * and observe the list empty.
+ */
+ if (split && head == &balance_push_callback)
+ head = NULL;
+ else
rq->balance_callback = NULL;
return head;
}
+static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
+{
+ return __splice_balance_callbacks(rq, true);
+}
+
static void __balance_callbacks(struct rq *rq)
{
- do_balance_callbacks(rq, splice_balance_callbacks(rq));
+ do_balance_callbacks(rq, __splice_balance_callbacks(rq, false));
}
static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 01259611beb9..47b89a0fc6e5 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1693,6 +1693,11 @@ queue_balance_callback(struct rq *rq,
{
lockdep_assert_rq_held(rq);
+ /*
+ * Don't (re)queue an already queued item; nor queue anything when
+ * balance_push() is active, see the comment with
+ * balance_push_callback.
+ */
if (unlikely(head->next || rq->balance_callback == &balance_push_callback))
return;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 7a13e6ac6327..68e5cdd24cef 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1936,7 +1936,7 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
event->prog = prog;
event->bpf_cookie = bpf_cookie;
rcu_assign_pointer(event->tp_event->prog_array, new_array);
- bpf_prog_array_free(old_array);
+ bpf_prog_array_free_sleepable(old_array);
unlock:
mutex_unlock(&bpf_event_mutex);
@@ -1962,7 +1962,7 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
bpf_prog_array_delete_safe(old_array, event->prog);
} else {
rcu_assign_pointer(event->tp_event->prog_array, new_array);
- bpf_prog_array_free(old_array);
+ bpf_prog_array_free_sleepable(old_array);
}
bpf_prog_put(event->prog);
@@ -2423,7 +2423,7 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned long entry_ip,
kprobe_multi_link_prog_run(link, entry_ip, regs);
}
-static int symbols_cmp(const void *a, const void *b)
+static int symbols_cmp_r(const void *a, const void *b, const void *priv)
{
const char **str_a = (const char **) a;
const char **str_b = (const char **) b;
@@ -2431,6 +2431,28 @@ static int symbols_cmp(const void *a, const void *b)
return strcmp(*str_a, *str_b);
}
+struct multi_symbols_sort {
+ const char **funcs;
+ u64 *cookies;
+};
+
+static void symbols_swap_r(void *a, void *b, int size, const void *priv)
+{
+ const struct multi_symbols_sort *data = priv;
+ const char **name_a = a, **name_b = b;
+
+ swap(*name_a, *name_b);
+
+ /* If defined, swap also related cookies. */
+ if (data->cookies) {
+ u64 *cookie_a, *cookie_b;
+
+ cookie_a = data->cookies + (name_a - data->funcs);
+ cookie_b = data->cookies + (name_b - data->funcs);
+ swap(*cookie_a, *cookie_b);
+ }
+}
+
int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
struct bpf_kprobe_multi_link *link = NULL;
@@ -2468,38 +2490,46 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
if (!addrs)
return -ENOMEM;
+ ucookies = u64_to_user_ptr(attr->link_create.kprobe_multi.cookies);
+ if (ucookies) {
+ cookies = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL);
+ if (!cookies) {
+ err = -ENOMEM;
+ goto error;
+ }
+ if (copy_from_user(cookies, ucookies, size)) {
+ err = -EFAULT;
+ goto error;
+ }
+ }
+
if (uaddrs) {
if (copy_from_user(addrs, uaddrs, size)) {
err = -EFAULT;
goto error;
}
} else {
+ struct multi_symbols_sort data = {
+ .cookies = cookies,
+ };
struct user_syms us;
err = copy_user_syms(&us, usyms, cnt);
if (err)
goto error;
- sort(us.syms, cnt, sizeof(*us.syms), symbols_cmp, NULL);
+ if (cookies)
+ data.funcs = us.syms;
+
+ sort_r(us.syms, cnt, sizeof(*us.syms), symbols_cmp_r,
+ symbols_swap_r, &data);
+
err = ftrace_lookup_symbols(us.syms, cnt, addrs);
free_user_syms(&us);
if (err)
goto error;
}
- ucookies = u64_to_user_ptr(attr->link_create.kprobe_multi.cookies);
- if (ucookies) {
- cookies = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL);
- if (!cookies) {
- err = -ENOMEM;
- goto error;
- }
- if (copy_from_user(cookies, ucookies, size)) {
- err = -EFAULT;
- goto error;
- }
- }
-
link = kzalloc(sizeof(*link), GFP_KERNEL);
if (!link) {
err = -ENOMEM;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index e750fe141a60..601ccf1b2f09 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -8029,15 +8029,23 @@ static int kallsyms_callback(void *data, const char *name,
struct module *mod, unsigned long addr)
{
struct kallsyms_data *args = data;
+ const char **sym;
+ int idx;
- if (!bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), symbols_cmp))
+ sym = bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), symbols_cmp);
+ if (!sym)
+ return 0;
+
+ idx = sym - args->syms;
+ if (args->addrs[idx])
return 0;
addr = ftrace_location(addr);
if (!addr)
return 0;
- args->addrs[args->found++] = addr;
+ args->addrs[idx] = addr;
+ args->found++;
return args->found == args->cnt ? 1 : 0;
}
@@ -8062,6 +8070,7 @@ int ftrace_lookup_symbols(const char **sorted_syms, size_t cnt, unsigned long *a
struct kallsyms_data args;
int err;
+ memset(addrs, 0, sizeof(*addrs) * cnt);
args.addrs = addrs;
args.syms = sorted_syms;
args.cnt = cnt;
diff --git a/kernel/trace/rethook.c b/kernel/trace/rethook.c
index b56833700d23..c69d82273ce7 100644
--- a/kernel/trace/rethook.c
+++ b/kernel/trace/rethook.c
@@ -154,6 +154,15 @@ struct rethook_node *rethook_try_get(struct rethook *rh)
if (unlikely(!handler))
return NULL;
+ /*
+ * This expects the caller will set up a rethook on a function entry.
+ * When the function returns, the rethook will eventually be reclaimed
+ * or released in the rethook_recycle() with call_rcu().
+ * This means the caller must be run in the RCU-availabe context.
+ */
+ if (unlikely(!rcu_is_watching()))
+ return NULL;
+
fn = freelist_try_get(&rh->pool);
if (!fn)
return NULL;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 9711589273cd..0282c119b1b2 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -16,6 +16,7 @@
#include <linux/namei.h>
#include <linux/string.h>
#include <linux/rculist.h>
+#include <linux/filter.h>
#include "trace_dynevent.h"
#include "trace_probe.h"
@@ -1346,9 +1347,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
if (bpf_prog_array_valid(call)) {
u32 ret;
- preempt_disable();
- ret = trace_call_bpf(call, regs);
- preempt_enable();
+ ret = bpf_prog_run_array_sleepable(call->prog_array, regs, bpf_prog_run);
if (!ret)
return;
}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4056f2a3f9d5..1ea50f6be843 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2788,13 +2788,13 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
}
/**
- * flush_workqueue - ensure that any scheduled work has run to completion.
+ * __flush_workqueue - ensure that any scheduled work has run to completion.
* @wq: workqueue to flush
*
* This function sleeps until all work items which were queued on entry
* have finished execution, but it is not livelocked by new incoming ones.
*/
-void flush_workqueue(struct workqueue_struct *wq)
+void __flush_workqueue(struct workqueue_struct *wq)
{
struct wq_flusher this_flusher = {
.list = LIST_HEAD_INIT(this_flusher.list),
@@ -2943,7 +2943,7 @@ void flush_workqueue(struct workqueue_struct *wq)
out_unlock:
mutex_unlock(&wq->mutex);
}
-EXPORT_SYMBOL(flush_workqueue);
+EXPORT_SYMBOL(__flush_workqueue);
/**
* drain_workqueue - drain a workqueue
@@ -2971,7 +2971,7 @@ void drain_workqueue(struct workqueue_struct *wq)
wq->flags |= __WQ_DRAINING;
mutex_unlock(&wq->mutex);
reflush:
- flush_workqueue(wq);
+ __flush_workqueue(wq);
mutex_lock(&wq->mutex);
@@ -6111,3 +6111,11 @@ void __init workqueue_init(void)
wq_online = true;
wq_watchdog_init();
}
+
+/*
+ * Despite the naming, this is a no-op function which is here only for avoiding
+ * link error. Since compile-time warning may fail to catch, we will need to
+ * emit run-time warning from __flush_workqueue().
+ */
+void __warn_flushing_systemwide_wq(void) { }
+EXPORT_SYMBOL(__warn_flushing_systemwide_wq);
diff --git a/lib/Kconfig b/lib/Kconfig
index 6a843639814f..eaaad4d85bf2 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -120,6 +120,9 @@ config INDIRECT_IOMEM_FALLBACK
source "lib/crypto/Kconfig"
+config LIB_MEMNEQ
+ bool
+
config CRC_CCITT
tristate "CRC-CCITT functions"
help
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index c4fe15d38b60..a9f7eb047768 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -94,7 +94,7 @@ config UBSAN_UNREACHABLE
bool "Perform checking for unreachable code"
# objtool already handles unreachable checking and gets angry about
# seeing UBSan instrumentation located in unreachable places.
- depends on !(OBJTOOL && (STACK_VALIDATION || UNWINDER_ORC || X86_SMAP))
+ depends on !(OBJTOOL && (STACK_VALIDATION || UNWINDER_ORC || HAVE_UACCESS_VALIDATION))
depends on $(cc-option,-fsanitize=unreachable)
help
This option enables -fsanitize=unreachable which checks for control
diff --git a/lib/Makefile b/lib/Makefile
index ea54294d73bf..f99bf61f8bbc 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -251,6 +251,7 @@ obj-$(CONFIG_DIMLIB) += dim/
obj-$(CONFIG_SIGNATURE) += digsig.o
lib-$(CONFIG_CLZ_TAB) += clz_tab.o
+lib-$(CONFIG_LIB_MEMNEQ) += memneq.o
obj-$(CONFIG_GENERIC_STRNCPY_FROM_USER) += strncpy_from_user.o
obj-$(CONFIG_GENERIC_STRNLEN_USER) += strnlen_user.o
diff --git a/lib/crc-itu-t.c b/lib/crc-itu-t.c
index 1974b355c148..1d26a1647da5 100644
--- a/lib/crc-itu-t.c
+++ b/lib/crc-itu-t.c
@@ -7,7 +7,7 @@
#include <linux/module.h>
#include <linux/crc-itu-t.h>
-/** CRC table for the CRC ITU-T V.41 0x1021 (x^16 + x^12 + x^15 + 1) */
+/* CRC table for the CRC ITU-T V.41 0x1021 (x^16 + x^12 + x^5 + 1) */
const u16 crc_itu_t_table[256] = {
0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 9856e291f414..2082af43d51f 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -71,6 +71,7 @@ config CRYPTO_LIB_CURVE25519
tristate "Curve25519 scalar multiplication library"
depends on CRYPTO_ARCH_HAVE_LIB_CURVE25519 || !CRYPTO_ARCH_HAVE_LIB_CURVE25519
select CRYPTO_LIB_CURVE25519_GENERIC if CRYPTO_ARCH_HAVE_LIB_CURVE25519=n
+ select LIB_MEMNEQ
help
Enable the Curve25519 library interface. This interface may be
fulfilled by either the generic implementation or an arch-specific
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 6dd5330f7a99..0b64695ab632 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1434,7 +1434,7 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i,
{
unsigned nr, offset;
pgoff_t index, count;
- size_t size = maxsize, actual;
+ size_t size = maxsize;
loff_t pos;
if (!size || !maxpages)
@@ -1461,13 +1461,7 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i,
if (nr == 0)
return 0;
- actual = PAGE_SIZE * nr;
- actual -= offset;
- if (nr == count && size > 0) {
- unsigned last_offset = (nr > 1) ? 0 : offset;
- actual -= PAGE_SIZE - (last_offset + size);
- }
- return actual;
+ return min_t(size_t, nr * PAGE_SIZE - offset, maxsize);
}
/* must be done on non-empty ITER_IOVEC one */
@@ -1602,7 +1596,7 @@ static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i,
struct page **p;
unsigned nr, offset;
pgoff_t index, count;
- size_t size = maxsize, actual;
+ size_t size = maxsize;
loff_t pos;
if (!size)
@@ -1631,13 +1625,7 @@ static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i,
if (nr == 0)
return 0;
- actual = PAGE_SIZE * nr;
- actual -= offset;
- if (nr == count && size > 0) {
- unsigned last_offset = (nr > 1) ? 0 : offset;
- actual -= PAGE_SIZE - (last_offset + size);
- }
- return actual;
+ return min_t(size_t, nr * PAGE_SIZE - offset, maxsize);
}
ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
diff --git a/crypto/memneq.c b/lib/memneq.c
index fb11608b1ec1..fb11608b1ec1 100644
--- a/crypto/memneq.c
+++ b/lib/memneq.c
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index fb77f7bfd126..3c1853a9d1c0 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -769,8 +769,7 @@ static inline int __ptr_to_hashval(const void *ptr, unsigned long *hashval_out)
static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn);
unsigned long flags;
- if (!system_unbound_wq ||
- (!rng_is_initialized() && !rng_has_arch_random()) ||
+ if (!system_unbound_wq || !rng_is_initialized() ||
!spin_trylock_irqsave(&filling, flags))
return -EAGAIN;
diff --git a/lib/xarray.c b/lib/xarray.c
index 54e646e8e6ee..ea9ce1f0b386 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -264,9 +264,10 @@ static void xa_node_free(struct xa_node *node)
* xas_destroy() - Free any resources allocated during the XArray operation.
* @xas: XArray operation state.
*
- * This function is now internal-only.
+ * Most users will not need to call this function; it is called for you
+ * by xas_nomem().
*/
-static void xas_destroy(struct xa_state *xas)
+void xas_destroy(struct xa_state *xas)
{
struct xa_node *next, *node = xas->xa_alloc;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index ff60bd7d74e0..95550b8fa7fe 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -231,20 +231,13 @@ static __init int bdi_class_init(void)
}
postcore_initcall(bdi_class_init);
-static int bdi_init(struct backing_dev_info *bdi);
-
static int __init default_bdi_init(void)
{
- int err;
-
bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_UNBOUND |
WQ_SYSFS, 0);
if (!bdi_wq)
return -ENOMEM;
-
- err = bdi_init(&noop_backing_dev_info);
-
- return err;
+ return 0;
}
subsys_initcall(default_bdi_init);
@@ -781,7 +774,7 @@ static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb)
#endif /* CONFIG_CGROUP_WRITEBACK */
-static int bdi_init(struct backing_dev_info *bdi)
+int bdi_init(struct backing_dev_info *bdi)
{
int ret;
diff --git a/mm/filemap.c b/mm/filemap.c
index 9daeaab36081..ac3775c1ce4c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2991,11 +2991,12 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
struct address_space *mapping = file->f_mapping;
DEFINE_READAHEAD(ractl, file, ra, mapping, vmf->pgoff);
struct file *fpin = NULL;
+ unsigned long vm_flags = vmf->vma->vm_flags;
unsigned int mmap_miss;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* Use the readahead code, even if readahead is disabled */
- if (vmf->vma->vm_flags & VM_HUGEPAGE) {
+ if (vm_flags & VM_HUGEPAGE) {
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1);
ra->size = HPAGE_PMD_NR;
@@ -3003,7 +3004,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
* Fetch two PMD folios, so we get the chance to actually
* readahead, unless we've been told not to.
*/
- if (!(vmf->vma->vm_flags & VM_RAND_READ))
+ if (!(vm_flags & VM_RAND_READ))
ra->size *= 2;
ra->async_size = HPAGE_PMD_NR;
page_cache_ra_order(&ractl, ra, HPAGE_PMD_ORDER);
@@ -3012,12 +3013,12 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
#endif
/* If we don't want any read-ahead, don't bother */
- if (vmf->vma->vm_flags & VM_RAND_READ)
+ if (vm_flags & VM_RAND_READ)
return fpin;
if (!ra->ra_pages)
return fpin;
- if (vmf->vma->vm_flags & VM_SEQ_READ) {
+ if (vm_flags & VM_SEQ_READ) {
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
page_cache_sync_ra(&ractl, ra->ra_pages);
return fpin;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a77c78a2b6b5..f7248002dad9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2672,8 +2672,7 @@ out_unlock:
if (mapping)
i_mmap_unlock_read(mapping);
out:
- /* Free any memory we didn't use */
- xas_nomem(&xas, 0);
+ xas_destroy(&xas);
count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
return ret;
}
diff --git a/mm/readahead.c b/mm/readahead.c
index 415c39d764ea..57a015108254 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -164,12 +164,14 @@ static void read_pages(struct readahead_control *rac)
while ((folio = readahead_folio(rac)) != NULL) {
unsigned long nr = folio_nr_pages(folio);
+ folio_get(folio);
rac->ra->size -= nr;
if (rac->ra->async_size >= nr) {
rac->ra->async_size -= nr;
filemap_remove_folio(folio);
}
folio_unlock(folio);
+ folio_put(folio);
}
} else {
while ((folio = readahead_folio(rac)) != NULL)
diff --git a/mm/slub.c b/mm/slub.c
index e5535020e0fd..b1281b8654bd 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -726,25 +726,48 @@ static struct track *get_track(struct kmem_cache *s, void *object,
return kasan_reset_tag(p + alloc);
}
-static void noinline set_track(struct kmem_cache *s, void *object,
- enum track_item alloc, unsigned long addr)
-{
- struct track *p = get_track(s, object, alloc);
-
#ifdef CONFIG_STACKDEPOT
+static noinline depot_stack_handle_t set_track_prepare(void)
+{
+ depot_stack_handle_t handle;
unsigned long entries[TRACK_ADDRS_COUNT];
unsigned int nr_entries;
nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3);
- p->handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT);
+ handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT);
+
+ return handle;
+}
+#else
+static inline depot_stack_handle_t set_track_prepare(void)
+{
+ return 0;
+}
#endif
+static void set_track_update(struct kmem_cache *s, void *object,
+ enum track_item alloc, unsigned long addr,
+ depot_stack_handle_t handle)
+{
+ struct track *p = get_track(s, object, alloc);
+
+#ifdef CONFIG_STACKDEPOT
+ p->handle = handle;
+#endif
p->addr = addr;
p->cpu = smp_processor_id();
p->pid = current->pid;
p->when = jiffies;
}
+static __always_inline void set_track(struct kmem_cache *s, void *object,
+ enum track_item alloc, unsigned long addr)
+{
+ depot_stack_handle_t handle = set_track_prepare();
+
+ set_track_update(s, object, alloc, addr, handle);
+}
+
static void init_tracking(struct kmem_cache *s, void *object)
{
struct track *p;
@@ -1373,6 +1396,10 @@ static noinline int free_debug_processing(
int cnt = 0;
unsigned long flags, flags2;
int ret = 0;
+ depot_stack_handle_t handle = 0;
+
+ if (s->flags & SLAB_STORE_USER)
+ handle = set_track_prepare();
spin_lock_irqsave(&n->list_lock, flags);
slab_lock(slab, &flags2);
@@ -1391,7 +1418,7 @@ next_object:
}
if (s->flags & SLAB_STORE_USER)
- set_track(s, object, TRACK_FREE, addr);
+ set_track_update(s, object, TRACK_FREE, addr, handle);
trace(s, slab, object, 0);
/* Freepointer not overwritten by init_object(), SLAB_POISON moved it */
init_object(s, object, SLUB_RED_INACTIVE);
@@ -2936,6 +2963,7 @@ redo:
if (!freelist) {
c->slab = NULL;
+ c->tid = next_tid(c->tid);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
stat(s, DEACTIVATE_BYPASS);
goto new_slab;
@@ -2968,6 +2996,7 @@ deactivate_slab:
freelist = c->freelist;
c->slab = NULL;
c->freelist = NULL;
+ c->tid = next_tid(c->tid);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
deactivate_slab(s, slab, freelist);
diff --git a/mm/usercopy.c b/mm/usercopy.c
index baeacc735b83..4e1da708699b 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -161,29 +161,27 @@ static inline void check_bogus_address(const unsigned long ptr, unsigned long n,
static inline void check_heap_object(const void *ptr, unsigned long n,
bool to_user)
{
+ uintptr_t addr = (uintptr_t)ptr;
+ unsigned long offset;
struct folio *folio;
if (is_kmap_addr(ptr)) {
- unsigned long page_end = (unsigned long)ptr | (PAGE_SIZE - 1);
-
- if ((unsigned long)ptr + n - 1 > page_end)
- usercopy_abort("kmap", NULL, to_user,
- offset_in_page(ptr), n);
+ offset = offset_in_page(ptr);
+ if (n > PAGE_SIZE - offset)
+ usercopy_abort("kmap", NULL, to_user, offset, n);
return;
}
if (is_vmalloc_addr(ptr)) {
- struct vm_struct *area = find_vm_area(ptr);
- unsigned long offset;
+ struct vmap_area *area = find_vmap_area(addr);
- if (!area) {
+ if (!area)
usercopy_abort("vmalloc", "no area", to_user, 0, n);
- return;
- }
- offset = ptr - area->addr;
- if (offset + n > get_vm_area_size(area))
+ if (n > area->va_end - addr) {
+ offset = addr - area->va_start;
usercopy_abort("vmalloc", NULL, to_user, offset, n);
+ }
return;
}
@@ -196,8 +194,8 @@ static inline void check_heap_object(const void *ptr, unsigned long n,
/* Check slab allocator for flags and size. */
__check_heap_object(ptr, n, folio_slab(folio), to_user);
} else if (folio_test_large(folio)) {
- unsigned long offset = ptr - folio_address(folio);
- if (offset + n > folio_size(folio))
+ offset = ptr - folio_address(folio);
+ if (n > folio_size(folio) - offset)
usercopy_abort("page alloc", NULL, to_user, offset, n);
}
}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 07db42455dd4..effd1ff6a4b4 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1798,7 +1798,7 @@ static void free_unmap_vmap_area(struct vmap_area *va)
free_vmap_area_noflush(va);
}
-static struct vmap_area *find_vmap_area(unsigned long addr)
+struct vmap_area *find_vmap_area(unsigned long addr)
{
struct vmap_area *va;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 1a5c0b071aa3..bbac3cb4dc99 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1662,9 +1662,12 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags)
{
struct sock *sk = sock->sk;
- struct sk_buff *skb;
+ struct sk_buff *skb, *last;
+ struct sk_buff_head *sk_queue;
int copied;
int err = 0;
+ int off = 0;
+ long timeo;
lock_sock(sk);
/*
@@ -1676,10 +1679,29 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
goto out;
}
- /* Now we can treat all alike */
- skb = skb_recv_datagram(sk, flags, &err);
- if (skb == NULL)
- goto out;
+ /* We need support for non-blocking reads. */
+ sk_queue = &sk->sk_receive_queue;
+ skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off, &err, &last);
+ /* If no packet is available, release_sock(sk) and try again. */
+ if (!skb) {
+ if (err != -EAGAIN)
+ goto out;
+ release_sock(sk);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+ while (timeo && !__skb_wait_for_more_packets(sk, sk_queue, &err,
+ &timeo, last)) {
+ skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off,
+ &err, &last);
+ if (skb)
+ break;
+
+ if (err != -EAGAIN)
+ goto done;
+ }
+ if (!skb)
+ goto done;
+ lock_sock(sk);
+ }
if (!sk_to_ax25(sk)->pidincl)
skb_pull(skb, 1); /* Remove PID */
@@ -1726,6 +1748,7 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
out:
release_sock(sk);
+done:
return err;
}
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index ab88b6ac5401..c5462486dbca 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -52,7 +52,8 @@ void ax25_dev_device_up(struct net_device *dev)
{
ax25_dev *ax25_dev;
- if ((ax25_dev = kzalloc(sizeof(*ax25_dev), GFP_ATOMIC)) == NULL) {
+ ax25_dev = kzalloc(sizeof(*ax25_dev), GFP_KERNEL);
+ if (!ax25_dev) {
printk(KERN_ERR "AX.25: ax25_dev_device_up - out of memory\n");
return;
}
@@ -60,7 +61,7 @@ void ax25_dev_device_up(struct net_device *dev)
refcount_set(&ax25_dev->refcount, 1);
dev->ax25_ptr = ax25_dev;
ax25_dev->dev = dev;
- netdev_hold(dev, &ax25_dev->dev_tracker, GFP_ATOMIC);
+ netdev_hold(dev, &ax25_dev->dev_tracker, GFP_KERNEL);
ax25_dev->forward = NULL;
ax25_dev->device_up = true;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 56f059b3c242..2ca96acbc50a 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -1420,9 +1420,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
void *data;
int ret;
- if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
- return -EINVAL;
-
if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size)
return -EINVAL;
@@ -1487,9 +1484,6 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
u32 retval, duration;
int ret = -EINVAL;
- if (prog->type != BPF_PROG_TYPE_SK_LOOKUP)
- return -EINVAL;
-
if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size)
return -EINVAL;
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index fdcc641fc89a..589ff497d50c 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -1025,8 +1025,8 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device");
return -EINVAL;
}
- if (p->state == BR_STATE_DISABLED) {
- NL_SET_ERR_MSG_MOD(extack, "Port is in disabled state");
+ if (p->state == BR_STATE_DISABLED && entry->state != MDB_PERMANENT) {
+ NL_SET_ERR_MSG_MOD(extack, "Port is in disabled state and entry is not permanent");
return -EINVAL;
}
vg = nbp_vlan_group(p);
@@ -1086,9 +1086,6 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry,
if (!p->key.port || p->key.port->dev->ifindex != entry->ifindex)
continue;
- if (p->key.port->state == BR_STATE_DISABLED)
- goto unlock;
-
br_multicast_del_pg(mp, p, pp);
err = 0;
break;
@@ -1124,8 +1121,14 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
return -ENODEV;
p = br_port_get_rtnl(pdev);
- if (!p || p->br != br || p->state == BR_STATE_DISABLED)
+ if (!p) {
+ NL_SET_ERR_MSG_MOD(extack, "Net device is not a bridge port");
+ return -EINVAL;
+ }
+ if (p->br != br) {
+ NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device");
return -EINVAL;
+ }
vg = nbp_vlan_group(p);
} else {
vg = br_vlan_group(br);
diff --git a/net/core/dev.c b/net/core/dev.c
index 8958c4227b67..978ed0622d8f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -397,16 +397,18 @@ static void list_netdevice(struct net_device *dev)
/* Device list removal
* caller must respect a RCU grace period before freeing/reusing dev
*/
-static void unlist_netdevice(struct net_device *dev)
+static void unlist_netdevice(struct net_device *dev, bool lock)
{
ASSERT_RTNL();
/* Unlink dev from the device chain */
- write_lock(&dev_base_lock);
+ if (lock)
+ write_lock(&dev_base_lock);
list_del_rcu(&dev->dev_list);
netdev_name_node_del(dev->name_node);
hlist_del_rcu(&dev->index_hlist);
- write_unlock(&dev_base_lock);
+ if (lock)
+ write_unlock(&dev_base_lock);
dev_base_seq_inc(dev_net(dev));
}
@@ -10061,11 +10063,11 @@ int register_netdevice(struct net_device *dev)
goto err_uninit;
ret = netdev_register_kobject(dev);
- if (ret) {
- dev->reg_state = NETREG_UNREGISTERED;
+ write_lock(&dev_base_lock);
+ dev->reg_state = ret ? NETREG_UNREGISTERED : NETREG_REGISTERED;
+ write_unlock(&dev_base_lock);
+ if (ret)
goto err_uninit;
- }
- dev->reg_state = NETREG_REGISTERED;
__netdev_update_features(dev);
@@ -10347,7 +10349,9 @@ void netdev_run_todo(void)
continue;
}
+ write_lock(&dev_base_lock);
dev->reg_state = NETREG_UNREGISTERED;
+ write_unlock(&dev_base_lock);
linkwatch_forget_dev(dev);
}
@@ -10828,9 +10832,10 @@ void unregister_netdevice_many(struct list_head *head)
list_for_each_entry(dev, head, unreg_list) {
/* And unlink it from device chain. */
- unlist_netdevice(dev);
-
+ write_lock(&dev_base_lock);
+ unlist_netdevice(dev, false);
dev->reg_state = NETREG_UNREGISTERING;
+ write_unlock(&dev_base_lock);
}
flush_all_backlogs();
@@ -10977,7 +10982,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
dev_close(dev);
/* And unlink it from device chain */
- unlist_netdevice(dev);
+ unlist_netdevice(dev, true);
synchronize_net();
diff --git a/net/core/filter.c b/net/core/filter.c
index 5af58eb48587..994d91680b12 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6463,8 +6463,6 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
/* bpf_skc_lookup performs the core lookup for different types of sockets,
* taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE.
- * Returns the socket as an 'unsigned long' to simplify the casting in the
- * callers to satisfy BPF_CALL declarations.
*/
static struct sock *
__bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
@@ -6516,10 +6514,21 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
ifindex, proto, netns_id, flags);
if (sk) {
- sk = sk_to_full_sk(sk);
- if (!sk_fullsock(sk)) {
+ struct sock *sk2 = sk_to_full_sk(sk);
+
+ /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
+ * sock refcnt is decremented to prevent a request_sock leak.
+ */
+ if (!sk_fullsock(sk2))
+ sk2 = NULL;
+ if (sk2 != sk) {
sock_gen_put(sk);
- return NULL;
+ /* Ensure there is no need to bump sk2 refcnt */
+ if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) {
+ WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
+ return NULL;
+ }
+ sk = sk2;
}
}
@@ -6553,10 +6562,21 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
flags);
if (sk) {
- sk = sk_to_full_sk(sk);
- if (!sk_fullsock(sk)) {
+ struct sock *sk2 = sk_to_full_sk(sk);
+
+ /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
+ * sock refcnt is decremented to prevent a request_sock leak.
+ */
+ if (!sk_fullsock(sk2))
+ sk2 = NULL;
+ if (sk2 != sk) {
sock_gen_put(sk);
- return NULL;
+ /* Ensure there is no need to bump sk2 refcnt */
+ if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) {
+ WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
+ return NULL;
+ }
+ sk = sk2;
}
}
@@ -7444,6 +7464,114 @@ static const struct bpf_func_proto bpf_skb_set_tstamp_proto = {
.arg3_type = ARG_ANYTHING,
};
+#ifdef CONFIG_SYN_COOKIES
+BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv4, struct iphdr *, iph,
+ struct tcphdr *, th, u32, th_len)
+{
+ u32 cookie;
+ u16 mss;
+
+ if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
+ return -EINVAL;
+
+ mss = tcp_parse_mss_option(th, 0) ?: TCP_MSS_DEFAULT;
+ cookie = __cookie_v4_init_sequence(iph, th, &mss);
+
+ return cookie | ((u64)mss << 32);
+}
+
+static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv4_proto = {
+ .func = bpf_tcp_raw_gen_syncookie_ipv4,
+ .gpl_only = true, /* __cookie_v4_init_sequence() is GPL */
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg1_size = sizeof(struct iphdr),
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+};
+
+BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv6, struct ipv6hdr *, iph,
+ struct tcphdr *, th, u32, th_len)
+{
+#if IS_BUILTIN(CONFIG_IPV6)
+ const u16 mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
+ sizeof(struct ipv6hdr);
+ u32 cookie;
+ u16 mss;
+
+ if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
+ return -EINVAL;
+
+ mss = tcp_parse_mss_option(th, 0) ?: mss_clamp;
+ cookie = __cookie_v6_init_sequence(iph, th, &mss);
+
+ return cookie | ((u64)mss << 32);
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = {
+ .func = bpf_tcp_raw_gen_syncookie_ipv6,
+ .gpl_only = true, /* __cookie_v6_init_sequence() is GPL */
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg1_size = sizeof(struct ipv6hdr),
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+};
+
+BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv4, struct iphdr *, iph,
+ struct tcphdr *, th)
+{
+ u32 cookie = ntohl(th->ack_seq) - 1;
+
+ if (__cookie_v4_check(iph, th, cookie) > 0)
+ return 0;
+
+ return -EACCES;
+}
+
+static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv4_proto = {
+ .func = bpf_tcp_raw_check_syncookie_ipv4,
+ .gpl_only = true, /* __cookie_v4_check is GPL */
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg1_size = sizeof(struct iphdr),
+ .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg2_size = sizeof(struct tcphdr),
+};
+
+BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv6, struct ipv6hdr *, iph,
+ struct tcphdr *, th)
+{
+#if IS_BUILTIN(CONFIG_IPV6)
+ u32 cookie = ntohl(th->ack_seq) - 1;
+
+ if (__cookie_v6_check(iph, th, cookie) > 0)
+ return 0;
+
+ return -EACCES;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv6_proto = {
+ .func = bpf_tcp_raw_check_syncookie_ipv6,
+ .gpl_only = true, /* __cookie_v6_check is GPL */
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg1_size = sizeof(struct ipv6hdr),
+ .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg2_size = sizeof(struct tcphdr),
+};
+#endif /* CONFIG_SYN_COOKIES */
+
#endif /* CONFIG_INET */
bool bpf_helper_changes_pkt_data(void *func)
@@ -7807,6 +7935,16 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_assign_proto;
case BPF_FUNC_skb_set_tstamp:
return &bpf_skb_set_tstamp_proto;
+#ifdef CONFIG_SYN_COOKIES
+ case BPF_FUNC_tcp_raw_gen_syncookie_ipv4:
+ return &bpf_tcp_raw_gen_syncookie_ipv4_proto;
+ case BPF_FUNC_tcp_raw_gen_syncookie_ipv6:
+ return &bpf_tcp_raw_gen_syncookie_ipv6_proto;
+ case BPF_FUNC_tcp_raw_check_syncookie_ipv4:
+ return &bpf_tcp_raw_check_syncookie_ipv4_proto;
+ case BPF_FUNC_tcp_raw_check_syncookie_ipv6:
+ return &bpf_tcp_raw_check_syncookie_ipv6_proto;
+#endif
#endif
default:
return bpf_sk_base_func_proto(func_id);
@@ -7856,6 +7994,16 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_check_syncookie_proto;
case BPF_FUNC_tcp_gen_syncookie:
return &bpf_tcp_gen_syncookie_proto;
+#ifdef CONFIG_SYN_COOKIES
+ case BPF_FUNC_tcp_raw_gen_syncookie_ipv4:
+ return &bpf_tcp_raw_gen_syncookie_ipv4_proto;
+ case BPF_FUNC_tcp_raw_gen_syncookie_ipv6:
+ return &bpf_tcp_raw_gen_syncookie_ipv6_proto;
+ case BPF_FUNC_tcp_raw_check_syncookie_ipv4:
+ return &bpf_tcp_raw_check_syncookie_ipv4_proto;
+ case BPF_FUNC_tcp_raw_check_syncookie_ipv6:
+ return &bpf_tcp_raw_check_syncookie_ipv6_proto;
+#endif
#endif
default:
return bpf_sk_base_func_proto(func_id);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index d49fc974e630..d61afd21aab5 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -33,6 +33,7 @@ static const char fmt_dec[] = "%d\n";
static const char fmt_ulong[] = "%lu\n";
static const char fmt_u64[] = "%llu\n";
+/* Caller holds RTNL or dev_base_lock */
static inline int dev_isalive(const struct net_device *dev)
{
return dev->reg_state <= NETREG_REGISTERED;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index fec75f8bf1f4..c62e42d0c531 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -175,13 +175,14 @@ static struct sk_buff *napi_skb_cache_get(void)
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
struct sk_buff *skb;
- if (unlikely(!nc->skb_count))
+ if (unlikely(!nc->skb_count)) {
nc->skb_count = kmem_cache_alloc_bulk(skbuff_head_cache,
GFP_ATOMIC,
NAPI_SKB_CACHE_BULK,
nc->skb_cache);
- if (unlikely(!nc->skb_count))
- return NULL;
+ if (unlikely(!nc->skb_count))
+ return NULL;
+ }
skb = nc->skb_cache[--nc->skb_count];
kasan_unpoison_object_data(skbuff_head_cache, skb);
@@ -3194,9 +3195,7 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
}
}
- to->truesize += len + plen;
- to->len += len + plen;
- to->data_len += len + plen;
+ skb_len_add(to, len + plen);
if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) {
skb_tx_error(from);
@@ -3633,13 +3632,8 @@ onlymerged:
tgt->ip_summed = CHECKSUM_PARTIAL;
skb->ip_summed = CHECKSUM_PARTIAL;
- /* Yak, is it really working this way? Some helper please? */
- skb->len -= shiftlen;
- skb->data_len -= shiftlen;
- skb->truesize -= shiftlen;
- tgt->len += shiftlen;
- tgt->data_len += shiftlen;
- tgt->truesize += shiftlen;
+ skb_len_add(skb, -shiftlen);
+ skb_len_add(tgt, shiftlen);
return shiftlen;
}
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 22b983ade0e7..fc69154bbc88 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -699,6 +699,11 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
write_lock_bh(&sk->sk_callback_lock);
+ if (sk_is_inet(sk) && inet_csk_has_ulp(sk)) {
+ psock = ERR_PTR(-EINVAL);
+ goto out;
+ }
+
if (sk->sk_user_data) {
psock = ERR_PTR(-EBUSY);
goto out;
@@ -715,6 +720,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
psock->eval = __SK_NONE;
psock->sk_proto = prot;
psock->saved_unhash = prot->unhash;
+ psock->saved_destroy = prot->destroy;
psock->saved_close = prot->close;
psock->saved_write_space = sk->sk_write_space;
diff --git a/net/core/sock.c b/net/core/sock.c
index 697d5c8e2f0d..92a0296ccb18 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1019,7 +1019,8 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
return -ENOMEM;
/* pre-charge to forward_alloc */
- allocated = sk_memory_allocated_add(sk, pages);
+ sk_memory_allocated_add(sk, pages);
+ allocated = sk_memory_allocated(sk);
/* If the system goes into memory pressure with this
* precharge, give up and return error.
*/
@@ -2906,11 +2907,13 @@ EXPORT_SYMBOL(sk_wait_data);
*/
int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
{
- struct proto *prot = sk->sk_prot;
- long allocated = sk_memory_allocated_add(sk, amt);
bool memcg_charge = mem_cgroup_sockets_enabled && sk->sk_memcg;
+ struct proto *prot = sk->sk_prot;
bool charged = true;
+ long allocated;
+ sk_memory_allocated_add(sk, amt);
+ allocated = sk_memory_allocated(sk);
if (memcg_charge &&
!(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt,
gfp_memcg_charge())))
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 81d4b4756a02..9f08ccfaf6da 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1561,6 +1561,29 @@ void sock_map_unhash(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sock_map_unhash);
+void sock_map_destroy(struct sock *sk)
+{
+ void (*saved_destroy)(struct sock *sk);
+ struct sk_psock *psock;
+
+ rcu_read_lock();
+ psock = sk_psock_get(sk);
+ if (unlikely(!psock)) {
+ rcu_read_unlock();
+ if (sk->sk_prot->destroy)
+ sk->sk_prot->destroy(sk);
+ return;
+ }
+
+ saved_destroy = psock->saved_destroy;
+ sock_map_remove_links(sk, psock);
+ rcu_read_unlock();
+ sk_psock_stop(psock, true);
+ sk_psock_put(sk, psock);
+ saved_destroy(sk);
+}
+EXPORT_SYMBOL_GPL(sock_map_destroy);
+
void sock_map_close(struct sock *sk, long timeout)
{
void (*saved_close)(struct sock *sk, long timeout);
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 2e78458900f2..eb8e128e43e8 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1120,12 +1120,6 @@ static int __init dccp_init(void)
SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
goto out_free_hashinfo2;
- dccp_hashinfo.bind2_bucket_cachep =
- kmem_cache_create("dccp_bind2_bucket",
- sizeof(struct inet_bind2_bucket), 0,
- SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
- if (!dccp_hashinfo.bind2_bucket_cachep)
- goto out_free_bind_bucket_cachep;
/*
* Size and allocate the main established and bind bucket
@@ -1156,7 +1150,7 @@ static int __init dccp_init(void)
if (!dccp_hashinfo.ehash) {
DCCP_CRIT("Failed to allocate DCCP established hash table");
- goto out_free_bind2_bucket_cachep;
+ goto out_free_bind_bucket_cachep;
}
for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
@@ -1182,23 +1176,14 @@ static int __init dccp_init(void)
goto out_free_dccp_locks;
}
- dccp_hashinfo.bhash2 = (struct inet_bind2_hashbucket *)
- __get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order);
-
- if (!dccp_hashinfo.bhash2) {
- DCCP_CRIT("Failed to allocate DCCP bind2 hash table");
- goto out_free_dccp_bhash;
- }
-
for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
spin_lock_init(&dccp_hashinfo.bhash[i].lock);
INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
- INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain);
}
rc = dccp_mib_init();
if (rc)
- goto out_free_dccp_bhash2;
+ goto out_free_dccp_bhash;
rc = dccp_ackvec_init();
if (rc)
@@ -1222,38 +1207,30 @@ out_ackvec_exit:
dccp_ackvec_exit();
out_free_dccp_mib:
dccp_mib_exit();
-out_free_dccp_bhash2:
- free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
out_free_dccp_bhash:
free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
out_free_dccp_locks:
inet_ehash_locks_free(&dccp_hashinfo);
out_free_dccp_ehash:
free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
-out_free_bind2_bucket_cachep:
- kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep);
out_free_bind_bucket_cachep:
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
out_free_hashinfo2:
inet_hashinfo2_free_mod(&dccp_hashinfo);
out_fail:
dccp_hashinfo.bhash = NULL;
- dccp_hashinfo.bhash2 = NULL;
dccp_hashinfo.ehash = NULL;
dccp_hashinfo.bind_bucket_cachep = NULL;
- dccp_hashinfo.bind2_bucket_cachep = NULL;
return rc;
}
static void __exit dccp_fini(void)
{
- int bhash_order = get_order(dccp_hashinfo.bhash_size *
- sizeof(struct inet_bind_hashbucket));
-
ccid_cleanup_builtins();
dccp_mib_exit();
- free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
- free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
+ free_pages((unsigned long)dccp_hashinfo.bhash,
+ get_order(dccp_hashinfo.bhash_size *
+ sizeof(struct inet_bind_hashbucket)));
free_pages((unsigned long)dccp_hashinfo.ehash,
get_order((dccp_hashinfo.ehash_mask + 1) *
sizeof(struct inet_ehash_bucket)));
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 8cb87b5067ee..63853fff4e2f 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -132,6 +132,13 @@ config NET_DSA_TAG_RTL8_4
Say Y or M if you want to enable support for tagging frames for Realtek
switches with 8 byte protocol 4 tags, such as the Realtek RTL8365MB-VC.
+config NET_DSA_TAG_RZN1_A5PSW
+ tristate "Tag driver for Renesas RZ/N1 A5PSW switch"
+ help
+ Say Y or M if you want to enable support for tagging frames for
+ Renesas RZ/N1 embedded switch that uses an 8 byte tag located after
+ destination MAC address.
+
config NET_DSA_TAG_LAN9303
tristate "Tag driver for SMSC/Microchip LAN9303 family of switches"
help
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 9f75820e7c98..af28c24ead18 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_NET_DSA_TAG_OCELOT_8021Q) += tag_ocelot_8021q.o
obj-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o
obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o
obj-$(CONFIG_NET_DSA_TAG_RTL8_4) += tag_rtl8_4.o
+obj-$(CONFIG_NET_DSA_TAG_RZN1_A5PSW) += tag_rzn1_a5psw.o
obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o
obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
obj-$(CONFIG_NET_DSA_TAG_XRS700X) += tag_xrs700x.o
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 2e1ac638d135..760ca58307a3 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1002,6 +1002,18 @@ dsa_slave_get_eth_ctrl_stats(struct net_device *dev,
ds->ops->get_eth_ctrl_stats(ds, dp->index, ctrl_stats);
}
+static void
+dsa_slave_get_rmon_stats(struct net_device *dev,
+ struct ethtool_rmon_stats *rmon_stats,
+ const struct ethtool_rmon_hist_range **ranges)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+
+ if (ds->ops->get_rmon_stats)
+ ds->ops->get_rmon_stats(ds, dp->index, rmon_stats, ranges);
+}
+
static void dsa_slave_net_selftest(struct net_device *ndev,
struct ethtool_test *etest, u64 *buf)
{
@@ -2081,6 +2093,7 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_eth_phy_stats = dsa_slave_get_eth_phy_stats,
.get_eth_mac_stats = dsa_slave_get_eth_mac_stats,
.get_eth_ctrl_stats = dsa_slave_get_eth_ctrl_stats,
+ .get_rmon_stats = dsa_slave_get_rmon_stats,
.set_wol = dsa_slave_set_wol,
.get_wol = dsa_slave_get_wol,
.set_eee = dsa_slave_set_eee,
@@ -2460,8 +2473,9 @@ static int dsa_slave_changeupper(struct net_device *dev,
if (!err)
dsa_bridge_mtu_normalization(dp);
if (err == -EOPNOTSUPP) {
- NL_SET_ERR_MSG_MOD(extack,
- "Offloading not supported");
+ if (!extack->_msg)
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offloading not supported");
err = 0;
}
err = notifier_from_errno(err);
diff --git a/net/dsa/tag_rzn1_a5psw.c b/net/dsa/tag_rzn1_a5psw.c
new file mode 100644
index 000000000000..e2a5ee6ae688
--- /dev/null
+++ b/net/dsa/tag_rzn1_a5psw.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 Schneider Electric
+ *
+ * Clément Léger <clement.leger@bootlin.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <net/dsa.h>
+
+#include "dsa_priv.h"
+
+/* To define the outgoing port and to discover the incoming port a TAG is
+ * inserted after Src MAC :
+ *
+ * Dest MAC Src MAC TAG Type
+ * ...| 1 2 3 4 5 6 | 1 2 3 4 5 6 | 1 2 3 4 5 6 7 8 | 1 2 |...
+ * |<--------------->|
+ *
+ * See struct a5psw_tag for layout
+ */
+
+#define ETH_P_DSA_A5PSW 0xE001
+#define A5PSW_TAG_LEN 8
+#define A5PSW_CTRL_DATA_FORCE_FORWARD BIT(0)
+/* This is both used for xmit tag and rcv tagging */
+#define A5PSW_CTRL_DATA_PORT GENMASK(3, 0)
+
+struct a5psw_tag {
+ __be16 ctrl_tag;
+ __be16 ctrl_data;
+ __be16 ctrl_data2_hi;
+ __be16 ctrl_data2_lo;
+};
+
+static struct sk_buff *a5psw_tag_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct a5psw_tag *ptag;
+ u32 data2_val;
+
+ BUILD_BUG_ON(sizeof(*ptag) != A5PSW_TAG_LEN);
+
+ /* The Ethernet switch we are interfaced with needs packets to be at
+ * least 60 bytes otherwise they will be discarded when they enter the
+ * switch port logic.
+ */
+ if (__skb_put_padto(skb, ETH_ZLEN, false))
+ return NULL;
+
+ /* provide 'A5PSW_TAG_LEN' bytes additional space */
+ skb_push(skb, A5PSW_TAG_LEN);
+
+ /* make room between MACs and Ether-Type to insert tag */
+ dsa_alloc_etype_header(skb, A5PSW_TAG_LEN);
+
+ ptag = dsa_etype_header_pos_tx(skb);
+
+ data2_val = FIELD_PREP(A5PSW_CTRL_DATA_PORT, BIT(dp->index));
+ ptag->ctrl_tag = htons(ETH_P_DSA_A5PSW);
+ ptag->ctrl_data = htons(A5PSW_CTRL_DATA_FORCE_FORWARD);
+ ptag->ctrl_data2_lo = htons(data2_val);
+ ptag->ctrl_data2_hi = 0;
+
+ return skb;
+}
+
+static struct sk_buff *a5psw_tag_rcv(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct a5psw_tag *tag;
+ int port;
+
+ if (unlikely(!pskb_may_pull(skb, A5PSW_TAG_LEN))) {
+ dev_warn_ratelimited(&dev->dev,
+ "Dropping packet, cannot pull\n");
+ return NULL;
+ }
+
+ tag = dsa_etype_header_pos_rx(skb);
+
+ if (tag->ctrl_tag != htons(ETH_P_DSA_A5PSW)) {
+ dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid TAG marker\n");
+ return NULL;
+ }
+
+ port = FIELD_GET(A5PSW_CTRL_DATA_PORT, ntohs(tag->ctrl_data));
+
+ skb->dev = dsa_master_find_slave(dev, 0, port);
+ if (!skb->dev)
+ return NULL;
+
+ skb_pull_rcsum(skb, A5PSW_TAG_LEN);
+ dsa_strip_etype_header(skb, A5PSW_TAG_LEN);
+
+ dsa_default_offload_fwd_mark(skb);
+
+ return skb;
+}
+
+static const struct dsa_device_ops a5psw_netdev_ops = {
+ .name = "a5psw",
+ .proto = DSA_TAG_PROTO_RZN1_A5PSW,
+ .xmit = a5psw_tag_xmit,
+ .rcv = a5psw_tag_rcv,
+ .needed_headroom = A5PSW_TAG_LEN,
+};
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_A5PSW);
+module_dsa_tag_driver(a5psw_netdev_ops);
diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c
index 7e6b37a54add..1c94bb8ea03f 100644
--- a/net/ethtool/eeprom.c
+++ b/net/ethtool/eeprom.c
@@ -36,7 +36,7 @@ static int fallback_set_params(struct eeprom_req_info *request,
if (request->page)
offset = request->page * ETH_MODULE_EEPROM_PAGE_LEN + offset;
- if (modinfo->type == ETH_MODULE_SFF_8079 &&
+ if (modinfo->type == ETH_MODULE_SFF_8472 &&
request->i2c_address == 0x51)
offset += ETH_MODULE_EEPROM_PAGE_LEN * 2;
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index d05ff6a17a1f..6a7308de192d 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -369,22 +369,9 @@ EXPORT_SYMBOL(ethtool_convert_legacy_u32_to_link_mode);
bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
const unsigned long *src)
{
- bool retval = true;
-
- /* TODO: following test will soon always be true */
- if (__ETHTOOL_LINK_MODE_MASK_NBITS > 32) {
- __ETHTOOL_DECLARE_LINK_MODE_MASK(ext);
-
- linkmode_zero(ext);
- bitmap_fill(ext, 32);
- bitmap_complement(ext, ext, __ETHTOOL_LINK_MODE_MASK_NBITS);
- if (linkmode_intersects(ext, src)) {
- /* src mask goes beyond bit 31 */
- retval = false;
- }
- }
*legacy_u32 = src[0];
- return retval;
+ return find_next_bit(src, __ETHTOOL_LINK_MODE_MASK_NBITS, 32) ==
+ __ETHTOOL_LINK_MODE_MASK_NBITS;
}
EXPORT_SYMBOL(ethtool_convert_link_mode_to_legacy_u32);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 30e0e8992085..da81f56fdd1c 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1929,6 +1929,8 @@ static int __init inet_init(void)
sock_skb_cb_check_size(sizeof(struct inet_skb_parm));
+ raw_hashinfo_init(&raw_v4_hashinfo);
+
rc = proto_register(&tcp_prot, 1);
if (rc)
goto out;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b21238df3301..7eae8d686e20 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -502,9 +502,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
nfrags++;
- skb->len += tailen;
- skb->data_len += tailen;
- skb->truesize += tailen;
+ skb_len_add(skb, tailen);
if (sk && sk_fullsock(sk))
refcount_add(tailen, &sk->sk_wmem_alloc);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index c0b7e6c21360..53f5f956d948 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -117,32 +117,6 @@ bool inet_rcv_saddr_any(const struct sock *sk)
return !sk->sk_rcv_saddr;
}
-static bool use_bhash2_on_bind(const struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- int addr_type;
-
- if (sk->sk_family == AF_INET6) {
- addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
- return addr_type != IPV6_ADDR_ANY &&
- addr_type != IPV6_ADDR_MAPPED;
- }
-#endif
- return sk->sk_rcv_saddr != htonl(INADDR_ANY);
-}
-
-static u32 get_bhash2_nulladdr_hash(const struct sock *sk, struct net *net,
- int port)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr nulladdr = {};
-
- if (sk->sk_family == AF_INET6)
- return ipv6_portaddr_hash(net, &nulladdr, port);
-#endif
- return ipv4_portaddr_hash(net, 0, port);
-}
-
void inet_get_local_port_range(struct net *net, int *low, int *high)
{
unsigned int seq;
@@ -156,71 +130,16 @@ void inet_get_local_port_range(struct net *net, int *low, int *high)
}
EXPORT_SYMBOL(inet_get_local_port_range);
-static bool bind_conflict_exist(const struct sock *sk, struct sock *sk2,
- kuid_t sk_uid, bool relax,
- bool reuseport_cb_ok, bool reuseport_ok)
-{
- int bound_dev_if2;
-
- if (sk == sk2)
- return false;
-
- bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
-
- if (!sk->sk_bound_dev_if || !bound_dev_if2 ||
- sk->sk_bound_dev_if == bound_dev_if2) {
- if (sk->sk_reuse && sk2->sk_reuse &&
- sk2->sk_state != TCP_LISTEN) {
- if (!relax || (!reuseport_ok && sk->sk_reuseport &&
- sk2->sk_reuseport && reuseport_cb_ok &&
- (sk2->sk_state == TCP_TIME_WAIT ||
- uid_eq(sk_uid, sock_i_uid(sk2)))))
- return true;
- } else if (!reuseport_ok || !sk->sk_reuseport ||
- !sk2->sk_reuseport || !reuseport_cb_ok ||
- (sk2->sk_state != TCP_TIME_WAIT &&
- !uid_eq(sk_uid, sock_i_uid(sk2)))) {
- return true;
- }
- }
- return false;
-}
-
-static bool check_bhash2_conflict(const struct sock *sk,
- struct inet_bind2_bucket *tb2, kuid_t sk_uid,
- bool relax, bool reuseport_cb_ok,
- bool reuseport_ok)
-{
- struct sock *sk2;
-
- sk_for_each_bound_bhash2(sk2, &tb2->owners) {
- if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
- continue;
-
- if (bind_conflict_exist(sk, sk2, sk_uid, relax,
- reuseport_cb_ok, reuseport_ok))
- return true;
- }
- return false;
-}
-
-/* This should be called only when the corresponding inet_bind_bucket spinlock
- * is held
- */
-static int inet_csk_bind_conflict(const struct sock *sk, int port,
- struct inet_bind_bucket *tb,
- struct inet_bind2_bucket *tb2, /* may be null */
+static int inet_csk_bind_conflict(const struct sock *sk,
+ const struct inet_bind_bucket *tb,
bool relax, bool reuseport_ok)
{
- struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- kuid_t uid = sock_i_uid((struct sock *)sk);
- struct sock_reuseport *reuseport_cb;
- struct inet_bind2_hashbucket *head2;
- bool reuseport_cb_ok;
struct sock *sk2;
- struct net *net;
- int l3mdev;
- u32 hash;
+ bool reuseport_cb_ok;
+ bool reuse = sk->sk_reuse;
+ bool reuseport = !!sk->sk_reuseport;
+ struct sock_reuseport *reuseport_cb;
+ kuid_t uid = sock_i_uid((struct sock *)sk);
rcu_read_lock();
reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);
@@ -231,42 +150,40 @@ static int inet_csk_bind_conflict(const struct sock *sk, int port,
/*
* Unlike other sk lookup places we do not check
* for sk_net here, since _all_ the socks listed
- * in tb->owners and tb2->owners list belong
- * to the same net
+ * in tb->owners list belong to the same net - the
+ * one this bucket belongs to.
*/
- if (!use_bhash2_on_bind(sk)) {
- sk_for_each_bound(sk2, &tb->owners)
- if (bind_conflict_exist(sk, sk2, uid, relax,
- reuseport_cb_ok, reuseport_ok) &&
- inet_rcv_saddr_equal(sk, sk2, true))
- return true;
+ sk_for_each_bound(sk2, &tb->owners) {
+ int bound_dev_if2;
- return false;
+ if (sk == sk2)
+ continue;
+ bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
+ if ((!sk->sk_bound_dev_if ||
+ !bound_dev_if2 ||
+ sk->sk_bound_dev_if == bound_dev_if2)) {
+ if (reuse && sk2->sk_reuse &&
+ sk2->sk_state != TCP_LISTEN) {
+ if ((!relax ||
+ (!reuseport_ok &&
+ reuseport && sk2->sk_reuseport &&
+ reuseport_cb_ok &&
+ (sk2->sk_state == TCP_TIME_WAIT ||
+ uid_eq(uid, sock_i_uid(sk2))))) &&
+ inet_rcv_saddr_equal(sk, sk2, true))
+ break;
+ } else if (!reuseport_ok ||
+ !reuseport || !sk2->sk_reuseport ||
+ !reuseport_cb_ok ||
+ (sk2->sk_state != TCP_TIME_WAIT &&
+ !uid_eq(uid, sock_i_uid(sk2)))) {
+ if (inet_rcv_saddr_equal(sk, sk2, true))
+ break;
+ }
+ }
}
-
- if (tb2 && check_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
- reuseport_ok))
- return true;
-
- net = sock_net(sk);
-
- /* check there's no conflict with an existing IPV6_ADDR_ANY (if ipv6) or
- * INADDR_ANY (if ipv4) socket.
- */
- hash = get_bhash2_nulladdr_hash(sk, net, port);
- head2 = &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
-
- l3mdev = inet_sk_bound_l3mdev(sk);
- inet_bind_bucket_for_each(tb2, &head2->chain)
- if (check_bind2_bucket_match_nulladdr(tb2, net, port, l3mdev, sk))
- break;
-
- if (tb2 && check_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
- reuseport_ok))
- return true;
-
- return false;
+ return sk2 != NULL;
}
/*
@@ -274,20 +191,16 @@ static int inet_csk_bind_conflict(const struct sock *sk, int port,
* inet_bind_hashbucket lock held.
*/
static struct inet_bind_hashbucket *
-inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret,
- struct inet_bind2_bucket **tb2_ret,
- struct inet_bind2_hashbucket **head2_ret, int *port_ret)
+inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *port_ret)
{
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- struct inet_bind2_hashbucket *head2;
+ int port = 0;
struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
+ bool relax = false;
int i, low, high, attempt_half;
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
u32 remaining, offset;
- bool relax = false;
- int port = 0;
int l3mdev;
l3mdev = inet_sk_bound_l3mdev(sk);
@@ -326,12 +239,10 @@ other_parity_scan:
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)];
spin_lock_bh(&head->lock);
- tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk,
- &head2);
inet_bind_bucket_for_each(tb, &head->chain)
- if (check_bind_bucket_match(tb, net, port, l3mdev)) {
- if (!inet_csk_bind_conflict(sk, port, tb, tb2,
- relax, false))
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+ tb->port == port) {
+ if (!inet_csk_bind_conflict(sk, tb, relax, false))
goto success;
goto next_port;
}
@@ -361,8 +272,6 @@ next_port:
success:
*port_ret = port;
*tb_ret = tb;
- *tb2_ret = tb2;
- *head2_ret = head2;
return head;
}
@@ -458,81 +367,54 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
{
bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- bool bhash_created = false, bhash2_created = false;
- struct inet_bind2_bucket *tb2 = NULL;
- struct inet_bind2_hashbucket *head2;
- struct inet_bind_bucket *tb = NULL;
+ int ret = 1, port = snum;
struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
- int ret = 1, port = snum;
- bool found_port = false;
+ struct inet_bind_bucket *tb = NULL;
int l3mdev;
l3mdev = inet_sk_bound_l3mdev(sk);
if (!port) {
- head = inet_csk_find_open_port(sk, &tb, &tb2, &head2, &port);
+ head = inet_csk_find_open_port(sk, &tb, &port);
if (!head)
return ret;
- if (tb && tb2)
- goto success;
- found_port = true;
- } else {
- head = &hinfo->bhash[inet_bhashfn(net, port,
- hinfo->bhash_size)];
- spin_lock_bh(&head->lock);
- inet_bind_bucket_for_each(tb, &head->chain)
- if (check_bind_bucket_match(tb, net, port, l3mdev))
- break;
-
- tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk,
- &head2);
- }
-
- if (!tb) {
- tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, net,
- head, port, l3mdev);
if (!tb)
- goto fail_unlock;
- bhash_created = true;
- }
-
- if (!tb2) {
- tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep,
- net, head2, port, l3mdev, sk);
- if (!tb2)
- goto fail_unlock;
- bhash2_created = true;
+ goto tb_not_found;
+ goto success;
}
-
- /* If we had to find an open port, we already checked for conflicts */
- if (!found_port && !hlist_empty(&tb->owners)) {
+ head = &hinfo->bhash[inet_bhashfn(net, port,
+ hinfo->bhash_size)];
+ spin_lock_bh(&head->lock);
+ inet_bind_bucket_for_each(tb, &head->chain)
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+ tb->port == port)
+ goto tb_found;
+tb_not_found:
+ tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
+ net, head, port, l3mdev);
+ if (!tb)
+ goto fail_unlock;
+tb_found:
+ if (!hlist_empty(&tb->owners)) {
if (sk->sk_reuse == SK_FORCE_REUSE)
goto success;
if ((tb->fastreuse > 0 && reuse) ||
sk_reuseport_match(tb, sk))
goto success;
- if (inet_csk_bind_conflict(sk, port, tb, tb2, true, true))
+ if (inet_csk_bind_conflict(sk, tb, true, true))
goto fail_unlock;
}
success:
inet_csk_update_fastreuse(tb, sk);
if (!inet_csk(sk)->icsk_bind_hash)
- inet_bind_hash(sk, tb, tb2, port);
+ inet_bind_hash(sk, tb, port);
WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
- WARN_ON(inet_csk(sk)->icsk_bind2_hash != tb2);
ret = 0;
fail_unlock:
- if (ret) {
- if (bhash_created)
- inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
- if (bhash2_created)
- inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep,
- tb2);
- }
spin_unlock_bh(&head->lock);
return ret;
}
@@ -1079,7 +961,6 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
inet_sk_set_state(newsk, TCP_SYN_RECV);
newicsk->icsk_bind_hash = NULL;
- newicsk->icsk_bind2_hash = NULL;
inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 545f91b6cb5e..b9d995b5ce24 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -81,41 +81,6 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
return tb;
}
-struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep,
- struct net *net,
- struct inet_bind2_hashbucket *head,
- const unsigned short port,
- int l3mdev,
- const struct sock *sk)
-{
- struct inet_bind2_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
-
- if (tb) {
- write_pnet(&tb->ib_net, net);
- tb->l3mdev = l3mdev;
- tb->port = port;
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- tb->v6_rcv_saddr = sk->sk_v6_rcv_saddr;
- else
-#endif
- tb->rcv_saddr = sk->sk_rcv_saddr;
- INIT_HLIST_HEAD(&tb->owners);
- hlist_add_head(&tb->node, &head->chain);
- }
- return tb;
-}
-
-static bool bind2_bucket_addr_match(struct inet_bind2_bucket *tb2, struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- return ipv6_addr_equal(&tb2->v6_rcv_saddr,
- &sk->sk_v6_rcv_saddr);
-#endif
- return tb2->rcv_saddr == sk->sk_rcv_saddr;
-}
-
/*
* Caller must hold hashbucket lock for this tb with local BH disabled
*/
@@ -127,25 +92,12 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket
}
}
-/* Caller must hold the lock for the corresponding hashbucket in the bhash table
- * with local BH disabled
- */
-void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb)
-{
- if (hlist_empty(&tb->owners)) {
- __hlist_del(&tb->node);
- kmem_cache_free(cachep, tb);
- }
-}
-
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
- struct inet_bind2_bucket *tb2, const unsigned short snum)
+ const unsigned short snum)
{
inet_sk(sk)->inet_num = snum;
sk_add_bind_node(sk, &tb->owners);
inet_csk(sk)->icsk_bind_hash = tb;
- sk_add_bind2_node(sk, &tb2->owners);
- inet_csk(sk)->icsk_bind2_hash = tb2;
}
/*
@@ -157,7 +109,6 @@ static void __inet_put_port(struct sock *sk)
const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num,
hashinfo->bhash_size);
struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
spin_lock(&head->lock);
@@ -166,13 +117,6 @@ static void __inet_put_port(struct sock *sk)
inet_csk(sk)->icsk_bind_hash = NULL;
inet_sk(sk)->inet_num = 0;
inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
-
- if (inet_csk(sk)->icsk_bind2_hash) {
- tb2 = inet_csk(sk)->icsk_bind2_hash;
- __sk_del_bind2_node(sk);
- inet_csk(sk)->icsk_bind2_hash = NULL;
- inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2);
- }
spin_unlock(&head->lock);
}
@@ -189,19 +133,14 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
unsigned short port = inet_sk(child)->inet_num;
const int bhash = inet_bhashfn(sock_net(sk), port,
- table->bhash_size);
+ table->bhash_size);
struct inet_bind_hashbucket *head = &table->bhash[bhash];
- struct inet_bind2_hashbucket *head_bhash2;
- bool created_inet_bind_bucket = false;
- struct net *net = sock_net(sk);
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
int l3mdev;
spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash;
- tb2 = inet_csk(sk)->icsk_bind2_hash;
- if (unlikely(!tb || !tb2)) {
+ if (unlikely(!tb)) {
spin_unlock(&head->lock);
return -ENOENT;
}
@@ -214,45 +153,25 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
* as that of the child socket. We have to look up or
* create a new bind bucket for the child here. */
inet_bind_bucket_for_each(tb, &head->chain) {
- if (check_bind_bucket_match(tb, net, port, l3mdev))
+ if (net_eq(ib_net(tb), sock_net(sk)) &&
+ tb->l3mdev == l3mdev && tb->port == port)
break;
}
if (!tb) {
tb = inet_bind_bucket_create(table->bind_bucket_cachep,
- net, head, port, l3mdev);
+ sock_net(sk), head, port,
+ l3mdev);
if (!tb) {
spin_unlock(&head->lock);
return -ENOMEM;
}
- created_inet_bind_bucket = true;
}
inet_csk_update_fastreuse(tb, child);
-
- goto bhash2_find;
- } else if (!bind2_bucket_addr_match(tb2, child)) {
- l3mdev = inet_sk_bound_l3mdev(sk);
-
-bhash2_find:
- tb2 = inet_bind2_bucket_find(table, net, port, l3mdev, child,
- &head_bhash2);
- if (!tb2) {
- tb2 = inet_bind2_bucket_create(table->bind2_bucket_cachep,
- net, head_bhash2, port,
- l3mdev, child);
- if (!tb2)
- goto error;
- }
}
- inet_bind_hash(child, tb, tb2, port);
+ inet_bind_hash(child, tb, port);
spin_unlock(&head->lock);
return 0;
-
-error:
- if (created_inet_bind_bucket)
- inet_bind_bucket_destroy(table->bind_bucket_cachep, tb);
- spin_unlock(&head->lock);
- return -ENOMEM;
}
EXPORT_SYMBOL_GPL(__inet_inherit_port);
@@ -756,76 +675,6 @@ void inet_unhash(struct sock *sk)
}
EXPORT_SYMBOL_GPL(inet_unhash);
-static bool check_bind2_bucket_match(struct inet_bind2_bucket *tb,
- struct net *net, unsigned short port,
- int l3mdev, struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev &&
- ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);
- else
-#endif
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev && tb->rcv_saddr == sk->sk_rcv_saddr;
-}
-
-bool check_bind2_bucket_match_nulladdr(struct inet_bind2_bucket *tb,
- struct net *net, const unsigned short port,
- int l3mdev, const struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr nulladdr = {};
-
- if (sk->sk_family == AF_INET6)
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev &&
- ipv6_addr_equal(&tb->v6_rcv_saddr, &nulladdr);
- else
-#endif
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev && tb->rcv_saddr == 0;
-}
-
-static struct inet_bind2_hashbucket *
-inet_bhashfn_portaddr(struct inet_hashinfo *hinfo, const struct sock *sk,
- const struct net *net, unsigned short port)
-{
- u32 hash;
-
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- hash = ipv6_portaddr_hash(net, &sk->sk_v6_rcv_saddr, port);
- else
-#endif
- hash = ipv4_portaddr_hash(net, sk->sk_rcv_saddr, port);
- return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
-}
-
-/* This should only be called when the spinlock for the socket's corresponding
- * bind_hashbucket is held
- */
-struct inet_bind2_bucket *
-inet_bind2_bucket_find(struct inet_hashinfo *hinfo, struct net *net,
- const unsigned short port, int l3mdev, struct sock *sk,
- struct inet_bind2_hashbucket **head)
-{
- struct inet_bind2_bucket *bhash2 = NULL;
- struct inet_bind2_hashbucket *h;
-
- h = inet_bhashfn_portaddr(hinfo, sk, net, port);
- inet_bind_bucket_for_each(bhash2, &h->chain) {
- if (check_bind2_bucket_match(bhash2, net, port, l3mdev, sk))
- break;
- }
-
- if (head)
- *head = h;
-
- return bhash2;
-}
-
/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm
* Note that we use 32bit integers (vs RFC 'short integers')
* because 2^16 is not a multiple of num_ephemeral and this
@@ -846,13 +695,10 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
struct inet_timewait_sock *tw = NULL;
- struct inet_bind2_hashbucket *head2;
struct inet_bind_hashbucket *head;
int port = inet_sk(sk)->inet_num;
struct net *net = sock_net(sk);
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
- bool tb_created = false;
u32 remaining, offset;
int ret, i, low, high;
int l3mdev;
@@ -909,7 +755,8 @@ other_parity_scan:
* the established check is already unique enough.
*/
inet_bind_bucket_for_each(tb, &head->chain) {
- if (check_bind_bucket_match(tb, net, port, l3mdev)) {
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+ tb->port == port) {
if (tb->fastreuse >= 0 ||
tb->fastreuseport >= 0)
goto next_port;
@@ -927,7 +774,6 @@ other_parity_scan:
spin_unlock_bh(&head->lock);
return -ENOMEM;
}
- tb_created = true;
tb->fastreuse = -1;
tb->fastreuseport = -1;
goto ok;
@@ -943,17 +789,6 @@ next_port:
return -EADDRNOTAVAIL;
ok:
- /* Find the corresponding tb2 bucket since we need to
- * add the socket to the bhash2 table as well
- */
- tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk, &head2);
- if (!tb2) {
- tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, net,
- head2, port, l3mdev, sk);
- if (!tb2)
- goto error;
- }
-
/* Here we want to add a little bit of randomness to the next source
* port that will be chosen. We use a max() with a random here so that
* on low contention the randomness is maximal and on high contention
@@ -963,7 +798,7 @@ ok:
WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
/* Head lock still held and bh's disabled */
- inet_bind_hash(sk, tb, tb2, port);
+ inet_bind_hash(sk, tb, port);
if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port);
inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
@@ -975,12 +810,6 @@ ok:
inet_twsk_deschedule_put(tw);
local_bh_enable();
return 0;
-
-error:
- if (tb_created)
- inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
- spin_unlock_bh(&head->lock);
- return -ENOMEM;
}
/*
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 3b9cd487075a..5c58e21f724e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -524,7 +524,6 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
int tunnel_hlen;
int version;
int nhoff;
- int thoff;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
@@ -558,10 +557,16 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
(ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
truncate = true;
- thoff = skb_transport_header(skb) - skb_mac_header(skb);
- if (skb->protocol == htons(ETH_P_IPV6) &&
- (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
- truncate = true;
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ int thoff;
+
+ if (skb_transport_header_was_set(skb))
+ thoff = skb_transport_header(skb) - skb_mac_header(skb);
+ else
+ thoff = nhoff + sizeof(struct ipv6hdr);
+ if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
+ truncate = true;
+ }
if (version == 1) {
erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 00b4bf26fd93..5e32a2f86fbd 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1214,9 +1214,7 @@ alloc_new_skb:
pfrag->offset += copy;
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
- skb->len += copy;
- skb->data_len += copy;
- skb->truesize += copy;
+ skb_len_add(skb, copy);
wmem_alloc_delta += copy;
} else {
err = skb_zerocopy_iter_dgram(skb, from, copy);
@@ -1443,9 +1441,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
skb->csum = csum_block_add(skb->csum, csum, skb->len);
}
- skb->len += len;
- skb->data_len += len;
- skb->truesize += len;
+ skb_len_add(skb, len);
refcount_add(len, &sk->sk_wmem_alloc);
offset += len;
size -= len;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 8324e541d193..f095b6c8100b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -77,7 +77,12 @@ struct ipmr_result {
* Note that the changes are semaphored via rtnl_lock.
*/
-static DEFINE_RWLOCK(mrt_lock);
+static DEFINE_SPINLOCK(mrt_lock);
+
+static struct net_device *vif_dev_read(const struct vif_device *vif)
+{
+ return rcu_dereference(vif->dev);
+}
/* Multicast router control variables */
@@ -100,11 +105,11 @@ static void ipmr_free_table(struct mr_table *mrt);
static void ip_mr_forward(struct net *net, struct mr_table *mrt,
struct net_device *dev, struct sk_buff *skb,
struct mfc_cache *cache, int local);
-static int ipmr_cache_report(struct mr_table *mrt,
+static int ipmr_cache_report(const struct mr_table *mrt,
struct sk_buff *pkt, vifi_t vifi, int assert);
static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
int cmd);
-static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
+static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt);
static void mroute_clean_tables(struct mr_table *mrt, int flags);
static void ipmr_expire_process(struct timer_list *t);
@@ -501,11 +506,15 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
return err;
}
- read_lock(&mrt_lock);
dev->stats.tx_bytes += skb->len;
dev->stats.tx_packets++;
- ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
- read_unlock(&mrt_lock);
+ rcu_read_lock();
+
+ /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */
+ ipmr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
+ IGMPMSG_WHOLEPKT);
+
+ rcu_read_unlock();
kfree_skb(skb);
return NETDEV_TX_OK;
}
@@ -572,6 +581,7 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
{
struct net_device *reg_dev = NULL;
struct iphdr *encap;
+ int vif_num;
encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
/* Check that:
@@ -584,11 +594,10 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
ntohs(encap->tot_len) + pimlen > skb->len)
return 1;
- read_lock(&mrt_lock);
- if (mrt->mroute_reg_vif_num >= 0)
- reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
- read_unlock(&mrt_lock);
-
+ /* Pairs with WRITE_ONCE() in vif_add()/vid_delete() */
+ vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
+ if (vif_num >= 0)
+ reg_dev = vif_dev_read(&mrt->vif_table[vif_num]);
if (!reg_dev)
return 1;
@@ -614,10 +623,11 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
static int call_ipmr_vif_entry_notifiers(struct net *net,
enum fib_event_type event_type,
struct vif_device *vif,
+ struct net_device *vif_dev,
vifi_t vif_index, u32 tb_id)
{
return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type,
- vif, vif_index, tb_id,
+ vif, vif_dev, vif_index, tb_id,
&net->ipv4.ipmr_seq);
}
@@ -649,22 +659,19 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
v = &mrt->vif_table[vifi];
- if (VIF_EXISTS(mrt, vifi))
- call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi,
- mrt->id);
-
- write_lock_bh(&mrt_lock);
- dev = v->dev;
- v->dev = NULL;
-
- if (!dev) {
- write_unlock_bh(&mrt_lock);
+ dev = rtnl_dereference(v->dev);
+ if (!dev)
return -EADDRNOTAVAIL;
- }
- if (vifi == mrt->mroute_reg_vif_num)
- mrt->mroute_reg_vif_num = -1;
+ spin_lock(&mrt_lock);
+ call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, dev,
+ vifi, mrt->id);
+ RCU_INIT_POINTER(v->dev, NULL);
+ if (vifi == mrt->mroute_reg_vif_num) {
+ /* Pairs with READ_ONCE() in ipmr_cache_report() and reg_vif_xmit() */
+ WRITE_ONCE(mrt->mroute_reg_vif_num, -1);
+ }
if (vifi + 1 == mrt->maxvif) {
int tmp;
@@ -672,10 +679,10 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
if (VIF_EXISTS(mrt, tmp))
break;
}
- mrt->maxvif = tmp+1;
+ WRITE_ONCE(mrt->maxvif, tmp + 1);
}
- write_unlock_bh(&mrt_lock);
+ spin_unlock(&mrt_lock);
dev_set_allmulti(dev, -1);
@@ -777,7 +784,7 @@ out:
spin_unlock(&mfc_unres_lock);
}
-/* Fill oifs list. It is called under write locked mrt_lock. */
+/* Fill oifs list. It is called under locked mrt_lock. */
static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache,
unsigned char *ttls)
{
@@ -889,15 +896,18 @@ static int vif_add(struct net *net, struct mr_table *mrt,
v->remote = vifc->vifc_rmt_addr.s_addr;
/* And finish update writing critical data */
- write_lock_bh(&mrt_lock);
- v->dev = dev;
+ spin_lock(&mrt_lock);
+ rcu_assign_pointer(v->dev, dev);
netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
- if (v->flags & VIFF_REGISTER)
- mrt->mroute_reg_vif_num = vifi;
+ if (v->flags & VIFF_REGISTER) {
+ /* Pairs with READ_ONCE() in ipmr_cache_report() and reg_vif_xmit() */
+ WRITE_ONCE(mrt->mroute_reg_vif_num, vifi);
+ }
if (vifi+1 > mrt->maxvif)
- mrt->maxvif = vifi+1;
- write_unlock_bh(&mrt_lock);
- call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id);
+ WRITE_ONCE(mrt->maxvif, vifi + 1);
+ spin_unlock(&mrt_lock);
+ call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, dev,
+ vifi, mrt->id);
return 0;
}
@@ -1001,9 +1011,9 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
/* Bounce a cache query up to mrouted and netlink.
*
- * Called under mrt_lock.
+ * Called under rcu_read_lock().
*/
-static int ipmr_cache_report(struct mr_table *mrt,
+static int ipmr_cache_report(const struct mr_table *mrt,
struct sk_buff *pkt, vifi_t vifi, int assert)
{
const int ihl = ip_hdrlen(pkt);
@@ -1038,8 +1048,11 @@ static int ipmr_cache_report(struct mr_table *mrt,
msg->im_vif = vifi;
msg->im_vif_hi = vifi >> 8;
} else {
- msg->im_vif = mrt->mroute_reg_vif_num;
- msg->im_vif_hi = mrt->mroute_reg_vif_num >> 8;
+ /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */
+ int vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
+
+ msg->im_vif = vif_num;
+ msg->im_vif_hi = vif_num >> 8;
}
ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
@@ -1064,10 +1077,8 @@ static int ipmr_cache_report(struct mr_table *mrt,
skb->transport_header = skb->network_header;
}
- rcu_read_lock();
mroute_sk = rcu_dereference(mrt->mroute_sk);
if (!mroute_sk) {
- rcu_read_unlock();
kfree_skb(skb);
return -EINVAL;
}
@@ -1076,7 +1087,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
/* Deliver to mrouted */
ret = sock_queue_rcv_skb(mroute_sk, skb);
- rcu_read_unlock();
+
if (ret < 0) {
net_warn_ratelimited("mroute: pending queue full, dropping entries\n");
kfree_skb(skb);
@@ -1086,6 +1097,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
}
/* Queue a packet for resolution. It gets locked cache entry! */
+/* Called under rcu_read_lock() */
static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
struct sk_buff *skb, struct net_device *dev)
{
@@ -1198,12 +1210,12 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
mfc->mfcc_mcastgrp.s_addr, parent);
rcu_read_unlock();
if (c) {
- write_lock_bh(&mrt_lock);
+ spin_lock(&mrt_lock);
c->_c.mfc_parent = mfc->mfcc_parent;
ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
if (!mrtsock)
c->_c.mfc_flags |= MFC_STATIC;
- write_unlock_bh(&mrt_lock);
+ spin_unlock(&mrt_lock);
call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
mrt->id);
mroute_netlink_event(mrt, c, RTM_NEWROUTE);
@@ -1598,20 +1610,20 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
if (vr.vifi >= mrt->maxvif)
return -EINVAL;
vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif);
- read_lock(&mrt_lock);
+ rcu_read_lock();
vif = &mrt->vif_table[vr.vifi];
if (VIF_EXISTS(mrt, vr.vifi)) {
- vr.icount = vif->pkt_in;
- vr.ocount = vif->pkt_out;
- vr.ibytes = vif->bytes_in;
- vr.obytes = vif->bytes_out;
- read_unlock(&mrt_lock);
+ vr.icount = READ_ONCE(vif->pkt_in);
+ vr.ocount = READ_ONCE(vif->pkt_out);
+ vr.ibytes = READ_ONCE(vif->bytes_in);
+ vr.obytes = READ_ONCE(vif->bytes_out);
+ rcu_read_unlock();
if (copy_to_user(arg, &vr, sizeof(vr)))
return -EFAULT;
return 0;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EADDRNOTAVAIL;
case SIOCGETSGCNT:
if (copy_from_user(&sr, arg, sizeof(sr)))
@@ -1673,20 +1685,20 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
if (vr.vifi >= mrt->maxvif)
return -EINVAL;
vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif);
- read_lock(&mrt_lock);
+ rcu_read_lock();
vif = &mrt->vif_table[vr.vifi];
if (VIF_EXISTS(mrt, vr.vifi)) {
- vr.icount = vif->pkt_in;
- vr.ocount = vif->pkt_out;
- vr.ibytes = vif->bytes_in;
- vr.obytes = vif->bytes_out;
- read_unlock(&mrt_lock);
+ vr.icount = READ_ONCE(vif->pkt_in);
+ vr.ocount = READ_ONCE(vif->pkt_out);
+ vr.ibytes = READ_ONCE(vif->bytes_in);
+ vr.obytes = READ_ONCE(vif->bytes_out);
+ rcu_read_unlock();
if (copy_to_user(arg, &vr, sizeof(vr)))
return -EFAULT;
return 0;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EADDRNOTAVAIL;
case SIOCGETSGCNT:
if (copy_from_user(&sr, arg, sizeof(sr)))
@@ -1726,7 +1738,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
ipmr_for_each_table(mrt, net) {
v = &mrt->vif_table[0];
for (ct = 0; ct < mrt->maxvif; ct++, v++) {
- if (v->dev == dev)
+ if (rcu_access_pointer(v->dev) == dev)
vif_delete(mrt, ct, 1, NULL);
}
}
@@ -1804,26 +1816,28 @@ static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
}
#endif
-/* Processing handlers for ipmr_forward */
+/* Processing handlers for ipmr_forward, under rcu_read_lock() */
static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
int in_vifi, struct sk_buff *skb, int vifi)
{
const struct iphdr *iph = ip_hdr(skb);
struct vif_device *vif = &mrt->vif_table[vifi];
+ struct net_device *vif_dev;
struct net_device *dev;
struct rtable *rt;
struct flowi4 fl4;
int encap = 0;
- if (!vif->dev)
+ vif_dev = vif_dev_read(vif);
+ if (!vif_dev)
goto out_free;
if (vif->flags & VIFF_REGISTER) {
- vif->pkt_out++;
- vif->bytes_out += skb->len;
- vif->dev->stats.tx_bytes += skb->len;
- vif->dev->stats.tx_packets++;
+ WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
+ WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
+ vif_dev->stats.tx_bytes += skb->len;
+ vif_dev->stats.tx_packets++;
ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
goto out_free;
}
@@ -1868,8 +1882,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
goto out_free;
}
- vif->pkt_out++;
- vif->bytes_out += skb->len;
+ WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
+ WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
skb_dst_drop(skb);
skb_dst_set(skb, &rt->dst);
@@ -1881,8 +1895,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
if (vif->flags & VIFF_TUNNEL) {
ip_encap(net, skb, vif->local, vif->remote);
/* FIXME: extra output firewall step used to be here. --RR */
- vif->dev->stats.tx_packets++;
- vif->dev->stats.tx_bytes += skb->len;
+ vif_dev->stats.tx_packets++;
+ vif_dev->stats.tx_bytes += skb->len;
}
IPCB(skb)->flags |= IPSKB_FORWARDED;
@@ -1906,18 +1920,20 @@ out_free:
kfree_skb(skb);
}
-static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
+/* Called with mrt_lock or rcu_read_lock() */
+static int ipmr_find_vif(const struct mr_table *mrt, struct net_device *dev)
{
int ct;
-
- for (ct = mrt->maxvif-1; ct >= 0; ct--) {
- if (mrt->vif_table[ct].dev == dev)
+ /* Pairs with WRITE_ONCE() in vif_delete()/vif_add() */
+ for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) {
+ if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev)
break;
}
return ct;
}
/* "local" means that we should preserve one skb (for local delivery) */
+/* Called uner rcu_read_lock() */
static void ip_mr_forward(struct net *net, struct mr_table *mrt,
struct net_device *dev, struct sk_buff *skb,
struct mfc_cache *c, int local)
@@ -1944,7 +1960,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
}
/* Wrong interface: drop packet and (maybe) send PIM assert. */
- if (mrt->vif_table[vif].dev != dev) {
+ if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
if (rt_is_output_route(skb_rtable(skb))) {
/* It is our own packet, looped back.
* Very complicated situation...
@@ -1983,8 +1999,10 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
}
forward:
- mrt->vif_table[vif].pkt_in++;
- mrt->vif_table[vif].bytes_in += skb->len;
+ WRITE_ONCE(mrt->vif_table[vif].pkt_in,
+ mrt->vif_table[vif].pkt_in + 1);
+ WRITE_ONCE(mrt->vif_table[vif].bytes_in,
+ mrt->vif_table[vif].bytes_in + skb->len);
/* Forward the frame */
if (c->mfc_origin == htonl(INADDR_ANY) &&
@@ -2140,22 +2158,14 @@ int ip_mr_input(struct sk_buff *skb)
skb = skb2;
}
- read_lock(&mrt_lock);
vif = ipmr_find_vif(mrt, dev);
- if (vif >= 0) {
- int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev);
- read_unlock(&mrt_lock);
-
- return err2;
- }
- read_unlock(&mrt_lock);
+ if (vif >= 0)
+ return ipmr_cache_unresolved(mrt, vif, skb, dev);
kfree_skb(skb);
return -ENODEV;
}
- read_lock(&mrt_lock);
ip_mr_forward(net, mrt, dev, skb, cache, local);
- read_unlock(&mrt_lock);
if (local)
return ip_local_deliver(skb);
@@ -2252,18 +2262,15 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
int vif = -1;
dev = skb->dev;
- read_lock(&mrt_lock);
if (dev)
vif = ipmr_find_vif(mrt, dev);
if (vif < 0) {
- read_unlock(&mrt_lock);
rcu_read_unlock();
return -ENODEV;
}
skb2 = skb_realloc_headroom(skb, sizeof(struct iphdr));
if (!skb2) {
- read_unlock(&mrt_lock);
rcu_read_unlock();
return -ENOMEM;
}
@@ -2277,14 +2284,11 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
iph->daddr = daddr;
iph->version = 0;
err = ipmr_cache_unresolved(mrt, vif, skb2, dev);
- read_unlock(&mrt_lock);
rcu_read_unlock();
return err;
}
- read_lock(&mrt_lock);
err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
- read_unlock(&mrt_lock);
rcu_read_unlock();
return err;
}
@@ -2404,7 +2408,7 @@ static size_t igmpmsg_netlink_msgsize(size_t payloadlen)
return len;
}
-static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
+static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt)
{
struct net *net = read_pnet(&mrt->net);
struct nlmsghdr *nlh;
@@ -2744,18 +2748,21 @@ static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb)
static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb)
{
+ struct net_device *vif_dev;
struct nlattr *vif_nest;
struct vif_device *vif;
+ vif = &mrt->vif_table[vifid];
+ vif_dev = vif_dev_read(vif);
/* if the VIF doesn't exist just continue */
- if (!VIF_EXISTS(mrt, vifid))
+ if (!vif_dev)
return true;
- vif = &mrt->vif_table[vifid];
vif_nest = nla_nest_start_noflag(skb, IPMRA_VIF);
if (!vif_nest)
return false;
- if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) ||
+
+ if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif_dev->ifindex) ||
nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) ||
nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) ||
nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in,
@@ -2887,7 +2894,7 @@ out:
*/
static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(mrt_lock)
+ __acquires(RCU)
{
struct mr_vif_iter *iter = seq->private;
struct net *net = seq_file_net(seq);
@@ -2899,14 +2906,14 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
iter->mrt = mrt;
- read_lock(&mrt_lock);
+ rcu_read_lock();
return mr_vif_seq_start(seq, pos);
}
static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
- __releases(mrt_lock)
+ __releases(RCU)
{
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
}
static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
@@ -2919,9 +2926,11 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
"Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
} else {
const struct vif_device *vif = v;
- const char *name = vif->dev ?
- vif->dev->name : "none";
+ const struct net_device *vif_dev;
+ const char *name;
+ vif_dev = vif_dev_read(vif);
+ name = vif_dev ? vif_dev->name : "none";
seq_printf(seq,
"%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
vif - mrt->vif_table,
@@ -3017,7 +3026,7 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb,
struct netlink_ext_ack *extack)
{
return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump,
- ipmr_mr_table_iter, &mrt_lock, extack);
+ ipmr_mr_table_iter, extack);
}
static const struct fib_notifier_ops ipmr_notifier_ops_template = {
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index aa8738a91210..271dc03fc6db 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -13,7 +13,7 @@ void vif_device_init(struct vif_device *v,
unsigned short flags,
unsigned short get_iflink_mask)
{
- v->dev = NULL;
+ RCU_INIT_POINTER(v->dev, NULL);
v->bytes_in = 0;
v->bytes_out = 0;
v->pkt_in = 0;
@@ -208,6 +208,7 @@ EXPORT_SYMBOL(mr_mfc_seq_next);
int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
struct mr_mfc *c, struct rtmsg *rtm)
{
+ struct net_device *vif_dev;
struct rta_mfc_stats mfcs;
struct nlattr *mp_attr;
struct rtnexthop *nhp;
@@ -220,10 +221,13 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
return -ENOENT;
}
- if (VIF_EXISTS(mrt, c->mfc_parent) &&
- nla_put_u32(skb, RTA_IIF,
- mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
+ rcu_read_lock();
+ vif_dev = rcu_dereference(mrt->vif_table[c->mfc_parent].dev);
+ if (vif_dev && nla_put_u32(skb, RTA_IIF, vif_dev->ifindex) < 0) {
+ rcu_read_unlock();
return -EMSGSIZE;
+ }
+ rcu_read_unlock();
if (c->mfc_flags & MFC_OFFLOAD)
rtm->rtm_flags |= RTNH_F_OFFLOAD;
@@ -232,23 +236,27 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
if (!mp_attr)
return -EMSGSIZE;
+ rcu_read_lock();
for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
- if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
- struct vif_device *vif;
+ struct vif_device *vif = &mrt->vif_table[ct];
+
+ vif_dev = rcu_dereference(vif->dev);
+ if (vif_dev && c->mfc_un.res.ttls[ct] < 255) {
nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
if (!nhp) {
+ rcu_read_unlock();
nla_nest_cancel(skb, mp_attr);
return -EMSGSIZE;
}
nhp->rtnh_flags = 0;
nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
- vif = &mrt->vif_table[ct];
- nhp->rtnh_ifindex = vif->dev->ifindex;
+ nhp->rtnh_ifindex = vif_dev->ifindex;
nhp->rtnh_len = sizeof(*nhp);
}
}
+ rcu_read_unlock();
nla_nest_end(skb, mp_attr);
@@ -275,13 +283,14 @@ static bool mr_mfc_uses_dev(const struct mr_table *mrt,
int ct;
for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
- if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
- const struct vif_device *vif;
-
- vif = &mrt->vif_table[ct];
- if (vif->dev == dev)
- return true;
- }
+ const struct net_device *vif_dev;
+ const struct vif_device *vif;
+
+ vif = &mrt->vif_table[ct];
+ vif_dev = rcu_access_pointer(vif->dev);
+ if (vif_dev && c->mfc_un.res.ttls[ct] < 255 &&
+ vif_dev == dev)
+ return true;
}
return false;
}
@@ -390,7 +399,6 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
struct netlink_ext_ack *extack),
struct mr_table *(*mr_iter)(struct net *net,
struct mr_table *mrt),
- rwlock_t *mrt_lock,
struct netlink_ext_ack *extack)
{
struct mr_table *mrt;
@@ -402,22 +410,25 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
for (mrt = mr_iter(net, NULL); mrt; mrt = mr_iter(net, mrt)) {
struct vif_device *v = &mrt->vif_table[0];
+ struct net_device *vif_dev;
struct mr_mfc *mfc;
int vifi;
/* Notifiy on table VIF entries */
- read_lock(mrt_lock);
+ rcu_read_lock();
for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
- if (!v->dev)
+ vif_dev = rcu_dereference(v->dev);
+ if (!vif_dev)
continue;
err = mr_call_vif_notifier(nb, family,
- FIB_EVENT_VIF_ADD,
- v, vifi, mrt->id, extack);
+ FIB_EVENT_VIF_ADD, v,
+ vif_dev, vifi,
+ mrt->id, extack);
if (err)
break;
}
- read_unlock(mrt_lock);
+ rcu_read_unlock();
if (err)
return err;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 1a43ca73f94d..b83c2bd9d722 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -50,7 +50,7 @@
struct ping_table {
struct hlist_nulls_head hash[PING_HTABLE_SIZE];
- rwlock_t lock;
+ spinlock_t lock;
};
static struct ping_table ping_table;
@@ -82,7 +82,7 @@ int ping_get_port(struct sock *sk, unsigned short ident)
struct sock *sk2 = NULL;
isk = inet_sk(sk);
- write_lock_bh(&ping_table.lock);
+ spin_lock(&ping_table.lock);
if (ident == 0) {
u32 i;
u16 result = ping_port_rover + 1;
@@ -128,14 +128,15 @@ next_port:
if (sk_unhashed(sk)) {
pr_debug("was not hashed\n");
sock_hold(sk);
- hlist_nulls_add_head(&sk->sk_nulls_node, hlist);
+ sock_set_flag(sk, SOCK_RCU_FREE);
+ hlist_nulls_add_head_rcu(&sk->sk_nulls_node, hlist);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
}
- write_unlock_bh(&ping_table.lock);
+ spin_unlock(&ping_table.lock);
return 0;
fail:
- write_unlock_bh(&ping_table.lock);
+ spin_unlock(&ping_table.lock);
return 1;
}
EXPORT_SYMBOL_GPL(ping_get_port);
@@ -153,19 +154,19 @@ void ping_unhash(struct sock *sk)
struct inet_sock *isk = inet_sk(sk);
pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
- write_lock_bh(&ping_table.lock);
+ spin_lock(&ping_table.lock);
if (sk_hashed(sk)) {
- hlist_nulls_del(&sk->sk_nulls_node);
- sk_nulls_node_init(&sk->sk_nulls_node);
+ hlist_nulls_del_init_rcu(&sk->sk_nulls_node);
sock_put(sk);
isk->inet_num = 0;
isk->inet_sport = 0;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
}
- write_unlock_bh(&ping_table.lock);
+ spin_unlock(&ping_table.lock);
}
EXPORT_SYMBOL_GPL(ping_unhash);
+/* Called under rcu_read_lock() */
static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
{
struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident);
@@ -190,8 +191,6 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
return NULL;
}
- read_lock_bh(&ping_table.lock);
-
ping_portaddr_for_each_entry(sk, hnode, hslot) {
isk = inet_sk(sk);
@@ -230,13 +229,11 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
sk->sk_bound_dev_if != sdif)
continue;
- sock_hold(sk);
goto exit;
}
sk = NULL;
exit:
- read_unlock_bh(&ping_table.lock);
return sk;
}
@@ -319,12 +316,16 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n",
sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port));
+ if (addr->sin_addr.s_addr == htonl(INADDR_ANY))
+ return 0;
+
tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
- if (!inet_addr_valid_or_nonlocal(net, inet_sk(sk),
- addr->sin_addr.s_addr,
- chk_addr_ret))
+ if (chk_addr_ret == RTN_MULTICAST ||
+ chk_addr_ret == RTN_BROADCAST ||
+ (chk_addr_ret != RTN_LOCAL &&
+ !inet_can_nonlocal_bind(net, isk)))
return -EADDRNOTAVAIL;
#if IS_ENABLED(CONFIG_IPV6)
@@ -588,7 +589,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info)
sk->sk_err = err;
sk_error_report(sk);
out:
- sock_put(sk);
+ return;
}
EXPORT_SYMBOL_GPL(ping_err);
@@ -994,7 +995,6 @@ enum skb_drop_reason ping_rcv(struct sk_buff *skb)
reason = __ping_queue_rcv_skb(sk, skb2);
else
reason = SKB_DROP_REASON_NOMEM;
- sock_put(sk);
}
if (reason)
@@ -1080,13 +1080,13 @@ static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos)
}
void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family)
- __acquires(ping_table.lock)
+ __acquires(RCU)
{
struct ping_iter_state *state = seq->private;
state->bucket = 0;
state->family = family;
- read_lock_bh(&ping_table.lock);
+ rcu_read_lock();
return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
}
@@ -1112,9 +1112,9 @@ void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos)
EXPORT_SYMBOL_GPL(ping_seq_next);
void ping_seq_stop(struct seq_file *seq, void *v)
- __releases(ping_table.lock)
+ __releases(RCU)
{
- read_unlock_bh(&ping_table.lock);
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(ping_seq_stop);
@@ -1198,5 +1198,5 @@ void __init ping_init(void)
for (i = 0; i < PING_HTABLE_SIZE; i++)
INIT_HLIST_NULLS_HEAD(&ping_table.hash[i], i);
- rwlock_init(&ping_table.lock);
+ spin_lock_init(&ping_table.lock);
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bbd717805b10..006c1f0ed8b4 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -85,21 +85,20 @@ struct raw_frag_vec {
int hlen;
};
-struct raw_hashinfo raw_v4_hashinfo = {
- .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
-};
+struct raw_hashinfo raw_v4_hashinfo;
EXPORT_SYMBOL_GPL(raw_v4_hashinfo);
int raw_hash_sk(struct sock *sk)
{
struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
- struct hlist_head *head;
+ struct hlist_nulls_head *hlist;
- head = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)];
+ hlist = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)];
- write_lock_bh(&h->lock);
- sk_add_node(sk, head);
- write_unlock_bh(&h->lock);
+ spin_lock(&h->lock);
+ __sk_nulls_add_node_rcu(sk, hlist);
+ sock_set_flag(sk, SOCK_RCU_FREE);
+ spin_unlock(&h->lock);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
return 0;
@@ -110,31 +109,26 @@ void raw_unhash_sk(struct sock *sk)
{
struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
- write_lock_bh(&h->lock);
- if (sk_del_node_init(sk))
+ spin_lock(&h->lock);
+ if (__sk_nulls_del_node_init_rcu(sk))
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
- write_unlock_bh(&h->lock);
+ spin_unlock(&h->lock);
}
EXPORT_SYMBOL_GPL(raw_unhash_sk);
-struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
- unsigned short num, __be32 raddr, __be32 laddr,
- int dif, int sdif)
+bool raw_v4_match(struct net *net, struct sock *sk, unsigned short num,
+ __be32 raddr, __be32 laddr, int dif, int sdif)
{
- sk_for_each_from(sk) {
- struct inet_sock *inet = inet_sk(sk);
-
- if (net_eq(sock_net(sk), net) && inet->inet_num == num &&
- !(inet->inet_daddr && inet->inet_daddr != raddr) &&
- !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
- raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
- goto found; /* gotcha */
- }
- sk = NULL;
-found:
- return sk;
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (net_eq(sock_net(sk), net) && inet->inet_num == num &&
+ !(inet->inet_daddr && inet->inet_daddr != raddr) &&
+ !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
+ raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
+ return true;
+ return false;
}
-EXPORT_SYMBOL_GPL(__raw_v4_lookup);
+EXPORT_SYMBOL_GPL(raw_v4_match);
/*
* 0 - deliver
@@ -168,23 +162,20 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
*/
static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
{
+ struct net *net = dev_net(skb->dev);
+ struct hlist_nulls_head *hlist;
+ struct hlist_nulls_node *hnode;
int sdif = inet_sdif(skb);
int dif = inet_iif(skb);
- struct sock *sk;
- struct hlist_head *head;
int delivered = 0;
- struct net *net;
-
- read_lock(&raw_v4_hashinfo.lock);
- head = &raw_v4_hashinfo.ht[hash];
- if (hlist_empty(head))
- goto out;
-
- net = dev_net(skb->dev);
- sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol,
- iph->saddr, iph->daddr, dif, sdif);
+ struct sock *sk;
- while (sk) {
+ hlist = &raw_v4_hashinfo.ht[hash];
+ rcu_read_lock();
+ sk_nulls_for_each(sk, hnode, hlist) {
+ if (!raw_v4_match(net, sk, iph->protocol,
+ iph->saddr, iph->daddr, dif, sdif))
+ continue;
delivered = 1;
if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) &&
ip_mc_sf_allow(sk, iph->daddr, iph->saddr,
@@ -195,31 +186,16 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
if (clone)
raw_rcv(sk, clone);
}
- sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol,
- iph->saddr, iph->daddr,
- dif, sdif);
}
-out:
- read_unlock(&raw_v4_hashinfo.lock);
+ rcu_read_unlock();
return delivered;
}
int raw_local_deliver(struct sk_buff *skb, int protocol)
{
- int hash;
- struct sock *raw_sk;
-
- hash = protocol & (RAW_HTABLE_SIZE - 1);
- raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
-
- /* If there maybe a raw socket we must check - if not we
- * don't care less
- */
- if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash))
- raw_sk = NULL;
-
- return raw_sk != NULL;
+ int hash = protocol & (RAW_HTABLE_SIZE - 1);
+ return raw_v4_input(skb, ip_hdr(skb), hash);
}
static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
@@ -286,31 +262,27 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
{
- int hash;
- struct sock *raw_sk;
+ struct net *net = dev_net(skb->dev);
+ struct hlist_nulls_head *hlist;
+ struct hlist_nulls_node *hnode;
+ int dif = skb->dev->ifindex;
+ int sdif = inet_sdif(skb);
const struct iphdr *iph;
- struct net *net;
+ struct sock *sk;
+ int hash;
hash = protocol & (RAW_HTABLE_SIZE - 1);
+ hlist = &raw_v4_hashinfo.ht[hash];
- read_lock(&raw_v4_hashinfo.lock);
- raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
- if (raw_sk) {
- int dif = skb->dev->ifindex;
- int sdif = inet_sdif(skb);
-
+ rcu_read_lock();
+ sk_nulls_for_each(sk, hnode, hlist) {
iph = (const struct iphdr *)skb->data;
- net = dev_net(skb->dev);
-
- while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol,
- iph->daddr, iph->saddr,
- dif, sdif)) != NULL) {
- raw_err(raw_sk, skb, info);
- raw_sk = sk_next(raw_sk);
- iph = (const struct iphdr *)skb->data;
- }
+ if (!raw_v4_match(net, sk, iph->protocol,
+ iph->daddr, iph->saddr, dif, sdif))
+ continue;
+ raw_err(sk, skb, info);
}
- read_unlock(&raw_v4_hashinfo.lock);
+ rcu_read_unlock();
}
static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
@@ -971,44 +943,41 @@ struct proto raw_prot = {
};
#ifdef CONFIG_PROC_FS
-static struct sock *raw_get_first(struct seq_file *seq)
+static struct sock *raw_get_first(struct seq_file *seq, int bucket)
{
- struct sock *sk;
struct raw_hashinfo *h = pde_data(file_inode(seq->file));
struct raw_iter_state *state = raw_seq_private(seq);
+ struct hlist_nulls_head *hlist;
+ struct hlist_nulls_node *hnode;
+ struct sock *sk;
- for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE;
+ for (state->bucket = bucket; state->bucket < RAW_HTABLE_SIZE;
++state->bucket) {
- sk_for_each(sk, &h->ht[state->bucket])
+ hlist = &h->ht[state->bucket];
+ sk_nulls_for_each(sk, hnode, hlist) {
if (sock_net(sk) == seq_file_net(seq))
- goto found;
+ return sk;
+ }
}
- sk = NULL;
-found:
- return sk;
+ return NULL;
}
static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk)
{
- struct raw_hashinfo *h = pde_data(file_inode(seq->file));
struct raw_iter_state *state = raw_seq_private(seq);
do {
- sk = sk_next(sk);
-try_again:
- ;
+ sk = sk_nulls_next(sk);
} while (sk && sock_net(sk) != seq_file_net(seq));
- if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
- sk = sk_head(&h->ht[state->bucket]);
- goto try_again;
- }
+ if (!sk)
+ return raw_get_first(seq, state->bucket + 1);
return sk;
}
static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos)
{
- struct sock *sk = raw_get_first(seq);
+ struct sock *sk = raw_get_first(seq, 0);
if (sk)
while (pos && (sk = raw_get_next(seq, sk)) != NULL)
@@ -1017,11 +986,9 @@ static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos)
}
void *raw_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(&h->lock)
+ __acquires(RCU)
{
- struct raw_hashinfo *h = pde_data(file_inode(seq->file));
-
- read_lock(&h->lock);
+ rcu_read_lock();
return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
EXPORT_SYMBOL_GPL(raw_seq_start);
@@ -1031,7 +998,7 @@ void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos)
struct sock *sk;
if (v == SEQ_START_TOKEN)
- sk = raw_get_first(seq);
+ sk = raw_get_first(seq, 0);
else
sk = raw_get_next(seq, v);
++*pos;
@@ -1040,11 +1007,9 @@ void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos)
EXPORT_SYMBOL_GPL(raw_seq_next);
void raw_seq_stop(struct seq_file *seq, void *v)
- __releases(&h->lock)
+ __releases(RCU)
{
- struct raw_hashinfo *h = pde_data(file_inode(seq->file));
-
- read_unlock(&h->lock);
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(raw_seq_stop);
@@ -1106,6 +1071,7 @@ static __net_initdata struct pernet_operations raw_net_ops = {
int __init raw_proc_init(void)
{
+
return register_pernet_subsys(&raw_net_ops);
}
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index ccacbde30a2c..999321834b94 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -34,57 +34,57 @@ raw_get_hashinfo(const struct inet_diag_req_v2 *r)
* use helper to figure it out.
*/
-static struct sock *raw_lookup(struct net *net, struct sock *from,
- const struct inet_diag_req_v2 *req)
+static bool raw_lookup(struct net *net, struct sock *sk,
+ const struct inet_diag_req_v2 *req)
{
struct inet_diag_req_raw *r = (void *)req;
- struct sock *sk = NULL;
if (r->sdiag_family == AF_INET)
- sk = __raw_v4_lookup(net, from, r->sdiag_raw_protocol,
- r->id.idiag_dst[0],
- r->id.idiag_src[0],
- r->id.idiag_if, 0);
+ return raw_v4_match(net, sk, r->sdiag_raw_protocol,
+ r->id.idiag_dst[0],
+ r->id.idiag_src[0],
+ r->id.idiag_if, 0);
#if IS_ENABLED(CONFIG_IPV6)
else
- sk = __raw_v6_lookup(net, from, r->sdiag_raw_protocol,
- (const struct in6_addr *)r->id.idiag_src,
- (const struct in6_addr *)r->id.idiag_dst,
- r->id.idiag_if, 0);
+ return raw_v6_match(net, sk, r->sdiag_raw_protocol,
+ (const struct in6_addr *)r->id.idiag_src,
+ (const struct in6_addr *)r->id.idiag_dst,
+ r->id.idiag_if, 0);
#endif
- return sk;
+ return false;
}
static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 *r)
{
struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
- struct sock *sk = NULL, *s;
+ struct hlist_nulls_head *hlist;
+ struct hlist_nulls_node *hnode;
+ struct sock *sk;
int slot;
if (IS_ERR(hashinfo))
return ERR_CAST(hashinfo);
- read_lock(&hashinfo->lock);
+ rcu_read_lock();
for (slot = 0; slot < RAW_HTABLE_SIZE; slot++) {
- sk_for_each(s, &hashinfo->ht[slot]) {
- sk = raw_lookup(net, s, r);
- if (sk) {
+ hlist = &hashinfo->ht[slot];
+ sk_nulls_for_each(sk, hnode, hlist) {
+ if (raw_lookup(net, sk, r)) {
/*
* Grab it and keep until we fill
- * diag meaage to be reported, so
+ * diag message to be reported, so
* caller should call sock_put then.
- * We can do that because we're keeping
- * hashinfo->lock here.
*/
- sock_hold(sk);
- goto out_unlock;
+ if (refcount_inc_not_zero(&sk->sk_refcnt))
+ goto out_unlock;
}
}
}
+ sk = ERR_PTR(-ENOENT);
out_unlock:
- read_unlock(&hashinfo->lock);
+ rcu_read_unlock();
- return sk ? sk : ERR_PTR(-ENOENT);
+ return sk;
}
static int raw_diag_dump_one(struct netlink_callback *cb,
@@ -142,6 +142,8 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
struct net *net = sock_net(skb->sk);
struct inet_diag_dump_data *cb_data;
+ struct hlist_nulls_head *hlist;
+ struct hlist_nulls_node *hnode;
int num, s_num, slot, s_slot;
struct sock *sk = NULL;
struct nlattr *bc;
@@ -154,11 +156,12 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
s_slot = cb->args[0];
num = s_num = cb->args[1];
- read_lock(&hashinfo->lock);
+ rcu_read_lock();
for (slot = s_slot; slot < RAW_HTABLE_SIZE; s_num = 0, slot++) {
num = 0;
- sk_for_each(sk, &hashinfo->ht[slot]) {
+ hlist = &hashinfo->ht[slot];
+ sk_nulls_for_each(sk, hnode, hlist) {
struct inet_sock *inet = inet_sk(sk);
if (!net_eq(sock_net(sk), net))
@@ -181,7 +184,7 @@ next:
}
out_unlock:
- read_unlock(&hashinfo->lock);
+ rcu_read_unlock();
cb->args[0] = slot;
cb->args[1] = num;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 14ebb4ec4a51..f7309452bdce 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -951,6 +951,24 @@ static int tcp_downgrade_zcopy_pure(struct sock *sk, struct sk_buff *skb)
return 0;
}
+
+static int tcp_wmem_schedule(struct sock *sk, int copy)
+{
+ int left;
+
+ if (likely(sk_wmem_schedule(sk, copy)))
+ return copy;
+
+ /* We could be in trouble if we have nothing queued.
+ * Use whatever is left in sk->sk_forward_alloc and tcp_wmem[0]
+ * to guarantee some progress.
+ */
+ left = sock_net(sk)->ipv4.sysctl_tcp_wmem[0] - sk->sk_wmem_queued;
+ if (left > 0)
+ sk_forced_mem_schedule(sk, min(left, copy));
+ return min(copy, sk->sk_forward_alloc);
+}
+
static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags,
struct page *page, int offset, size_t *size)
{
@@ -986,7 +1004,11 @@ new_segment:
tcp_mark_push(tp, skb);
goto new_segment;
}
- if (tcp_downgrade_zcopy_pure(sk, skb) || !sk_wmem_schedule(sk, copy))
+ if (tcp_downgrade_zcopy_pure(sk, skb))
+ return NULL;
+
+ copy = tcp_wmem_schedule(sk, copy);
+ if (!copy)
return NULL;
if (can_coalesce) {
@@ -1334,8 +1356,11 @@ new_segment:
copy = min_t(int, copy, pfrag->size - pfrag->offset);
- if (tcp_downgrade_zcopy_pure(sk, skb) ||
- !sk_wmem_schedule(sk, copy))
+ if (tcp_downgrade_zcopy_pure(sk, skb))
+ goto wait_for_space;
+
+ copy = tcp_wmem_schedule(sk, copy);
+ if (!copy)
goto wait_for_space;
err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
@@ -1362,7 +1387,8 @@ new_segment:
skb_shinfo(skb)->flags |= SKBFL_PURE_ZEROCOPY;
if (!skb_zcopy_pure(skb)) {
- if (!sk_wmem_schedule(sk, copy))
+ copy = tcp_wmem_schedule(sk, copy);
+ if (!copy)
goto wait_for_space;
}
@@ -4599,12 +4625,6 @@ void __init tcp_init(void)
SLAB_HWCACHE_ALIGN | SLAB_PANIC |
SLAB_ACCOUNT,
NULL);
- tcp_hashinfo.bind2_bucket_cachep =
- kmem_cache_create("tcp_bind2_bucket",
- sizeof(struct inet_bind2_bucket), 0,
- SLAB_HWCACHE_ALIGN | SLAB_PANIC |
- SLAB_ACCOUNT,
- NULL);
/* Size and allocate the main established and bind bucket
* hash tables.
@@ -4627,9 +4647,8 @@ void __init tcp_init(void)
if (inet_ehash_locks_alloc(&tcp_hashinfo))
panic("TCP: failed to alloc ehash_locks");
tcp_hashinfo.bhash =
- alloc_large_system_hash("TCP bind bhash tables",
- sizeof(struct inet_bind_hashbucket) +
- sizeof(struct inet_bind2_hashbucket),
+ alloc_large_system_hash("TCP bind",
+ sizeof(struct inet_bind_hashbucket),
tcp_hashinfo.ehash_mask + 1,
17, /* one slot per 128 KB of memory */
0,
@@ -4638,12 +4657,9 @@ void __init tcp_init(void)
0,
64 * 1024);
tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size;
- tcp_hashinfo.bhash2 =
- (struct inet_bind2_hashbucket *)(tcp_hashinfo.bhash + tcp_hashinfo.bhash_size);
for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
spin_lock_init(&tcp_hashinfo.bhash[i].lock);
INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
- INIT_HLIST_HEAD(&tcp_hashinfo.bhash2[i].chain);
}
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index be3947e70fec..a1626afe87a1 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -540,6 +540,7 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
struct proto *base)
{
prot[TCP_BPF_BASE] = *base;
+ prot[TCP_BPF_BASE].destroy = sock_map_destroy;
prot[TCP_BPF_BASE].close = sock_map_close;
prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg;
prot[TCP_BPF_BASE].sock_is_readable = sk_msg_is_readable;
@@ -611,9 +612,6 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
return 0;
}
- if (inet_csk_has_ulp(sk))
- return -EINVAL;
-
if (sk->sk_family == AF_INET6) {
if (tcp_bpf_assert_proto_ops(psock->sk_proto))
return -EINVAL;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fdc7beb81b68..80cb112ef142 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3966,7 +3966,7 @@ static bool smc_parse_options(const struct tcphdr *th,
/* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped
* value on success.
*/
-static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
+u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
{
const unsigned char *ptr = (const unsigned char *)(th + 1);
int length = (th->doff * 4) - sizeof(struct tcphdr);
@@ -4005,6 +4005,7 @@ static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
}
return mss;
}
+EXPORT_SYMBOL_GPL(tcp_parse_mss_option);
/* Look for tcp options. Normally only called on SYN and SYNACK packets.
* But, this can also be called on packets in the established flow when
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8ab98e1aca67..18c913a2347a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3362,11 +3362,12 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
*/
void sk_forced_mem_schedule(struct sock *sk, int size)
{
- int amt;
+ int delta, amt;
- if (size <= sk->sk_forward_alloc)
+ delta = size - sk->sk_forward_alloc;
+ if (delta <= 0)
return;
- amt = sk_mem_pages(size);
+ amt = sk_mem_pages(delta);
sk->sk_forward_alloc += amt << PAGE_SHIFT;
sk_memory_allocated_add(sk, amt);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 70564ddccc46..658823e91eca 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -63,6 +63,7 @@
#include <net/compat.h>
#include <net/xfrm.h>
#include <net/ioam6.h>
+#include <net/rawv6.h>
#include <linux/uaccess.h>
#include <linux/mroute6.h>
@@ -1073,6 +1074,8 @@ static int __init inet6_init(void)
goto out;
}
+ raw_hashinfo_init(&raw_v6_hashinfo);
+
err = proto_register(&tcpv6_prot, 1);
if (err)
goto out;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3e22cbe5966a..1bd10ae332e8 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -939,7 +939,6 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
__be16 proto;
__u32 mtu;
int nhoff;
- int thoff;
if (!pskb_inet_may_pull(skb))
goto tx_err;
@@ -960,10 +959,16 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
(ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
truncate = true;
- thoff = skb_transport_header(skb) - skb_mac_header(skb);
- if (skb->protocol == htons(ETH_P_IPV6) &&
- (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
- truncate = true;
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ int thoff;
+
+ if (skb_transport_header_was_set(skb))
+ thoff = skb_transport_header(skb) - skb_mac_header(skb);
+ else
+ thoff = nhoff + sizeof(struct ipv6hdr);
+ if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
+ truncate = true;
+ }
if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen))
goto tx_err;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index d4aad41c9849..ec6e1509fc7c 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -62,7 +62,12 @@ struct ip6mr_result {
Note that the changes are semaphored via rtnl_lock.
*/
-static DEFINE_RWLOCK(mrt_lock);
+static DEFINE_SPINLOCK(mrt_lock);
+
+static struct net_device *vif_dev_read(const struct vif_device *vif)
+{
+ return rcu_dereference(vif->dev);
+}
/* Multicast router control variables */
@@ -85,11 +90,11 @@ static void ip6mr_free_table(struct mr_table *mrt);
static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
struct net_device *dev, struct sk_buff *skb,
struct mfc6_cache *cache);
-static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert);
static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
int cmd);
-static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
+static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt);
static int ip6mr_rtm_dumproute(struct sk_buff *skb,
struct netlink_callback *cb);
static void mroute_clean_tables(struct mr_table *mrt, int flags);
@@ -398,7 +403,7 @@ static void ip6mr_free_table(struct mr_table *mrt)
*/
static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(mrt_lock)
+ __acquires(RCU)
{
struct mr_vif_iter *iter = seq->private;
struct net *net = seq_file_net(seq);
@@ -410,14 +415,14 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
iter->mrt = mrt;
- read_lock(&mrt_lock);
+ rcu_read_lock();
return mr_vif_seq_start(seq, pos);
}
static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
- __releases(mrt_lock)
+ __releases(RCU)
{
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
}
static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
@@ -430,7 +435,11 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
"Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
} else {
const struct vif_device *vif = v;
- const char *name = vif->dev ? vif->dev->name : "none";
+ const struct net_device *vif_dev;
+ const char *name;
+
+ vif_dev = vif_dev_read(vif);
+ name = vif_dev ? vif_dev->name : "none";
seq_printf(seq,
"%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
@@ -549,13 +558,11 @@ static int pim6_rcv(struct sk_buff *skb)
if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
goto drop;
- reg_vif_num = mrt->mroute_reg_vif_num;
- read_lock(&mrt_lock);
+ /* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */
+ reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
if (reg_vif_num >= 0)
- reg_dev = mrt->vif_table[reg_vif_num].dev;
- dev_hold(reg_dev);
- read_unlock(&mrt_lock);
+ reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]);
if (!reg_dev)
goto drop;
@@ -570,7 +577,6 @@ static int pim6_rcv(struct sk_buff *skb)
netif_rx(skb);
- dev_put(reg_dev);
return 0;
drop:
kfree_skb(skb);
@@ -600,11 +606,12 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
goto tx_err;
- read_lock(&mrt_lock);
dev->stats.tx_bytes += skb->len;
dev->stats.tx_packets++;
- ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
- read_unlock(&mrt_lock);
+ rcu_read_lock();
+ ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
+ MRT6MSG_WHOLEPKT);
+ rcu_read_unlock();
kfree_skb(skb);
return NETDEV_TX_OK;
@@ -670,10 +677,11 @@ failure:
static int call_ip6mr_vif_entry_notifiers(struct net *net,
enum fib_event_type event_type,
struct vif_device *vif,
+ struct net_device *vif_dev,
mifi_t vif_index, u32 tb_id)
{
return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
- vif, vif_index, tb_id,
+ vif, vif_dev, vif_index, tb_id,
&net->ipv6.ipmr_seq);
}
@@ -698,23 +706,21 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
v = &mrt->vif_table[vifi];
- if (VIF_EXISTS(mrt, vifi))
- call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
- FIB_EVENT_VIF_DEL, v, vifi,
- mrt->id);
-
- write_lock_bh(&mrt_lock);
- dev = v->dev;
- v->dev = NULL;
-
- if (!dev) {
- write_unlock_bh(&mrt_lock);
+ dev = rtnl_dereference(v->dev);
+ if (!dev)
return -EADDRNOTAVAIL;
- }
+
+ call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
+ FIB_EVENT_VIF_DEL, v, dev,
+ vifi, mrt->id);
+ spin_lock(&mrt_lock);
+ RCU_INIT_POINTER(v->dev, NULL);
#ifdef CONFIG_IPV6_PIMSM_V2
- if (vifi == mrt->mroute_reg_vif_num)
- mrt->mroute_reg_vif_num = -1;
+ if (vifi == mrt->mroute_reg_vif_num) {
+ /* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */
+ WRITE_ONCE(mrt->mroute_reg_vif_num, -1);
+ }
#endif
if (vifi + 1 == mrt->maxvif) {
@@ -723,10 +729,10 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
if (VIF_EXISTS(mrt, tmp))
break;
}
- mrt->maxvif = tmp + 1;
+ WRITE_ONCE(mrt->maxvif, tmp + 1);
}
- write_unlock_bh(&mrt_lock);
+ spin_unlock(&mrt_lock);
dev_set_allmulti(dev, -1);
@@ -826,7 +832,7 @@ static void ipmr_expire_process(struct timer_list *t)
spin_unlock(&mfc_unres_lock);
}
-/* Fill oifs list. It is called under write locked mrt_lock. */
+/* Fill oifs list. It is called under locked mrt_lock. */
static void ip6mr_update_thresholds(struct mr_table *mrt,
struct mr_mfc *cache,
@@ -912,18 +918,18 @@ static int mif6_add(struct net *net, struct mr_table *mrt,
MIFF_REGISTER);
/* And finish update writing critical data */
- write_lock_bh(&mrt_lock);
- v->dev = dev;
+ spin_lock(&mrt_lock);
+ rcu_assign_pointer(v->dev, dev);
netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
#ifdef CONFIG_IPV6_PIMSM_V2
if (v->flags & MIFF_REGISTER)
- mrt->mroute_reg_vif_num = vifi;
+ WRITE_ONCE(mrt->mroute_reg_vif_num, vifi);
#endif
if (vifi + 1 > mrt->maxvif)
- mrt->maxvif = vifi + 1;
- write_unlock_bh(&mrt_lock);
+ WRITE_ONCE(mrt->maxvif, vifi + 1);
+ spin_unlock(&mrt_lock);
call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
- v, vifi, mrt->id);
+ v, dev, vifi, mrt->id);
return 0;
}
@@ -1028,10 +1034,10 @@ static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
/*
* Bounce a cache query up to pim6sd and netlink.
*
- * Called under mrt_lock.
+ * Called under rcu_read_lock()
*/
-static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert)
{
struct sock *mroute6_sk;
@@ -1072,7 +1078,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
if (assert == MRT6MSG_WRMIFWHOLE)
msg->im6_mif = mifi;
else
- msg->im6_mif = mrt->mroute_reg_vif_num;
+ msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num);
msg->im6_pad = 0;
msg->im6_src = ipv6_hdr(pkt)->saddr;
msg->im6_dst = ipv6_hdr(pkt)->daddr;
@@ -1107,10 +1113,8 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
- rcu_read_lock();
mroute6_sk = rcu_dereference(mrt->mroute_sk);
if (!mroute6_sk) {
- rcu_read_unlock();
kfree_skb(skb);
return -EINVAL;
}
@@ -1119,7 +1123,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
/* Deliver to user space multicast routing algorithms */
ret = sock_queue_rcv_skb(mroute6_sk, skb);
- rcu_read_unlock();
+
if (ret < 0) {
net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
kfree_skb(skb);
@@ -1243,7 +1247,7 @@ static int ip6mr_device_event(struct notifier_block *this,
ip6mr_for_each_table(mrt, net) {
v = &mrt->vif_table[0];
for (ct = 0; ct < mrt->maxvif; ct++, v++) {
- if (v->dev == dev)
+ if (rcu_access_pointer(v->dev) == dev)
mif6_delete(mrt, ct, 1, NULL);
}
}
@@ -1262,7 +1266,7 @@ static int ip6mr_dump(struct net *net, struct notifier_block *nb,
struct netlink_ext_ack *extack)
{
return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
- ip6mr_mr_table_iter, &mrt_lock, extack);
+ ip6mr_mr_table_iter, extack);
}
static struct notifier_block ip6_mr_notifier = {
@@ -1437,12 +1441,12 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
&mfc->mf6cc_mcastgrp.sin6_addr, parent);
rcu_read_unlock();
if (c) {
- write_lock_bh(&mrt_lock);
+ spin_lock(&mrt_lock);
c->_c.mfc_parent = mfc->mf6cc_parent;
ip6mr_update_thresholds(mrt, &c->_c, ttls);
if (!mrtsock)
c->_c.mfc_flags |= MFC_STATIC;
- write_unlock_bh(&mrt_lock);
+ spin_unlock(&mrt_lock);
call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
c, mrt->id);
mr6_netlink_event(mrt, c, RTM_NEWROUTE);
@@ -1560,7 +1564,7 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
struct net *net = sock_net(sk);
rtnl_lock();
- write_lock_bh(&mrt_lock);
+ spin_lock(&mrt_lock);
if (rtnl_dereference(mrt->mroute_sk)) {
err = -EADDRINUSE;
} else {
@@ -1568,7 +1572,7 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
sock_set_flag(sk, SOCK_RCU_FREE);
atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
}
- write_unlock_bh(&mrt_lock);
+ spin_unlock(&mrt_lock);
if (!err)
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -1598,14 +1602,14 @@ int ip6mr_sk_done(struct sock *sk)
rtnl_lock();
ip6mr_for_each_table(mrt, net) {
if (sk == rtnl_dereference(mrt->mroute_sk)) {
- write_lock_bh(&mrt_lock);
+ spin_lock(&mrt_lock);
RCU_INIT_POINTER(mrt->mroute_sk, NULL);
/* Note that mroute_sk had SOCK_RCU_FREE set,
* so the RCU grace period before sk freeing
* is guaranteed by sk_destruct()
*/
atomic_dec(&devconf->mc_forwarding);
- write_unlock_bh(&mrt_lock);
+ spin_unlock(&mrt_lock);
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
NETCONFA_IFINDEX_ALL,
@@ -1891,20 +1895,20 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
if (vr.mifi >= mrt->maxvif)
return -EINVAL;
vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
- read_lock(&mrt_lock);
+ rcu_read_lock();
vif = &mrt->vif_table[vr.mifi];
if (VIF_EXISTS(mrt, vr.mifi)) {
- vr.icount = vif->pkt_in;
- vr.ocount = vif->pkt_out;
- vr.ibytes = vif->bytes_in;
- vr.obytes = vif->bytes_out;
- read_unlock(&mrt_lock);
+ vr.icount = READ_ONCE(vif->pkt_in);
+ vr.ocount = READ_ONCE(vif->pkt_out);
+ vr.ibytes = READ_ONCE(vif->bytes_in);
+ vr.obytes = READ_ONCE(vif->bytes_out);
+ rcu_read_unlock();
if (copy_to_user(arg, &vr, sizeof(vr)))
return -EFAULT;
return 0;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EADDRNOTAVAIL;
case SIOCGETSGCNT_IN6:
if (copy_from_user(&sr, arg, sizeof(sr)))
@@ -1966,20 +1970,20 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
if (vr.mifi >= mrt->maxvif)
return -EINVAL;
vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
- read_lock(&mrt_lock);
+ rcu_read_lock();
vif = &mrt->vif_table[vr.mifi];
if (VIF_EXISTS(mrt, vr.mifi)) {
- vr.icount = vif->pkt_in;
- vr.ocount = vif->pkt_out;
- vr.ibytes = vif->bytes_in;
- vr.obytes = vif->bytes_out;
- read_unlock(&mrt_lock);
+ vr.icount = READ_ONCE(vif->pkt_in);
+ vr.ocount = READ_ONCE(vif->pkt_out);
+ vr.ibytes = READ_ONCE(vif->bytes_in);
+ vr.obytes = READ_ONCE(vif->bytes_out);
+ rcu_read_unlock();
if (copy_to_user(arg, &vr, sizeof(vr)))
return -EFAULT;
return 0;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EADDRNOTAVAIL;
case SIOCGETSGCNT_IN6:
if (copy_from_user(&sr, arg, sizeof(sr)))
@@ -2021,21 +2025,22 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
struct sk_buff *skb, int vifi)
{
- struct ipv6hdr *ipv6h;
struct vif_device *vif = &mrt->vif_table[vifi];
- struct net_device *dev;
+ struct net_device *vif_dev;
+ struct ipv6hdr *ipv6h;
struct dst_entry *dst;
struct flowi6 fl6;
- if (!vif->dev)
+ vif_dev = vif_dev_read(vif);
+ if (!vif_dev)
goto out_free;
#ifdef CONFIG_IPV6_PIMSM_V2
if (vif->flags & MIFF_REGISTER) {
- vif->pkt_out++;
- vif->bytes_out += skb->len;
- vif->dev->stats.tx_bytes += skb->len;
- vif->dev->stats.tx_packets++;
+ WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
+ WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
+ vif_dev->stats.tx_bytes += skb->len;
+ vif_dev->stats.tx_packets++;
ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
goto out_free;
}
@@ -2068,14 +2073,13 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
* not mrouter) cannot join to more than one interface - it will
* result in receiving multiple packets.
*/
- dev = vif->dev;
- skb->dev = dev;
- vif->pkt_out++;
- vif->bytes_out += skb->len;
+ skb->dev = vif_dev;
+ WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
+ WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
/* We are about to write */
/* XXX: extension headers? */
- if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
+ if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev)))
goto out_free;
ipv6h = ipv6_hdr(skb);
@@ -2084,7 +2088,7 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
IP6CB(skb)->flags |= IP6SKB_FORWARDED;
return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
- net, NULL, skb, skb->dev, dev,
+ net, NULL, skb, skb->dev, vif_dev,
ip6mr_forward2_finish);
out_free:
@@ -2092,17 +2096,20 @@ out_free:
return 0;
}
+/* Called with rcu_read_lock() */
static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
{
int ct;
- for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
- if (mrt->vif_table[ct].dev == dev)
+ /* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */
+ for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) {
+ if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev)
break;
}
return ct;
}
+/* Called under rcu_read_lock() */
static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
struct net_device *dev, struct sk_buff *skb,
struct mfc6_cache *c)
@@ -2122,20 +2129,18 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
/* For an (*,G) entry, we only check that the incoming
* interface is part of the static tree.
*/
- rcu_read_lock();
cache_proxy = mr_mfc_find_any_parent(mrt, vif);
if (cache_proxy &&
cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
rcu_read_unlock();
goto forward;
}
- rcu_read_unlock();
}
/*
* Wrong interface: drop packet and (maybe) send PIM assert.
*/
- if (mrt->vif_table[vif].dev != dev) {
+ if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
c->_c.mfc_un.res.wrong_if++;
if (true_vifi >= 0 && mrt->mroute_do_assert &&
@@ -2159,8 +2164,10 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
}
forward:
- mrt->vif_table[vif].pkt_in++;
- mrt->vif_table[vif].bytes_in += skb->len;
+ WRITE_ONCE(mrt->vif_table[vif].pkt_in,
+ mrt->vif_table[vif].pkt_in + 1);
+ WRITE_ONCE(mrt->vif_table[vif].bytes_in,
+ mrt->vif_table[vif].bytes_in + skb->len);
/*
* Forward the frame
@@ -2238,7 +2245,6 @@ int ip6_mr_input(struct sk_buff *skb)
return err;
}
- read_lock(&mrt_lock);
cache = ip6mr_cache_find(mrt,
&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
if (!cache) {
@@ -2259,19 +2265,15 @@ int ip6_mr_input(struct sk_buff *skb)
vif = ip6mr_find_vif(mrt, dev);
if (vif >= 0) {
int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
- read_unlock(&mrt_lock);
return err;
}
- read_unlock(&mrt_lock);
kfree_skb(skb);
return -ENODEV;
}
ip6_mr_forward(net, mrt, dev, skb, cache);
- read_unlock(&mrt_lock);
-
return 0;
}
@@ -2287,7 +2289,7 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
if (!mrt)
return -ENOENT;
- read_lock(&mrt_lock);
+ rcu_read_lock();
cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
if (!cache && skb->dev) {
int vif = ip6mr_find_vif(mrt, skb->dev);
@@ -2305,14 +2307,14 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
dev = skb->dev;
if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -ENODEV;
}
/* really correct? */
skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
if (!skb2) {
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -ENOMEM;
}
@@ -2335,13 +2337,13 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
iph->daddr = rt->rt6i_dst.addr;
err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return err;
}
err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return err;
}
@@ -2460,7 +2462,7 @@ static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
return len;
}
-static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
+static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt)
{
struct net *net = read_pnet(&mrt->net);
struct nlmsghdr *nlh;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 3b7cbd522b54..722de9dd0ff7 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -61,46 +61,30 @@
#define ICMPV6_HDRLEN 4 /* ICMPv6 header, RFC 4443 Section 2.1 */
-struct raw_hashinfo raw_v6_hashinfo = {
- .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock),
-};
+struct raw_hashinfo raw_v6_hashinfo;
EXPORT_SYMBOL_GPL(raw_v6_hashinfo);
-struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
- unsigned short num, const struct in6_addr *loc_addr,
- const struct in6_addr *rmt_addr, int dif, int sdif)
+bool raw_v6_match(struct net *net, struct sock *sk, unsigned short num,
+ const struct in6_addr *loc_addr,
+ const struct in6_addr *rmt_addr, int dif, int sdif)
{
- bool is_multicast = ipv6_addr_is_multicast(loc_addr);
-
- sk_for_each_from(sk)
- if (inet_sk(sk)->inet_num == num) {
-
- if (!net_eq(sock_net(sk), net))
- continue;
-
- if (!ipv6_addr_any(&sk->sk_v6_daddr) &&
- !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
- continue;
-
- if (!raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if,
- dif, sdif))
- continue;
-
- if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
- if (ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))
- goto found;
- if (is_multicast &&
- inet6_mc_check(sk, loc_addr, rmt_addr))
- goto found;
- continue;
- }
- goto found;
- }
- sk = NULL;
-found:
- return sk;
+ if (inet_sk(sk)->inet_num != num ||
+ !net_eq(sock_net(sk), net) ||
+ (!ipv6_addr_any(&sk->sk_v6_daddr) &&
+ !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) ||
+ !raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if,
+ dif, sdif))
+ return false;
+
+ if (ipv6_addr_any(&sk->sk_v6_rcv_saddr) ||
+ ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr) ||
+ (ipv6_addr_is_multicast(loc_addr) &&
+ inet6_mc_check(sk, loc_addr, rmt_addr)))
+ return true;
+
+ return false;
}
-EXPORT_SYMBOL_GPL(__raw_v6_lookup);
+EXPORT_SYMBOL_GPL(raw_v6_match);
/*
* 0 - deliver
@@ -156,31 +140,27 @@ EXPORT_SYMBOL(rawv6_mh_filter_unregister);
*/
static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
{
+ struct net *net = dev_net(skb->dev);
+ struct hlist_nulls_head *hlist;
+ struct hlist_nulls_node *hnode;
const struct in6_addr *saddr;
const struct in6_addr *daddr;
struct sock *sk;
bool delivered = false;
__u8 hash;
- struct net *net;
saddr = &ipv6_hdr(skb)->saddr;
daddr = saddr + 1;
hash = nexthdr & (RAW_HTABLE_SIZE - 1);
-
- read_lock(&raw_v6_hashinfo.lock);
- sk = sk_head(&raw_v6_hashinfo.ht[hash]);
-
- if (!sk)
- goto out;
-
- net = dev_net(skb->dev);
- sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr,
- inet6_iif(skb), inet6_sdif(skb));
-
- while (sk) {
+ hlist = &raw_v6_hashinfo.ht[hash];
+ rcu_read_lock();
+ sk_nulls_for_each(sk, hnode, hlist) {
int filtered;
+ if (!raw_v6_match(net, sk, nexthdr, daddr, saddr,
+ inet6_iif(skb), inet6_sdif(skb)))
+ continue;
delivered = true;
switch (nexthdr) {
case IPPROTO_ICMPV6:
@@ -219,23 +199,14 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
rawv6_rcv(sk, clone);
}
}
- sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr,
- inet6_iif(skb), inet6_sdif(skb));
}
-out:
- read_unlock(&raw_v6_hashinfo.lock);
+ rcu_read_unlock();
return delivered;
}
bool raw6_local_deliver(struct sk_buff *skb, int nexthdr)
{
- struct sock *raw_sk;
-
- raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (RAW_HTABLE_SIZE - 1)]);
- if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
- raw_sk = NULL;
-
- return raw_sk != NULL;
+ return ipv6_raw_deliver(skb, nexthdr);
}
/* This cleans up af_inet6 a bit. -DaveM */
@@ -361,30 +332,25 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
u8 type, u8 code, int inner_offset, __be32 info)
{
+ struct net *net = dev_net(skb->dev);
+ struct hlist_nulls_head *hlist;
+ struct hlist_nulls_node *hnode;
struct sock *sk;
int hash;
- const struct in6_addr *saddr, *daddr;
- struct net *net;
hash = nexthdr & (RAW_HTABLE_SIZE - 1);
-
- read_lock(&raw_v6_hashinfo.lock);
- sk = sk_head(&raw_v6_hashinfo.ht[hash]);
- if (sk) {
+ hlist = &raw_v6_hashinfo.ht[hash];
+ rcu_read_lock();
+ sk_nulls_for_each(sk, hnode, hlist) {
/* Note: ipv6_hdr(skb) != skb->data */
const struct ipv6hdr *ip6h = (const struct ipv6hdr *)skb->data;
- saddr = &ip6h->saddr;
- daddr = &ip6h->daddr;
- net = dev_net(skb->dev);
-
- while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr,
- inet6_iif(skb), inet6_iif(skb)))) {
- rawv6_err(sk, skb, NULL, type, code,
- inner_offset, info);
- sk = sk_next(sk);
- }
+
+ if (!raw_v6_match(net, sk, nexthdr, &ip6h->saddr, &ip6h->daddr,
+ inet6_iif(skb), inet6_iif(skb)))
+ continue;
+ rawv6_err(sk, skb, NULL, type, code, inner_offset, info);
}
- read_unlock(&raw_v6_hashinfo.lock);
+ rcu_read_unlock();
}
static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb)
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
index 7873bd1389c3..a8e2425e43b0 100644
--- a/net/netfilter/nf_dup_netdev.c
+++ b/net/netfilter/nf_dup_netdev.c
@@ -13,14 +13,31 @@
#include <net/netfilter/nf_tables_offload.h>
#include <net/netfilter/nf_dup_netdev.h>
-static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev)
+#define NF_RECURSION_LIMIT 2
+
+static DEFINE_PER_CPU(u8, nf_dup_skb_recursion);
+
+static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev,
+ enum nf_dev_hooks hook)
{
- if (skb_mac_header_was_set(skb))
+ if (__this_cpu_read(nf_dup_skb_recursion) > NF_RECURSION_LIMIT)
+ goto err;
+
+ if (hook == NF_NETDEV_INGRESS && skb_mac_header_was_set(skb)) {
+ if (skb_cow_head(skb, skb->mac_len))
+ goto err;
+
skb_push(skb, skb->mac_len);
+ }
skb->dev = dev;
skb_clear_tstamp(skb);
+ __this_cpu_inc(nf_dup_skb_recursion);
dev_queue_xmit(skb);
+ __this_cpu_dec(nf_dup_skb_recursion);
+ return;
+err:
+ kfree_skb(skb);
}
void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif)
@@ -33,7 +50,7 @@ void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif)
return;
}
- nf_do_netdev_egress(pkt->skb, dev);
+ nf_do_netdev_egress(pkt->skb, dev, nft_hook(pkt));
}
EXPORT_SYMBOL_GPL(nf_fwd_netdev_egress);
@@ -48,7 +65,7 @@ void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif)
skb = skb_clone(pkt->skb, GFP_ATOMIC);
if (skb)
- nf_do_netdev_egress(skb, dev);
+ nf_do_netdev_egress(skb, dev, nft_hook(pkt));
}
EXPORT_SYMBOL_GPL(nf_dup_netdev_egress);
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index af15102bc696..f466af4f8531 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -614,7 +614,7 @@ static void __net_exit cttimeout_net_exit(struct net *net)
nf_ct_untimeout(net, NULL);
- list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_freelist, head) {
+ list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_freelist, free_head) {
list_del(&cur->free_head);
if (refcount_dec_and_test(&cur->refcnt))
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index ac4859241e17..55d2d49c3425 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -14,6 +14,7 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/random.h>
#include <linux/smp.h>
#include <linux/static_key.h>
#include <net/dst.h>
@@ -32,8 +33,6 @@
#define NFT_META_SECS_PER_DAY 86400
#define NFT_META_DAYS_PER_WEEK 7
-static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state);
-
static u8 nft_meta_weekday(void)
{
time64_t secs = ktime_get_real_seconds();
@@ -271,13 +270,6 @@ static bool nft_meta_get_eval_ifname(enum nft_meta_keys key, u32 *dest,
return true;
}
-static noinline u32 nft_prandom_u32(void)
-{
- struct rnd_state *state = this_cpu_ptr(&nft_prandom_state);
-
- return prandom_u32_state(state);
-}
-
#ifdef CONFIG_IP_ROUTE_CLASSID
static noinline bool
nft_meta_get_eval_rtclassid(const struct sk_buff *skb, u32 *dest)
@@ -389,7 +381,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
break;
#endif
case NFT_META_PRANDOM:
- *dest = nft_prandom_u32();
+ *dest = get_random_u32();
break;
#ifdef CONFIG_XFRM
case NFT_META_SECPATH:
@@ -518,7 +510,6 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
len = IFNAMSIZ;
break;
case NFT_META_PRANDOM:
- prandom_init_once(&nft_prandom_state);
len = sizeof(u32);
break;
#ifdef CONFIG_XFRM
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index 81b40c663d86..45d3dc9e96f2 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -9,12 +9,11 @@
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
+#include <linux/random.h>
#include <linux/static_key.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
-static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state);
-
struct nft_ng_inc {
u8 dreg;
u32 modulus;
@@ -135,12 +134,9 @@ struct nft_ng_random {
u32 offset;
};
-static u32 nft_ng_random_gen(struct nft_ng_random *priv)
+static u32 nft_ng_random_gen(const struct nft_ng_random *priv)
{
- struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state);
-
- return reciprocal_scale(prandom_u32_state(state), priv->modulus) +
- priv->offset;
+ return reciprocal_scale(get_random_u32(), priv->modulus) + priv->offset;
}
static void nft_ng_random_eval(const struct nft_expr *expr,
@@ -168,8 +164,6 @@ static int nft_ng_random_init(const struct nft_ctx *ctx,
if (priv->offset + priv->modulus - 1 < priv->offset)
return -EOVERFLOW;
- prandom_init_once(&nft_numgen_prandom_state);
-
return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg,
NULL, NFT_DATA_VALUE, sizeof(u32));
}
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 372bf54a0ca9..e20d1a973417 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -407,7 +407,7 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
if (flags & IP6_FH_F_FRAG) {
if (frag_off) {
key->ip.frag = OVS_FRAG_TYPE_LATER;
- key->ip.proto = nexthdr;
+ key->ip.proto = NEXTHDR_FRAGMENT;
return 0;
}
key->ip.frag = OVS_FRAG_TYPE_FIRST;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index ed4ccef5d6a8..5449ed114e40 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -1146,9 +1146,9 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
struct tc_netem_rate rate;
struct tc_netem_slot slot;
- qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency),
+ qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency),
UINT_MAX);
- qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter),
+ qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter),
UINT_MAX);
qopt.limit = q->limit;
qopt.loss = q->loss;
diff --git a/net/socket.c b/net/socket.c
index 2bc8773d9dc5..1b6f5e2ebef5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1878,10 +1878,8 @@ out_fd:
return ERR_PTR(err);
}
-int __sys_accept4_file(struct file *file, unsigned file_flags,
- struct sockaddr __user *upeer_sockaddr,
- int __user *upeer_addrlen, int flags,
- unsigned long nofile)
+static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_sockaddr,
+ int __user *upeer_addrlen, int flags)
{
struct file *newfile;
int newfd;
@@ -1892,11 +1890,11 @@ int __sys_accept4_file(struct file *file, unsigned file_flags,
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
- newfd = __get_unused_fd_flags(flags, nofile);
+ newfd = get_unused_fd_flags(flags);
if (unlikely(newfd < 0))
return newfd;
- newfile = do_accept(file, file_flags, upeer_sockaddr, upeer_addrlen,
+ newfile = do_accept(file, 0, upeer_sockaddr, upeer_addrlen,
flags);
if (IS_ERR(newfile)) {
put_unused_fd(newfd);
@@ -1926,9 +1924,8 @@ int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
f = fdget(fd);
if (f.file) {
- ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
- upeer_addrlen, flags,
- rlimit(RLIMIT_NOFILE));
+ ret = __sys_accept4_file(f.file, upeer_sockaddr,
+ upeer_addrlen, flags);
fdput(f);
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index e2c6eca0271b..b6781ada3aa8 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -651,6 +651,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
new->cl_discrtry = clnt->cl_discrtry;
new->cl_chatty = clnt->cl_chatty;
new->cl_principal = clnt->cl_principal;
+ new->cl_max_connect = clnt->cl_max_connect;
return new;
out_err:
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index df194cc07035..f87a2d8f23a7 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -919,7 +919,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
EXPORT_SYMBOL_GPL(xdr_init_encode);
/**
- * xdr_commit_encode - Ensure all data is written to buffer
+ * __xdr_commit_encode - Ensure all data is written to buffer
* @xdr: pointer to xdr_stream
*
* We handle encoding across page boundaries by giving the caller a
@@ -931,26 +931,29 @@ EXPORT_SYMBOL_GPL(xdr_init_encode);
* required at the end of encoding, or any other time when the xdr_buf
* data might be read.
*/
-inline void xdr_commit_encode(struct xdr_stream *xdr)
+void __xdr_commit_encode(struct xdr_stream *xdr)
{
- int shift = xdr->scratch.iov_len;
+ size_t shift = xdr->scratch.iov_len;
void *page;
- if (shift == 0)
- return;
page = page_address(*xdr->page_ptr);
memcpy(xdr->scratch.iov_base, page, shift);
memmove(page, page + shift, (void *)xdr->p - page);
xdr_reset_scratch_buffer(xdr);
}
-EXPORT_SYMBOL_GPL(xdr_commit_encode);
+EXPORT_SYMBOL_GPL(__xdr_commit_encode);
-static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
- size_t nbytes)
+/*
+ * The buffer space to be reserved crosses the boundary between
+ * xdr->buf->head and xdr->buf->pages, or between two pages
+ * in xdr->buf->pages.
+ */
+static noinline __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
+ size_t nbytes)
{
- __be32 *p;
int space_left;
int frag1bytes, frag2bytes;
+ void *p;
if (nbytes > PAGE_SIZE)
goto out_overflow; /* Bigger buffers require special handling */
@@ -964,6 +967,7 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
xdr->buf->page_len += frag1bytes;
xdr->page_ptr++;
xdr->iov = NULL;
+
/*
* If the last encode didn't end exactly on a page boundary, the
* next one will straddle boundaries. Encode into the next
@@ -972,14 +976,19 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
* space at the end of the previous buffer:
*/
xdr_set_scratch_buffer(xdr, xdr->p, frag1bytes);
- p = page_address(*xdr->page_ptr);
+
/*
- * Note this is where the next encode will start after we've
- * shifted this one back:
+ * xdr->p is where the next encode will start after
+ * xdr_commit_encode() has shifted this one back:
*/
- xdr->p = (void *)p + frag2bytes;
+ p = page_address(*xdr->page_ptr);
+ xdr->p = p + frag2bytes;
space_left = xdr->buf->buflen - xdr->buf->len;
- xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE);
+ if (space_left - nbytes >= PAGE_SIZE)
+ xdr->end = p + PAGE_SIZE;
+ else
+ xdr->end = p + space_left - frag1bytes;
+
xdr->buf->page_len += frag2bytes;
xdr->buf->len += nbytes;
return p;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 5f0155fdefc7..11cf7c646644 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -478,10 +478,10 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
unsigned int write_len;
u64 offset;
- seg = &info->wi_chunk->ch_segments[info->wi_seg_no];
- if (!seg)
+ if (info->wi_seg_no >= info->wi_chunk->ch_segcount)
goto out_overflow;
+ seg = &info->wi_chunk->ch_segments[info->wi_seg_no];
write_len = min(remaining, seg->rs_length - info->wi_seg_off);
if (!write_len)
goto out_overflow;
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 3f4542e0f065..434e70eabe08 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -109,10 +109,9 @@ static void __net_exit tipc_exit_net(struct net *net)
struct tipc_net *tn = tipc_net(net);
tipc_detach_loopback(net);
+ tipc_net_stop(net);
/* Make sure the tipc_net_finalize_work() finished */
cancel_work_sync(&tn->work);
- tipc_net_stop(net);
-
tipc_bcast_stop(net);
tipc_nametbl_stop(net);
tipc_sk_rht_destroy(net);
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 1d8ba233d047..d1180370fdf4 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -1202,14 +1202,3 @@ void tipc_dest_list_purge(struct list_head *l)
kfree(dst);
}
}
-
-int tipc_dest_list_len(struct list_head *l)
-{
- struct tipc_dest *dst;
- int i = 0;
-
- list_for_each_entry(dst, l, list) {
- i++;
- }
- return i;
-}
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 259f95e3d99c..3bcd9ef8cee3 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -151,6 +151,5 @@ bool tipc_dest_push(struct list_head *l, u32 node, u32 port);
bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port);
bool tipc_dest_del(struct list_head *l, u32 node, u32 port);
void tipc_dest_list_purge(struct list_head *l);
-int tipc_dest_list_len(struct list_head *l);
#endif
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index da176411c1b5..2ffede463e4a 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -921,6 +921,8 @@ static void tls_update(struct sock *sk, struct proto *p,
{
struct tls_context *ctx;
+ WARN_ON_ONCE(sk->sk_prot == p);
+
ctx = tls_get_ctx(sk);
if (likely(ctx)) {
ctx->sk_write_space = write_space;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3453e0053f76..49f6626330c3 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -118,15 +118,11 @@
#include "scm.h"
-spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE];
-EXPORT_SYMBOL_GPL(unix_table_locks);
-struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
-EXPORT_SYMBOL_GPL(unix_socket_table);
static atomic_long_t unix_nr_socks;
/* SMP locking strategy:
- * hash table is protected with spinlock unix_table_locks
- * each socket state is protected by separate spin lock.
+ * hash table is protected with spinlock.
+ * each socket state is protected by separate spinlock.
*/
static unsigned int unix_unbound_hash(struct sock *sk)
@@ -137,12 +133,12 @@ static unsigned int unix_unbound_hash(struct sock *sk)
hash ^= hash >> 8;
hash ^= sk->sk_type;
- return UNIX_HASH_SIZE + (hash & (UNIX_HASH_SIZE - 1));
+ return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
}
static unsigned int unix_bsd_hash(struct inode *i)
{
- return i->i_ino & (UNIX_HASH_SIZE - 1);
+ return i->i_ino & UNIX_HASH_MOD;
}
static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
@@ -155,26 +151,28 @@ static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
hash ^= hash >> 8;
hash ^= type;
- return hash & (UNIX_HASH_SIZE - 1);
+ return hash & UNIX_HASH_MOD;
}
-static void unix_table_double_lock(unsigned int hash1, unsigned int hash2)
+static void unix_table_double_lock(struct net *net,
+ unsigned int hash1, unsigned int hash2)
{
/* hash1 and hash2 is never the same because
- * one is between 0 and UNIX_HASH_SIZE - 1, and
- * another is between UNIX_HASH_SIZE and UNIX_HASH_SIZE * 2.
+ * one is between 0 and UNIX_HASH_MOD, and
+ * another is between UNIX_HASH_MOD + 1 and UNIX_HASH_SIZE - 1.
*/
if (hash1 > hash2)
swap(hash1, hash2);
- spin_lock(&unix_table_locks[hash1]);
- spin_lock_nested(&unix_table_locks[hash2], SINGLE_DEPTH_NESTING);
+ spin_lock(&net->unx.table.locks[hash1]);
+ spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
}
-static void unix_table_double_unlock(unsigned int hash1, unsigned int hash2)
+static void unix_table_double_unlock(struct net *net,
+ unsigned int hash1, unsigned int hash2)
{
- spin_unlock(&unix_table_locks[hash1]);
- spin_unlock(&unix_table_locks[hash2]);
+ spin_unlock(&net->unx.table.locks[hash1]);
+ spin_unlock(&net->unx.table.locks[hash2]);
}
#ifdef CONFIG_SECURITY_NETWORK
@@ -300,34 +298,34 @@ static void __unix_remove_socket(struct sock *sk)
sk_del_node_init(sk);
}
-static void __unix_insert_socket(struct sock *sk)
+static void __unix_insert_socket(struct net *net, struct sock *sk)
{
DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
- sk_add_node(sk, &unix_socket_table[sk->sk_hash]);
+ sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
}
-static void __unix_set_addr_hash(struct sock *sk, struct unix_address *addr,
- unsigned int hash)
+static void __unix_set_addr_hash(struct net *net, struct sock *sk,
+ struct unix_address *addr, unsigned int hash)
{
__unix_remove_socket(sk);
smp_store_release(&unix_sk(sk)->addr, addr);
sk->sk_hash = hash;
- __unix_insert_socket(sk);
+ __unix_insert_socket(net, sk);
}
-static void unix_remove_socket(struct sock *sk)
+static void unix_remove_socket(struct net *net, struct sock *sk)
{
- spin_lock(&unix_table_locks[sk->sk_hash]);
+ spin_lock(&net->unx.table.locks[sk->sk_hash]);
__unix_remove_socket(sk);
- spin_unlock(&unix_table_locks[sk->sk_hash]);
+ spin_unlock(&net->unx.table.locks[sk->sk_hash]);
}
-static void unix_insert_unbound_socket(struct sock *sk)
+static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
{
- spin_lock(&unix_table_locks[sk->sk_hash]);
- __unix_insert_socket(sk);
- spin_unlock(&unix_table_locks[sk->sk_hash]);
+ spin_lock(&net->unx.table.locks[sk->sk_hash]);
+ __unix_insert_socket(net, sk);
+ spin_unlock(&net->unx.table.locks[sk->sk_hash]);
}
static struct sock *__unix_find_socket_byname(struct net *net,
@@ -336,12 +334,9 @@ static struct sock *__unix_find_socket_byname(struct net *net,
{
struct sock *s;
- sk_for_each(s, &unix_socket_table[hash]) {
+ sk_for_each(s, &net->unx.table.buckets[hash]) {
struct unix_sock *u = unix_sk(s);
- if (!net_eq(sock_net(s), net))
- continue;
-
if (u->addr->len == len &&
!memcmp(u->addr->name, sunname, len))
return s;
@@ -355,30 +350,30 @@ static inline struct sock *unix_find_socket_byname(struct net *net,
{
struct sock *s;
- spin_lock(&unix_table_locks[hash]);
+ spin_lock(&net->unx.table.locks[hash]);
s = __unix_find_socket_byname(net, sunname, len, hash);
if (s)
sock_hold(s);
- spin_unlock(&unix_table_locks[hash]);
+ spin_unlock(&net->unx.table.locks[hash]);
return s;
}
-static struct sock *unix_find_socket_byinode(struct inode *i)
+static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
{
unsigned int hash = unix_bsd_hash(i);
struct sock *s;
- spin_lock(&unix_table_locks[hash]);
- sk_for_each(s, &unix_socket_table[hash]) {
+ spin_lock(&net->unx.table.locks[hash]);
+ sk_for_each(s, &net->unx.table.buckets[hash]) {
struct dentry *dentry = unix_sk(s)->path.dentry;
if (dentry && d_backing_inode(dentry) == i) {
sock_hold(s);
- spin_unlock(&unix_table_locks[hash]);
+ spin_unlock(&net->unx.table.locks[hash]);
return s;
}
}
- spin_unlock(&unix_table_locks[hash]);
+ spin_unlock(&net->unx.table.locks[hash]);
return NULL;
}
@@ -576,12 +571,12 @@ static void unix_sock_destructor(struct sock *sk)
static void unix_release_sock(struct sock *sk, int embrion)
{
struct unix_sock *u = unix_sk(sk);
- struct path path;
struct sock *skpair;
struct sk_buff *skb;
+ struct path path;
int state;
- unix_remove_socket(sk);
+ unix_remove_socket(sock_net(sk), sk);
/* Clear state */
unix_state_lock(sk);
@@ -930,9 +925,9 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
init_waitqueue_head(&u->peer_wait);
init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
memset(&u->scm_stat, 0, sizeof(struct scm_stat));
- unix_insert_unbound_socket(sk);
+ unix_insert_unbound_socket(net, sk);
- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+ sock_prot_inuse_add(net, sk->sk_prot, 1);
return sk;
@@ -1015,7 +1010,7 @@ static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr,
if (!S_ISSOCK(inode->i_mode))
goto path_put;
- sk = unix_find_socket_byinode(inode);
+ sk = unix_find_socket_byinode(net, inode);
if (!sk)
goto path_put;
@@ -1074,6 +1069,7 @@ static int unix_autobind(struct sock *sk)
{
unsigned int new_hash, old_hash = sk->sk_hash;
struct unix_sock *u = unix_sk(sk);
+ struct net *net = sock_net(sk);
struct unix_address *addr;
u32 lastnum, ordernum;
int err;
@@ -1102,11 +1098,10 @@ retry:
sprintf(addr->name->sun_path + 1, "%05x", ordernum);
new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
- unix_table_double_lock(old_hash, new_hash);
+ unix_table_double_lock(net, old_hash, new_hash);
- if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len,
- new_hash)) {
- unix_table_double_unlock(old_hash, new_hash);
+ if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
+ unix_table_double_unlock(net, old_hash, new_hash);
/* __unix_find_socket_byname() may take long time if many names
* are already in use.
@@ -1123,8 +1118,8 @@ retry:
goto retry;
}
- __unix_set_addr_hash(sk, addr, new_hash);
- unix_table_double_unlock(old_hash, new_hash);
+ __unix_set_addr_hash(net, sk, addr, new_hash);
+ unix_table_double_unlock(net, old_hash, new_hash);
err = 0;
out: mutex_unlock(&u->bindlock);
@@ -1138,6 +1133,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
(SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
unsigned int new_hash, old_hash = sk->sk_hash;
struct unix_sock *u = unix_sk(sk);
+ struct net *net = sock_net(sk);
struct user_namespace *ns; // barf...
struct unix_address *addr;
struct dentry *dentry;
@@ -1178,11 +1174,11 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
goto out_unlock;
new_hash = unix_bsd_hash(d_backing_inode(dentry));
- unix_table_double_lock(old_hash, new_hash);
+ unix_table_double_lock(net, old_hash, new_hash);
u->path.mnt = mntget(parent.mnt);
u->path.dentry = dget(dentry);
- __unix_set_addr_hash(sk, addr, new_hash);
- unix_table_double_unlock(old_hash, new_hash);
+ __unix_set_addr_hash(net, sk, addr, new_hash);
+ unix_table_double_unlock(net, old_hash, new_hash);
mutex_unlock(&u->bindlock);
done_path_create(&parent, dentry);
return 0;
@@ -1205,6 +1201,7 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
{
unsigned int new_hash, old_hash = sk->sk_hash;
struct unix_sock *u = unix_sk(sk);
+ struct net *net = sock_net(sk);
struct unix_address *addr;
int err;
@@ -1222,19 +1219,18 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
}
new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
- unix_table_double_lock(old_hash, new_hash);
+ unix_table_double_lock(net, old_hash, new_hash);
- if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len,
- new_hash))
+ if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
goto out_spin;
- __unix_set_addr_hash(sk, addr, new_hash);
- unix_table_double_unlock(old_hash, new_hash);
+ __unix_set_addr_hash(net, sk, addr, new_hash);
+ unix_table_double_unlock(net, old_hash, new_hash);
mutex_unlock(&u->bindlock);
return 0;
out_spin:
- unix_table_double_unlock(old_hash, new_hash);
+ unix_table_double_unlock(net, old_hash, new_hash);
err = -EADDRINUSE;
out_mutex:
mutex_unlock(&u->bindlock);
@@ -1293,9 +1289,8 @@ static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
int alen, int flags)
{
- struct sock *sk = sock->sk;
- struct net *net = sock_net(sk);
struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
+ struct sock *sk = sock->sk;
struct sock *other;
int err;
@@ -1316,7 +1311,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
}
restart:
- other = unix_find_other(net, sunaddr, alen, sock->type);
+ other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type);
if (IS_ERR(other)) {
err = PTR_ERR(other);
goto out;
@@ -1404,15 +1399,13 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
- struct sock *sk = sock->sk;
- struct net *net = sock_net(sk);
+ struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
struct unix_sock *u = unix_sk(sk), *newu, *otheru;
- struct sock *newsk = NULL;
- struct sock *other = NULL;
+ struct net *net = sock_net(sk);
struct sk_buff *skb = NULL;
- int st;
- int err;
long timeo;
+ int err;
+ int st;
err = unix_validate_addr(sunaddr, addr_len);
if (err)
@@ -1432,7 +1425,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
*/
/* create new sock for complete connection */
- newsk = unix_create1(sock_net(sk), NULL, 0, sock->type);
+ newsk = unix_create1(net, NULL, 0, sock->type);
if (IS_ERR(newsk)) {
err = PTR_ERR(newsk);
newsk = NULL;
@@ -1541,9 +1534,9 @@ restart:
*
* The contents of *(otheru->addr) and otheru->path
* are seen fully set up here, since we have found
- * otheru in hash under unix_table_locks. Insertion
- * into the hash chain we'd found it in had been done
- * in an earlier critical area protected by unix_table_locks,
+ * otheru in hash under its lock. Insertion into the
+ * hash chain we'd found it in had been done in an
+ * earlier critical area protected by the chain's lock,
* the same one where we'd set *(otheru->addr) contents,
* as well as otheru->path and otheru->addr itself.
*
@@ -1840,17 +1833,15 @@ static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
size_t len)
{
- struct sock *sk = sock->sk;
- struct net *net = sock_net(sk);
- struct unix_sock *u = unix_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
- struct sock *other = NULL;
- int err;
- struct sk_buff *skb;
- long timeo;
+ struct sock *sk = sock->sk, *other = NULL;
+ struct unix_sock *u = unix_sk(sk);
struct scm_cookie scm;
+ struct sk_buff *skb;
int data_len = 0;
int sk_locked;
+ long timeo;
+ int err;
wait_for_unix_gc();
err = scm_send(sock, msg, &scm, false);
@@ -1917,7 +1908,7 @@ restart:
if (sunaddr == NULL)
goto out_free;
- other = unix_find_other(net, sunaddr, msg->msg_namelen,
+ other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen,
sk->sk_type);
if (IS_ERR(other)) {
err = PTR_ERR(other);
@@ -3226,12 +3217,11 @@ static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
{
unsigned long offset = get_offset(*pos);
unsigned long bucket = get_bucket(*pos);
- struct sock *sk;
unsigned long count = 0;
+ struct sock *sk;
- for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
- if (sock_net(sk) != seq_file_net(seq))
- continue;
+ for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
+ sk; sk = sk_next(sk)) {
if (++count == offset)
break;
}
@@ -3242,16 +3232,17 @@ static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
{
unsigned long bucket = get_bucket(*pos);
+ struct net *net = seq_file_net(seq);
struct sock *sk;
- while (bucket < ARRAY_SIZE(unix_socket_table)) {
- spin_lock(&unix_table_locks[bucket]);
+ while (bucket < UNIX_HASH_SIZE) {
+ spin_lock(&net->unx.table.locks[bucket]);
sk = unix_from_bucket(seq, pos);
if (sk)
return sk;
- spin_unlock(&unix_table_locks[bucket]);
+ spin_unlock(&net->unx.table.locks[bucket]);
*pos = set_bucket_offset(++bucket, 1);
}
@@ -3264,11 +3255,12 @@ static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
{
unsigned long bucket = get_bucket(*pos);
- for (sk = sk_next(sk); sk; sk = sk_next(sk))
- if (sock_net(sk) == seq_file_net(seq))
- return sk;
+ sk = sk_next(sk);
+ if (sk)
+ return sk;
- spin_unlock(&unix_table_locks[bucket]);
+
+ spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
*pos = set_bucket_offset(++bucket, 1);
@@ -3298,7 +3290,7 @@ static void unix_seq_stop(struct seq_file *seq, void *v)
struct sock *sk = v;
if (sk)
- spin_unlock(&unix_table_locks[sk->sk_hash]);
+ spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
}
static int unix_seq_show(struct seq_file *seq, void *v)
@@ -3323,7 +3315,7 @@ static int unix_seq_show(struct seq_file *seq, void *v)
(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
sock_i_ino(s));
- if (u->addr) { // under unix_table_locks here
+ if (u->addr) { // under a hash table lock here
int i, len;
seq_putc(seq, ' ');
@@ -3393,9 +3385,6 @@ static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
iter->batch[iter->end_sk++] = start_sk;
for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
- if (sock_net(sk) != seq_file_net(seq))
- continue;
-
if (iter->end_sk < iter->max_sk) {
sock_hold(sk);
iter->batch[iter->end_sk++] = sk;
@@ -3404,7 +3393,7 @@ static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
expected++;
}
- spin_unlock(&unix_table_locks[start_sk->sk_hash]);
+ spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
return expected;
}
@@ -3564,7 +3553,7 @@ static const struct net_proto_family unix_family_ops = {
static int __net_init unix_net_init(struct net *net)
{
- int error = -ENOMEM;
+ int i;
net->unx.sysctl_max_dgram_qlen = 10;
if (unix_sysctl_register(net))
@@ -3572,18 +3561,44 @@ static int __net_init unix_net_init(struct net *net)
#ifdef CONFIG_PROC_FS
if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
- sizeof(struct seq_net_private))) {
- unix_sysctl_unregister(net);
- goto out;
+ sizeof(struct seq_net_private)))
+ goto err_sysctl;
+#endif
+
+ net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
+ sizeof(spinlock_t), GFP_KERNEL);
+ if (!net->unx.table.locks)
+ goto err_proc;
+
+ net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
+ sizeof(struct hlist_head),
+ GFP_KERNEL);
+ if (!net->unx.table.buckets)
+ goto free_locks;
+
+ for (i = 0; i < UNIX_HASH_SIZE; i++) {
+ spin_lock_init(&net->unx.table.locks[i]);
+ INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
}
+
+ return 0;
+
+free_locks:
+ kvfree(net->unx.table.locks);
+err_proc:
+#ifdef CONFIG_PROC_FS
+ remove_proc_entry("unix", net->proc_net);
+err_sysctl:
#endif
- error = 0;
+ unix_sysctl_unregister(net);
out:
- return error;
+ return -ENOMEM;
}
static void __net_exit unix_net_exit(struct net *net)
{
+ kvfree(net->unx.table.buckets);
+ kvfree(net->unx.table.locks);
unix_sysctl_unregister(net);
remove_proc_entry("unix", net->proc_net);
}
@@ -3667,13 +3682,10 @@ static void __init bpf_iter_register(void)
static int __init af_unix_init(void)
{
- int i, rc = -1;
+ int rc = -1;
BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
- for (i = 0; i < 2 * UNIX_HASH_SIZE; i++)
- spin_lock_init(&unix_table_locks[i]);
-
rc = proto_register(&unix_dgram_proto, 1);
if (rc != 0) {
pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
diff --git a/net/unix/diag.c b/net/unix/diag.c
index bb0b5ea1655f..105f522a89fe 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -13,7 +13,7 @@
static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb)
{
- /* might or might not have unix_table_locks */
+ /* might or might not have a hash table lock */
struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
if (!addr)
@@ -195,25 +195,21 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct unix_diag_req *req;
- int num, s_num, slot, s_slot;
struct net *net = sock_net(skb->sk);
+ int num, s_num, slot, s_slot;
+ struct unix_diag_req *req;
req = nlmsg_data(cb->nlh);
s_slot = cb->args[0];
num = s_num = cb->args[1];
- for (slot = s_slot;
- slot < ARRAY_SIZE(unix_socket_table);
- s_num = 0, slot++) {
+ for (slot = s_slot; slot < UNIX_HASH_SIZE; s_num = 0, slot++) {
struct sock *sk;
num = 0;
- spin_lock(&unix_table_locks[slot]);
- sk_for_each(sk, &unix_socket_table[slot]) {
- if (!net_eq(sock_net(sk), net))
- continue;
+ spin_lock(&net->unx.table.locks[slot]);
+ sk_for_each(sk, &net->unx.table.buckets[slot]) {
if (num < s_num)
goto next;
if (!(req->udiag_states & (1 << sk->sk_state)))
@@ -222,13 +218,13 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI) < 0) {
- spin_unlock(&unix_table_locks[slot]);
+ spin_unlock(&net->unx.table.locks[slot]);
goto done;
}
next:
num++;
}
- spin_unlock(&unix_table_locks[slot]);
+ spin_unlock(&net->unx.table.locks[slot]);
}
done:
cb->args[0] = slot;
@@ -237,20 +233,21 @@ done:
return skb->len;
}
-static struct sock *unix_lookup_by_ino(unsigned int ino)
+static struct sock *unix_lookup_by_ino(struct net *net, unsigned int ino)
{
struct sock *sk;
int i;
- for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) {
- spin_lock(&unix_table_locks[i]);
- sk_for_each(sk, &unix_socket_table[i])
+ for (i = 0; i < UNIX_HASH_SIZE; i++) {
+ spin_lock(&net->unx.table.locks[i]);
+ sk_for_each(sk, &net->unx.table.buckets[i]) {
if (ino == sock_i_ino(sk)) {
sock_hold(sk);
- spin_unlock(&unix_table_locks[i]);
+ spin_unlock(&net->unx.table.locks[i]);
return sk;
}
- spin_unlock(&unix_table_locks[i]);
+ }
+ spin_unlock(&net->unx.table.locks[i]);
}
return NULL;
}
@@ -259,21 +256,20 @@ static int unix_diag_get_exact(struct sk_buff *in_skb,
const struct nlmsghdr *nlh,
struct unix_diag_req *req)
{
- int err = -EINVAL;
- struct sock *sk;
- struct sk_buff *rep;
- unsigned int extra_len;
struct net *net = sock_net(in_skb->sk);
+ unsigned int extra_len;
+ struct sk_buff *rep;
+ struct sock *sk;
+ int err;
+ err = -EINVAL;
if (req->udiag_ino == 0)
goto out_nosk;
- sk = unix_lookup_by_ino(req->udiag_ino);
+ sk = unix_lookup_by_ino(net, req->udiag_ino);
err = -ENOENT;
if (sk == NULL)
goto out_nosk;
- if (!net_eq(sock_net(sk), net))
- goto out;
err = sock_diag_check_cookie(sk, req->udiag_cookie);
if (err)
@@ -308,7 +304,6 @@ out_nosk:
static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
{
int hdrlen = sizeof(struct unix_diag_req);
- struct net *net = sock_net(skb->sk);
if (nlmsg_len(h) < hdrlen)
return -EINVAL;
@@ -317,7 +312,7 @@ static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
struct netlink_dump_control c = {
.dump = unix_diag_dump,
};
- return netlink_dump_start(net->diag_nlsk, skb, h, &c);
+ return netlink_dump_start(sock_net(skb->sk)->diag_nlsk, skb, h, &c);
} else
return unix_diag_get_exact(skb, h, nlmsg_data(h));
}
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index f01ef6bda390..869b9b9b9fad 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -57,7 +57,7 @@ static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages,
static void xdp_umem_release(struct xdp_umem *umem)
{
umem->zc = false;
- ida_simple_remove(&umem_ida, umem->id);
+ ida_free(&umem_ida, umem->id);
xdp_umem_addr_unmap(umem);
xdp_umem_unpin_pages(umem);
@@ -242,7 +242,7 @@ struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
if (!umem)
return ERR_PTR(-ENOMEM);
- err = ida_simple_get(&umem_ida, 0, 0, GFP_KERNEL);
+ err = ida_alloc(&umem_ida, GFP_KERNEL);
if (err < 0) {
kfree(umem);
return ERR_PTR(err);
@@ -251,7 +251,7 @@ struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
err = xdp_umem_reg(umem, mr);
if (err) {
- ida_simple_remove(&umem_ida, umem->id);
+ ida_free(&umem_ida, umem->id);
kfree(umem);
return ERR_PTR(err);
}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 19ac872a6624..09002387987e 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -538,12 +538,6 @@ static int xsk_generic_xmit(struct sock *sk)
goto out;
}
- skb = xsk_build_skb(xs, &desc);
- if (IS_ERR(skb)) {
- err = PTR_ERR(skb);
- goto out;
- }
-
/* This is the backpressure mechanism for the Tx path.
* Reserve space in the completion queue and only proceed
* if there is space in it. This avoids having to implement
@@ -552,11 +546,19 @@ static int xsk_generic_xmit(struct sock *sk)
spin_lock_irqsave(&xs->pool->cq_lock, flags);
if (xskq_prod_reserve(xs->pool->cq)) {
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
- kfree_skb(skb);
goto out;
}
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ skb = xsk_build_skb(xs, &desc);
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ spin_lock_irqsave(&xs->pool->cq_lock, flags);
+ xskq_prod_cancel(xs->pool->cq);
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ goto out;
+ }
+
err = __dev_direct_xmit(skb, xs->queue_id);
if (err == NETDEV_TX_BUSY) {
/* Tell user-space to retry the send */
diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c
index 1828487bae9a..84f57f1209ce 100644
--- a/samples/bpf/xdp_fwd_user.c
+++ b/samples/bpf/xdp_fwd_user.c
@@ -47,17 +47,60 @@ static int do_attach(int idx, int prog_fd, int map_fd, const char *name)
return err;
}
-static int do_detach(int idx, const char *name)
+static int do_detach(int ifindex, const char *ifname, const char *app_name)
{
- int err;
+ LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
+ struct bpf_prog_info prog_info = {};
+ char prog_name[BPF_OBJ_NAME_LEN];
+ __u32 info_len, curr_prog_id;
+ int prog_fd;
+ int err = 1;
+
+ if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+ printf("ERROR: bpf_xdp_query_id failed (%s)\n",
+ strerror(errno));
+ return err;
+ }
- err = bpf_xdp_detach(idx, xdp_flags, NULL);
- if (err < 0)
- printf("ERROR: failed to detach program from %s\n", name);
+ if (!curr_prog_id) {
+ printf("ERROR: flags(0x%x) xdp prog is not attached to %s\n",
+ xdp_flags, ifname);
+ return err;
+ }
+ info_len = sizeof(prog_info);
+ prog_fd = bpf_prog_get_fd_by_id(curr_prog_id);
+ if (prog_fd < 0) {
+ printf("ERROR: bpf_prog_get_fd_by_id failed (%s)\n",
+ strerror(errno));
+ return prog_fd;
+ }
+
+ err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
+ if (err) {
+ printf("ERROR: bpf_obj_get_info_by_fd failed (%s)\n",
+ strerror(errno));
+ goto close_out;
+ }
+ snprintf(prog_name, sizeof(prog_name), "%s_prog", app_name);
+ prog_name[BPF_OBJ_NAME_LEN - 1] = '\0';
+
+ if (strcmp(prog_info.name, prog_name)) {
+ printf("ERROR: %s isn't attached to %s\n", app_name, ifname);
+ err = 1;
+ goto close_out;
+ }
+
+ opts.old_prog_fd = prog_fd;
+ err = bpf_xdp_detach(ifindex, xdp_flags, &opts);
+ if (err < 0)
+ printf("ERROR: failed to detach program from %s (%s)\n",
+ ifname, strerror(errno));
/* TODO: Remember to cleanup map, when adding use of shared map
* bpf_map_delete_elem((map_fd, &idx);
*/
+close_out:
+ close(prog_fd);
return err;
}
@@ -169,7 +212,7 @@ int main(int argc, char **argv)
return 1;
}
if (!attach) {
- err = do_detach(idx, argv[i]);
+ err = do_detach(idx, argv[i], prog_name);
if (err)
ret = err;
} else {
diff --git a/samples/bpf/xdp_router_ipv4.bpf.c b/samples/bpf/xdp_router_ipv4.bpf.c
index 248119ca7938..0643330d1d2e 100644
--- a/samples/bpf/xdp_router_ipv4.bpf.c
+++ b/samples/bpf/xdp_router_ipv4.bpf.c
@@ -150,6 +150,15 @@ int xdp_router_ipv4_prog(struct xdp_md *ctx)
dest_mac = bpf_map_lookup_elem(&arp_table,
&prefix_value->gw);
+ if (!dest_mac) {
+ /* Forward the packet to the kernel in
+ * order to trigger ARP discovery for
+ * the default gw.
+ */
+ if (rec)
+ NO_TEAR_INC(rec->xdp_pass);
+ return XDP_PASS;
+ }
}
}
diff --git a/samples/fprobe/fprobe_example.c b/samples/fprobe/fprobe_example.c
index 24d3cf109140..01ee6c8c8382 100644
--- a/samples/fprobe/fprobe_example.c
+++ b/samples/fprobe/fprobe_example.c
@@ -21,6 +21,7 @@
#define BACKTRACE_DEPTH 16
#define MAX_SYMBOL_LEN 4096
struct fprobe sample_probe;
+static unsigned long nhit;
static char symbol[MAX_SYMBOL_LEN] = "kernel_clone";
module_param_string(symbol, symbol, sizeof(symbol), 0644);
@@ -28,6 +29,8 @@ static char nosymbol[MAX_SYMBOL_LEN] = "";
module_param_string(nosymbol, nosymbol, sizeof(nosymbol), 0644);
static bool stackdump = true;
module_param(stackdump, bool, 0644);
+static bool use_trace = false;
+module_param(use_trace, bool, 0644);
static void show_backtrace(void)
{
@@ -40,7 +43,15 @@ static void show_backtrace(void)
static void sample_entry_handler(struct fprobe *fp, unsigned long ip, struct pt_regs *regs)
{
- pr_info("Enter <%pS> ip = 0x%p\n", (void *)ip, (void *)ip);
+ if (use_trace)
+ /*
+ * This is just an example, no kernel code should call
+ * trace_printk() except when actively debugging.
+ */
+ trace_printk("Enter <%pS> ip = 0x%p\n", (void *)ip, (void *)ip);
+ else
+ pr_info("Enter <%pS> ip = 0x%p\n", (void *)ip, (void *)ip);
+ nhit++;
if (stackdump)
show_backtrace();
}
@@ -49,8 +60,17 @@ static void sample_exit_handler(struct fprobe *fp, unsigned long ip, struct pt_r
{
unsigned long rip = instruction_pointer(regs);
- pr_info("Return from <%pS> ip = 0x%p to rip = 0x%p (%pS)\n",
- (void *)ip, (void *)ip, (void *)rip, (void *)rip);
+ if (use_trace)
+ /*
+ * This is just an example, no kernel code should call
+ * trace_printk() except when actively debugging.
+ */
+ trace_printk("Return from <%pS> ip = 0x%p to rip = 0x%p (%pS)\n",
+ (void *)ip, (void *)ip, (void *)rip, (void *)rip);
+ else
+ pr_info("Return from <%pS> ip = 0x%p to rip = 0x%p (%pS)\n",
+ (void *)ip, (void *)ip, (void *)rip, (void *)rip);
+ nhit++;
if (stackdump)
show_backtrace();
}
@@ -112,7 +132,8 @@ static void __exit fprobe_exit(void)
{
unregister_fprobe(&sample_probe);
- pr_info("fprobe at %s unregistered\n", symbol);
+ pr_info("fprobe at %s unregistered. %ld times hit, %ld times missed\n",
+ symbol, nhit, sample_probe.nmissed);
}
module_init(fprobe_init)
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 1f01ac65c0cd..cac070aee791 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -251,8 +251,8 @@ $(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE
# To make this rule robust against "Argument list too long" error,
# ensure to add $(obj)/ prefix by a shell command.
-cmd_mod = echo $(call real-search, $*.o, .o, -objs -y -m) | \
- $(AWK) -v RS='( |\n)' '!x[$$0]++ { print("$(obj)/"$$0) }' > $@
+cmd_mod = printf '%s\n' $(call real-search, $*.o, .o, -objs -y -m) | \
+ $(AWK) '!x[$$0]++ { print("$(obj)/"$$0) }' > $@
$(obj)/%.mod: FORCE
$(call if_changed,mod)
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index 855b937e7585..a0ec321469bd 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -635,6 +635,8 @@ class PrinterHelpers(Printer):
'struct bpf_timer',
'struct mptcp_sock',
'struct bpf_dynptr',
+ 'struct iphdr',
+ 'struct ipv6hdr',
]
known_types = {
'...',
@@ -686,6 +688,8 @@ class PrinterHelpers(Printer):
'struct bpf_timer',
'struct mptcp_sock',
'struct bpf_dynptr',
+ 'struct iphdr',
+ 'struct ipv6hdr',
}
mapped_types = {
'u8': '__u8',
diff --git a/scripts/check-local-export b/scripts/check-local-export
index da745e2743b7..6ccc2f467416 100755
--- a/scripts/check-local-export
+++ b/scripts/check-local-export
@@ -8,11 +8,31 @@
set -e
+# catch errors from ${NM}
+set -o pipefail
+
+# Run the last element of a pipeline in the current shell.
+# Without this, the while-loop would be executed in a subshell, and
+# the changes made to 'symbol_types' and 'export_symbols' would be lost.
+shopt -s lastpipe
+
declare -A symbol_types
declare -a export_symbols
exit_code=0
+# If there is no symbol in the object, ${NM} (both GNU nm and llvm-nm) shows
+# 'no symbols' diagnostic (but exits with 0). It is harmless and hidden by
+# '2>/dev/null'. However, it suppresses real error messages as well. Add a
+# hand-crafted error message here.
+#
+# TODO:
+# Use --quiet instead of 2>/dev/null when we upgrade the minimum version of
+# binutils to 2.37, llvm to 13.0.0.
+# Then, the following line will be really simple:
+# ${NM} --quiet ${1} |
+
+{ ${NM} ${1} 2>/dev/null || { echo "${0}: ${NM} failed" >&2; false; } } |
while read value type name
do
# Skip the line if the number of fields is less than 3.
@@ -37,21 +57,7 @@ do
if [[ ${name} == __ksymtab_* ]]; then
export_symbols+=(${name#__ksymtab_})
fi
-
- # If there is no symbol in the object, ${NM} (both GNU nm and llvm-nm)
- # shows 'no symbols' diagnostic (but exits with 0). It is harmless and
- # hidden by '2>/dev/null'. However, it suppresses real error messages
- # as well. Add a hand-crafted error message here.
- #
- # Use --quiet instead of 2>/dev/null when we upgrade the minimum version
- # of binutils to 2.37, llvm to 13.0.0.
- #
- # Then, the following line will be really simple:
- # done < <(${NM} --quiet ${1})
-done < <(${NM} ${1} 2>/dev/null || { echo "${0}: ${NM} failed" >&2; false; } )
-
-# Catch error in the process substitution
-wait $!
+done
for name in "${export_symbols[@]}"
do
diff --git a/scripts/faddr2line b/scripts/faddr2line
index 0e6268d59883..94ed98dd899f 100755
--- a/scripts/faddr2line
+++ b/scripts/faddr2line
@@ -95,17 +95,25 @@ __faddr2line() {
local print_warnings=$4
local sym_name=${func_addr%+*}
- local offset=${func_addr#*+}
- offset=${offset%/*}
+ local func_offset=${func_addr#*+}
+ func_offset=${func_offset%/*}
local user_size=
+ local file_type
+ local is_vmlinux=0
[[ $func_addr =~ "/" ]] && user_size=${func_addr#*/}
- if [[ -z $sym_name ]] || [[ -z $offset ]] || [[ $sym_name = $func_addr ]]; then
+ if [[ -z $sym_name ]] || [[ -z $func_offset ]] || [[ $sym_name = $func_addr ]]; then
warn "bad func+offset $func_addr"
DONE=1
return
fi
+ # vmlinux uses absolute addresses in the section table rather than
+ # section offsets.
+ local file_type=$(${READELF} --file-header $objfile |
+ ${AWK} '$1 == "Type:" { print $2; exit }')
+ [[ $file_type = "EXEC" ]] && is_vmlinux=1
+
# Go through each of the object's symbols which match the func name.
# In rare cases there might be duplicates, in which case we print all
# matches.
@@ -114,9 +122,11 @@ __faddr2line() {
local sym_addr=0x${fields[1]}
local sym_elf_size=${fields[2]}
local sym_sec=${fields[6]}
+ local sec_size
+ local sec_name
# Get the section size:
- local sec_size=$(${READELF} --section-headers --wide $objfile |
+ sec_size=$(${READELF} --section-headers --wide $objfile |
sed 's/\[ /\[/' |
${AWK} -v sec=$sym_sec '$1 == "[" sec "]" { print "0x" $6; exit }')
@@ -126,6 +136,17 @@ __faddr2line() {
return
fi
+ # Get the section name:
+ sec_name=$(${READELF} --section-headers --wide $objfile |
+ sed 's/\[ /\[/' |
+ ${AWK} -v sec=$sym_sec '$1 == "[" sec "]" { print $2; exit }')
+
+ if [[ -z $sec_name ]]; then
+ warn "bad section name: section: $sym_sec"
+ DONE=1
+ return
+ fi
+
# Calculate the symbol size.
#
# Unfortunately we can't use the ELF size, because kallsyms
@@ -174,10 +195,10 @@ __faddr2line() {
sym_size=0x$(printf %x $sym_size)
- # Calculate the section address from user-supplied offset:
- local addr=$(($sym_addr + $offset))
+ # Calculate the address from user-supplied offset:
+ local addr=$(($sym_addr + $func_offset))
if [[ -z $addr ]] || [[ $addr = 0 ]]; then
- warn "bad address: $sym_addr + $offset"
+ warn "bad address: $sym_addr + $func_offset"
DONE=1
return
fi
@@ -191,9 +212,9 @@ __faddr2line() {
fi
# Make sure the provided offset is within the symbol's range:
- if [[ $offset -gt $sym_size ]]; then
+ if [[ $func_offset -gt $sym_size ]]; then
[[ $print_warnings = 1 ]] &&
- echo "skipping $sym_name address at $addr due to size mismatch ($offset > $sym_size)"
+ echo "skipping $sym_name address at $addr due to size mismatch ($func_offset > $sym_size)"
continue
fi
@@ -202,11 +223,13 @@ __faddr2line() {
[[ $FIRST = 0 ]] && echo
FIRST=0
- echo "$sym_name+$offset/$sym_size:"
+ echo "$sym_name+$func_offset/$sym_size:"
# Pass section address to addr2line and strip absolute paths
# from the output:
- local output=$(${ADDR2LINE} -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;")
+ local args="--functions --pretty-print --inlines --exe=$objfile"
+ [[ $is_vmlinux = 0 ]] && args="$args --section=$sec_name"
+ local output=$(${ADDR2LINE} $args $addr | sed "s; $dir_prefix\(\./\)*; ;")
[[ -z $output ]] && continue
# Default output (non --list):
diff --git a/scripts/gdb/linux/config.py b/scripts/gdb/linux/config.py
index 90e1565b1967..8843ab3cbadd 100644
--- a/scripts/gdb/linux/config.py
+++ b/scripts/gdb/linux/config.py
@@ -24,9 +24,9 @@ class LxConfigDump(gdb.Command):
filename = arg
try:
- py_config_ptr = gdb.parse_and_eval("kernel_config_data + 8")
- py_config_size = gdb.parse_and_eval(
- "sizeof(kernel_config_data) - 1 - 8 * 2")
+ py_config_ptr = gdb.parse_and_eval("&kernel_config_data")
+ py_config_ptr_end = gdb.parse_and_eval("&kernel_config_data_end")
+ py_config_size = py_config_ptr_end - py_config_ptr
except gdb.error as e:
raise gdb.GdbError("Can't find config, enable CONFIG_IKCONFIG?")
diff --git a/scripts/nsdeps b/scripts/nsdeps
index 04c4b96e95ec..f1718cc0d700 100644
--- a/scripts/nsdeps
+++ b/scripts/nsdeps
@@ -34,9 +34,8 @@ generate_deps() {
local mod=${1%.ko:}
shift
local namespaces="$*"
- local mod_source_files="`cat $mod.mod | sed -n 1p \
- | sed -e 's/\.o/\.c/g' \
- | sed "s|[^ ]* *|${src_prefix}&|g"`"
+ local mod_source_files=$(sed "s|^\(.*\)\.o$|${src_prefix}\1.c|" $mod.mod)
+
for ns in $namespaces; do
echo "Adding namespace $ns to module $mod.ko."
generate_deps_for_ns $ns "$mod_source_files"
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index beceb89f68d9..1bbd53321d13 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2600,8 +2600,9 @@ static int selinux_sb_eat_lsm_opts(char *options, void **mnt_opts)
}
}
rc = selinux_add_opt(token, arg, mnt_opts);
+ kfree(arg);
+ arg = NULL;
if (unlikely(rc)) {
- kfree(arg);
goto free_opt;
}
} else {
@@ -2792,17 +2793,13 @@ static int selinux_fs_context_parse_param(struct fs_context *fc,
struct fs_parameter *param)
{
struct fs_parse_result result;
- int opt, rc;
+ int opt;
opt = fs_parse(fc, selinux_fs_parameters, param, &result);
if (opt < 0)
return opt;
- rc = selinux_add_opt(opt, param->string, &fc->security);
- if (!rc)
- param->string = NULL;
-
- return rc;
+ return selinux_add_opt(opt, param->string, &fc->security);
}
/* inode security operations */
diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c
index 15dc7160ba34..8cfdaee77905 100644
--- a/sound/core/memalloc.c
+++ b/sound/core/memalloc.c
@@ -431,33 +431,17 @@ static const struct snd_malloc_ops snd_dma_iram_ops = {
*/
static void *snd_dma_dev_alloc(struct snd_dma_buffer *dmab, size_t size)
{
- void *p;
-
- p = dma_alloc_coherent(dmab->dev.dev, size, &dmab->addr, DEFAULT_GFP);
-#ifdef CONFIG_X86
- if (p && dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC)
- set_memory_wc((unsigned long)p, PAGE_ALIGN(size) >> PAGE_SHIFT);
-#endif
- return p;
+ return dma_alloc_coherent(dmab->dev.dev, size, &dmab->addr, DEFAULT_GFP);
}
static void snd_dma_dev_free(struct snd_dma_buffer *dmab)
{
-#ifdef CONFIG_X86
- if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC)
- set_memory_wb((unsigned long)dmab->area,
- PAGE_ALIGN(dmab->bytes) >> PAGE_SHIFT);
-#endif
dma_free_coherent(dmab->dev.dev, dmab->bytes, dmab->area, dmab->addr);
}
static int snd_dma_dev_mmap(struct snd_dma_buffer *dmab,
struct vm_area_struct *area)
{
-#ifdef CONFIG_X86
- if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC)
- area->vm_page_prot = pgprot_writecombine(area->vm_page_prot);
-#endif
return dma_mmap_coherent(dmab->dev.dev, area,
dmab->area, dmab->addr, dmab->bytes);
}
@@ -471,10 +455,6 @@ static const struct snd_malloc_ops snd_dma_dev_ops = {
/*
* Write-combined pages
*/
-#ifdef CONFIG_X86
-/* On x86, share the same ops as the standard dev ops */
-#define snd_dma_wc_ops snd_dma_dev_ops
-#else /* CONFIG_X86 */
static void *snd_dma_wc_alloc(struct snd_dma_buffer *dmab, size_t size)
{
return dma_alloc_wc(dmab->dev.dev, size, &dmab->addr, DEFAULT_GFP);
@@ -497,7 +477,6 @@ static const struct snd_malloc_ops snd_dma_wc_ops = {
.free = snd_dma_wc_free,
.mmap = snd_dma_wc_mmap,
};
-#endif /* CONFIG_X86 */
#ifdef CONFIG_SND_DMA_SGBUF
static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size);
diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c
index 3f35972e1cf7..161a9711cd63 100644
--- a/sound/hda/hdac_i915.c
+++ b/sound/hda/hdac_i915.c
@@ -119,21 +119,18 @@ static int i915_component_master_match(struct device *dev, int subcomponent,
/* check whether Intel graphics is present and reachable */
static int i915_gfx_present(struct pci_dev *hdac_pci)
{
- unsigned int class = PCI_BASE_CLASS_DISPLAY << 16;
struct pci_dev *display_dev = NULL;
- bool match = false;
- do {
- display_dev = pci_get_class(class, display_dev);
-
- if (display_dev && display_dev->vendor == PCI_VENDOR_ID_INTEL &&
+ for_each_pci_dev(display_dev) {
+ if (display_dev->vendor == PCI_VENDOR_ID_INTEL &&
+ (display_dev->class >> 16) == PCI_BASE_CLASS_DISPLAY &&
connectivity_check(display_dev, hdac_pci)) {
pci_dev_put(display_dev);
- match = true;
+ return true;
}
- } while (!match && display_dev);
+ }
- return match;
+ return false;
}
/**
diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c
index a8fe01764b25..ec9cbb219bc1 100644
--- a/sound/hda/intel-dsp-config.c
+++ b/sound/hda/intel-dsp-config.c
@@ -196,6 +196,12 @@ static const struct config_entry config_table[] = {
DMI_MATCH(DMI_SYS_VENDOR, "Google"),
}
},
+ {
+ .ident = "UP-WHL",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "AAEON"),
+ }
+ },
{}
}
},
@@ -358,6 +364,12 @@ static const struct config_entry config_table[] = {
DMI_MATCH(DMI_SYS_VENDOR, "Google"),
}
},
+ {
+ .ident = "UPX-TGL",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "AAEON"),
+ }
+ },
{}
}
},
diff --git a/sound/hda/intel-nhlt.c b/sound/hda/intel-nhlt.c
index 4063da378283..9db5ccd9aa2d 100644
--- a/sound/hda/intel-nhlt.c
+++ b/sound/hda/intel-nhlt.c
@@ -55,8 +55,8 @@ int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt)
/* find max number of channels based on format_configuration */
if (fmt_configs->fmt_count) {
- dev_dbg(dev, "%s: found %d format definitions\n",
- __func__, fmt_configs->fmt_count);
+ dev_dbg(dev, "found %d format definitions\n",
+ fmt_configs->fmt_count);
for (i = 0; i < fmt_configs->fmt_count; i++) {
struct wav_fmt_ext *fmt_ext;
@@ -66,9 +66,9 @@ int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt)
if (fmt_ext->fmt.channels > max_ch)
max_ch = fmt_ext->fmt.channels;
}
- dev_dbg(dev, "%s: max channels found %d\n", __func__, max_ch);
+ dev_dbg(dev, "max channels found %d\n", max_ch);
} else {
- dev_dbg(dev, "%s: No format information found\n", __func__);
+ dev_dbg(dev, "No format information found\n");
}
if (cfg->device_config.config_type != NHLT_CONFIG_TYPE_MIC_ARRAY) {
@@ -95,17 +95,16 @@ int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt)
}
if (dmic_geo > 0) {
- dev_dbg(dev, "%s: Array with %d dmics\n", __func__, dmic_geo);
+ dev_dbg(dev, "Array with %d dmics\n", dmic_geo);
}
if (max_ch > dmic_geo) {
- dev_dbg(dev, "%s: max channels %d exceed dmic number %d\n",
- __func__, max_ch, dmic_geo);
+ dev_dbg(dev, "max channels %d exceed dmic number %d\n",
+ max_ch, dmic_geo);
}
}
}
- dev_dbg(dev, "%s: dmic number %d max_ch %d\n",
- __func__, dmic_geo, max_ch);
+ dev_dbg(dev, "dmic number %d max_ch %d\n", dmic_geo, max_ch);
return dmic_geo;
}
diff --git a/sound/pci/hda/hda_auto_parser.c b/sound/pci/hda/hda_auto_parser.c
index cd1db943b7e0..7c6b1fe8dfcc 100644
--- a/sound/pci/hda/hda_auto_parser.c
+++ b/sound/pci/hda/hda_auto_parser.c
@@ -819,7 +819,7 @@ static void set_pin_targets(struct hda_codec *codec,
snd_hda_set_pin_ctl_cache(codec, cfg->nid, cfg->val);
}
-static void apply_fixup(struct hda_codec *codec, int id, int action, int depth)
+void __snd_hda_apply_fixup(struct hda_codec *codec, int id, int action, int depth)
{
const char *modelname = codec->fixup_name;
@@ -829,7 +829,7 @@ static void apply_fixup(struct hda_codec *codec, int id, int action, int depth)
if (++depth > 10)
break;
if (fix->chained_before)
- apply_fixup(codec, fix->chain_id, action, depth + 1);
+ __snd_hda_apply_fixup(codec, fix->chain_id, action, depth + 1);
switch (fix->type) {
case HDA_FIXUP_PINS:
@@ -870,6 +870,7 @@ static void apply_fixup(struct hda_codec *codec, int id, int action, int depth)
id = fix->chain_id;
}
}
+EXPORT_SYMBOL_GPL(__snd_hda_apply_fixup);
/**
* snd_hda_apply_fixup - Apply the fixup chain with the given action
@@ -879,7 +880,7 @@ static void apply_fixup(struct hda_codec *codec, int id, int action, int depth)
void snd_hda_apply_fixup(struct hda_codec *codec, int action)
{
if (codec->fixup_list)
- apply_fixup(codec, codec->fixup_id, action, 0);
+ __snd_hda_apply_fixup(codec, codec->fixup_id, action, 0);
}
EXPORT_SYMBOL_GPL(snd_hda_apply_fixup);
diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h
index aca592651870..682dca2057db 100644
--- a/sound/pci/hda/hda_local.h
+++ b/sound/pci/hda/hda_local.h
@@ -348,6 +348,7 @@ void snd_hda_apply_verbs(struct hda_codec *codec);
void snd_hda_apply_pincfgs(struct hda_codec *codec,
const struct hda_pintbl *cfg);
void snd_hda_apply_fixup(struct hda_codec *codec, int action);
+void __snd_hda_apply_fixup(struct hda_codec *codec, int id, int action, int depth);
void snd_hda_pick_fixup(struct hda_codec *codec,
const struct hda_model_fixup *models,
const struct snd_pci_quirk *quirk,
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 1248d1a51cf0..3e541a4c0423 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -1079,11 +1079,11 @@ static int patch_conexant_auto(struct hda_codec *codec)
if (err < 0)
goto error;
- err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg);
+ err = cx_auto_parse_beep(codec);
if (err < 0)
goto error;
- err = cx_auto_parse_beep(codec);
+ err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg);
if (err < 0)
goto error;
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index b0f954118e72..cee69fa7e246 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2634,6 +2634,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
SND_PCI_QUIRK(0x1558, 0x67e5, "Clevo PC70D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
SND_PCI_QUIRK(0x1558, 0x67f1, "Clevo PC70H[PRS]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x67f5, "Clevo PD70PN[NRT]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
SND_PCI_QUIRK(0x1558, 0x70d1, "Clevo PC70[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
SND_PCI_QUIRK(0x1558, 0x7714, "Clevo X170SM", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
SND_PCI_QUIRK(0x1558, 0x7715, "Clevo X170KM-G", ALC1220_FIXUP_CLEVO_PB51ED),
@@ -7004,6 +7005,7 @@ enum {
ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS,
ALC287_FIXUP_LEGION_15IMHG05_AUTOMUTE,
ALC287_FIXUP_YOGA7_14ITL_SPEAKERS,
+ ALC298_FIXUP_LENOVO_C940_DUET7,
ALC287_FIXUP_13S_GEN2_SPEAKERS,
ALC256_FIXUP_SET_COEF_DEFAULTS,
ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE,
@@ -7022,6 +7024,23 @@ enum {
ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE,
};
+/* A special fixup for Lenovo C940 and Yoga Duet 7;
+ * both have the very same PCI SSID, and we need to apply different fixups
+ * depending on the codec ID
+ */
+static void alc298_fixup_lenovo_c940_duet7(struct hda_codec *codec,
+ const struct hda_fixup *fix,
+ int action)
+{
+ int id;
+
+ if (codec->core.vendor_id == 0x10ec0298)
+ id = ALC298_FIXUP_LENOVO_SPK_VOLUME; /* C940 */
+ else
+ id = ALC287_FIXUP_YOGA7_14ITL_SPEAKERS; /* Duet 7 */
+ __snd_hda_apply_fixup(codec, id, action, 0);
+}
+
static const struct hda_fixup alc269_fixups[] = {
[ALC269_FIXUP_GPIO2] = {
.type = HDA_FIXUP_FUNC,
@@ -8721,6 +8740,10 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC269_FIXUP_HEADSET_MODE,
},
+ [ALC298_FIXUP_LENOVO_C940_DUET7] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc298_fixup_lenovo_c940_duet7,
+ },
[ALC287_FIXUP_13S_GEN2_SPEAKERS] = {
.type = HDA_FIXUP_VERBS,
.v.verbs = (const struct hda_verb[]) {
@@ -9022,6 +9045,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
ALC285_FIXUP_HP_GPIO_AMP_INIT),
SND_PCI_QUIRK(0x103c, 0x8783, "HP ZBook Fury 15 G7 Mobile Workstation",
ALC285_FIXUP_HP_GPIO_AMP_INIT),
+ SND_PCI_QUIRK(0x103c, 0x8787, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED),
SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED),
SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED),
@@ -9187,6 +9211,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1558, 0x70f3, "Clevo NH77DPQ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x70f4, "Clevo NH77EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x70f6, "Clevo NH77DPQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x7716, "Clevo NS50PU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x8228, "Clevo NR40BU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x8520, "Clevo NH50D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x8521, "Clevo NH77D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
@@ -9273,7 +9298,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x31af, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340),
SND_PCI_QUIRK(0x17aa, 0x3802, "Lenovo Yoga DuetITL 2021", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS),
- SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940", ALC298_FIXUP_LENOVO_SPK_VOLUME),
+ SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940 / Yoga Duet 7", ALC298_FIXUP_LENOVO_C940_DUET7),
SND_PCI_QUIRK(0x17aa, 0x3819, "Lenovo 13s Gen2 ITL", ALC287_FIXUP_13S_GEN2_SPEAKERS),
SND_PCI_QUIRK(0x17aa, 0x3820, "Yoga Duet 7 13ITL6", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
SND_PCI_QUIRK(0x17aa, 0x3824, "Legion Y9000X 2020", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS),
@@ -10737,6 +10762,7 @@ enum {
ALC668_FIXUP_MIC_DET_COEF,
ALC897_FIXUP_LENOVO_HEADSET_MIC,
ALC897_FIXUP_HEADSET_MIC_PIN,
+ ALC897_FIXUP_HP_HSMIC_VERB,
};
static const struct hda_fixup alc662_fixups[] = {
@@ -11156,6 +11182,13 @@ static const struct hda_fixup alc662_fixups[] = {
.chained = true,
.chain_id = ALC897_FIXUP_LENOVO_HEADSET_MIC
},
+ [ALC897_FIXUP_HP_HSMIC_VERB] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */
+ { }
+ },
+ },
};
static const struct snd_pci_quirk alc662_fixup_tbl[] = {
@@ -11181,6 +11214,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
SND_PCI_QUIRK(0x1028, 0x0698, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800),
+ SND_PCI_QUIRK(0x103c, 0x8719, "HP", ALC897_FIXUP_HP_HSMIC_VERB),
SND_PCI_QUIRK(0x103c, 0x873e, "HP", ALC671_FIXUP_HP_HEADSET_MIC2),
SND_PCI_QUIRK(0x103c, 0x885f, "HP 288 Pro G8", ALC671_FIXUP_HP_HEADSET_MIC2),
SND_PCI_QUIRK(0x1043, 0x1080, "Asus UX501VW", ALC668_FIXUP_HEADSET_MODE),
diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index a05304f340df..aea7fae2ca4b 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c
@@ -518,11 +518,11 @@ static int via_parse_auto_config(struct hda_codec *codec)
if (err < 0)
return err;
- err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg);
+ err = auto_parse_beep(codec);
if (err < 0)
return err;
- err = auto_parse_beep(codec);
+ err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg);
if (err < 0)
return err;
diff --git a/sound/usb/mixer_us16x08.c b/sound/usb/mixer_us16x08.c
index b7b6f3834ed5..6eb7d93b358d 100644
--- a/sound/usb/mixer_us16x08.c
+++ b/sound/usb/mixer_us16x08.c
@@ -637,10 +637,10 @@ static int snd_get_meter_comp_index(struct snd_us16x08_meter_store *store)
}
} else {
/* skip channels with no compressor active */
- while (!store->comp_store->val[
+ while (store->comp_index <= SND_US16X08_MAX_CHANNELS
+ && !store->comp_store->val[
COMP_STORE_IDX(SND_US16X08_ID_COMP_SWITCH)]
- [store->comp_index - 1]
- && store->comp_index <= SND_US16X08_MAX_CHANNELS) {
+ [store->comp_index - 1]) {
store->comp_index++;
}
ret = store->comp_index++;
diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c
index 0d828e35b401..ab95fb34a635 100644
--- a/sound/x86/intel_hdmi_audio.c
+++ b/sound/x86/intel_hdmi_audio.c
@@ -33,6 +33,8 @@
#include <drm/intel_lpe_audio.h>
#include "intel_hdmi_audio.h"
+#define INTEL_HDMI_AUDIO_SUSPEND_DELAY_MS 5000
+
#define for_each_pipe(card_ctx, pipe) \
for ((pipe) = 0; (pipe) < (card_ctx)->num_pipes; (pipe)++)
#define for_each_port(card_ctx, port) \
@@ -1066,7 +1068,9 @@ static int had_pcm_open(struct snd_pcm_substream *substream)
intelhaddata = snd_pcm_substream_chip(substream);
runtime = substream->runtime;
- pm_runtime_get_sync(intelhaddata->dev);
+ retval = pm_runtime_resume_and_get(intelhaddata->dev);
+ if (retval < 0)
+ return retval;
/* set the runtime hw parameter with local snd_pcm_hardware struct */
runtime->hw = had_pcm_hardware;
@@ -1534,8 +1538,12 @@ static void had_audio_wq(struct work_struct *work)
container_of(work, struct snd_intelhad, hdmi_audio_wq);
struct intel_hdmi_lpe_audio_pdata *pdata = ctx->dev->platform_data;
struct intel_hdmi_lpe_audio_port_pdata *ppdata = &pdata->port[ctx->port];
+ int ret;
+
+ ret = pm_runtime_resume_and_get(ctx->dev);
+ if (ret < 0)
+ return;
- pm_runtime_get_sync(ctx->dev);
mutex_lock(&ctx->mutex);
if (ppdata->pipe < 0) {
dev_dbg(ctx->dev, "%s: Event: HAD_NOTIFY_HOT_UNPLUG : port = %d\n",
@@ -1802,8 +1810,11 @@ static int __hdmi_lpe_audio_probe(struct platform_device *pdev)
pdata->notify_audio_lpe = notify_audio_lpe;
spin_unlock_irq(&pdata->lpe_audio_slock);
+ pm_runtime_set_autosuspend_delay(&pdev->dev, INTEL_HDMI_AUDIO_SUSPEND_DELAY_MS);
pm_runtime_use_autosuspend(&pdev->dev);
+ pm_runtime_enable(&pdev->dev);
pm_runtime_mark_last_busy(&pdev->dev);
+ pm_runtime_idle(&pdev->dev);
dev_dbg(&pdev->dev, "%s: handle pending notification\n", __func__);
for_each_port(card_ctx, port) {
diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h
index e09d6908a21d..8aa0d276a636 100644
--- a/tools/arch/arm64/include/asm/cputype.h
+++ b/tools/arch/arm64/include/asm/cputype.h
@@ -36,7 +36,7 @@
#define MIDR_VARIANT(midr) \
(((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT)
#define MIDR_IMPLEMENTOR_SHIFT 24
-#define MIDR_IMPLEMENTOR_MASK (0xff << MIDR_IMPLEMENTOR_SHIFT)
+#define MIDR_IMPLEMENTOR_MASK (0xffU << MIDR_IMPLEMENTOR_SHIFT)
#define MIDR_IMPLEMENTOR(midr) \
(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
@@ -118,6 +118,10 @@
#define APPLE_CPU_PART_M1_ICESTORM 0x022
#define APPLE_CPU_PART_M1_FIRESTORM 0x023
+#define APPLE_CPU_PART_M1_ICESTORM_PRO 0x024
+#define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025
+#define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028
+#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
@@ -164,6 +168,10 @@
#define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
#define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
#define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
+#define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
+#define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO)
+#define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX)
+#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX)
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
#define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX
@@ -172,7 +180,7 @@
#ifndef __ASSEMBLY__
-#include "sysreg.h"
+#include <asm/sysreg.h>
#define read_cpuid(reg) read_sysreg_s(SYS_ ## reg)
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 73e643ae94b6..e17de69faa54 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -443,5 +443,6 @@
#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
+#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 403e83b4adc8..d27e0581b777 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -116,6 +116,30 @@
* Not susceptible to
* TSX Async Abort (TAA) vulnerabilities.
*/
+#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /*
+ * Not susceptible to SBDR and SSDP
+ * variants of Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_FBSDP_NO BIT(14) /*
+ * Not susceptible to FBSDP variant of
+ * Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_PSDP_NO BIT(15) /*
+ * Not susceptible to PSDP variant of
+ * Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_FB_CLEAR BIT(17) /*
+ * VERW clears CPU fill buffer
+ * even on MDS_NO CPUs.
+ */
+#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /*
+ * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]
+ * bit available to control VERW
+ * behavior.
+ */
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
@@ -133,6 +157,7 @@
#define MSR_IA32_MCU_OPT_CTRL 0x00000123
#define RNGDS_MITG_DIS BIT(0) /* SRBDS support */
#define RTM_ALLOW BIT(1) /* TSX development mode */
+#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index bf6e96011dfe..21614807a2cb 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -428,11 +428,12 @@ struct kvm_sync_regs {
struct kvm_vcpu_events events;
};
-#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
-#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
-#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
-#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3)
-#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
+#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
+#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
+#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
+#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3)
+#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
+#define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5)
#define KVM_STATE_NESTED_FORMAT_VMX 0
#define KVM_STATE_NESTED_FORMAT_SVM 1
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index a17e9aa314fd..bd015ec9847b 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -31,11 +31,17 @@ CGROUP COMMANDS
| **bpftool** **cgroup help**
|
| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
-| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** |
-| **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** |
-| **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** |
-| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** |
-| **sock_release** }
+| *ATTACH_TYPE* := { **cgroup_inet_ingress** | **cgroup_inet_egress** |
+| **cgroup_inet_sock_create** | **cgroup_sock_ops** |
+| **cgroup_device** | **cgroup_inet4_bind** | **cgroup_inet6_bind** |
+| **cgroup_inet4_post_bind** | **cgroup_inet6_post_bind** |
+| **cgroup_inet4_connect** | **cgroup_inet6_connect** |
+| **cgroup_inet4_getpeername** | **cgroup_inet6_getpeername** |
+| **cgroup_inet4_getsockname** | **cgroup_inet6_getsockname** |
+| **cgroup_udp4_sendmsg** | **cgroup_udp6_sendmsg** |
+| **cgroup_udp4_recvmsg** | **cgroup_udp6_recvmsg** |
+| **cgroup_sysctl** | **cgroup_getsockopt** | **cgroup_setsockopt** |
+| **cgroup_inet_sock_release** }
| *ATTACH_FLAGS* := { **multi** | **override** }
DESCRIPTION
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index a2e9359e554c..eb1b2a254eb1 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -53,8 +53,9 @@ PROG COMMANDS
| **cgroup/getsockopt** | **cgroup/setsockopt** | **cgroup/sock_release** |
| **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup**
| }
-| *ATTACH_TYPE* := {
-| **msg_verdict** | **skb_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
+| *ATTACH_TYPE* := {
+| **sk_msg_verdict** | **sk_skb_verdict** | **sk_skb_stream_verdict** |
+| **sk_skb_stream_parser** | **flow_dissector**
| }
| *METRICs* := {
| **cycles** | **instructions** | **l1d_loads** | **llc_misses** |
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index c6d2c77d0252..c19e0e4c41bd 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -53,7 +53,7 @@ $(LIBBPF_INTERNAL_HDRS): $(LIBBPF_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_HDRS_
$(LIBBPF_BOOTSTRAP): $(wildcard $(BPF_DIR)/*.[ch] $(BPF_DIR)/Makefile) | $(LIBBPF_BOOTSTRAP_OUTPUT)
$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \
DESTDIR=$(LIBBPF_BOOTSTRAP_DESTDIR:/=) prefix= \
- ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) $@ install_headers
+ ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) AR=$(HOSTAR) $@ install_headers
$(LIBBPF_BOOTSTRAP_INTERNAL_HDRS): $(LIBBPF_BOOTSTRAP_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_BOOTSTRAP_HDRS_DIR)
$(call QUIET_INSTALL, $@)
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 5df8d72c5179..91f89a9a5b36 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -407,8 +407,8 @@ _bpftool()
return 0
;;
5)
- local BPFTOOL_PROG_ATTACH_TYPES='msg_verdict \
- skb_verdict stream_verdict stream_parser \
+ local BPFTOOL_PROG_ATTACH_TYPES='sk_msg_verdict \
+ sk_skb_verdict sk_skb_stream_verdict sk_skb_stream_parser \
flow_dissector'
COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_ATTACH_TYPES" -- "$cur" ) )
return 0
@@ -1039,12 +1039,14 @@ _bpftool()
return 0
;;
attach|detach)
- local BPFTOOL_CGROUP_ATTACH_TYPES='ingress egress \
- sock_create sock_ops device \
- bind4 bind6 post_bind4 post_bind6 connect4 connect6 \
- getpeername4 getpeername6 getsockname4 getsockname6 \
- sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \
- setsockopt sock_release'
+ local BPFTOOL_CGROUP_ATTACH_TYPES='cgroup_inet_ingress cgroup_inet_egress \
+ cgroup_inet_sock_create cgroup_sock_ops cgroup_device cgroup_inet4_bind \
+ cgroup_inet6_bind cgroup_inet4_post_bind cgroup_inet6_post_bind \
+ cgroup_inet4_connect cgroup_inet6_connect cgroup_inet4_getpeername \
+ cgroup_inet6_getpeername cgroup_inet4_getsockname cgroup_inet6_getsockname \
+ cgroup_udp4_sendmsg cgroup_udp6_sendmsg cgroup_udp4_recvmsg \
+ cgroup_udp6_recvmsg cgroup_sysctl cgroup_getsockopt cgroup_setsockopt \
+ cgroup_inet_sock_release'
local ATTACH_FLAGS='multi override'
local PROG_TYPE='id pinned tag name'
# Check for $prev = $command first
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 7e6accb9d9f7..0744bd1150be 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -40,6 +40,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
[BTF_KIND_FLOAT] = "FLOAT",
[BTF_KIND_DECL_TAG] = "DECL_TAG",
[BTF_KIND_TYPE_TAG] = "TYPE_TAG",
+ [BTF_KIND_ENUM64] = "ENUM64",
};
struct btf_attach_point {
@@ -212,26 +213,76 @@ static int dump_btf_type(const struct btf *btf, __u32 id,
case BTF_KIND_ENUM: {
const struct btf_enum *v = (const void *)(t + 1);
__u16 vlen = BTF_INFO_VLEN(t->info);
+ const char *encoding;
int i;
+ encoding = btf_kflag(t) ? "SIGNED" : "UNSIGNED";
if (json_output) {
+ jsonw_string_field(w, "encoding", encoding);
jsonw_uint_field(w, "size", t->size);
jsonw_uint_field(w, "vlen", vlen);
jsonw_name(w, "values");
jsonw_start_array(w);
} else {
- printf(" size=%u vlen=%u", t->size, vlen);
+ printf(" encoding=%s size=%u vlen=%u", encoding, t->size, vlen);
+ }
+ for (i = 0; i < vlen; i++, v++) {
+ const char *name = btf_str(btf, v->name_off);
+
+ if (json_output) {
+ jsonw_start_object(w);
+ jsonw_string_field(w, "name", name);
+ if (btf_kflag(t))
+ jsonw_int_field(w, "val", v->val);
+ else
+ jsonw_uint_field(w, "val", v->val);
+ jsonw_end_object(w);
+ } else {
+ if (btf_kflag(t))
+ printf("\n\t'%s' val=%d", name, v->val);
+ else
+ printf("\n\t'%s' val=%u", name, v->val);
+ }
+ }
+ if (json_output)
+ jsonw_end_array(w);
+ break;
+ }
+ case BTF_KIND_ENUM64: {
+ const struct btf_enum64 *v = btf_enum64(t);
+ __u16 vlen = btf_vlen(t);
+ const char *encoding;
+ int i;
+
+ encoding = btf_kflag(t) ? "SIGNED" : "UNSIGNED";
+ if (json_output) {
+ jsonw_string_field(w, "encoding", encoding);
+ jsonw_uint_field(w, "size", t->size);
+ jsonw_uint_field(w, "vlen", vlen);
+ jsonw_name(w, "values");
+ jsonw_start_array(w);
+ } else {
+ printf(" encoding=%s size=%u vlen=%u", encoding, t->size, vlen);
}
for (i = 0; i < vlen; i++, v++) {
const char *name = btf_str(btf, v->name_off);
+ __u64 val = ((__u64)v->val_hi32 << 32) | v->val_lo32;
if (json_output) {
jsonw_start_object(w);
jsonw_string_field(w, "name", name);
- jsonw_uint_field(w, "val", v->val);
+ if (btf_kflag(t))
+ jsonw_int_field(w, "val", val);
+ else
+ jsonw_uint_field(w, "val", val);
jsonw_end_object(w);
} else {
- printf("\n\t'%s' val=%u", name, v->val);
+ if (btf_kflag(t))
+ printf("\n\t'%s' val=%lldLL", name,
+ (unsigned long long)val);
+ else
+ printf("\n\t'%s' val=%lluULL", name,
+ (unsigned long long)val);
}
}
if (json_output)
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index f5dddf8ef404..125798b0bc5d 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -182,6 +182,32 @@ static int btf_dumper_enum(const struct btf_dumper *d,
return 0;
}
+static int btf_dumper_enum64(const struct btf_dumper *d,
+ const struct btf_type *t,
+ const void *data)
+{
+ const struct btf_enum64 *enums = btf_enum64(t);
+ __u32 val_lo32, val_hi32;
+ __u64 value;
+ __u16 i;
+
+ value = *(__u64 *)data;
+ val_lo32 = (__u32)value;
+ val_hi32 = value >> 32;
+
+ for (i = 0; i < btf_vlen(t); i++) {
+ if (val_lo32 == enums[i].val_lo32 && val_hi32 == enums[i].val_hi32) {
+ jsonw_string(d->jw,
+ btf__name_by_offset(d->btf,
+ enums[i].name_off));
+ return 0;
+ }
+ }
+
+ jsonw_int(d->jw, value);
+ return 0;
+}
+
static bool is_str_array(const struct btf *btf, const struct btf_array *arr,
const char *s)
{
@@ -542,6 +568,8 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
return btf_dumper_array(d, type_id, data);
case BTF_KIND_ENUM:
return btf_dumper_enum(d, t, data);
+ case BTF_KIND_ENUM64:
+ return btf_dumper_enum64(d, t, data);
case BTF_KIND_PTR:
btf_dumper_ptr(d, t, data);
return 0;
@@ -618,6 +646,7 @@ static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id,
btf__name_by_offset(btf, t->name_off));
break;
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
BTF_PRINT_ARG("enum %s ",
btf__name_by_offset(btf, t->name_off));
break;
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index effe136119d7..42421fe47a58 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -21,25 +21,43 @@
#define HELP_SPEC_ATTACH_FLAGS \
"ATTACH_FLAGS := { multi | override }"
-#define HELP_SPEC_ATTACH_TYPES \
- " ATTACH_TYPE := { ingress | egress | sock_create |\n" \
- " sock_ops | device | bind4 | bind6 |\n" \
- " post_bind4 | post_bind6 | connect4 |\n" \
- " connect6 | getpeername4 | getpeername6 |\n" \
- " getsockname4 | getsockname6 | sendmsg4 |\n" \
- " sendmsg6 | recvmsg4 | recvmsg6 |\n" \
- " sysctl | getsockopt | setsockopt |\n" \
- " sock_release }"
+#define HELP_SPEC_ATTACH_TYPES \
+ " ATTACH_TYPE := { cgroup_inet_ingress | cgroup_inet_egress |\n" \
+ " cgroup_inet_sock_create | cgroup_sock_ops |\n" \
+ " cgroup_device | cgroup_inet4_bind |\n" \
+ " cgroup_inet6_bind | cgroup_inet4_post_bind |\n" \
+ " cgroup_inet6_post_bind | cgroup_inet4_connect |\n" \
+ " cgroup_inet6_connect | cgroup_inet4_getpeername |\n" \
+ " cgroup_inet6_getpeername | cgroup_inet4_getsockname |\n" \
+ " cgroup_inet6_getsockname | cgroup_udp4_sendmsg |\n" \
+ " cgroup_udp6_sendmsg | cgroup_udp4_recvmsg |\n" \
+ " cgroup_udp6_recvmsg | cgroup_sysctl |\n" \
+ " cgroup_getsockopt | cgroup_setsockopt |\n" \
+ " cgroup_inet_sock_release }"
static unsigned int query_flags;
static enum bpf_attach_type parse_attach_type(const char *str)
{
+ const char *attach_type_str;
enum bpf_attach_type type;
- for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
- if (attach_type_name[type] &&
- is_prefix(str, attach_type_name[type]))
+ for (type = 0; ; type++) {
+ attach_type_str = libbpf_bpf_attach_type_str(type);
+ if (!attach_type_str)
+ break;
+ if (!strcmp(str, attach_type_str))
+ return type;
+ }
+
+ /* Also check traditionally used attach type strings. For these we keep
+ * allowing prefixed usage.
+ */
+ for (type = 0; ; type++) {
+ attach_type_str = bpf_attach_type_input_str(type);
+ if (!attach_type_str)
+ break;
+ if (is_prefix(str, attach_type_str))
return type;
}
@@ -52,6 +70,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
{
char prog_name[MAX_PROG_FULL_NAME];
struct bpf_prog_info info = {};
+ const char *attach_type_str;
__u32 info_len = sizeof(info);
int prog_fd;
@@ -64,13 +83,13 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
return -1;
}
+ attach_type_str = libbpf_bpf_attach_type_str(attach_type);
get_prog_full_name(&info, prog_fd, prog_name, sizeof(prog_name));
if (json_output) {
jsonw_start_object(json_wtr);
jsonw_uint_field(json_wtr, "id", info.id);
- if (attach_type < ARRAY_SIZE(attach_type_name))
- jsonw_string_field(json_wtr, "attach_type",
- attach_type_name[attach_type]);
+ if (attach_type_str)
+ jsonw_string_field(json_wtr, "attach_type", attach_type_str);
else
jsonw_uint_field(json_wtr, "attach_type", attach_type);
jsonw_string_field(json_wtr, "attach_flags",
@@ -79,8 +98,8 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
jsonw_end_object(json_wtr);
} else {
printf("%s%-8u ", level ? " " : "", info.id);
- if (attach_type < ARRAY_SIZE(attach_type_name))
- printf("%-15s", attach_type_name[attach_type]);
+ if (attach_type_str)
+ printf("%-15s", attach_type_str);
else
printf("type %-10u", attach_type);
printf(" %-15s %-15s\n", attach_flags_str, prog_name);
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index c740142c24d8..a0d4acd7c54a 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -17,6 +17,7 @@
#include <linux/magic.h>
#include <net/if.h>
#include <sys/mount.h>
+#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/vfs.h>
@@ -31,52 +32,6 @@
#define BPF_FS_MAGIC 0xcafe4a11
#endif
-const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
- [BPF_CGROUP_INET_INGRESS] = "ingress",
- [BPF_CGROUP_INET_EGRESS] = "egress",
- [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
- [BPF_CGROUP_INET_SOCK_RELEASE] = "sock_release",
- [BPF_CGROUP_SOCK_OPS] = "sock_ops",
- [BPF_CGROUP_DEVICE] = "device",
- [BPF_CGROUP_INET4_BIND] = "bind4",
- [BPF_CGROUP_INET6_BIND] = "bind6",
- [BPF_CGROUP_INET4_CONNECT] = "connect4",
- [BPF_CGROUP_INET6_CONNECT] = "connect6",
- [BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
- [BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
- [BPF_CGROUP_INET4_GETPEERNAME] = "getpeername4",
- [BPF_CGROUP_INET6_GETPEERNAME] = "getpeername6",
- [BPF_CGROUP_INET4_GETSOCKNAME] = "getsockname4",
- [BPF_CGROUP_INET6_GETSOCKNAME] = "getsockname6",
- [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
- [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
- [BPF_CGROUP_SYSCTL] = "sysctl",
- [BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4",
- [BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6",
- [BPF_CGROUP_GETSOCKOPT] = "getsockopt",
- [BPF_CGROUP_SETSOCKOPT] = "setsockopt",
- [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
- [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
- [BPF_SK_SKB_VERDICT] = "sk_skb_verdict",
- [BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
- [BPF_LIRC_MODE2] = "lirc_mode2",
- [BPF_FLOW_DISSECTOR] = "flow_dissector",
- [BPF_TRACE_RAW_TP] = "raw_tp",
- [BPF_TRACE_FENTRY] = "fentry",
- [BPF_TRACE_FEXIT] = "fexit",
- [BPF_MODIFY_RETURN] = "mod_ret",
- [BPF_LSM_MAC] = "lsm_mac",
- [BPF_SK_LOOKUP] = "sk_lookup",
- [BPF_TRACE_ITER] = "trace_iter",
- [BPF_XDP_DEVMAP] = "xdp_devmap",
- [BPF_XDP_CPUMAP] = "xdp_cpumap",
- [BPF_XDP] = "xdp",
- [BPF_SK_REUSEPORT_SELECT] = "sk_skb_reuseport_select",
- [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_skb_reuseport_select_or_migrate",
- [BPF_PERF_EVENT] = "perf_event",
- [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi",
-};
-
void p_err(const char *fmt, ...)
{
va_list ap;
@@ -118,6 +73,13 @@ static bool is_bpffs(char *path)
return (unsigned long)st_fs.f_type == BPF_FS_MAGIC;
}
+void set_max_rlimit(void)
+{
+ struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
+
+ setrlimit(RLIMIT_MEMLOCK, &rinf);
+}
+
static int
mnt_fs(const char *target, const char *type, char *buff, size_t bufflen)
{
@@ -1009,3 +971,39 @@ bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx)
{
return k1 == k2;
}
+
+const char *bpf_attach_type_input_str(enum bpf_attach_type t)
+{
+ switch (t) {
+ case BPF_CGROUP_INET_INGRESS: return "ingress";
+ case BPF_CGROUP_INET_EGRESS: return "egress";
+ case BPF_CGROUP_INET_SOCK_CREATE: return "sock_create";
+ case BPF_CGROUP_INET_SOCK_RELEASE: return "sock_release";
+ case BPF_CGROUP_SOCK_OPS: return "sock_ops";
+ case BPF_CGROUP_DEVICE: return "device";
+ case BPF_CGROUP_INET4_BIND: return "bind4";
+ case BPF_CGROUP_INET6_BIND: return "bind6";
+ case BPF_CGROUP_INET4_CONNECT: return "connect4";
+ case BPF_CGROUP_INET6_CONNECT: return "connect6";
+ case BPF_CGROUP_INET4_POST_BIND: return "post_bind4";
+ case BPF_CGROUP_INET6_POST_BIND: return "post_bind6";
+ case BPF_CGROUP_INET4_GETPEERNAME: return "getpeername4";
+ case BPF_CGROUP_INET6_GETPEERNAME: return "getpeername6";
+ case BPF_CGROUP_INET4_GETSOCKNAME: return "getsockname4";
+ case BPF_CGROUP_INET6_GETSOCKNAME: return "getsockname6";
+ case BPF_CGROUP_UDP4_SENDMSG: return "sendmsg4";
+ case BPF_CGROUP_UDP6_SENDMSG: return "sendmsg6";
+ case BPF_CGROUP_SYSCTL: return "sysctl";
+ case BPF_CGROUP_UDP4_RECVMSG: return "recvmsg4";
+ case BPF_CGROUP_UDP6_RECVMSG: return "recvmsg6";
+ case BPF_CGROUP_GETSOCKOPT: return "getsockopt";
+ case BPF_CGROUP_SETSOCKOPT: return "setsockopt";
+ case BPF_TRACE_RAW_TP: return "raw_tp";
+ case BPF_TRACE_FENTRY: return "fentry";
+ case BPF_TRACE_FEXIT: return "fexit";
+ case BPF_MODIFY_RETURN: return "mod_ret";
+ case BPF_SK_REUSEPORT_SELECT: return "sk_skb_reuseport_select";
+ case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE: return "sk_skb_reuseport_select_or_migrate";
+ default: return libbpf_bpf_attach_type_str(t);
+ }
+}
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index d12f46051aac..bac4ef428a02 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -548,8 +548,8 @@ static bool probe_prog_type_ifindex(enum bpf_prog_type prog_type, __u32 ifindex)
}
static void
-probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types,
- const char *define_prefix, __u32 ifindex)
+probe_prog_type(enum bpf_prog_type prog_type, const char *prog_type_str,
+ bool *supported_types, const char *define_prefix, __u32 ifindex)
{
char feat_name[128], plain_desc[128], define_name[128];
const char *plain_comment = "eBPF program_type ";
@@ -580,20 +580,16 @@ probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types,
supported_types[prog_type] |= res;
- if (!prog_type_name[prog_type]) {
- p_info("program type name not found (type %d)", prog_type);
- return;
- }
maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1;
- if (strlen(prog_type_name[prog_type]) > maxlen) {
+ if (strlen(prog_type_str) > maxlen) {
p_info("program type name too long");
return;
}
- sprintf(feat_name, "have_%s_prog_type", prog_type_name[prog_type]);
- sprintf(define_name, "%s_prog_type", prog_type_name[prog_type]);
+ sprintf(feat_name, "have_%s_prog_type", prog_type_str);
+ sprintf(define_name, "%s_prog_type", prog_type_str);
uppercase(define_name, sizeof(define_name));
- sprintf(plain_desc, "%s%s", plain_comment, prog_type_name[prog_type]);
+ sprintf(plain_desc, "%s%s", plain_comment, prog_type_str);
print_bool_feature(feat_name, plain_desc, define_name, res,
define_prefix);
}
@@ -619,8 +615,8 @@ static bool probe_map_type_ifindex(enum bpf_map_type map_type, __u32 ifindex)
}
static void
-probe_map_type(enum bpf_map_type map_type, const char *define_prefix,
- __u32 ifindex)
+probe_map_type(enum bpf_map_type map_type, char const *map_type_str,
+ const char *define_prefix, __u32 ifindex)
{
char feat_name[128], plain_desc[128], define_name[128];
const char *plain_comment = "eBPF map_type ";
@@ -645,20 +641,16 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix,
* check required for unprivileged users
*/
- if (!map_type_name[map_type]) {
- p_info("map type name not found (type %d)", map_type);
- return;
- }
maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1;
- if (strlen(map_type_name[map_type]) > maxlen) {
+ if (strlen(map_type_str) > maxlen) {
p_info("map type name too long");
return;
}
- sprintf(feat_name, "have_%s_map_type", map_type_name[map_type]);
- sprintf(define_name, "%s_map_type", map_type_name[map_type]);
+ sprintf(feat_name, "have_%s_map_type", map_type_str);
+ sprintf(define_name, "%s_map_type", map_type_str);
uppercase(define_name, sizeof(define_name));
- sprintf(plain_desc, "%s%s", plain_comment, map_type_name[map_type]);
+ sprintf(plain_desc, "%s%s", plain_comment, map_type_str);
print_bool_feature(feat_name, plain_desc, define_name, res,
define_prefix);
}
@@ -728,10 +720,10 @@ probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
}
static void
-probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
+probe_helpers_for_progtype(enum bpf_prog_type prog_type,
+ const char *prog_type_str, bool supported_type,
const char *define_prefix, __u32 ifindex)
{
- const char *ptype_name = prog_type_name[prog_type];
char feat_name[128];
unsigned int id;
bool probe_res = false;
@@ -747,12 +739,12 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
}
if (json_output) {
- sprintf(feat_name, "%s_available_helpers", ptype_name);
+ sprintf(feat_name, "%s_available_helpers", prog_type_str);
jsonw_name(json_wtr, feat_name);
jsonw_start_array(json_wtr);
} else if (!define_prefix) {
printf("eBPF helpers supported for program type %s:",
- ptype_name);
+ prog_type_str);
}
for (id = 1; id < ARRAY_SIZE(helper_name); id++) {
@@ -768,7 +760,7 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
/* fallthrough */
default:
probe_res |= probe_helper_for_progtype(prog_type, supported_type,
- define_prefix, id, ptype_name,
+ define_prefix, id, prog_type_str,
ifindex);
}
}
@@ -943,30 +935,47 @@ static void
section_program_types(bool *supported_types, const char *define_prefix,
__u32 ifindex)
{
- unsigned int i;
+ unsigned int prog_type = BPF_PROG_TYPE_UNSPEC;
+ const char *prog_type_str;
print_start_section("program_types",
"Scanning eBPF program types...",
"/*** eBPF program types ***/",
define_prefix);
- for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++)
- probe_prog_type(i, supported_types, define_prefix, ifindex);
+ while (true) {
+ prog_type++;
+ prog_type_str = libbpf_bpf_prog_type_str(prog_type);
+ /* libbpf will return NULL for variants unknown to it. */
+ if (!prog_type_str)
+ break;
+
+ probe_prog_type(prog_type, prog_type_str, supported_types, define_prefix,
+ ifindex);
+ }
print_end_section();
}
static void section_map_types(const char *define_prefix, __u32 ifindex)
{
- unsigned int i;
+ unsigned int map_type = BPF_MAP_TYPE_UNSPEC;
+ const char *map_type_str;
print_start_section("map_types",
"Scanning eBPF map types...",
"/*** eBPF map types ***/",
define_prefix);
- for (i = BPF_MAP_TYPE_UNSPEC + 1; i < map_type_name_size; i++)
- probe_map_type(i, define_prefix, ifindex);
+ while (true) {
+ map_type++;
+ map_type_str = libbpf_bpf_map_type_str(map_type);
+ /* libbpf will return NULL for variants unknown to it. */
+ if (!map_type_str)
+ break;
+
+ probe_map_type(map_type, map_type_str, define_prefix, ifindex);
+ }
print_end_section();
}
@@ -974,7 +983,8 @@ static void section_map_types(const char *define_prefix, __u32 ifindex)
static void
section_helpers(bool *supported_types, const char *define_prefix, __u32 ifindex)
{
- unsigned int i;
+ unsigned int prog_type = BPF_PROG_TYPE_UNSPEC;
+ const char *prog_type_str;
print_start_section("helpers",
"Scanning eBPF helper functions...",
@@ -996,9 +1006,18 @@ section_helpers(bool *supported_types, const char *define_prefix, __u32 ifindex)
" %sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n",
define_prefix, define_prefix, define_prefix,
define_prefix);
- for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++)
- probe_helpers_for_progtype(i, supported_types[i], define_prefix,
+ while (true) {
+ prog_type++;
+ prog_type_str = libbpf_bpf_prog_type_str(prog_type);
+ /* libbpf will return NULL for variants unknown to it. */
+ if (!prog_type_str)
+ break;
+
+ probe_helpers_for_progtype(prog_type, prog_type_str,
+ supported_types[prog_type],
+ define_prefix,
ifindex);
+ }
print_end_section();
}
@@ -1148,6 +1167,8 @@ static int do_probe(int argc, char **argv)
__u32 ifindex = 0;
char *ifname;
+ set_max_rlimit();
+
while (argc) {
if (is_prefix(*argv, "kernel")) {
if (target != COMPONENT_UNSPEC) {
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 4c9477ff748d..480cbd859359 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -474,6 +474,9 @@ static void codegen_asserts(struct bpf_object *obj, const char *obj_name)
const struct btf_type *sec;
char map_ident[256], var_ident[256];
+ if (!btf)
+ return;
+
codegen("\
\n\
__attribute__((unused)) static void \n\
@@ -1747,6 +1750,7 @@ btfgen_mark_type(struct btfgen_info *info, unsigned int type_id, bool follow_poi
case BTF_KIND_INT:
case BTF_KIND_FLOAT:
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
break;
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 6353a789322b..7a20931c3250 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -13,19 +13,6 @@
#include "json_writer.h"
#include "main.h"
-static const char * const link_type_name[] = {
- [BPF_LINK_TYPE_UNSPEC] = "unspec",
- [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
- [BPF_LINK_TYPE_TRACING] = "tracing",
- [BPF_LINK_TYPE_CGROUP] = "cgroup",
- [BPF_LINK_TYPE_ITER] = "iter",
- [BPF_LINK_TYPE_NETNS] = "netns",
- [BPF_LINK_TYPE_XDP] = "xdp",
- [BPF_LINK_TYPE_PERF_EVENT] = "perf_event",
- [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi",
- [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops",
-};
-
static struct hashmap *link_table;
static int link_parse_fd(int *argc, char ***argv)
@@ -67,9 +54,12 @@ static int link_parse_fd(int *argc, char ***argv)
static void
show_link_header_json(struct bpf_link_info *info, json_writer_t *wtr)
{
+ const char *link_type_str;
+
jsonw_uint_field(wtr, "id", info->id);
- if (info->type < ARRAY_SIZE(link_type_name))
- jsonw_string_field(wtr, "type", link_type_name[info->type]);
+ link_type_str = libbpf_bpf_link_type_str(info->type);
+ if (link_type_str)
+ jsonw_string_field(wtr, "type", link_type_str);
else
jsonw_uint_field(wtr, "type", info->type);
@@ -78,9 +68,11 @@ show_link_header_json(struct bpf_link_info *info, json_writer_t *wtr)
static void show_link_attach_type_json(__u32 attach_type, json_writer_t *wtr)
{
- if (attach_type < ARRAY_SIZE(attach_type_name))
- jsonw_string_field(wtr, "attach_type",
- attach_type_name[attach_type]);
+ const char *attach_type_str;
+
+ attach_type_str = libbpf_bpf_attach_type_str(attach_type);
+ if (attach_type_str)
+ jsonw_string_field(wtr, "attach_type", attach_type_str);
else
jsonw_uint_field(wtr, "attach_type", attach_type);
}
@@ -121,6 +113,7 @@ static int get_prog_info(int prog_id, struct bpf_prog_info *info)
static int show_link_close_json(int fd, struct bpf_link_info *info)
{
struct bpf_prog_info prog_info;
+ const char *prog_type_str;
int err;
jsonw_start_object(json_wtr);
@@ -137,12 +130,12 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
if (err)
return err;
- if (prog_info.type < prog_type_name_size)
- jsonw_string_field(json_wtr, "prog_type",
- prog_type_name[prog_info.type]);
+ prog_type_str = libbpf_bpf_prog_type_str(prog_info.type);
+ /* libbpf will return NULL for variants unknown to it. */
+ if (prog_type_str)
+ jsonw_string_field(json_wtr, "prog_type", prog_type_str);
else
- jsonw_uint_field(json_wtr, "prog_type",
- prog_info.type);
+ jsonw_uint_field(json_wtr, "prog_type", prog_info.type);
show_link_attach_type_json(info->tracing.attach_type,
json_wtr);
@@ -184,9 +177,12 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
static void show_link_header_plain(struct bpf_link_info *info)
{
+ const char *link_type_str;
+
printf("%u: ", info->id);
- if (info->type < ARRAY_SIZE(link_type_name))
- printf("%s ", link_type_name[info->type]);
+ link_type_str = libbpf_bpf_link_type_str(info->type);
+ if (link_type_str)
+ printf("%s ", link_type_str);
else
printf("type %u ", info->type);
@@ -195,8 +191,11 @@ static void show_link_header_plain(struct bpf_link_info *info)
static void show_link_attach_type_plain(__u32 attach_type)
{
- if (attach_type < ARRAY_SIZE(attach_type_name))
- printf("attach_type %s ", attach_type_name[attach_type]);
+ const char *attach_type_str;
+
+ attach_type_str = libbpf_bpf_attach_type_str(attach_type);
+ if (attach_type_str)
+ printf("attach_type %s ", attach_type_str);
else
printf("attach_type %u ", attach_type);
}
@@ -214,6 +213,7 @@ static void show_iter_plain(struct bpf_link_info *info)
static int show_link_close_plain(int fd, struct bpf_link_info *info)
{
struct bpf_prog_info prog_info;
+ const char *prog_type_str;
int err;
show_link_header_plain(info);
@@ -228,9 +228,10 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
if (err)
return err;
- if (prog_info.type < prog_type_name_size)
- printf("\n\tprog_type %s ",
- prog_type_name[prog_info.type]);
+ prog_type_str = libbpf_bpf_prog_type_str(prog_info.type);
+ /* libbpf will return NULL for variants unknown to it. */
+ if (prog_type_str)
+ printf("\n\tprog_type %s ", prog_type_str);
else
printf("\n\tprog_type %u ", prog_info.type);
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 9062ef2b8767..451cefc2d0da 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -508,8 +508,6 @@ int main(int argc, char **argv)
* mode for loading generated skeleton.
*/
libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS);
- } else {
- libbpf_set_strict_mode(LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK);
}
argc -= optind;
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index aa99ffab451a..589cb76b227a 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -63,14 +63,8 @@ static inline void *u64_to_ptr(__u64 ptr)
#define HELP_SPEC_LINK \
"LINK := { id LINK_ID | pinned FILE }"
-extern const char * const prog_type_name[];
-extern const size_t prog_type_name_size;
-
extern const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE];
-extern const char * const map_type_name[];
-extern const size_t map_type_name_size;
-
/* keep in sync with the definition in skeleton/pid_iter.bpf.c */
enum bpf_obj_type {
BPF_OBJ_UNKNOWN,
@@ -102,6 +96,8 @@ int detect_common_prefix(const char *arg, ...);
void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep);
void usage(void) __noreturn;
+void set_max_rlimit(void);
+
int mount_tracefs(const char *target);
struct obj_ref {
@@ -249,6 +245,20 @@ int print_all_levels(__maybe_unused enum libbpf_print_level level,
size_t hash_fn_for_key_as_id(const void *key, void *ctx);
bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx);
+/* bpf_attach_type_input_str - convert the provided attach type value into a
+ * textual representation that we accept for input purposes.
+ *
+ * This function is similar in nature to libbpf_bpf_attach_type_str, but
+ * recognizes some attach type names that have been used by the program in the
+ * past and which do not follow the string inference scheme that libbpf uses.
+ * These textual representations should only be used for user input.
+ *
+ * @t: The attach type
+ * Returns a pointer to a static string identifying the attach type. NULL is
+ * returned for unknown bpf_attach_type values.
+ */
+const char *bpf_attach_type_input_str(enum bpf_attach_type t);
+
static inline void *u32_as_hash_field(__u32 x)
{
return (void *)(uintptr_t)x;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 877387ef79c7..38b6bc9c26c3 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -22,42 +22,6 @@
#include "json_writer.h"
#include "main.h"
-const char * const map_type_name[] = {
- [BPF_MAP_TYPE_UNSPEC] = "unspec",
- [BPF_MAP_TYPE_HASH] = "hash",
- [BPF_MAP_TYPE_ARRAY] = "array",
- [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array",
- [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array",
- [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash",
- [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array",
- [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace",
- [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array",
- [BPF_MAP_TYPE_LRU_HASH] = "lru_hash",
- [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash",
- [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie",
- [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps",
- [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps",
- [BPF_MAP_TYPE_DEVMAP] = "devmap",
- [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash",
- [BPF_MAP_TYPE_SOCKMAP] = "sockmap",
- [BPF_MAP_TYPE_CPUMAP] = "cpumap",
- [BPF_MAP_TYPE_XSKMAP] = "xskmap",
- [BPF_MAP_TYPE_SOCKHASH] = "sockhash",
- [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage",
- [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray",
- [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage",
- [BPF_MAP_TYPE_QUEUE] = "queue",
- [BPF_MAP_TYPE_STACK] = "stack",
- [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage",
- [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops",
- [BPF_MAP_TYPE_RINGBUF] = "ringbuf",
- [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage",
- [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage",
- [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter",
-};
-
-const size_t map_type_name_size = ARRAY_SIZE(map_type_name);
-
static struct hashmap *map_table;
static bool map_is_per_cpu(__u32 type)
@@ -81,12 +45,18 @@ static bool map_is_map_of_progs(__u32 type)
static int map_type_from_str(const char *type)
{
+ const char *map_type_str;
unsigned int i;
- for (i = 0; i < ARRAY_SIZE(map_type_name); i++)
+ for (i = 0; ; i++) {
+ map_type_str = libbpf_bpf_map_type_str(i);
+ if (!map_type_str)
+ break;
+
/* Don't allow prefixing in case of possible future shadowing */
- if (map_type_name[i] && !strcmp(map_type_name[i], type))
+ if (!strcmp(map_type_str, type))
return i;
+ }
return -1;
}
@@ -472,9 +442,12 @@ static int parse_elem(char **argv, struct bpf_map_info *info,
static void show_map_header_json(struct bpf_map_info *info, json_writer_t *wtr)
{
+ const char *map_type_str;
+
jsonw_uint_field(wtr, "id", info->id);
- if (info->type < ARRAY_SIZE(map_type_name))
- jsonw_string_field(wtr, "type", map_type_name[info->type]);
+ map_type_str = libbpf_bpf_map_type_str(info->type);
+ if (map_type_str)
+ jsonw_string_field(wtr, "type", map_type_str);
else
jsonw_uint_field(wtr, "type", info->type);
@@ -513,10 +486,12 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
if (owner_prog_type) {
unsigned int prog_type = atoi(owner_prog_type);
+ const char *prog_type_str;
- if (prog_type < prog_type_name_size)
+ prog_type_str = libbpf_bpf_prog_type_str(prog_type);
+ if (prog_type_str)
jsonw_string_field(json_wtr, "owner_prog_type",
- prog_type_name[prog_type]);
+ prog_type_str);
else
jsonw_uint_field(json_wtr, "owner_prog_type",
prog_type);
@@ -559,9 +534,13 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
static void show_map_header_plain(struct bpf_map_info *info)
{
+ const char *map_type_str;
+
printf("%u: ", info->id);
- if (info->type < ARRAY_SIZE(map_type_name))
- printf("%s ", map_type_name[info->type]);
+
+ map_type_str = libbpf_bpf_map_type_str(info->type);
+ if (map_type_str)
+ printf("%s ", map_type_str);
else
printf("type %u ", info->type);
@@ -597,10 +576,11 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
printf("\n\t");
if (owner_prog_type) {
unsigned int prog_type = atoi(owner_prog_type);
+ const char *prog_type_str;
- if (prog_type < prog_type_name_size)
- printf("owner_prog_type %s ",
- prog_type_name[prog_type]);
+ prog_type_str = libbpf_bpf_prog_type_str(prog_type);
+ if (prog_type_str)
+ printf("owner_prog_type %s ", prog_type_str);
else
printf("owner_prog_type %d ", prog_type);
}
@@ -876,9 +856,13 @@ map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr,
}
if (info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
- info->value_size != 8)
+ info->value_size != 8) {
+ const char *map_type_str;
+
+ map_type_str = libbpf_bpf_map_type_str(info->type);
p_info("Warning: cannot read values from %s map with value_size != 8",
- map_type_name[info->type]);
+ map_type_str);
+ }
while (true) {
err = bpf_map_get_next_key(fd, prev_key, key);
if (err) {
@@ -1342,6 +1326,8 @@ static int do_create(int argc, char **argv)
goto exit;
}
+ set_max_rlimit();
+
fd = bpf_map_create(map_type, map_name, key_size, value_size, max_entries, &attr);
if (fd < 0) {
p_err("map create failed: %s", strerror(errno));
diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c
index e2d00d3cd868..bb6c969a114a 100644
--- a/tools/bpf/bpftool/pids.c
+++ b/tools/bpf/bpftool/pids.c
@@ -108,6 +108,7 @@ int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type)
p_err("failed to create hashmap for PID references");
return -1;
}
+ set_max_rlimit();
skel = pid_iter_bpf__open();
if (!skel) {
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 5c2c63df92e8..f081de398b60 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -36,54 +36,28 @@
#define BPF_METADATA_PREFIX "bpf_metadata_"
#define BPF_METADATA_PREFIX_LEN (sizeof(BPF_METADATA_PREFIX) - 1)
-const char * const prog_type_name[] = {
- [BPF_PROG_TYPE_UNSPEC] = "unspec",
- [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
- [BPF_PROG_TYPE_KPROBE] = "kprobe",
- [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls",
- [BPF_PROG_TYPE_SCHED_ACT] = "sched_act",
- [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint",
- [BPF_PROG_TYPE_XDP] = "xdp",
- [BPF_PROG_TYPE_PERF_EVENT] = "perf_event",
- [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb",
- [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock",
- [BPF_PROG_TYPE_LWT_IN] = "lwt_in",
- [BPF_PROG_TYPE_LWT_OUT] = "lwt_out",
- [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit",
- [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
- [BPF_PROG_TYPE_SK_SKB] = "sk_skb",
- [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
- [BPF_PROG_TYPE_SK_MSG] = "sk_msg",
- [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
- [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
- [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local",
- [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
- [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport",
- [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector",
- [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
- [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
- [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt",
- [BPF_PROG_TYPE_TRACING] = "tracing",
- [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops",
- [BPF_PROG_TYPE_EXT] = "ext",
- [BPF_PROG_TYPE_LSM] = "lsm",
- [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup",
- [BPF_PROG_TYPE_SYSCALL] = "syscall",
-};
-
-const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name);
-
enum dump_mode {
DUMP_JITED,
DUMP_XLATED,
};
+static const bool attach_types[] = {
+ [BPF_SK_SKB_STREAM_PARSER] = true,
+ [BPF_SK_SKB_STREAM_VERDICT] = true,
+ [BPF_SK_SKB_VERDICT] = true,
+ [BPF_SK_MSG_VERDICT] = true,
+ [BPF_FLOW_DISSECTOR] = true,
+ [__MAX_BPF_ATTACH_TYPE] = false,
+};
+
+/* Textual representations traditionally used by the program and kept around
+ * for the sake of backwards compatibility.
+ */
static const char * const attach_type_strings[] = {
[BPF_SK_SKB_STREAM_PARSER] = "stream_parser",
[BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict",
[BPF_SK_SKB_VERDICT] = "skb_verdict",
[BPF_SK_MSG_VERDICT] = "msg_verdict",
- [BPF_FLOW_DISSECTOR] = "flow_dissector",
[__MAX_BPF_ATTACH_TYPE] = NULL,
};
@@ -94,6 +68,14 @@ static enum bpf_attach_type parse_attach_type(const char *str)
enum bpf_attach_type type;
for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
+ if (attach_types[type]) {
+ const char *attach_type_str;
+
+ attach_type_str = libbpf_bpf_attach_type_str(type);
+ if (!strcmp(str, attach_type_str))
+ return type;
+ }
+
if (attach_type_strings[type] &&
is_prefix(str, attach_type_strings[type]))
return type;
@@ -428,12 +410,14 @@ out_free:
static void print_prog_header_json(struct bpf_prog_info *info, int fd)
{
+ const char *prog_type_str;
char prog_name[MAX_PROG_FULL_NAME];
jsonw_uint_field(json_wtr, "id", info->id);
- if (info->type < ARRAY_SIZE(prog_type_name))
- jsonw_string_field(json_wtr, "type",
- prog_type_name[info->type]);
+ prog_type_str = libbpf_bpf_prog_type_str(info->type);
+
+ if (prog_type_str)
+ jsonw_string_field(json_wtr, "type", prog_type_str);
else
jsonw_uint_field(json_wtr, "type", info->type);
@@ -515,11 +499,13 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
static void print_prog_header_plain(struct bpf_prog_info *info, int fd)
{
+ const char *prog_type_str;
char prog_name[MAX_PROG_FULL_NAME];
printf("%u: ", info->id);
- if (info->type < ARRAY_SIZE(prog_type_name))
- printf("%s ", prog_type_name[info->type]);
+ prog_type_str = libbpf_bpf_prog_type_str(info->type);
+ if (prog_type_str)
+ printf("%s ", prog_type_str);
else
printf("type %u ", info->type);
@@ -1604,6 +1590,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
}
}
+ set_max_rlimit();
+
if (verifier_logs)
/* log_level1 + log_level2 + stats, but not stable UAPI */
open_opts.kernel_log_level = 1 + 2 + 4;
@@ -2301,6 +2289,7 @@ static int do_profile(int argc, char **argv)
}
}
+ set_max_rlimit();
err = profiler_bpf__load(profile_obj);
if (err) {
p_err("failed to load profile_obj");
@@ -2374,8 +2363,8 @@ static int do_help(int argc, char **argv)
" cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
" cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n"
" struct_ops | fentry | fexit | freplace | sk_lookup }\n"
- " ATTACH_TYPE := { msg_verdict | skb_verdict | stream_verdict |\n"
- " stream_parser | flow_dissector }\n"
+ " ATTACH_TYPE := { sk_msg_verdict | sk_skb_verdict | sk_skb_stream_verdict |\n"
+ " sk_skb_stream_parser | flow_dissector }\n"
" METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n"
" " HELP_SPEC_OPTIONS " |\n"
" {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} |\n"
diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c
index 2535f079ed67..e08a6ff2866c 100644
--- a/tools/bpf/bpftool/struct_ops.c
+++ b/tools/bpf/bpftool/struct_ops.c
@@ -501,6 +501,8 @@ static int do_register(int argc, char **argv)
if (libbpf_get_error(obj))
return -1;
+ set_max_rlimit();
+
if (bpf_object__load(obj)) {
bpf_object__close(obj);
return -1;
diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h
index 6491fa8fba6d..15b940ec1eac 100644
--- a/tools/include/linux/objtool.h
+++ b/tools/include/linux/objtool.h
@@ -143,6 +143,12 @@ struct unwind_hint {
.popsection
.endm
+.macro STACK_FRAME_NON_STANDARD_FP func:req
+#ifdef CONFIG_FRAME_POINTER
+ STACK_FRAME_NON_STANDARD \func
+#endif
+.endm
+
.macro ANNOTATE_NOENDBR
.Lhere_\@:
.pushsection .discard.noendbr
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f4009dbdf62d..e81362891596 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3597,10 +3597,11 @@ union bpf_attr {
*
* *iph* points to the start of the IPv4 or IPv6 header, while
* *iph_len* contains **sizeof**\ (**struct iphdr**) or
- * **sizeof**\ (**struct ip6hdr**).
+ * **sizeof**\ (**struct ipv6hdr**).
*
* *th* points to the start of the TCP header, while *th_len*
- * contains **sizeof**\ (**struct tcphdr**).
+ * contains the length of the TCP header (at least
+ * **sizeof**\ (**struct tcphdr**)).
* Return
* 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
* error otherwise.
@@ -3783,10 +3784,11 @@ union bpf_attr {
*
* *iph* points to the start of the IPv4 or IPv6 header, while
* *iph_len* contains **sizeof**\ (**struct iphdr**) or
- * **sizeof**\ (**struct ip6hdr**).
+ * **sizeof**\ (**struct ipv6hdr**).
*
* *th* points to the start of the TCP header, while *th_len*
- * contains the length of the TCP header.
+ * contains the length of the TCP header with options (at least
+ * **sizeof**\ (**struct tcphdr**)).
* Return
* On success, lower 32 bits hold the generated SYN cookie in
* followed by 16 bits which hold the MSS value for that cookie,
@@ -5249,6 +5251,80 @@ union bpf_attr {
* Pointer to the underlying dynptr data, NULL if the dynptr is
* read-only, if the dynptr is invalid, or if the offset and length
* is out of bounds.
+ *
+ * s64 bpf_tcp_raw_gen_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th, u32 th_len)
+ * Description
+ * Try to issue a SYN cookie for the packet with corresponding
+ * IPv4/TCP headers, *iph* and *th*, without depending on a
+ * listening socket.
+ *
+ * *iph* points to the IPv4 header.
+ *
+ * *th* points to the start of the TCP header, while *th_len*
+ * contains the length of the TCP header (at least
+ * **sizeof**\ (**struct tcphdr**)).
+ * Return
+ * On success, lower 32 bits hold the generated SYN cookie in
+ * followed by 16 bits which hold the MSS value for that cookie,
+ * and the top 16 bits are unused.
+ *
+ * On failure, the returned value is one of the following:
+ *
+ * **-EINVAL** if *th_len* is invalid.
+ *
+ * s64 bpf_tcp_raw_gen_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th, u32 th_len)
+ * Description
+ * Try to issue a SYN cookie for the packet with corresponding
+ * IPv6/TCP headers, *iph* and *th*, without depending on a
+ * listening socket.
+ *
+ * *iph* points to the IPv6 header.
+ *
+ * *th* points to the start of the TCP header, while *th_len*
+ * contains the length of the TCP header (at least
+ * **sizeof**\ (**struct tcphdr**)).
+ * Return
+ * On success, lower 32 bits hold the generated SYN cookie in
+ * followed by 16 bits which hold the MSS value for that cookie,
+ * and the top 16 bits are unused.
+ *
+ * On failure, the returned value is one of the following:
+ *
+ * **-EINVAL** if *th_len* is invalid.
+ *
+ * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
+ *
+ * long bpf_tcp_raw_check_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th)
+ * Description
+ * Check whether *iph* and *th* contain a valid SYN cookie ACK
+ * without depending on a listening socket.
+ *
+ * *iph* points to the IPv4 header.
+ *
+ * *th* points to the TCP header.
+ * Return
+ * 0 if *iph* and *th* are a valid SYN cookie ACK.
+ *
+ * On failure, the returned value is one of the following:
+ *
+ * **-EACCES** if the SYN cookie is not valid.
+ *
+ * long bpf_tcp_raw_check_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th)
+ * Description
+ * Check whether *iph* and *th* contain a valid SYN cookie ACK
+ * without depending on a listening socket.
+ *
+ * *iph* points to the IPv6 header.
+ *
+ * *th* points to the TCP header.
+ * Return
+ * 0 if *iph* and *th* are a valid SYN cookie ACK.
+ *
+ * On failure, the returned value is one of the following:
+ *
+ * **-EACCES** if the SYN cookie is not valid.
+ *
+ * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -5455,6 +5531,10 @@ union bpf_attr {
FN(dynptr_read), \
FN(dynptr_write), \
FN(dynptr_data), \
+ FN(tcp_raw_gen_syncookie_ipv4), \
+ FN(tcp_raw_gen_syncookie_ipv6), \
+ FN(tcp_raw_check_syncookie_ipv4), \
+ FN(tcp_raw_check_syncookie_ipv6), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index a9162a6c0284..ec1798b6d3ff 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -36,10 +36,10 @@ struct btf_type {
* bits 24-28: kind (e.g. int, ptr, array...etc)
* bits 29-30: unused
* bit 31: kind_flag, currently used by
- * struct, union and fwd
+ * struct, union, enum, fwd and enum64
*/
__u32 info;
- /* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC.
+ /* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64.
* "size" tells the size of the type it is describing.
*
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
@@ -63,7 +63,7 @@ enum {
BTF_KIND_ARRAY = 3, /* Array */
BTF_KIND_STRUCT = 4, /* Struct */
BTF_KIND_UNION = 5, /* Union */
- BTF_KIND_ENUM = 6, /* Enumeration */
+ BTF_KIND_ENUM = 6, /* Enumeration up to 32-bit values */
BTF_KIND_FWD = 7, /* Forward */
BTF_KIND_TYPEDEF = 8, /* Typedef */
BTF_KIND_VOLATILE = 9, /* Volatile */
@@ -76,6 +76,7 @@ enum {
BTF_KIND_FLOAT = 16, /* Floating point */
BTF_KIND_DECL_TAG = 17, /* Decl Tag */
BTF_KIND_TYPE_TAG = 18, /* Type Tag */
+ BTF_KIND_ENUM64 = 19, /* Enumeration up to 64-bit values */
NR_BTF_KINDS,
BTF_KIND_MAX = NR_BTF_KINDS - 1,
@@ -186,4 +187,14 @@ struct btf_decl_tag {
__s32 component_idx;
};
+/* BTF_KIND_ENUM64 is followed by multiple "struct btf_enum64".
+ * The exact number of btf_enum64 is stored in the vlen (of the
+ * info in "struct btf_type").
+ */
+struct btf_enum64 {
+ __u32 name_off;
+ __u32 val_lo32;
+ __u32 val_hi32;
+};
+
#endif /* _UAPI__LINUX_BTF_H__ */
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index b339bf2196ca..0242f31e339c 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -890,6 +890,7 @@ enum {
IFLA_BOND_SLAVE_AD_AGGREGATOR_ID,
IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE,
IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE,
+ IFLA_BOND_SLAVE_PRIO,
__IFLA_BOND_SLAVE_MAX,
};
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index e998764f0262..a5e06dcbba13 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -272,6 +272,15 @@ struct prctl_mm_map {
# define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1
# define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2
+/* arm64 Scalable Matrix Extension controls */
+/* Flag values must be in sync with SVE versions */
+#define PR_SME_SET_VL 63 /* set task vector length */
+# define PR_SME_SET_VL_ONEXEC (1 << 18) /* defer effect until exec */
+#define PR_SME_GET_VL 64 /* get task vector length */
+/* Bits common to PR_SME_SET_VL and PR_SME_GET_VL */
+# define PR_SME_VL_LEN_MASK 0xffff
+# define PR_SME_VL_INHERIT (1 << 17) /* inherit across exec */
+
#define PR_SET_VMA 0x53564d41
# define PR_SET_VMA_ANON_NAME 0
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index bb1e06eb1eca..ae1520f7e1b0 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -130,7 +130,7 @@ static inline __u64 ptr_to_u64(const void *ptr)
/* Ensure given dynamically allocated memory region pointed to by *data* with
* capacity of *cap_cnt* elements each taking *elem_sz* bytes has enough
- * memory to accomodate *add_cnt* new elements, assuming *cur_cnt* elements
+ * memory to accommodate *add_cnt* new elements, assuming *cur_cnt* elements
* are already used. At most *max_cnt* elements can be ever allocated.
* If necessary, memory is reallocated and all existing data is copied over,
* new pointer to the memory region is stored at *data, new memory region
@@ -305,6 +305,8 @@ static int btf_type_size(const struct btf_type *t)
return base_size + sizeof(__u32);
case BTF_KIND_ENUM:
return base_size + vlen * sizeof(struct btf_enum);
+ case BTF_KIND_ENUM64:
+ return base_size + vlen * sizeof(struct btf_enum64);
case BTF_KIND_ARRAY:
return base_size + sizeof(struct btf_array);
case BTF_KIND_STRUCT:
@@ -334,6 +336,7 @@ static void btf_bswap_type_base(struct btf_type *t)
static int btf_bswap_type_rest(struct btf_type *t)
{
struct btf_var_secinfo *v;
+ struct btf_enum64 *e64;
struct btf_member *m;
struct btf_array *a;
struct btf_param *p;
@@ -361,6 +364,13 @@ static int btf_bswap_type_rest(struct btf_type *t)
e->val = bswap_32(e->val);
}
return 0;
+ case BTF_KIND_ENUM64:
+ for (i = 0, e64 = btf_enum64(t); i < vlen; i++, e64++) {
+ e64->name_off = bswap_32(e64->name_off);
+ e64->val_lo32 = bswap_32(e64->val_lo32);
+ e64->val_hi32 = bswap_32(e64->val_hi32);
+ }
+ return 0;
case BTF_KIND_ARRAY:
a = btf_array(t);
a->type = bswap_32(a->type);
@@ -472,9 +482,22 @@ const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
static int determine_ptr_size(const struct btf *btf)
{
+ static const char * const long_aliases[] = {
+ "long",
+ "long int",
+ "int long",
+ "unsigned long",
+ "long unsigned",
+ "unsigned long int",
+ "unsigned int long",
+ "long unsigned int",
+ "long int unsigned",
+ "int unsigned long",
+ "int long unsigned",
+ };
const struct btf_type *t;
const char *name;
- int i, n;
+ int i, j, n;
if (btf->base_btf && btf->base_btf->ptr_sz > 0)
return btf->base_btf->ptr_sz;
@@ -485,15 +508,16 @@ static int determine_ptr_size(const struct btf *btf)
if (!btf_is_int(t))
continue;
+ if (t->size != 4 && t->size != 8)
+ continue;
+
name = btf__name_by_offset(btf, t->name_off);
if (!name)
continue;
- if (strcmp(name, "long int") == 0 ||
- strcmp(name, "long unsigned int") == 0) {
- if (t->size != 4 && t->size != 8)
- continue;
- return t->size;
+ for (j = 0; j < ARRAY_SIZE(long_aliases); j++) {
+ if (strcmp(name, long_aliases[j]) == 0)
+ return t->size;
}
}
@@ -597,6 +621,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
case BTF_KIND_DATASEC:
case BTF_KIND_FLOAT:
size = t->size;
@@ -644,6 +669,7 @@ int btf__align_of(const struct btf *btf, __u32 id)
switch (kind) {
case BTF_KIND_INT:
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
case BTF_KIND_FLOAT:
return min(btf_ptr_sz(btf), (size_t)t->size);
case BTF_KIND_PTR:
@@ -2115,20 +2141,8 @@ int btf__add_field(struct btf *btf, const char *name, int type_id,
return 0;
}
-/*
- * Append new BTF_KIND_ENUM type with:
- * - *name* - name of the enum, can be NULL or empty for anonymous enums;
- * - *byte_sz* - size of the enum, in bytes.
- *
- * Enum initially has no enum values in it (and corresponds to enum forward
- * declaration). Enumerator values can be added by btf__add_enum_value()
- * immediately after btf__add_enum() succeeds.
- *
- * Returns:
- * - >0, type ID of newly added BTF type;
- * - <0, on error.
- */
-int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz)
+static int btf_add_enum_common(struct btf *btf, const char *name, __u32 byte_sz,
+ bool is_signed, __u8 kind)
{
struct btf_type *t;
int sz, name_off = 0;
@@ -2153,13 +2167,35 @@ int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz)
/* start out with vlen=0; it will be adjusted when adding enum values */
t->name_off = name_off;
- t->info = btf_type_info(BTF_KIND_ENUM, 0, 0);
+ t->info = btf_type_info(kind, 0, is_signed);
t->size = byte_sz;
return btf_commit_type(btf, sz);
}
/*
+ * Append new BTF_KIND_ENUM type with:
+ * - *name* - name of the enum, can be NULL or empty for anonymous enums;
+ * - *byte_sz* - size of the enum, in bytes.
+ *
+ * Enum initially has no enum values in it (and corresponds to enum forward
+ * declaration). Enumerator values can be added by btf__add_enum_value()
+ * immediately after btf__add_enum() succeeds.
+ *
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz)
+{
+ /*
+ * set the signedness to be unsigned, it will change to signed
+ * if any later enumerator is negative.
+ */
+ return btf_add_enum_common(btf, name, byte_sz, false, BTF_KIND_ENUM);
+}
+
+/*
* Append new enum value for the current ENUM type with:
* - *name* - name of the enumerator value, can't be NULL or empty;
* - *value* - integer value corresponding to enum value *name*;
@@ -2206,6 +2242,82 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
t = btf_last_type(btf);
btf_type_inc_vlen(t);
+ /* if negative value, set signedness to signed */
+ if (value < 0)
+ t->info = btf_type_info(btf_kind(t), btf_vlen(t), true);
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ return 0;
+}
+
+/*
+ * Append new BTF_KIND_ENUM64 type with:
+ * - *name* - name of the enum, can be NULL or empty for anonymous enums;
+ * - *byte_sz* - size of the enum, in bytes.
+ * - *is_signed* - whether the enum values are signed or not;
+ *
+ * Enum initially has no enum values in it (and corresponds to enum forward
+ * declaration). Enumerator values can be added by btf__add_enum64_value()
+ * immediately after btf__add_enum64() succeeds.
+ *
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_enum64(struct btf *btf, const char *name, __u32 byte_sz,
+ bool is_signed)
+{
+ return btf_add_enum_common(btf, name, byte_sz, is_signed,
+ BTF_KIND_ENUM64);
+}
+
+/*
+ * Append new enum value for the current ENUM64 type with:
+ * - *name* - name of the enumerator value, can't be NULL or empty;
+ * - *value* - integer value corresponding to enum value *name*;
+ * Returns:
+ * - 0, on success;
+ * - <0, on error.
+ */
+int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value)
+{
+ struct btf_enum64 *v;
+ struct btf_type *t;
+ int sz, name_off;
+
+ /* last type should be BTF_KIND_ENUM64 */
+ if (btf->nr_types == 0)
+ return libbpf_err(-EINVAL);
+ t = btf_last_type(btf);
+ if (!btf_is_enum64(t))
+ return libbpf_err(-EINVAL);
+
+ /* non-empty name */
+ if (!name || !name[0])
+ return libbpf_err(-EINVAL);
+
+ /* decompose and invalidate raw data */
+ if (btf_ensure_modifiable(btf))
+ return libbpf_err(-ENOMEM);
+
+ sz = sizeof(struct btf_enum64);
+ v = btf_add_type_mem(btf, sz);
+ if (!v)
+ return libbpf_err(-ENOMEM);
+
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+
+ v->name_off = name_off;
+ v->val_lo32 = (__u32)value;
+ v->val_hi32 = value >> 32;
+
+ /* update parent type's vlen */
+ t = btf_last_type(btf);
+ btf_type_inc_vlen(t);
+
btf->hdr->type_len += sz;
btf->hdr->str_off += sz;
return 0;
@@ -3470,7 +3582,7 @@ static bool btf_equal_int_tag(struct btf_type *t1, struct btf_type *t2)
return info1 == info2;
}
-/* Calculate type signature hash of ENUM. */
+/* Calculate type signature hash of ENUM/ENUM64. */
static long btf_hash_enum(struct btf_type *t)
{
long h;
@@ -3504,9 +3616,31 @@ static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
return true;
}
+static bool btf_equal_enum64(struct btf_type *t1, struct btf_type *t2)
+{
+ const struct btf_enum64 *m1, *m2;
+ __u16 vlen;
+ int i;
+
+ if (!btf_equal_common(t1, t2))
+ return false;
+
+ vlen = btf_vlen(t1);
+ m1 = btf_enum64(t1);
+ m2 = btf_enum64(t2);
+ for (i = 0; i < vlen; i++) {
+ if (m1->name_off != m2->name_off || m1->val_lo32 != m2->val_lo32 ||
+ m1->val_hi32 != m2->val_hi32)
+ return false;
+ m1++;
+ m2++;
+ }
+ return true;
+}
+
static inline bool btf_is_enum_fwd(struct btf_type *t)
{
- return btf_is_enum(t) && btf_vlen(t) == 0;
+ return btf_is_any_enum(t) && btf_vlen(t) == 0;
}
static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
@@ -3519,6 +3653,17 @@ static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
t1->size == t2->size;
}
+static bool btf_compat_enum64(struct btf_type *t1, struct btf_type *t2)
+{
+ if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2))
+ return btf_equal_enum64(t1, t2);
+
+ /* ignore vlen when comparing */
+ return t1->name_off == t2->name_off &&
+ (t1->info & ~0xffff) == (t2->info & ~0xffff) &&
+ t1->size == t2->size;
+}
+
/*
* Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs,
* as referenced type IDs equivalence is established separately during type
@@ -3731,6 +3876,7 @@ static int btf_dedup_prep(struct btf_dedup *d)
h = btf_hash_int_decl_tag(t);
break;
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
h = btf_hash_enum(t);
break;
case BTF_KIND_STRUCT:
@@ -3820,6 +3966,27 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
}
break;
+ case BTF_KIND_ENUM64:
+ h = btf_hash_enum(t);
+ for_each_dedup_cand(d, hash_entry, h) {
+ cand_id = (__u32)(long)hash_entry->value;
+ cand = btf_type_by_id(d->btf, cand_id);
+ if (btf_equal_enum64(t, cand)) {
+ new_id = cand_id;
+ break;
+ }
+ if (btf_compat_enum64(t, cand)) {
+ if (btf_is_enum_fwd(t)) {
+ /* resolve fwd to full enum */
+ new_id = cand_id;
+ break;
+ }
+ /* resolve canonical enum fwd to full enum */
+ d->map[cand_id] = type_id;
+ }
+ }
+ break;
+
case BTF_KIND_FWD:
case BTF_KIND_FLOAT:
h = btf_hash_common(t);
@@ -4115,6 +4282,9 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
case BTF_KIND_ENUM:
return btf_compat_enum(cand_type, canon_type);
+ case BTF_KIND_ENUM64:
+ return btf_compat_enum64(cand_type, canon_type);
+
case BTF_KIND_FWD:
case BTF_KIND_FLOAT:
return btf_equal_common(cand_type, canon_type);
@@ -4717,6 +4887,7 @@ int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ct
case BTF_KIND_INT:
case BTF_KIND_FLOAT:
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
return 0;
case BTF_KIND_FWD:
@@ -4811,6 +4982,16 @@ int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ct
}
break;
}
+ case BTF_KIND_ENUM64: {
+ struct btf_enum64 *m = btf_enum64(t);
+
+ for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+ err = visit(&m->name_off, ctx);
+ if (err)
+ return err;
+ }
+ break;
+ }
case BTF_KIND_FUNC_PROTO: {
struct btf_param *m = btf_params(t);
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 951ac7475794..9fb416eb5644 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -215,6 +215,8 @@ LIBBPF_API int btf__add_field(struct btf *btf, const char *name, int field_type_
/* enum construction APIs */
LIBBPF_API int btf__add_enum(struct btf *btf, const char *name, __u32 bytes_sz);
LIBBPF_API int btf__add_enum_value(struct btf *btf, const char *name, __s64 value);
+LIBBPF_API int btf__add_enum64(struct btf *btf, const char *name, __u32 bytes_sz, bool is_signed);
+LIBBPF_API int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value);
enum btf_fwd_kind {
BTF_FWD_STRUCT = 0,
@@ -393,9 +395,10 @@ btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
#ifndef BTF_KIND_FLOAT
#define BTF_KIND_FLOAT 16 /* Floating point */
#endif
-/* The kernel header switched to enums, so these two were never #defined */
+/* The kernel header switched to enums, so the following were never #defined */
#define BTF_KIND_DECL_TAG 17 /* Decl Tag */
#define BTF_KIND_TYPE_TAG 18 /* Type Tag */
+#define BTF_KIND_ENUM64 19 /* Enum for up-to 64bit values */
static inline __u16 btf_kind(const struct btf_type *t)
{
@@ -454,6 +457,11 @@ static inline bool btf_is_enum(const struct btf_type *t)
return btf_kind(t) == BTF_KIND_ENUM;
}
+static inline bool btf_is_enum64(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_ENUM64;
+}
+
static inline bool btf_is_fwd(const struct btf_type *t)
{
return btf_kind(t) == BTF_KIND_FWD;
@@ -524,6 +532,18 @@ static inline bool btf_is_type_tag(const struct btf_type *t)
return btf_kind(t) == BTF_KIND_TYPE_TAG;
}
+static inline bool btf_is_any_enum(const struct btf_type *t)
+{
+ return btf_is_enum(t) || btf_is_enum64(t);
+}
+
+static inline bool btf_kind_core_compat(const struct btf_type *t1,
+ const struct btf_type *t2)
+{
+ return btf_kind(t1) == btf_kind(t2) ||
+ (btf_is_any_enum(t1) && btf_is_any_enum(t2));
+}
+
static inline __u8 btf_int_encoding(const struct btf_type *t)
{
return BTF_INT_ENCODING(*(__u32 *)(t + 1));
@@ -549,6 +569,16 @@ static inline struct btf_enum *btf_enum(const struct btf_type *t)
return (struct btf_enum *)(t + 1);
}
+static inline struct btf_enum64 *btf_enum64(const struct btf_type *t)
+{
+ return (struct btf_enum64 *)(t + 1);
+}
+
+static inline __u64 btf_enum64_value(const struct btf_enum64 *e)
+{
+ return ((__u64)e->val_hi32 << 32) | e->val_lo32;
+}
+
static inline struct btf_member *btf_members(const struct btf_type *t)
{
return (struct btf_member *)(t + 1);
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 6b1bc1f43728..f5275f819027 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -318,6 +318,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d)
switch (btf_kind(t)) {
case BTF_KIND_INT:
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
case BTF_KIND_FWD:
case BTF_KIND_FLOAT:
break;
@@ -538,6 +539,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
return 1;
}
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
case BTF_KIND_FWD:
/*
* non-anonymous or non-referenced enums are top-level
@@ -739,6 +741,7 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
tstate->emit_state = EMITTED;
break;
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
if (top_level_def) {
btf_dump_emit_enum_def(d, id, t, 0);
btf_dump_printf(d, ";\n\n");
@@ -989,38 +992,81 @@ static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id,
btf_dump_printf(d, "enum %s", btf_dump_type_name(d, id));
}
-static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
- const struct btf_type *t,
- int lvl)
+static void btf_dump_emit_enum32_val(struct btf_dump *d,
+ const struct btf_type *t,
+ int lvl, __u16 vlen)
{
const struct btf_enum *v = btf_enum(t);
- __u16 vlen = btf_vlen(t);
+ bool is_signed = btf_kflag(t);
+ const char *fmt_str;
const char *name;
size_t dup_cnt;
int i;
+ for (i = 0; i < vlen; i++, v++) {
+ name = btf_name_of(d, v->name_off);
+ /* enumerators share namespace with typedef idents */
+ dup_cnt = btf_dump_name_dups(d, d->ident_names, name);
+ if (dup_cnt > 1) {
+ fmt_str = is_signed ? "\n%s%s___%zd = %d," : "\n%s%s___%zd = %u,";
+ btf_dump_printf(d, fmt_str, pfx(lvl + 1), name, dup_cnt, v->val);
+ } else {
+ fmt_str = is_signed ? "\n%s%s = %d," : "\n%s%s = %u,";
+ btf_dump_printf(d, fmt_str, pfx(lvl + 1), name, v->val);
+ }
+ }
+}
+
+static void btf_dump_emit_enum64_val(struct btf_dump *d,
+ const struct btf_type *t,
+ int lvl, __u16 vlen)
+{
+ const struct btf_enum64 *v = btf_enum64(t);
+ bool is_signed = btf_kflag(t);
+ const char *fmt_str;
+ const char *name;
+ size_t dup_cnt;
+ __u64 val;
+ int i;
+
+ for (i = 0; i < vlen; i++, v++) {
+ name = btf_name_of(d, v->name_off);
+ dup_cnt = btf_dump_name_dups(d, d->ident_names, name);
+ val = btf_enum64_value(v);
+ if (dup_cnt > 1) {
+ fmt_str = is_signed ? "\n%s%s___%zd = %lldLL,"
+ : "\n%s%s___%zd = %lluULL,";
+ btf_dump_printf(d, fmt_str,
+ pfx(lvl + 1), name, dup_cnt,
+ (unsigned long long)val);
+ } else {
+ fmt_str = is_signed ? "\n%s%s = %lldLL,"
+ : "\n%s%s = %lluULL,";
+ btf_dump_printf(d, fmt_str,
+ pfx(lvl + 1), name,
+ (unsigned long long)val);
+ }
+ }
+}
+static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
+ const struct btf_type *t,
+ int lvl)
+{
+ __u16 vlen = btf_vlen(t);
+
btf_dump_printf(d, "enum%s%s",
t->name_off ? " " : "",
btf_dump_type_name(d, id));
- if (vlen) {
- btf_dump_printf(d, " {");
- for (i = 0; i < vlen; i++, v++) {
- name = btf_name_of(d, v->name_off);
- /* enumerators share namespace with typedef idents */
- dup_cnt = btf_dump_name_dups(d, d->ident_names, name);
- if (dup_cnt > 1) {
- btf_dump_printf(d, "\n%s%s___%zu = %u,",
- pfx(lvl + 1), name, dup_cnt,
- (__u32)v->val);
- } else {
- btf_dump_printf(d, "\n%s%s = %u,",
- pfx(lvl + 1), name,
- (__u32)v->val);
- }
- }
- btf_dump_printf(d, "\n%s}", pfx(lvl));
- }
+ if (!vlen)
+ return;
+
+ btf_dump_printf(d, " {");
+ if (btf_is_enum(t))
+ btf_dump_emit_enum32_val(d, t, lvl, vlen);
+ else
+ btf_dump_emit_enum64_val(d, t, lvl, vlen);
+ btf_dump_printf(d, "\n%s}", pfx(lvl));
}
static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id,
@@ -1178,6 +1224,7 @@ skip_mod:
break;
case BTF_KIND_INT:
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
case BTF_KIND_FWD:
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
@@ -1312,6 +1359,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
btf_dump_emit_struct_fwd(d, id, t);
break;
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
btf_dump_emit_mods(d, decls);
/* inline anonymous enum */
if (t->name_off == 0 && !d->skip_anon_defs)
@@ -1988,7 +2036,8 @@ static int btf_dump_get_enum_value(struct btf_dump *d,
__u32 id,
__s64 *value)
{
- /* handle unaligned enum value */
+ bool is_signed = btf_kflag(t);
+
if (!ptr_is_aligned(d->btf, id, data)) {
__u64 val;
int err;
@@ -2005,13 +2054,13 @@ static int btf_dump_get_enum_value(struct btf_dump *d,
*value = *(__s64 *)data;
return 0;
case 4:
- *value = *(__s32 *)data;
+ *value = is_signed ? *(__s32 *)data : *(__u32 *)data;
return 0;
case 2:
- *value = *(__s16 *)data;
+ *value = is_signed ? *(__s16 *)data : *(__u16 *)data;
return 0;
case 1:
- *value = *(__s8 *)data;
+ *value = is_signed ? *(__s8 *)data : *(__u8 *)data;
return 0;
default:
pr_warn("unexpected size %d for enum, id:[%u]\n", t->size, id);
@@ -2024,7 +2073,7 @@ static int btf_dump_enum_data(struct btf_dump *d,
__u32 id,
const void *data)
{
- const struct btf_enum *e;
+ bool is_signed;
__s64 value;
int i, err;
@@ -2032,14 +2081,31 @@ static int btf_dump_enum_data(struct btf_dump *d,
if (err)
return err;
- for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) {
- if (value != e->val)
- continue;
- btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off));
- return 0;
- }
+ is_signed = btf_kflag(t);
+ if (btf_is_enum(t)) {
+ const struct btf_enum *e;
+
+ for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) {
+ if (value != e->val)
+ continue;
+ btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off));
+ return 0;
+ }
- btf_dump_type_values(d, "%d", value);
+ btf_dump_type_values(d, is_signed ? "%d" : "%u", value);
+ } else {
+ const struct btf_enum64 *e;
+
+ for (i = 0, e = btf_enum64(t); i < btf_vlen(t); i++, e++) {
+ if (value != btf_enum64_value(e))
+ continue;
+ btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off));
+ return 0;
+ }
+
+ btf_dump_type_values(d, is_signed ? "%lldLL" : "%lluULL",
+ (unsigned long long)value);
+ }
return 0;
}
@@ -2099,6 +2165,7 @@ static int btf_dump_type_data_check_overflow(struct btf_dump *d,
case BTF_KIND_FLOAT:
case BTF_KIND_PTR:
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
if (data + bits_offset / 8 + size > d->typed_dump->data_end)
return -E2BIG;
break;
@@ -2203,6 +2270,7 @@ static int btf_dump_type_data_check_zero(struct btf_dump *d,
return -ENODATA;
}
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
err = btf_dump_get_enum_value(d, t, data, id, &value);
if (err)
return err;
@@ -2275,6 +2343,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d,
err = btf_dump_struct_data(d, t, id, data);
break;
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
/* handle bitfield and int enum values */
if (bit_sz) {
__u64 print_num;
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e89cc9c885b3..49e359cd34df 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -72,6 +72,134 @@
static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
+static const char * const attach_type_name[] = {
+ [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress",
+ [BPF_CGROUP_INET_EGRESS] = "cgroup_inet_egress",
+ [BPF_CGROUP_INET_SOCK_CREATE] = "cgroup_inet_sock_create",
+ [BPF_CGROUP_INET_SOCK_RELEASE] = "cgroup_inet_sock_release",
+ [BPF_CGROUP_SOCK_OPS] = "cgroup_sock_ops",
+ [BPF_CGROUP_DEVICE] = "cgroup_device",
+ [BPF_CGROUP_INET4_BIND] = "cgroup_inet4_bind",
+ [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind",
+ [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect",
+ [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect",
+ [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind",
+ [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind",
+ [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername",
+ [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername",
+ [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname",
+ [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname",
+ [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg",
+ [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg",
+ [BPF_CGROUP_SYSCTL] = "cgroup_sysctl",
+ [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg",
+ [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg",
+ [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt",
+ [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt",
+ [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
+ [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
+ [BPF_SK_SKB_VERDICT] = "sk_skb_verdict",
+ [BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
+ [BPF_LIRC_MODE2] = "lirc_mode2",
+ [BPF_FLOW_DISSECTOR] = "flow_dissector",
+ [BPF_TRACE_RAW_TP] = "trace_raw_tp",
+ [BPF_TRACE_FENTRY] = "trace_fentry",
+ [BPF_TRACE_FEXIT] = "trace_fexit",
+ [BPF_MODIFY_RETURN] = "modify_return",
+ [BPF_LSM_MAC] = "lsm_mac",
+ [BPF_SK_LOOKUP] = "sk_lookup",
+ [BPF_TRACE_ITER] = "trace_iter",
+ [BPF_XDP_DEVMAP] = "xdp_devmap",
+ [BPF_XDP_CPUMAP] = "xdp_cpumap",
+ [BPF_XDP] = "xdp",
+ [BPF_SK_REUSEPORT_SELECT] = "sk_reuseport_select",
+ [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate",
+ [BPF_PERF_EVENT] = "perf_event",
+ [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi",
+};
+
+static const char * const link_type_name[] = {
+ [BPF_LINK_TYPE_UNSPEC] = "unspec",
+ [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
+ [BPF_LINK_TYPE_TRACING] = "tracing",
+ [BPF_LINK_TYPE_CGROUP] = "cgroup",
+ [BPF_LINK_TYPE_ITER] = "iter",
+ [BPF_LINK_TYPE_NETNS] = "netns",
+ [BPF_LINK_TYPE_XDP] = "xdp",
+ [BPF_LINK_TYPE_PERF_EVENT] = "perf_event",
+ [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi",
+ [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops",
+};
+
+static const char * const map_type_name[] = {
+ [BPF_MAP_TYPE_UNSPEC] = "unspec",
+ [BPF_MAP_TYPE_HASH] = "hash",
+ [BPF_MAP_TYPE_ARRAY] = "array",
+ [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array",
+ [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array",
+ [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash",
+ [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array",
+ [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace",
+ [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array",
+ [BPF_MAP_TYPE_LRU_HASH] = "lru_hash",
+ [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash",
+ [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie",
+ [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps",
+ [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps",
+ [BPF_MAP_TYPE_DEVMAP] = "devmap",
+ [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash",
+ [BPF_MAP_TYPE_SOCKMAP] = "sockmap",
+ [BPF_MAP_TYPE_CPUMAP] = "cpumap",
+ [BPF_MAP_TYPE_XSKMAP] = "xskmap",
+ [BPF_MAP_TYPE_SOCKHASH] = "sockhash",
+ [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage",
+ [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray",
+ [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage",
+ [BPF_MAP_TYPE_QUEUE] = "queue",
+ [BPF_MAP_TYPE_STACK] = "stack",
+ [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage",
+ [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops",
+ [BPF_MAP_TYPE_RINGBUF] = "ringbuf",
+ [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage",
+ [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage",
+ [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter",
+};
+
+static const char * const prog_type_name[] = {
+ [BPF_PROG_TYPE_UNSPEC] = "unspec",
+ [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
+ [BPF_PROG_TYPE_KPROBE] = "kprobe",
+ [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls",
+ [BPF_PROG_TYPE_SCHED_ACT] = "sched_act",
+ [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint",
+ [BPF_PROG_TYPE_XDP] = "xdp",
+ [BPF_PROG_TYPE_PERF_EVENT] = "perf_event",
+ [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb",
+ [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock",
+ [BPF_PROG_TYPE_LWT_IN] = "lwt_in",
+ [BPF_PROG_TYPE_LWT_OUT] = "lwt_out",
+ [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit",
+ [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
+ [BPF_PROG_TYPE_SK_SKB] = "sk_skb",
+ [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
+ [BPF_PROG_TYPE_SK_MSG] = "sk_msg",
+ [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
+ [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
+ [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local",
+ [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
+ [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport",
+ [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector",
+ [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
+ [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
+ [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt",
+ [BPF_PROG_TYPE_TRACING] = "tracing",
+ [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops",
+ [BPF_PROG_TYPE_EXT] = "ext",
+ [BPF_PROG_TYPE_LSM] = "lsm",
+ [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup",
+ [BPF_PROG_TYPE_SYSCALL] = "syscall",
+};
+
static int __base_pr(enum libbpf_print_level level, const char *format,
va_list args)
{
@@ -2114,6 +2242,7 @@ static const char *__btf_kind_str(__u16 kind)
case BTF_KIND_FLOAT: return "float";
case BTF_KIND_DECL_TAG: return "decl_tag";
case BTF_KIND_TYPE_TAG: return "type_tag";
+ case BTF_KIND_ENUM64: return "enum64";
default: return "unknown";
}
}
@@ -2642,12 +2771,13 @@ static bool btf_needs_sanitization(struct bpf_object *obj)
bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
+ bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
return !has_func || !has_datasec || !has_func_global || !has_float ||
- !has_decl_tag || !has_type_tag;
+ !has_decl_tag || !has_type_tag || !has_enum64;
}
-static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
+static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
{
bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
@@ -2655,6 +2785,8 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
+ bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
+ int enum64_placeholder_id = 0;
struct btf_type *t;
int i, j, vlen;
@@ -2717,8 +2849,32 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
/* replace TYPE_TAG with a CONST */
t->name_off = 0;
t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0);
- }
+ } else if (!has_enum64 && btf_is_enum(t)) {
+ /* clear the kflag */
+ t->info = btf_type_info(btf_kind(t), btf_vlen(t), false);
+ } else if (!has_enum64 && btf_is_enum64(t)) {
+ /* replace ENUM64 with a union */
+ struct btf_member *m;
+
+ if (enum64_placeholder_id == 0) {
+ enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0);
+ if (enum64_placeholder_id < 0)
+ return enum64_placeholder_id;
+
+ t = (struct btf_type *)btf__type_by_id(btf, i);
+ }
+
+ m = btf_members(t);
+ vlen = btf_vlen(t);
+ t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen);
+ for (j = 0; j < vlen; j++, m++) {
+ m->type = enum64_placeholder_id;
+ m->offset = 0;
+ }
+ }
}
+
+ return 0;
}
static bool libbpf_needs_btf(const struct bpf_object *obj)
@@ -3056,7 +3212,9 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
/* enforce 8-byte pointers for BPF-targeted BTFs */
btf__set_pointer_size(obj->btf, 8);
- bpf_object__sanitize_btf(obj, kern_btf);
+ err = bpf_object__sanitize_btf(obj, kern_btf);
+ if (err)
+ return err;
}
if (obj->gen_loader) {
@@ -3563,6 +3721,10 @@ static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
if (strcmp(name, "libbpf_tristate"))
return KCFG_UNKNOWN;
return KCFG_TRISTATE;
+ case BTF_KIND_ENUM64:
+ if (strcmp(name, "libbpf_tristate"))
+ return KCFG_UNKNOWN;
+ return KCFG_TRISTATE;
case BTF_KIND_ARRAY:
if (btf_array(t)->nelems == 0)
return KCFG_UNKNOWN;
@@ -4746,6 +4908,17 @@ static int probe_kern_bpf_cookie(void)
return probe_fd(ret);
}
+static int probe_kern_btf_enum64(void)
+{
+ static const char strs[] = "\0enum64";
+ __u32 types[] = {
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs)));
+}
+
enum kern_feature_result {
FEAT_UNKNOWN = 0,
FEAT_SUPPORTED = 1,
@@ -4811,6 +4984,9 @@ static struct kern_feature_desc {
[FEAT_BPF_COOKIE] = {
"BPF cookie support", probe_kern_bpf_cookie,
},
+ [FEAT_BTF_ENUM64] = {
+ "BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
+ },
};
bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
@@ -4943,11 +5119,6 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
static void bpf_map__destroy(struct bpf_map *map);
-static bool is_pow_of_2(size_t x)
-{
- return x && (x & (x - 1));
-}
-
static size_t adjust_ringbuf_sz(size_t sz)
{
__u32 page_sz = sysconf(_SC_PAGE_SIZE);
@@ -5353,7 +5524,7 @@ int bpf_core_add_cands(struct bpf_core_cand *local_cand,
n = btf__type_cnt(targ_btf);
for (i = targ_start_id; i < n; i++) {
t = btf__type_by_id(targ_btf, i);
- if (btf_kind(t) != btf_kind(local_t))
+ if (!btf_kind_core_compat(t, local_t))
continue;
targ_name = btf__name_by_offset(targ_btf, t->name_off);
@@ -5567,7 +5738,7 @@ int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
/* caller made sure that names match (ignoring flavor suffix) */
local_type = btf__type_by_id(local_btf, local_id);
targ_type = btf__type_by_id(targ_btf, targ_id);
- if (btf_kind(local_type) != btf_kind(targ_type))
+ if (!btf_kind_core_compat(local_type, targ_type))
return 0;
recur:
@@ -5580,7 +5751,7 @@ recur:
if (!local_type || !targ_type)
return -EINVAL;
- if (btf_kind(local_type) != btf_kind(targ_type))
+ if (!btf_kind_core_compat(local_type, targ_type))
return 0;
switch (btf_kind(local_type)) {
@@ -5588,6 +5759,7 @@ recur:
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
case BTF_KIND_FWD:
return 1;
case BTF_KIND_INT:
@@ -9005,8 +9177,10 @@ static const struct bpf_sec_def section_defs[] = {
SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe),
SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe),
+ SEC_DEF("uprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe),
SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe),
+ SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt),
@@ -9300,6 +9474,38 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
return libbpf_err(-ESRCH);
}
+const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t)
+{
+ if (t < 0 || t >= ARRAY_SIZE(attach_type_name))
+ return NULL;
+
+ return attach_type_name[t];
+}
+
+const char *libbpf_bpf_link_type_str(enum bpf_link_type t)
+{
+ if (t < 0 || t >= ARRAY_SIZE(link_type_name))
+ return NULL;
+
+ return link_type_name[t];
+}
+
+const char *libbpf_bpf_map_type_str(enum bpf_map_type t)
+{
+ if (t < 0 || t >= ARRAY_SIZE(map_type_name))
+ return NULL;
+
+ return map_type_name[t];
+}
+
+const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t)
+{
+ if (t < 0 || t >= ARRAY_SIZE(prog_type_name))
+ return NULL;
+
+ return prog_type_name[t];
+}
+
static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
size_t offset)
{
@@ -10988,43 +11194,6 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
return pfd;
}
-/* uprobes deal in relative offsets; subtract the base address associated with
- * the mapped binary. See Documentation/trace/uprobetracer.rst for more
- * details.
- */
-static long elf_find_relative_offset(const char *filename, Elf *elf, long addr)
-{
- size_t n;
- int i;
-
- if (elf_getphdrnum(elf, &n)) {
- pr_warn("elf: failed to find program headers for '%s': %s\n", filename,
- elf_errmsg(-1));
- return -ENOENT;
- }
-
- for (i = 0; i < n; i++) {
- int seg_start, seg_end, seg_offset;
- GElf_Phdr phdr;
-
- if (!gelf_getphdr(elf, i, &phdr)) {
- pr_warn("elf: failed to get program header %d from '%s': %s\n", i, filename,
- elf_errmsg(-1));
- return -ENOENT;
- }
- if (phdr.p_type != PT_LOAD || !(phdr.p_flags & PF_X))
- continue;
-
- seg_start = phdr.p_vaddr;
- seg_end = seg_start + phdr.p_memsz;
- seg_offset = phdr.p_offset;
- if (addr >= seg_start && addr < seg_end)
- return addr - seg_start + seg_offset;
- }
- pr_warn("elf: failed to find prog header containing 0x%lx in '%s'\n", addr, filename);
- return -ENOENT;
-}
-
/* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */
static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn)
{
@@ -11111,6 +11280,8 @@ static long elf_find_func_offset(const char *binary_path, const char *name)
for (idx = 0; idx < nr_syms; idx++) {
int curr_bind;
GElf_Sym sym;
+ Elf_Scn *sym_scn;
+ GElf_Shdr sym_sh;
if (!gelf_getsym(symbols, idx, &sym))
continue;
@@ -11148,12 +11319,28 @@ static long elf_find_func_offset(const char *binary_path, const char *name)
continue;
}
}
- ret = sym.st_value;
+
+ /* Transform symbol's virtual address (absolute for
+ * binaries and relative for shared libs) into file
+ * offset, which is what kernel is expecting for
+ * uprobe/uretprobe attachment.
+ * See Documentation/trace/uprobetracer.rst for more
+ * details.
+ * This is done by looking up symbol's containing
+ * section's header and using it's virtual address
+ * (sh_addr) and corresponding file offset (sh_offset)
+ * to transform sym.st_value (virtual address) into
+ * desired final file offset.
+ */
+ sym_scn = elf_getscn(elf, sym.st_shndx);
+ if (!sym_scn)
+ continue;
+ if (!gelf_getshdr(sym_scn, &sym_sh))
+ continue;
+
+ ret = sym.st_value - sym_sh.sh_addr + sym_sh.sh_offset;
last_bind = curr_bind;
}
- /* For binaries that are not shared libraries, we need relative offset */
- if (ret > 0 && !is_shared_lib)
- ret = elf_find_relative_offset(binary_path, elf, ret);
if (ret > 0)
break;
}
@@ -11386,7 +11573,8 @@ static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf
break;
case 3:
case 4:
- opts.retprobe = strcmp(probe_type, "uretprobe") == 0;
+ opts.retprobe = strcmp(probe_type, "uretprobe") == 0 ||
+ strcmp(probe_type, "uretprobe.s") == 0;
if (opts.retprobe && offset != 0) {
pr_warn("prog '%s': uretprobes do not support offset specification\n",
prog->name);
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 9e9a3fd3edd8..fa27969da0da 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -51,6 +51,42 @@ enum libbpf_errno {
LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size);
+/**
+ * @brief **libbpf_bpf_attach_type_str()** converts the provided attach type
+ * value into a textual representation.
+ * @param t The attach type.
+ * @return Pointer to a static string identifying the attach type. NULL is
+ * returned for unknown **bpf_attach_type** values.
+ */
+LIBBPF_API const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t);
+
+/**
+ * @brief **libbpf_bpf_link_type_str()** converts the provided link type value
+ * into a textual representation.
+ * @param t The link type.
+ * @return Pointer to a static string identifying the link type. NULL is
+ * returned for unknown **bpf_link_type** values.
+ */
+LIBBPF_API const char *libbpf_bpf_link_type_str(enum bpf_link_type t);
+
+/**
+ * @brief **libbpf_bpf_map_type_str()** converts the provided map type value
+ * into a textual representation.
+ * @param t The map type.
+ * @return Pointer to a static string identifying the map type. NULL is
+ * returned for unknown **bpf_map_type** values.
+ */
+LIBBPF_API const char *libbpf_bpf_map_type_str(enum bpf_map_type t);
+
+/**
+ * @brief **libbpf_bpf_prog_type_str()** converts the provided program type
+ * value into a textual representation.
+ * @param t The program type.
+ * @return Pointer to a static string identifying the program type. NULL is
+ * returned for unknown **bpf_prog_type** values.
+ */
+LIBBPF_API const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t);
+
enum libbpf_print_level {
LIBBPF_WARN,
LIBBPF_INFO,
@@ -71,7 +107,7 @@ struct bpf_object_open_attr {
};
struct bpf_object_open_opts {
- /* size of this struct, for forward/backward compatiblity */
+ /* size of this struct, for forward/backward compatibility */
size_t sz;
/* object name override, if provided:
* - for object open from file, this will override setting object
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 52973cffc20c..116a2a8ee7c2 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -461,5 +461,13 @@ LIBBPF_0.8.0 {
} LIBBPF_0.7.0;
LIBBPF_1.0.0 {
+ global:
+ btf__add_enum64;
+ btf__add_enum64_value;
+ libbpf_bpf_attach_type_str;
+ libbpf_bpf_link_type_str;
+ libbpf_bpf_map_type_str;
+ libbpf_bpf_prog_type_str;
+
local: *;
};
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 4abdbe2fea9d..a1ad145ffa74 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -351,6 +351,8 @@ enum kern_feature_id {
FEAT_MEMCG_ACCOUNT,
/* BPF cookie (bpf_get_attach_cookie() BPF helper) support */
FEAT_BPF_COOKIE,
+ /* BTF_KIND_ENUM64 support and BTF_KIND_ENUM kflag support */
+ FEAT_BTF_ENUM64,
__FEAT_CNT,
};
@@ -580,4 +582,9 @@ struct bpf_link * usdt_manager_attach_usdt(struct usdt_manager *man,
const char *usdt_provider, const char *usdt_name,
__u64 usdt_cookie);
+static inline bool is_pow_of_2(size_t x)
+{
+ return x && (x & (x - 1)) == 0;
+}
+
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 9aa016fb55aa..4ac02c28e152 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -697,11 +697,6 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,
return err;
}
-static bool is_pow_of_2(size_t x)
-{
- return x && (x & (x - 1)) == 0;
-}
-
static int linker_sanity_check_elf(struct src_obj *obj)
{
struct src_sec *sec;
@@ -1340,6 +1335,7 @@ recur:
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
case BTF_KIND_FWD:
case BTF_KIND_FUNC:
case BTF_KIND_VAR:
@@ -1362,6 +1358,7 @@ recur:
case BTF_KIND_INT:
case BTF_KIND_FLOAT:
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
/* ignore encoding for int and enum values for enum */
if (t1->size != t2->size) {
pr_warn("global '%s': incompatible %s '%s' size %u and %u\n",
diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c
index ba4453dfd1ed..6ad3c3891a9a 100644
--- a/tools/lib/bpf/relo_core.c
+++ b/tools/lib/bpf/relo_core.c
@@ -167,7 +167,7 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
* just a parsed access string representation): [0, 1, 2, 3].
*
* High-level spec will capture only 3 points:
- * - intial zero-index access by pointer (&s->... is the same as &s[0]...);
+ * - initial zero-index access by pointer (&s->... is the same as &s[0]...);
* - field 'a' access (corresponds to '2' in low-level spec);
* - array element #3 access (corresponds to '3' in low-level spec).
*
@@ -186,7 +186,7 @@ int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
struct bpf_core_accessor *acc;
const struct btf_type *t;
const char *name, *spec_str;
- __u32 id;
+ __u32 id, name_off;
__s64 sz;
spec_str = btf__name_by_offset(btf, relo->access_str_off);
@@ -231,11 +231,13 @@ int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
spec->len++;
if (core_relo_is_enumval_based(relo->kind)) {
- if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
+ if (!btf_is_any_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
return -EINVAL;
/* record enumerator name in a first accessor */
- acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
+ name_off = btf_is_enum(t) ? btf_enum(t)[access_idx].name_off
+ : btf_enum64(t)[access_idx].name_off;
+ acc->name = btf__name_by_offset(btf, name_off);
return 0;
}
@@ -340,7 +342,7 @@ recur:
if (btf_is_composite(local_type) && btf_is_composite(targ_type))
return 1;
- if (btf_kind(local_type) != btf_kind(targ_type))
+ if (!btf_kind_core_compat(local_type, targ_type))
return 0;
switch (btf_kind(local_type)) {
@@ -348,6 +350,7 @@ recur:
case BTF_KIND_FLOAT:
return 1;
case BTF_KIND_FWD:
+ case BTF_KIND_ENUM64:
case BTF_KIND_ENUM: {
const char *local_name, *targ_name;
size_t local_len, targ_len;
@@ -477,6 +480,7 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
const struct bpf_core_accessor *local_acc;
struct bpf_core_accessor *targ_acc;
int i, sz, matched;
+ __u32 name_off;
memset(targ_spec, 0, sizeof(*targ_spec));
targ_spec->btf = targ_btf;
@@ -494,18 +498,22 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
if (core_relo_is_enumval_based(local_spec->relo_kind)) {
size_t local_essent_len, targ_essent_len;
- const struct btf_enum *e;
const char *targ_name;
/* has to resolve to an enum */
targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
- if (!btf_is_enum(targ_type))
+ if (!btf_is_any_enum(targ_type))
return 0;
local_essent_len = bpf_core_essential_name_len(local_acc->name);
- for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
- targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
+ for (i = 0; i < btf_vlen(targ_type); i++) {
+ if (btf_is_enum(targ_type))
+ name_off = btf_enum(targ_type)[i].name_off;
+ else
+ name_off = btf_enum64(targ_type)[i].name_off;
+
+ targ_name = btf__name_by_offset(targ_spec->btf, name_off);
targ_essent_len = bpf_core_essential_name_len(targ_name);
if (targ_essent_len != local_essent_len)
continue;
@@ -583,7 +591,7 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
static int bpf_core_calc_field_relo(const char *prog_name,
const struct bpf_core_relo *relo,
const struct bpf_core_spec *spec,
- __u32 *val, __u32 *field_sz, __u32 *type_id,
+ __u64 *val, __u32 *field_sz, __u32 *type_id,
bool *validate)
{
const struct bpf_core_accessor *acc;
@@ -680,8 +688,7 @@ static int bpf_core_calc_field_relo(const char *prog_name,
*val = byte_sz;
break;
case BPF_CORE_FIELD_SIGNED:
- /* enums will be assumed unsigned */
- *val = btf_is_enum(mt) ||
+ *val = (btf_is_any_enum(mt) && BTF_INFO_KFLAG(mt->info)) ||
(btf_int_encoding(mt) & BTF_INT_SIGNED);
if (validate)
*validate = true; /* signedness is never ambiguous */
@@ -708,7 +715,7 @@ static int bpf_core_calc_field_relo(const char *prog_name,
static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
const struct bpf_core_spec *spec,
- __u32 *val, bool *validate)
+ __u64 *val, bool *validate)
{
__s64 sz;
@@ -751,10 +758,9 @@ static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
const struct bpf_core_spec *spec,
- __u32 *val)
+ __u64 *val)
{
const struct btf_type *t;
- const struct btf_enum *e;
switch (relo->kind) {
case BPF_CORE_ENUMVAL_EXISTS:
@@ -764,8 +770,10 @@ static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
if (!spec)
return -EUCLEAN; /* request instruction poisoning */
t = btf_type_by_id(spec->btf, spec->spec[0].type_id);
- e = btf_enum(t) + spec->spec[0].idx;
- *val = e->val;
+ if (btf_is_enum(t))
+ *val = btf_enum(t)[spec->spec[0].idx].val;
+ else
+ *val = btf_enum64_value(btf_enum64(t) + spec->spec[0].idx);
break;
default:
return -EOPNOTSUPP;
@@ -929,7 +937,7 @@ int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn,
int insn_idx, const struct bpf_core_relo *relo,
int relo_idx, const struct bpf_core_relo_res *res)
{
- __u32 orig_val, new_val;
+ __u64 orig_val, new_val;
__u8 class;
class = BPF_CLASS(insn->code);
@@ -954,28 +962,30 @@ poison:
if (BPF_SRC(insn->code) != BPF_K)
return -EINVAL;
if (res->validate && insn->imm != orig_val) {
- pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %llu -> %llu\n",
prog_name, relo_idx,
- insn_idx, insn->imm, orig_val, new_val);
+ insn_idx, insn->imm, (unsigned long long)orig_val,
+ (unsigned long long)new_val);
return -EINVAL;
}
orig_val = insn->imm;
insn->imm = new_val;
- pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
+ pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %llu -> %llu\n",
prog_name, relo_idx, insn_idx,
- orig_val, new_val);
+ (unsigned long long)orig_val, (unsigned long long)new_val);
break;
case BPF_LDX:
case BPF_ST:
case BPF_STX:
if (res->validate && insn->off != orig_val) {
- pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
- prog_name, relo_idx, insn_idx, insn->off, orig_val, new_val);
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %llu -> %llu\n",
+ prog_name, relo_idx, insn_idx, insn->off, (unsigned long long)orig_val,
+ (unsigned long long)new_val);
return -EINVAL;
}
if (new_val > SHRT_MAX) {
- pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
- prog_name, relo_idx, insn_idx, new_val);
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %llu\n",
+ prog_name, relo_idx, insn_idx, (unsigned long long)new_val);
return -ERANGE;
}
if (res->fail_memsz_adjust) {
@@ -987,8 +997,9 @@ poison:
orig_val = insn->off;
insn->off = new_val;
- pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
- prog_name, relo_idx, insn_idx, orig_val, new_val);
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %llu -> %llu\n",
+ prog_name, relo_idx, insn_idx, (unsigned long long)orig_val,
+ (unsigned long long)new_val);
if (res->new_sz != res->orig_sz) {
int insn_bytes_sz, insn_bpf_sz;
@@ -1024,20 +1035,20 @@ poison:
return -EINVAL;
}
- imm = insn[0].imm + ((__u64)insn[1].imm << 32);
+ imm = (__u32)insn[0].imm | ((__u64)insn[1].imm << 32);
if (res->validate && imm != orig_val) {
- pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %llu -> %llu\n",
prog_name, relo_idx,
insn_idx, (unsigned long long)imm,
- orig_val, new_val);
+ (unsigned long long)orig_val, (unsigned long long)new_val);
return -EINVAL;
}
insn[0].imm = new_val;
- insn[1].imm = 0; /* currently only 32-bit values are supported */
- pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
+ insn[1].imm = new_val >> 32;
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %llu\n",
prog_name, relo_idx, insn_idx,
- (unsigned long long)imm, new_val);
+ (unsigned long long)imm, (unsigned long long)new_val);
break;
}
default:
@@ -1057,7 +1068,6 @@ poison:
int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec)
{
const struct btf_type *t;
- const struct btf_enum *e;
const char *s;
__u32 type_id;
int i, len = 0;
@@ -1086,10 +1096,23 @@ int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *s
if (core_relo_is_enumval_based(spec->relo_kind)) {
t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
- e = btf_enum(t) + spec->raw_spec[0];
- s = btf__name_by_offset(spec->btf, e->name_off);
+ if (btf_is_enum(t)) {
+ const struct btf_enum *e;
+ const char *fmt_str;
+
+ e = btf_enum(t) + spec->raw_spec[0];
+ s = btf__name_by_offset(spec->btf, e->name_off);
+ fmt_str = BTF_INFO_KFLAG(t->info) ? "::%s = %d" : "::%s = %u";
+ append_buf(fmt_str, s, e->val);
+ } else {
+ const struct btf_enum64 *e;
+ const char *fmt_str;
- append_buf("::%s = %u", s, e->val);
+ e = btf_enum64(t) + spec->raw_spec[0];
+ s = btf__name_by_offset(spec->btf, e->name_off);
+ fmt_str = BTF_INFO_KFLAG(t->info) ? "::%s = %lld" : "::%s = %llu";
+ append_buf(fmt_str, s, (unsigned long long)btf_enum64_value(e));
+ }
return len;
}
@@ -1148,11 +1171,11 @@ int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *s
* 3. It is supported and expected that there might be multiple flavors
* matching the spec. As long as all the specs resolve to the same set of
* offsets across all candidates, there is no error. If there is any
- * ambiguity, CO-RE relocation will fail. This is necessary to accomodate
- * imprefection of BTF deduplication, which can cause slight duplication of
+ * ambiguity, CO-RE relocation will fail. This is necessary to accommodate
+ * imperfection of BTF deduplication, which can cause slight duplication of
* the same BTF type, if some directly or indirectly referenced (by
* pointer) type gets resolved to different actual types in different
- * object files. If such situation occurs, deduplicated BTF will end up
+ * object files. If such a situation occurs, deduplicated BTF will end up
* with two (or more) structurally identical types, which differ only in
* types they refer to through pointer. This should be OK in most cases and
* is not an error.
@@ -1261,10 +1284,12 @@ int bpf_core_calc_relo_insn(const char *prog_name,
* decision and value, otherwise it's dangerous to
* proceed due to ambiguity
*/
- pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
+ pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %llu != %s %llu\n",
prog_name, relo_idx,
- cand_res.poison ? "failure" : "success", cand_res.new_val,
- targ_res->poison ? "failure" : "success", targ_res->new_val);
+ cand_res.poison ? "failure" : "success",
+ (unsigned long long)cand_res.new_val,
+ targ_res->poison ? "failure" : "success",
+ (unsigned long long)targ_res->new_val);
return -EINVAL;
}
diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h
index 073039d8ca4f..7df0da082f2c 100644
--- a/tools/lib/bpf/relo_core.h
+++ b/tools/lib/bpf/relo_core.h
@@ -46,9 +46,9 @@ struct bpf_core_spec {
struct bpf_core_relo_res {
/* expected value in the instruction, unless validate == false */
- __u32 orig_val;
+ __u64 orig_val;
/* new value that needs to be patched up to */
- __u32 new_val;
+ __u64 new_val;
/* relocation unsuccessful, poison instruction, but don't fail load */
bool poison;
/* some relocations can't be validated against orig_val */
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index f1c9339cfbbc..5159207cbfd9 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -441,7 +441,7 @@ static int parse_elf_segs(Elf *elf, const char *path, struct elf_seg **segs, siz
return 0;
}
-static int parse_lib_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt)
+static int parse_vma_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt)
{
char path[PATH_MAX], line[PATH_MAX], mode[16];
size_t seg_start, seg_end, seg_off;
@@ -531,35 +531,40 @@ err_out:
return err;
}
-static struct elf_seg *find_elf_seg(struct elf_seg *segs, size_t seg_cnt, long addr, bool relative)
+static struct elf_seg *find_elf_seg(struct elf_seg *segs, size_t seg_cnt, long virtaddr)
{
struct elf_seg *seg;
int i;
- if (relative) {
- /* for shared libraries, address is relative offset and thus
- * should be fall within logical offset-based range of
- * [offset_start, offset_end)
- */
- for (i = 0, seg = segs; i < seg_cnt; i++, seg++) {
- if (seg->offset <= addr && addr < seg->offset + (seg->end - seg->start))
- return seg;
- }
- } else {
- /* for binaries, address is absolute and thus should be within
- * absolute address range of [seg_start, seg_end)
- */
- for (i = 0, seg = segs; i < seg_cnt; i++, seg++) {
- if (seg->start <= addr && addr < seg->end)
- return seg;
- }
+ /* for ELF binaries (both executables and shared libraries), we are
+ * given virtual address (absolute for executables, relative for
+ * libraries) which should match address range of [seg_start, seg_end)
+ */
+ for (i = 0, seg = segs; i < seg_cnt; i++, seg++) {
+ if (seg->start <= virtaddr && virtaddr < seg->end)
+ return seg;
}
+ return NULL;
+}
+static struct elf_seg *find_vma_seg(struct elf_seg *segs, size_t seg_cnt, long offset)
+{
+ struct elf_seg *seg;
+ int i;
+
+ /* for VMA segments from /proc/<pid>/maps file, provided "address" is
+ * actually a file offset, so should be fall within logical
+ * offset-based range of [offset_start, offset_end)
+ */
+ for (i = 0, seg = segs; i < seg_cnt; i++, seg++) {
+ if (seg->offset <= offset && offset < seg->offset + (seg->end - seg->start))
+ return seg;
+ }
return NULL;
}
-static int parse_usdt_note(Elf *elf, const char *path, long base_addr,
- GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off,
+static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr,
+ const char *data, size_t name_off, size_t desc_off,
struct usdt_note *usdt_note);
static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie);
@@ -568,8 +573,8 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie,
struct usdt_target **out_targets, size_t *out_target_cnt)
{
- size_t off, name_off, desc_off, seg_cnt = 0, lib_seg_cnt = 0, target_cnt = 0;
- struct elf_seg *segs = NULL, *lib_segs = NULL;
+ size_t off, name_off, desc_off, seg_cnt = 0, vma_seg_cnt = 0, target_cnt = 0;
+ struct elf_seg *segs = NULL, *vma_segs = NULL;
struct usdt_target *targets = NULL, *target;
long base_addr = 0;
Elf_Scn *notes_scn, *base_scn;
@@ -613,8 +618,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
struct elf_seg *seg = NULL;
void *tmp;
- err = parse_usdt_note(elf, path, base_addr, &nhdr,
- data->d_buf, name_off, desc_off, &note);
+ err = parse_usdt_note(elf, path, &nhdr, data->d_buf, name_off, desc_off, &note);
if (err)
goto err_out;
@@ -654,30 +658,29 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
usdt_rel_ip += base_addr - note.base_addr;
}
- if (ehdr.e_type == ET_EXEC) {
- /* When attaching uprobes (which what USDTs basically
- * are) kernel expects a relative IP to be specified,
- * so if we are attaching to an executable ELF binary
- * (i.e., not a shared library), we need to calculate
- * proper relative IP based on ELF's load address
- */
- seg = find_elf_seg(segs, seg_cnt, usdt_abs_ip, false /* relative */);
- if (!seg) {
- err = -ESRCH;
- pr_warn("usdt: failed to find ELF program segment for '%s:%s' in '%s' at IP 0x%lx\n",
- usdt_provider, usdt_name, path, usdt_abs_ip);
- goto err_out;
- }
- if (!seg->is_exec) {
- err = -ESRCH;
- pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx) for '%s:%s' at IP 0x%lx is not executable\n",
- path, seg->start, seg->end, usdt_provider, usdt_name,
- usdt_abs_ip);
- goto err_out;
- }
+ /* When attaching uprobes (which is what USDTs basically are)
+ * kernel expects file offset to be specified, not a relative
+ * virtual address, so we need to translate virtual address to
+ * file offset, for both ET_EXEC and ET_DYN binaries.
+ */
+ seg = find_elf_seg(segs, seg_cnt, usdt_abs_ip);
+ if (!seg) {
+ err = -ESRCH;
+ pr_warn("usdt: failed to find ELF program segment for '%s:%s' in '%s' at IP 0x%lx\n",
+ usdt_provider, usdt_name, path, usdt_abs_ip);
+ goto err_out;
+ }
+ if (!seg->is_exec) {
+ err = -ESRCH;
+ pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx) for '%s:%s' at IP 0x%lx is not executable\n",
+ path, seg->start, seg->end, usdt_provider, usdt_name,
+ usdt_abs_ip);
+ goto err_out;
+ }
+ /* translate from virtual address to file offset */
+ usdt_rel_ip = usdt_abs_ip - seg->start + seg->offset;
- usdt_rel_ip = usdt_abs_ip - (seg->start - seg->offset);
- } else if (!man->has_bpf_cookie) { /* ehdr.e_type == ET_DYN */
+ if (ehdr.e_type == ET_DYN && !man->has_bpf_cookie) {
/* If we don't have BPF cookie support but need to
* attach to a shared library, we'll need to know and
* record absolute addresses of attach points due to
@@ -697,9 +700,9 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
goto err_out;
}
- /* lib_segs are lazily initialized only if necessary */
- if (lib_seg_cnt == 0) {
- err = parse_lib_segs(pid, path, &lib_segs, &lib_seg_cnt);
+ /* vma_segs are lazily initialized only if necessary */
+ if (vma_seg_cnt == 0) {
+ err = parse_vma_segs(pid, path, &vma_segs, &vma_seg_cnt);
if (err) {
pr_warn("usdt: failed to get memory segments in PID %d for shared library '%s': %d\n",
pid, path, err);
@@ -707,7 +710,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
}
}
- seg = find_elf_seg(lib_segs, lib_seg_cnt, usdt_rel_ip, true /* relative */);
+ seg = find_vma_seg(vma_segs, vma_seg_cnt, usdt_rel_ip);
if (!seg) {
err = -ESRCH;
pr_warn("usdt: failed to find shared lib memory segment for '%s:%s' in '%s' at relative IP 0x%lx\n",
@@ -715,7 +718,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
goto err_out;
}
- usdt_abs_ip = seg->start + (usdt_rel_ip - seg->offset);
+ usdt_abs_ip = seg->start - seg->offset + usdt_rel_ip;
}
pr_debug("usdt: probe for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved abs_ip 0x%lx rel_ip 0x%lx) args '%s' in segment [0x%lx, 0x%lx) at offset 0x%lx\n",
@@ -723,7 +726,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
note.loc_addr, note.base_addr, usdt_abs_ip, usdt_rel_ip, note.args,
seg ? seg->start : 0, seg ? seg->end : 0, seg ? seg->offset : 0);
- /* Adjust semaphore address to be a relative offset */
+ /* Adjust semaphore address to be a file offset */
if (note.sema_addr) {
if (!man->has_sema_refcnt) {
pr_warn("usdt: kernel doesn't support USDT semaphore refcounting for '%s:%s' in '%s'\n",
@@ -732,7 +735,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
goto err_out;
}
- seg = find_elf_seg(segs, seg_cnt, note.sema_addr, false /* relative */);
+ seg = find_elf_seg(segs, seg_cnt, note.sema_addr);
if (!seg) {
err = -ESRCH;
pr_warn("usdt: failed to find ELF loadable segment with semaphore of '%s:%s' in '%s' at 0x%lx\n",
@@ -747,7 +750,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
goto err_out;
}
- usdt_sema_off = note.sema_addr - (seg->start - seg->offset);
+ usdt_sema_off = note.sema_addr - seg->start + seg->offset;
pr_debug("usdt: sema for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved 0x%lx) in segment [0x%lx, 0x%lx] at offset 0x%lx\n",
usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ",
@@ -770,7 +773,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
target->rel_ip = usdt_rel_ip;
target->sema_off = usdt_sema_off;
- /* notes->args references strings from Elf itself, so they can
+ /* notes.args references strings from Elf itself, so they can
* be referenced safely until elf_end() call
*/
target->spec_str = note.args;
@@ -788,7 +791,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
err_out:
free(segs);
- free(lib_segs);
+ free(vma_segs);
if (err < 0)
free(targets);
return err;
@@ -1089,8 +1092,8 @@ err_out:
/* Parse out USDT ELF note from '.note.stapsdt' section.
* Logic inspired by perf's code.
*/
-static int parse_usdt_note(Elf *elf, const char *path, long base_addr,
- GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off,
+static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr,
+ const char *data, size_t name_off, size_t desc_off,
struct usdt_note *note)
{
const char *provider, *name, *args;
diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c
index c1d58673f6ef..952f3520d5c2 100644
--- a/tools/lib/perf/evsel.c
+++ b/tools/lib/perf/evsel.c
@@ -149,23 +149,30 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
int fd, group_fd, *evsel_fd;
evsel_fd = FD(evsel, idx, thread);
- if (evsel_fd == NULL)
- return -EINVAL;
+ if (evsel_fd == NULL) {
+ err = -EINVAL;
+ goto out;
+ }
err = get_group_fd(evsel, idx, thread, &group_fd);
if (err < 0)
- return err;
+ goto out;
fd = sys_perf_event_open(&evsel->attr,
threads->map[thread].pid,
cpu, group_fd, 0);
- if (fd < 0)
- return -errno;
+ if (fd < 0) {
+ err = -errno;
+ goto out;
+ }
*evsel_fd = fd;
}
}
+out:
+ if (err)
+ perf_evsel__close(evsel);
return err;
}
diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c
index d1ebb5561e5b..6f921db33cf9 100644
--- a/tools/perf/tests/bp_account.c
+++ b/tools/perf/tests/bp_account.c
@@ -151,11 +151,21 @@ static int detect_ioctl(void)
static int detect_share(int wp_cnt, int bp_cnt)
{
struct perf_event_attr attr;
- int i, fd[wp_cnt + bp_cnt], ret;
+ int i, *fd = NULL, ret = -1;
+
+ if (wp_cnt + bp_cnt == 0)
+ return 0;
+
+ fd = malloc(sizeof(int) * (wp_cnt + bp_cnt));
+ if (!fd)
+ return -1;
for (i = 0; i < wp_cnt; i++) {
fd[i] = wp_event((void *)&the_var, &attr);
- TEST_ASSERT_VAL("failed to create wp\n", fd[i] != -1);
+ if (fd[i] == -1) {
+ pr_err("failed to create wp\n");
+ goto out;
+ }
}
for (; i < (bp_cnt + wp_cnt); i++) {
@@ -166,9 +176,11 @@ static int detect_share(int wp_cnt, int bp_cnt)
ret = i != (bp_cnt + wp_cnt);
+out:
while (i--)
close(fd[i]);
+ free(fd);
return ret;
}
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index d54c5371c6a6..5c0032fe93ae 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -97,6 +97,8 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
ret |= test(ctx, "2.2 > 2.2", 0);
ret |= test(ctx, "2.2 < 1.1", 0);
ret |= test(ctx, "1.1 > 2.2", 0);
+ ret |= test(ctx, "1.1e10 < 1.1e100", 1);
+ ret |= test(ctx, "1.1e2 > 1.1e-2", 1);
if (ret) {
expr__ctx_free(ctx);
diff --git a/tools/perf/tests/shell/lib/perf_csv_output_lint.py b/tools/perf/tests/shell/lib/perf_csv_output_lint.py
deleted file mode 100644
index 714f283cfb1b..000000000000
--- a/tools/perf/tests/shell/lib/perf_csv_output_lint.py
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/usr/bin/python
-# SPDX-License-Identifier: GPL-2.0
-
-import argparse
-import sys
-
-# Basic sanity check of perf CSV output as specified in the man page.
-# Currently just checks the number of fields per line in output.
-
-ap = argparse.ArgumentParser()
-ap.add_argument('--no-args', action='store_true')
-ap.add_argument('--interval', action='store_true')
-ap.add_argument('--system-wide-no-aggr', action='store_true')
-ap.add_argument('--system-wide', action='store_true')
-ap.add_argument('--event', action='store_true')
-ap.add_argument('--per-core', action='store_true')
-ap.add_argument('--per-thread', action='store_true')
-ap.add_argument('--per-die', action='store_true')
-ap.add_argument('--per-node', action='store_true')
-ap.add_argument('--per-socket', action='store_true')
-ap.add_argument('--separator', default=',', nargs='?')
-args = ap.parse_args()
-
-Lines = sys.stdin.readlines()
-
-def check_csv_output(exp):
- for line in Lines:
- if 'failed' not in line:
- count = line.count(args.separator)
- if count != exp:
- sys.stdout.write(''.join(Lines))
- raise RuntimeError(f'wrong number of fields. expected {exp} in {line}')
-
-try:
- if args.no_args or args.system_wide or args.event:
- expected_items = 6
- elif args.interval or args.per_thread or args.system_wide_no_aggr:
- expected_items = 7
- elif args.per_core or args.per_socket or args.per_node or args.per_die:
- expected_items = 8
- else:
- ap.print_help()
- raise RuntimeError('No checking option specified')
- check_csv_output(expected_items)
-
-except:
- sys.stdout.write('Test failed for input: ' + ''.join(Lines))
- raise
diff --git a/tools/perf/tests/shell/stat+csv_output.sh b/tools/perf/tests/shell/stat+csv_output.sh
index 983220ef3cb4..38c26f3ef4c1 100755
--- a/tools/perf/tests/shell/stat+csv_output.sh
+++ b/tools/perf/tests/shell/stat+csv_output.sh
@@ -6,20 +6,41 @@
set -e
-pythonchecker=$(dirname $0)/lib/perf_csv_output_lint.py
-if [ "x$PYTHON" == "x" ]
-then
- if which python3 > /dev/null
- then
- PYTHON=python3
- elif which python > /dev/null
- then
- PYTHON=python
- else
- echo Skipping test, python not detected please set environment variable PYTHON.
- exit 2
- fi
-fi
+function commachecker()
+{
+ local -i cnt=0 exp=0
+
+ case "$1"
+ in "--no-args") exp=6
+ ;; "--system-wide") exp=6
+ ;; "--event") exp=6
+ ;; "--interval") exp=7
+ ;; "--per-thread") exp=7
+ ;; "--system-wide-no-aggr") exp=7
+ [ $(uname -m) = "s390x" ] && exp=6
+ ;; "--per-core") exp=8
+ ;; "--per-socket") exp=8
+ ;; "--per-node") exp=8
+ ;; "--per-die") exp=8
+ esac
+
+ while read line
+ do
+ # Check for lines beginning with Failed
+ x=${line:0:6}
+ [ "$x" = "Failed" ] && continue
+
+ # Count the number of commas
+ x=$(echo $line | tr -d -c ',')
+ cnt="${#x}"
+ # echo $line $cnt
+ [ "$cnt" -ne "$exp" ] && {
+ echo "wrong number of fields. expected $exp in $line" 1>&2
+ exit 1;
+ }
+ done
+ return 0
+}
# Return true if perf_event_paranoid is > $1 and not running as root.
function ParanoidAndNotRoot()
@@ -30,7 +51,7 @@ function ParanoidAndNotRoot()
check_no_args()
{
echo -n "Checking CSV output: no args "
- perf stat -x, true 2>&1 | $PYTHON $pythonchecker --no-args
+ perf stat -x, true 2>&1 | commachecker --no-args
echo "[Success]"
}
@@ -42,7 +63,7 @@ check_system_wide()
echo "[Skip] paranoid and not root"
return
fi
- perf stat -x, -a true 2>&1 | $PYTHON $pythonchecker --system-wide
+ perf stat -x, -a true 2>&1 | commachecker --system-wide
echo "[Success]"
}
@@ -55,14 +76,14 @@ check_system_wide_no_aggr()
return
fi
echo -n "Checking CSV output: system wide no aggregation "
- perf stat -x, -A -a --no-merge true 2>&1 | $PYTHON $pythonchecker --system-wide-no-aggr
+ perf stat -x, -A -a --no-merge true 2>&1 | commachecker --system-wide-no-aggr
echo "[Success]"
}
check_interval()
{
echo -n "Checking CSV output: interval "
- perf stat -x, -I 1000 true 2>&1 | $PYTHON $pythonchecker --interval
+ perf stat -x, -I 1000 true 2>&1 | commachecker --interval
echo "[Success]"
}
@@ -70,7 +91,7 @@ check_interval()
check_event()
{
echo -n "Checking CSV output: event "
- perf stat -x, -e cpu-clock true 2>&1 | $PYTHON $pythonchecker --event
+ perf stat -x, -e cpu-clock true 2>&1 | commachecker --event
echo "[Success]"
}
@@ -82,7 +103,7 @@ check_per_core()
echo "[Skip] paranoid and not root"
return
fi
- perf stat -x, --per-core -a true 2>&1 | $PYTHON $pythonchecker --per-core
+ perf stat -x, --per-core -a true 2>&1 | commachecker --per-core
echo "[Success]"
}
@@ -94,7 +115,7 @@ check_per_thread()
echo "[Skip] paranoid and not root"
return
fi
- perf stat -x, --per-thread -a true 2>&1 | $PYTHON $pythonchecker --per-thread
+ perf stat -x, --per-thread -a true 2>&1 | commachecker --per-thread
echo "[Success]"
}
@@ -106,7 +127,7 @@ check_per_die()
echo "[Skip] paranoid and not root"
return
fi
- perf stat -x, --per-die -a true 2>&1 | $PYTHON $pythonchecker --per-die
+ perf stat -x, --per-die -a true 2>&1 | commachecker --per-die
echo "[Success]"
}
@@ -118,7 +139,7 @@ check_per_node()
echo "[Skip] paranoid and not root"
return
fi
- perf stat -x, --per-node -a true 2>&1 | $PYTHON $pythonchecker --per-node
+ perf stat -x, --per-node -a true 2>&1 | commachecker --per-node
echo "[Success]"
}
@@ -130,7 +151,7 @@ check_per_socket()
echo "[Skip] paranoid and not root"
return
fi
- perf stat -x, --per-socket -a true 2>&1 | $PYTHON $pythonchecker --per-socket
+ perf stat -x, --per-socket -a true 2>&1 | commachecker --per-socket
echo "[Success]"
}
diff --git a/tools/perf/tests/shell/test_arm_callgraph_fp.sh b/tools/perf/tests/shell/test_arm_callgraph_fp.sh
index 6ffbb27afaba..ec108d45d3c6 100755
--- a/tools/perf/tests/shell/test_arm_callgraph_fp.sh
+++ b/tools/perf/tests/shell/test_arm_callgraph_fp.sh
@@ -43,7 +43,7 @@ CFLAGS="-g -O0 -fno-inline -fno-omit-frame-pointer"
cc $CFLAGS $TEST_PROGRAM_SOURCE -o $TEST_PROGRAM || exit 1
# Add a 1 second delay to skip samples that are not in the leaf() function
-perf record -o $PERF_DATA --call-graph fp -e cycles//u -D 1000 -- $TEST_PROGRAM 2> /dev/null &
+perf record -o $PERF_DATA --call-graph fp -e cycles//u -D 1000 --user-callchains -- $TEST_PROGRAM 2> /dev/null &
PID=$!
echo " + Recording (PID=$PID)..."
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index d23a9e322ff5..0b4f61b6cc6b 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -115,7 +115,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
* physical_package_id will be set to -1. Hence skip this
* test if physical_package_id returns -1 for cpu from perf_cpu_map.
*/
- if (strncmp(session->header.env.arch, "powerpc", 7)) {
+ if (!strncmp(session->header.env.arch, "ppc64le", 7)) {
if (cpu__get_socket_id(perf_cpu_map__cpu(map, 0)) == -1)
return TEST_SKIP;
}
diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
index 6f85f5d957ef..17311ad9f9af 100644
--- a/tools/perf/trace/beauty/include/linux/socket.h
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -50,6 +50,9 @@ struct linger {
struct msghdr {
void *msg_name; /* ptr to socket address structure */
int msg_namelen; /* size of socket address structure */
+
+ int msg_inq; /* output, data left in socket */
+
struct iov_iter msg_iter; /* data */
/*
@@ -62,8 +65,9 @@ struct msghdr {
void __user *msg_control_user;
};
bool msg_control_is_user : 1;
- __kernel_size_t msg_controllen; /* ancillary data buffer length */
+ bool msg_get_inq : 1;/* return INQ after receive */
unsigned int msg_flags; /* flags on received message */
+ __kernel_size_t msg_controllen; /* ancillary data buffer length */
struct kiocb *msg_iocb; /* ptr to iocb for async requests */
};
@@ -434,6 +438,7 @@ extern struct file *do_accept(struct file *file, unsigned file_flags,
extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags);
extern int __sys_socket(int family, int type, int protocol);
+extern struct file *__sys_socket_file(int family, int type, int protocol);
extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen);
extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr,
int addrlen, int file_flags);
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 1a80151baed9..d040406f3314 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -387,26 +387,16 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
return arm_spe_deliver_synth_event(spe, speq, event, &sample);
}
-#define SPE_MEM_TYPE (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
- ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
- ARM_SPE_REMOTE_ACCESS)
-
-static bool arm_spe__is_memory_event(enum arm_spe_sample_type type)
-{
- if (type & SPE_MEM_TYPE)
- return true;
-
- return false;
-}
-
static u64 arm_spe__synth_data_source(const struct arm_spe_record *record)
{
union perf_mem_data_src data_src = { 0 };
if (record->op == ARM_SPE_LD)
data_src.mem_op = PERF_MEM_OP_LOAD;
- else
+ else if (record->op == ARM_SPE_ST)
data_src.mem_op = PERF_MEM_OP_STORE;
+ else
+ return 0;
if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
data_src.mem_lvl = PERF_MEM_LVL_L3;
@@ -510,7 +500,11 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
return err;
}
- if (spe->sample_memory && arm_spe__is_memory_event(record->type)) {
+ /*
+ * When data_src is zero it means the record is not a memory operation,
+ * skip to synthesize memory sample for this case.
+ */
+ if (spe->sample_memory && data_src) {
err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
if (err)
return err;
diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l
index 0a13eb20c814..4dc8edbfd9ce 100644
--- a/tools/perf/util/expr.l
+++ b/tools/perf/util/expr.l
@@ -91,7 +91,7 @@ static int literal(yyscan_t scanner)
}
%}
-number ([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+)
+number ([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+)(e-?[0-9]+)?
sch [-,=]
spec \\{sch}
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index ee8fcfa115e5..8f7baeabc5cf 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -1372,6 +1372,7 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
*out_evlist = NULL;
if (!metric_no_merge || hashmap__size(ids->ids) == 0) {
+ bool added_event = false;
int i;
/*
* We may fail to share events between metrics because a tool
@@ -1393,8 +1394,16 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
if (!tmp)
return -ENOMEM;
ids__insert(ids->ids, tmp);
+ added_event = true;
}
}
+ if (!added_event && hashmap__size(ids->ids) == 0) {
+ char *tmp = strdup("duration_time");
+
+ if (!tmp)
+ return -ENOMEM;
+ ids__insert(ids->ids, tmp);
+ }
}
ret = metricgroup__build_event_string(&events, ids, modifier,
has_constraint);
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index 37622699c91a..6e5b8cce47bf 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -174,7 +174,7 @@ static int elf_section_address_and_offset(int fd, const char *name, u64 *address
Elf *elf;
GElf_Ehdr ehdr;
GElf_Shdr shdr;
- int ret;
+ int ret = -1;
elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
if (elf == NULL)
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 595565eb68c0..ca2f47f45670 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -43,3 +43,4 @@ test_cpp
*.tmp
xdpxceiver
xdp_redirect_multi
+xdp_synproxy
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 2d3c8c8f558a..cb8e552e1418 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -82,7 +82,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
- xdpxceiver xdp_redirect_multi
+ xdpxceiver xdp_redirect_multi xdp_synproxy
TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
@@ -168,17 +168,26 @@ $(OUTPUT)/%:%.c
$(call msg,BINARY,,$@)
$(Q)$(LINK.c) $^ $(LDLIBS) -o $@
+# LLVM's ld.lld doesn't support all the architectures, so use it only on x86
+ifeq ($(SRCARCH),x86)
+LLD := lld
+else
+LLD := ld
+endif
+
# Filter out -static for liburandom_read.so and its dependent targets so that static builds
# do not fail. Static builds leave urandom_read relying on system-wide shared libraries.
$(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c
$(call msg,LIB,,$@)
- $(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) -fPIC -shared -o $@
+ $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) \
+ -fuse-ld=$(LLD) -Wl,-znoseparate-code -fPIC -shared -o $@
$(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so
$(call msg,BINARY,,$@)
- $(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \
- liburandom_read.so $(LDLIBS) \
- -Wl,-rpath=. -Wl,--build-id=sha1 -o $@
+ $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \
+ liburandom_read.so $(LDLIBS) \
+ -fuse-ld=$(LLD) -Wl,-znoseparate-code \
+ -Wl,-rpath=. -Wl,--build-id=sha1 -o $@
$(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
$(call msg,MOD,,$@)
@@ -502,6 +511,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \
cap_helpers.c
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \
$(OUTPUT)/liburandom_read.so \
+ $(OUTPUT)/xdp_synproxy \
ima_setup.sh \
$(wildcard progs/btf_dump_test_case_*.c)
TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
@@ -560,6 +570,7 @@ $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \
$(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h
$(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h
$(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h
+$(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h
$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
$(OUTPUT)/bench: LDLIBS += -lm
$(OUTPUT)/bench: $(OUTPUT)/bench.o \
@@ -571,13 +582,16 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
$(OUTPUT)/bench_ringbufs.o \
$(OUTPUT)/bench_bloom_filter_map.o \
$(OUTPUT)/bench_bpf_loop.o \
- $(OUTPUT)/bench_strncmp.o
+ $(OUTPUT)/bench_strncmp.o \
+ $(OUTPUT)/bench_bpf_hashmap_full_update.o
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
feature bpftool \
- $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h no_alu32 bpf_gcc bpf_testmod.ko)
+ $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h \
+ no_alu32 bpf_gcc bpf_testmod.ko \
+ liburandom_read.so)
.PHONY: docs docs-clean
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index f061cc20e776..d8aa62be996b 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -396,6 +396,7 @@ extern const struct bench bench_hashmap_with_bloom;
extern const struct bench bench_bpf_loop;
extern const struct bench bench_strncmp_no_helper;
extern const struct bench bench_strncmp_helper;
+extern const struct bench bench_bpf_hashmap_full_update;
static const struct bench *benchs[] = {
&bench_count_global,
@@ -430,6 +431,7 @@ static const struct bench *benchs[] = {
&bench_bpf_loop,
&bench_strncmp_no_helper,
&bench_strncmp_helper,
+ &bench_bpf_hashmap_full_update,
};
static void setup_benchmark()
diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
new file mode 100644
index 000000000000..cec51e0ff4b8
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Bytedance */
+
+#include <argp.h>
+#include "bench.h"
+#include "bpf_hashmap_full_update_bench.skel.h"
+#include "bpf_util.h"
+
+/* BPF triggering benchmarks */
+static struct ctx {
+ struct bpf_hashmap_full_update_bench *skel;
+} ctx;
+
+#define MAX_LOOP_NUM 10000
+
+static void validate(void)
+{
+ if (env.consumer_cnt != 1) {
+ fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+ exit(1);
+ }
+}
+
+static void *producer(void *input)
+{
+ while (true) {
+ /* trigger the bpf program */
+ syscall(__NR_getpgid);
+ }
+
+ return NULL;
+}
+
+static void *consumer(void *input)
+{
+ return NULL;
+}
+
+static void measure(struct bench_res *res)
+{
+}
+
+static void setup(void)
+{
+ struct bpf_link *link;
+ int map_fd, i, max_entries;
+
+ setup_libbpf();
+
+ ctx.skel = bpf_hashmap_full_update_bench__open_and_load();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ ctx.skel->bss->nr_loops = MAX_LOOP_NUM;
+
+ link = bpf_program__attach(ctx.skel->progs.benchmark);
+ if (!link) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+
+ /* fill hash_map */
+ map_fd = bpf_map__fd(ctx.skel->maps.hash_map_bench);
+ max_entries = bpf_map__max_entries(ctx.skel->maps.hash_map_bench);
+ for (i = 0; i < max_entries; i++)
+ bpf_map_update_elem(map_fd, &i, &i, BPF_ANY);
+}
+
+void hashmap_report_final(struct bench_res res[], int res_cnt)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ int i;
+
+ for (i = 0; i < nr_cpus; i++) {
+ u64 time = ctx.skel->bss->percpu_time[i];
+
+ if (!time)
+ continue;
+
+ printf("%d:hash_map_full_perf %lld events per sec\n",
+ i, ctx.skel->bss->nr_loops * 1000000000ll / time);
+ }
+}
+
+const struct bench bench_bpf_hashmap_full_update = {
+ .name = "bpf-hashmap-ful-update",
+ .validate = validate,
+ .setup = setup,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = NULL,
+ .report_final = hashmap_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh
new file mode 100755
index 000000000000..1e2de838f9fa
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+nr_threads=`expr $(cat /proc/cpuinfo | grep "processor"| wc -l) - 1`
+summary=$($RUN_BENCH -p $nr_threads bpf-hashmap-ful-update)
+printf "$summary"
+printf "\n"
diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c
index b5941d514e17..1c1c2c26690a 100644
--- a/tools/testing/selftests/bpf/btf_helpers.c
+++ b/tools/testing/selftests/bpf/btf_helpers.c
@@ -26,11 +26,12 @@ static const char * const btf_kind_str_mapping[] = {
[BTF_KIND_FLOAT] = "FLOAT",
[BTF_KIND_DECL_TAG] = "DECL_TAG",
[BTF_KIND_TYPE_TAG] = "TYPE_TAG",
+ [BTF_KIND_ENUM64] = "ENUM64",
};
static const char *btf_kind_str(__u16 kind)
{
- if (kind > BTF_KIND_TYPE_TAG)
+ if (kind > BTF_KIND_ENUM64)
return "UNKNOWN";
return btf_kind_str_mapping[kind];
}
@@ -139,14 +140,32 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id)
}
case BTF_KIND_ENUM: {
const struct btf_enum *v = btf_enum(t);
+ const char *fmt_str;
- fprintf(out, " size=%u vlen=%u", t->size, vlen);
+ fmt_str = btf_kflag(t) ? "\n\t'%s' val=%d" : "\n\t'%s' val=%u";
+ fprintf(out, " encoding=%s size=%u vlen=%u",
+ btf_kflag(t) ? "SIGNED" : "UNSIGNED", t->size, vlen);
for (i = 0; i < vlen; i++, v++) {
- fprintf(out, "\n\t'%s' val=%u",
+ fprintf(out, fmt_str,
btf_str(btf, v->name_off), v->val);
}
break;
}
+ case BTF_KIND_ENUM64: {
+ const struct btf_enum64 *v = btf_enum64(t);
+ const char *fmt_str;
+
+ fmt_str = btf_kflag(t) ? "\n\t'%s' val=%lld" : "\n\t'%s' val=%llu";
+
+ fprintf(out, " encoding=%s size=%u vlen=%u",
+ btf_kflag(t) ? "SIGNED" : "UNSIGNED", t->size, vlen);
+ for (i = 0; i < vlen; i++, v++) {
+ fprintf(out, fmt_str,
+ btf_str(btf, v->name_off),
+ ((__u64)v->val_hi32 << 32) | v->val_lo32);
+ }
+ break;
+ }
case BTF_KIND_FWD:
fprintf(out, " fwd_kind=%s", btf_kflag(t) ? "union" : "struct");
break;
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index 08c0601b3e84..0b899d2d8ea7 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -17,6 +17,14 @@ static void trigger_func2(void)
asm volatile ("");
}
+/* attach point for byname sleepable uprobe */
+static void trigger_func3(void)
+{
+ asm volatile ("");
+}
+
+static char test_data[] = "test_data";
+
void test_attach_probe(void)
{
DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
@@ -49,9 +57,17 @@ void test_attach_probe(void)
if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset"))
return;
- skel = test_attach_probe__open_and_load();
+ skel = test_attach_probe__open();
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
+
+ /* sleepable kprobe test case needs flags set before loading */
+ if (!ASSERT_OK(bpf_program__set_flags(skel->progs.handle_kprobe_sleepable,
+ BPF_F_SLEEPABLE), "kprobe_sleepable_flags"))
+ goto cleanup;
+
+ if (!ASSERT_OK(test_attach_probe__load(skel), "skel_load"))
+ goto cleanup;
if (!ASSERT_OK_PTR(skel->bss, "check_bss"))
goto cleanup;
@@ -151,6 +167,30 @@ void test_attach_probe(void)
if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname2, "attach_uretprobe_byname2"))
goto cleanup;
+ /* sleepable kprobes should not attach successfully */
+ skel->links.handle_kprobe_sleepable = bpf_program__attach(skel->progs.handle_kprobe_sleepable);
+ if (!ASSERT_ERR_PTR(skel->links.handle_kprobe_sleepable, "attach_kprobe_sleepable"))
+ goto cleanup;
+
+ /* test sleepable uprobe and uretprobe variants */
+ skel->links.handle_uprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uprobe_byname3_sleepable);
+ if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3_sleepable, "attach_uprobe_byname3_sleepable"))
+ goto cleanup;
+
+ skel->links.handle_uprobe_byname3 = bpf_program__attach(skel->progs.handle_uprobe_byname3);
+ if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3, "attach_uprobe_byname3"))
+ goto cleanup;
+
+ skel->links.handle_uretprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uretprobe_byname3_sleepable);
+ if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3_sleepable, "attach_uretprobe_byname3_sleepable"))
+ goto cleanup;
+
+ skel->links.handle_uretprobe_byname3 = bpf_program__attach(skel->progs.handle_uretprobe_byname3);
+ if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3, "attach_uretprobe_byname3"))
+ goto cleanup;
+
+ skel->bss->user_ptr = test_data;
+
/* trigger & validate kprobe && kretprobe */
usleep(1);
@@ -164,6 +204,9 @@ void test_attach_probe(void)
/* trigger & validate uprobe attached by name */
trigger_func2();
+ /* trigger & validate sleepable uprobe attached by name */
+ trigger_func3();
+
ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res");
ASSERT_EQ(skel->bss->kprobe2_res, 11, "check_kprobe_auto_res");
ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res");
@@ -174,6 +217,10 @@ void test_attach_probe(void)
ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res");
ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res");
ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res");
+ ASSERT_EQ(skel->bss->uprobe_byname3_sleepable_res, 9, "check_uprobe_byname3_sleepable_res");
+ ASSERT_EQ(skel->bss->uprobe_byname3_res, 10, "check_uprobe_byname3_res");
+ ASSERT_EQ(skel->bss->uretprobe_byname3_sleepable_res, 11, "check_uretprobe_byname3_sleepable_res");
+ ASSERT_EQ(skel->bss->uretprobe_byname3_res, 12, "check_uretprobe_byname3_res");
cleanup:
test_attach_probe__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index 83ef55e3caa4..2974b44f80fa 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -121,24 +121,24 @@ static void kprobe_multi_link_api_subtest(void)
})
GET_ADDR("bpf_fentry_test1", addrs[0]);
- GET_ADDR("bpf_fentry_test2", addrs[1]);
- GET_ADDR("bpf_fentry_test3", addrs[2]);
- GET_ADDR("bpf_fentry_test4", addrs[3]);
- GET_ADDR("bpf_fentry_test5", addrs[4]);
- GET_ADDR("bpf_fentry_test6", addrs[5]);
- GET_ADDR("bpf_fentry_test7", addrs[6]);
+ GET_ADDR("bpf_fentry_test3", addrs[1]);
+ GET_ADDR("bpf_fentry_test4", addrs[2]);
+ GET_ADDR("bpf_fentry_test5", addrs[3]);
+ GET_ADDR("bpf_fentry_test6", addrs[4]);
+ GET_ADDR("bpf_fentry_test7", addrs[5]);
+ GET_ADDR("bpf_fentry_test2", addrs[6]);
GET_ADDR("bpf_fentry_test8", addrs[7]);
#undef GET_ADDR
- cookies[0] = 1;
- cookies[1] = 2;
- cookies[2] = 3;
- cookies[3] = 4;
- cookies[4] = 5;
- cookies[5] = 6;
- cookies[6] = 7;
- cookies[7] = 8;
+ cookies[0] = 1; /* bpf_fentry_test1 */
+ cookies[1] = 2; /* bpf_fentry_test3 */
+ cookies[2] = 3; /* bpf_fentry_test4 */
+ cookies[3] = 4; /* bpf_fentry_test5 */
+ cookies[4] = 5; /* bpf_fentry_test6 */
+ cookies[5] = 6; /* bpf_fentry_test7 */
+ cookies[6] = 7; /* bpf_fentry_test2 */
+ cookies[7] = 8; /* bpf_fentry_test8 */
opts.kprobe_multi.addrs = (const unsigned long *) &addrs;
opts.kprobe_multi.cnt = ARRAY_SIZE(addrs);
@@ -149,14 +149,14 @@ static void kprobe_multi_link_api_subtest(void)
if (!ASSERT_GE(link1_fd, 0, "link1_fd"))
goto cleanup;
- cookies[0] = 8;
- cookies[1] = 7;
- cookies[2] = 6;
- cookies[3] = 5;
- cookies[4] = 4;
- cookies[5] = 3;
- cookies[6] = 2;
- cookies[7] = 1;
+ cookies[0] = 8; /* bpf_fentry_test1 */
+ cookies[1] = 7; /* bpf_fentry_test3 */
+ cookies[2] = 6; /* bpf_fentry_test4 */
+ cookies[3] = 5; /* bpf_fentry_test5 */
+ cookies[4] = 4; /* bpf_fentry_test6 */
+ cookies[5] = 3; /* bpf_fentry_test7 */
+ cookies[6] = 2; /* bpf_fentry_test2 */
+ cookies[7] = 1; /* bpf_fentry_test8 */
opts.kprobe_multi.flags = BPF_F_KPROBE_MULTI_RETURN;
prog_fd = bpf_program__fd(skel->progs.test_kretprobe);
@@ -181,12 +181,12 @@ static void kprobe_multi_attach_api_subtest(void)
struct kprobe_multi *skel = NULL;
const char *syms[8] = {
"bpf_fentry_test1",
- "bpf_fentry_test2",
"bpf_fentry_test3",
"bpf_fentry_test4",
"bpf_fentry_test5",
"bpf_fentry_test6",
"bpf_fentry_test7",
+ "bpf_fentry_test2",
"bpf_fentry_test8",
};
__u64 cookies[8];
@@ -198,14 +198,14 @@ static void kprobe_multi_attach_api_subtest(void)
skel->bss->pid = getpid();
skel->bss->test_cookie = true;
- cookies[0] = 1;
- cookies[1] = 2;
- cookies[2] = 3;
- cookies[3] = 4;
- cookies[4] = 5;
- cookies[5] = 6;
- cookies[6] = 7;
- cookies[7] = 8;
+ cookies[0] = 1; /* bpf_fentry_test1 */
+ cookies[1] = 2; /* bpf_fentry_test3 */
+ cookies[2] = 3; /* bpf_fentry_test4 */
+ cookies[3] = 4; /* bpf_fentry_test5 */
+ cookies[4] = 5; /* bpf_fentry_test6 */
+ cookies[5] = 6; /* bpf_fentry_test7 */
+ cookies[6] = 7; /* bpf_fentry_test2 */
+ cookies[7] = 8; /* bpf_fentry_test8 */
opts.syms = syms;
opts.cnt = ARRAY_SIZE(syms);
@@ -216,14 +216,14 @@ static void kprobe_multi_attach_api_subtest(void)
if (!ASSERT_OK_PTR(link1, "bpf_program__attach_kprobe_multi_opts"))
goto cleanup;
- cookies[0] = 8;
- cookies[1] = 7;
- cookies[2] = 6;
- cookies[3] = 5;
- cookies[4] = 4;
- cookies[5] = 3;
- cookies[6] = 2;
- cookies[7] = 1;
+ cookies[0] = 8; /* bpf_fentry_test1 */
+ cookies[1] = 7; /* bpf_fentry_test3 */
+ cookies[2] = 6; /* bpf_fentry_test4 */
+ cookies[3] = 5; /* bpf_fentry_test5 */
+ cookies[4] = 4; /* bpf_fentry_test6 */
+ cookies[5] = 3; /* bpf_fentry_test7 */
+ cookies[6] = 2; /* bpf_fentry_test2 */
+ cookies[7] = 1; /* bpf_fentry_test8 */
opts.retprobe = true;
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index ba5bde53d418..edb387163baa 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -2897,26 +2897,6 @@ static struct btf_raw_test raw_tests[] = {
},
{
- .descr = "invalid enum kind_flag",
- .raw_types = {
- BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
- BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 1, 1), 4), /* [2] */
- BTF_ENUM_ENC(NAME_TBD, 0),
- BTF_END_RAW,
- },
- BTF_STR_SEC("\0A"),
- .map_type = BPF_MAP_TYPE_ARRAY,
- .map_name = "enum_type_check_btf",
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .key_type_id = 1,
- .value_type_id = 1,
- .max_entries = 4,
- .btf_load_err = true,
- .err_str = "Invalid btf_info kind_flag",
-},
-
-{
.descr = "valid fwd kind_flag",
.raw_types = {
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
@@ -4072,6 +4052,42 @@ static struct btf_raw_test raw_tests[] = {
.btf_load_err = true,
.err_str = "Type tags don't precede modifiers",
},
+{
+ .descr = "enum64 test #1, unsigned, size 8",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [2] */
+ BTF_ENUM64_ENC(NAME_TBD, 0, 0),
+ BTF_ENUM64_ENC(NAME_TBD, 1, 1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0a\0b\0c"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 8,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+},
+{
+ .descr = "enum64 test #2, signed, size 4",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 1, 2), 4), /* [2] */
+ BTF_ENUM64_ENC(NAME_TBD, -1, 0),
+ BTF_ENUM64_ENC(NAME_TBD, 1, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0a\0b\0c"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+},
}; /* struct btf_raw_test raw_tests[] */
@@ -7000,9 +7016,12 @@ static struct btf_dedup_test dedup_tests[] = {
BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */
BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */
BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [19] enum64 */
+ BTF_ENUM64_ENC(NAME_TBD, 0, 0),
+ BTF_ENUM64_ENC(NAME_TBD, 1, 1),
BTF_END_RAW,
},
- BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R"),
+ BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R\0S\0T\0U"),
},
.expect = {
.raw_types = {
@@ -7030,9 +7049,12 @@ static struct btf_dedup_test dedup_tests[] = {
BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */
BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */
BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [19] enum64 */
+ BTF_ENUM64_ENC(NAME_TBD, 0, 0),
+ BTF_ENUM64_ENC(NAME_TBD, 1, 1),
BTF_END_RAW,
},
- BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R"),
+ BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R\0S\0T\0U"),
},
},
{
@@ -7493,6 +7515,91 @@ static struct btf_dedup_test dedup_tests[] = {
BTF_STR_SEC("\0tag1\0t\0m"),
},
},
+{
+ .descr = "dedup: enum64, standalone",
+ .input = {
+ .raw_types = {
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 123),
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 123),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 123),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val"),
+ },
+},
+{
+ .descr = "dedup: enum64, fwd resolution",
+ .input = {
+ .raw_types = {
+ /* [1] fwd enum64 'e1' before full enum */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
+ /* [2] full enum64 'e1' after fwd */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 123),
+ /* [3] full enum64 'e2' before fwd */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(4), 0, 456),
+ /* [4] fwd enum64 'e2' after full enum */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
+ /* [5] incompatible full enum64 with different value */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 0, 321),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"),
+ },
+ .expect = {
+ .raw_types = {
+ /* [1] full enum64 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 123),
+ /* [2] full enum64 'e2' */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(4), 0, 456),
+ /* [3] incompatible full enum64 with different value */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 0, 321),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"),
+ },
+},
+{
+ .descr = "dedup: enum and enum64, no dedup",
+ .input = {
+ .raw_types = {
+ /* [1] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ /* [2] enum64 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 4),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val"),
+ },
+ .expect = {
+ .raw_types = {
+ /* [1] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ /* [2] enum64 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 4),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val"),
+ },
+},
};
@@ -7517,6 +7624,8 @@ static int btf_type_size(const struct btf_type *t)
return base_size + sizeof(__u32);
case BTF_KIND_ENUM:
return base_size + vlen * sizeof(struct btf_enum);
+ case BTF_KIND_ENUM64:
+ return base_size + vlen * sizeof(struct btf_enum64);
case BTF_KIND_ARRAY:
return base_size + sizeof(struct btf_array);
case BTF_KIND_STRUCT:
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c
index addf99c05896..6e36de1302fc 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_write.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c
@@ -9,6 +9,7 @@ static void gen_btf(struct btf *btf)
const struct btf_var_secinfo *vi;
const struct btf_type *t;
const struct btf_member *m;
+ const struct btf_enum64 *v64;
const struct btf_enum *v;
const struct btf_param *p;
int id, err, str_off;
@@ -171,7 +172,7 @@ static void gen_btf(struct btf *btf)
ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v2", "v2_name");
ASSERT_EQ(v->val, 2, "v2_val");
ASSERT_STREQ(btf_type_raw_dump(btf, 9),
- "[9] ENUM 'e1' size=4 vlen=2\n"
+ "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n"
"\t'v1' val=1\n"
"\t'v2' val=2", "raw_dump");
@@ -202,7 +203,7 @@ static void gen_btf(struct btf *btf)
ASSERT_EQ(btf_vlen(t), 0, "enum_fwd_kind");
ASSERT_EQ(t->size, 4, "enum_fwd_sz");
ASSERT_STREQ(btf_type_raw_dump(btf, 12),
- "[12] ENUM 'enum_fwd' size=4 vlen=0", "raw_dump");
+ "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0", "raw_dump");
/* TYPEDEF */
id = btf__add_typedef(btf, "typedef1", 1);
@@ -307,6 +308,48 @@ static void gen_btf(struct btf *btf)
ASSERT_EQ(t->type, 1, "tag_type");
ASSERT_STREQ(btf_type_raw_dump(btf, 20),
"[20] TYPE_TAG 'tag1' type_id=1", "raw_dump");
+
+ /* ENUM64 */
+ id = btf__add_enum64(btf, "e1", 8, true);
+ ASSERT_EQ(id, 21, "enum64_id");
+ err = btf__add_enum64_value(btf, "v1", -1);
+ ASSERT_OK(err, "v1_res");
+ err = btf__add_enum64_value(btf, "v2", 0x123456789); /* 4886718345 */
+ ASSERT_OK(err, "v2_res");
+ t = btf__type_by_id(btf, 21);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum64_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM64, "enum64_kind");
+ ASSERT_EQ(btf_vlen(t), 2, "enum64_vlen");
+ ASSERT_EQ(t->size, 8, "enum64_sz");
+ v64 = btf_enum64(t) + 0;
+ ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v1", "v1_name");
+ ASSERT_EQ(v64->val_hi32, 0xffffffff, "v1_val");
+ ASSERT_EQ(v64->val_lo32, 0xffffffff, "v1_val");
+ v64 = btf_enum64(t) + 1;
+ ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v2", "v2_name");
+ ASSERT_EQ(v64->val_hi32, 0x1, "v2_val");
+ ASSERT_EQ(v64->val_lo32, 0x23456789, "v2_val");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 21),
+ "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n"
+ "\t'v1' val=-1\n"
+ "\t'v2' val=4886718345", "raw_dump");
+
+ id = btf__add_enum64(btf, "e1", 8, false);
+ ASSERT_EQ(id, 22, "enum64_id");
+ err = btf__add_enum64_value(btf, "v1", 0xffffffffFFFFFFFF); /* 18446744073709551615 */
+ ASSERT_OK(err, "v1_res");
+ t = btf__type_by_id(btf, 22);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum64_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM64, "enum64_kind");
+ ASSERT_EQ(btf_vlen(t), 1, "enum64_vlen");
+ ASSERT_EQ(t->size, 8, "enum64_sz");
+ v64 = btf_enum64(t) + 0;
+ ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v1", "v1_name");
+ ASSERT_EQ(v64->val_hi32, 0xffffffff, "v1_val");
+ ASSERT_EQ(v64->val_lo32, 0xffffffff, "v1_val");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 22),
+ "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n"
+ "\t'v1' val=18446744073709551615", "raw_dump");
}
static void test_btf_add()
@@ -332,12 +375,12 @@ static void test_btf_add()
"\t'f2' type_id=1 bits_offset=32 bitfield_size=16",
"[8] UNION 'u1' size=8 vlen=1\n"
"\t'f1' type_id=1 bits_offset=0 bitfield_size=16",
- "[9] ENUM 'e1' size=4 vlen=2\n"
+ "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n"
"\t'v1' val=1\n"
"\t'v2' val=2",
"[10] FWD 'struct_fwd' fwd_kind=struct",
"[11] FWD 'union_fwd' fwd_kind=union",
- "[12] ENUM 'enum_fwd' size=4 vlen=0",
+ "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0",
"[13] TYPEDEF 'typedef1' type_id=1",
"[14] FUNC 'func1' type_id=15 linkage=global",
"[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n"
@@ -348,7 +391,12 @@ static void test_btf_add()
"\ttype_id=1 offset=4 size=8",
"[18] DECL_TAG 'tag1' type_id=16 component_idx=-1",
"[19] DECL_TAG 'tag2' type_id=14 component_idx=1",
- "[20] TYPE_TAG 'tag1' type_id=1");
+ "[20] TYPE_TAG 'tag1' type_id=1",
+ "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n"
+ "\t'v1' val=-1\n"
+ "\t'v2' val=4886718345",
+ "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n"
+ "\t'v1' val=18446744073709551615");
btf__free(btf);
}
@@ -370,7 +418,7 @@ static void test_btf_add_btf()
gen_btf(btf2);
id = btf__add_btf(btf1, btf2);
- if (!ASSERT_EQ(id, 21, "id"))
+ if (!ASSERT_EQ(id, 23, "id"))
goto cleanup;
VALIDATE_RAW_BTF(
@@ -386,12 +434,12 @@ static void test_btf_add_btf()
"\t'f2' type_id=1 bits_offset=32 bitfield_size=16",
"[8] UNION 'u1' size=8 vlen=1\n"
"\t'f1' type_id=1 bits_offset=0 bitfield_size=16",
- "[9] ENUM 'e1' size=4 vlen=2\n"
+ "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n"
"\t'v1' val=1\n"
"\t'v2' val=2",
"[10] FWD 'struct_fwd' fwd_kind=struct",
"[11] FWD 'union_fwd' fwd_kind=union",
- "[12] ENUM 'enum_fwd' size=4 vlen=0",
+ "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0",
"[13] TYPEDEF 'typedef1' type_id=1",
"[14] FUNC 'func1' type_id=15 linkage=global",
"[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n"
@@ -403,36 +451,46 @@ static void test_btf_add_btf()
"[18] DECL_TAG 'tag1' type_id=16 component_idx=-1",
"[19] DECL_TAG 'tag2' type_id=14 component_idx=1",
"[20] TYPE_TAG 'tag1' type_id=1",
+ "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n"
+ "\t'v1' val=-1\n"
+ "\t'v2' val=4886718345",
+ "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n"
+ "\t'v1' val=18446744073709551615",
/* types appended from the second BTF */
- "[21] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
- "[22] PTR '(anon)' type_id=21",
- "[23] CONST '(anon)' type_id=25",
- "[24] VOLATILE '(anon)' type_id=23",
- "[25] RESTRICT '(anon)' type_id=24",
- "[26] ARRAY '(anon)' type_id=22 index_type_id=21 nr_elems=10",
- "[27] STRUCT 's1' size=8 vlen=2\n"
- "\t'f1' type_id=21 bits_offset=0\n"
- "\t'f2' type_id=21 bits_offset=32 bitfield_size=16",
- "[28] UNION 'u1' size=8 vlen=1\n"
- "\t'f1' type_id=21 bits_offset=0 bitfield_size=16",
- "[29] ENUM 'e1' size=4 vlen=2\n"
+ "[23] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[24] PTR '(anon)' type_id=23",
+ "[25] CONST '(anon)' type_id=27",
+ "[26] VOLATILE '(anon)' type_id=25",
+ "[27] RESTRICT '(anon)' type_id=26",
+ "[28] ARRAY '(anon)' type_id=24 index_type_id=23 nr_elems=10",
+ "[29] STRUCT 's1' size=8 vlen=2\n"
+ "\t'f1' type_id=23 bits_offset=0\n"
+ "\t'f2' type_id=23 bits_offset=32 bitfield_size=16",
+ "[30] UNION 'u1' size=8 vlen=1\n"
+ "\t'f1' type_id=23 bits_offset=0 bitfield_size=16",
+ "[31] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n"
"\t'v1' val=1\n"
"\t'v2' val=2",
- "[30] FWD 'struct_fwd' fwd_kind=struct",
- "[31] FWD 'union_fwd' fwd_kind=union",
- "[32] ENUM 'enum_fwd' size=4 vlen=0",
- "[33] TYPEDEF 'typedef1' type_id=21",
- "[34] FUNC 'func1' type_id=35 linkage=global",
- "[35] FUNC_PROTO '(anon)' ret_type_id=21 vlen=2\n"
- "\t'p1' type_id=21\n"
- "\t'p2' type_id=22",
- "[36] VAR 'var1' type_id=21, linkage=global-alloc",
- "[37] DATASEC 'datasec1' size=12 vlen=1\n"
- "\ttype_id=21 offset=4 size=8",
- "[38] DECL_TAG 'tag1' type_id=36 component_idx=-1",
- "[39] DECL_TAG 'tag2' type_id=34 component_idx=1",
- "[40] TYPE_TAG 'tag1' type_id=21");
+ "[32] FWD 'struct_fwd' fwd_kind=struct",
+ "[33] FWD 'union_fwd' fwd_kind=union",
+ "[34] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0",
+ "[35] TYPEDEF 'typedef1' type_id=23",
+ "[36] FUNC 'func1' type_id=37 linkage=global",
+ "[37] FUNC_PROTO '(anon)' ret_type_id=23 vlen=2\n"
+ "\t'p1' type_id=23\n"
+ "\t'p2' type_id=24",
+ "[38] VAR 'var1' type_id=23, linkage=global-alloc",
+ "[39] DATASEC 'datasec1' size=12 vlen=1\n"
+ "\ttype_id=23 offset=4 size=8",
+ "[40] DECL_TAG 'tag1' type_id=38 component_idx=-1",
+ "[41] DECL_TAG 'tag2' type_id=36 component_idx=1",
+ "[42] TYPE_TAG 'tag1' type_id=23",
+ "[43] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n"
+ "\t'v1' val=-1\n"
+ "\t'v2' val=4886718345",
+ "[44] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n"
+ "\t'v1' val=18446744073709551615");
cleanup:
btf__free(btf1);
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 3712dfe1be59..2f92feb809be 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -84,6 +84,7 @@ static int duration = 0;
#define NESTING_ERR_CASE(name) { \
NESTING_CASE_COMMON(name), \
.fails = true, \
+ .run_btfgen_fails = true, \
}
#define ARRAYS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \
@@ -258,12 +259,14 @@ static int duration = 0;
BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o", \
"probed:", name), \
.fails = true, \
+ .run_btfgen_fails = true, \
.raw_tp_name = "sys_enter", \
.prog_name = "test_core_bitfields", \
}, { \
BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \
"direct:", name), \
.fails = true, \
+ .run_btfgen_fails = true, \
.prog_name = "test_core_bitfields_direct", \
}
@@ -304,6 +307,7 @@ static int duration = 0;
#define SIZE_ERR_CASE(name) { \
SIZE_CASE_COMMON(name), \
.fails = true, \
+ .run_btfgen_fails = true, \
}
#define TYPE_BASED_CASE_COMMON(name) \
@@ -363,6 +367,25 @@ static int duration = 0;
.fails = true, \
}
+#define ENUM64VAL_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_enum64val.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_enum64val"
+
+#define ENUM64VAL_CASE(name, ...) { \
+ ENUM64VAL_CASE_COMMON(name), \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_enum64val_output) \
+ __VA_ARGS__, \
+ .output_len = sizeof(struct core_reloc_enum64val_output), \
+}
+
+#define ENUM64VAL_ERR_CASE(name) { \
+ ENUM64VAL_CASE_COMMON(name), \
+ .fails = true, \
+}
+
struct core_reloc_test_case;
typedef int (*setup_test_fn)(struct core_reloc_test_case *test);
@@ -377,6 +400,7 @@ struct core_reloc_test_case {
const char *output;
int output_len;
bool fails;
+ bool run_btfgen_fails;
bool needs_testmod;
bool relaxed_core_relocs;
const char *prog_name;
@@ -831,6 +855,45 @@ static const struct core_reloc_test_case test_cases[] = {
.anon_val2 = 0x222,
}),
ENUMVAL_ERR_CASE(enumval___err_missing),
+
+ /* 64bit enumerator value existence and value relocations */
+ ENUM64VAL_CASE(enum64val, {
+ .unsigned_val1_exists = true,
+ .unsigned_val2_exists = true,
+ .unsigned_val3_exists = true,
+ .signed_val1_exists = true,
+ .signed_val2_exists = true,
+ .signed_val3_exists = true,
+ .unsigned_val1 = 0x1ffffffffULL,
+ .unsigned_val2 = 0x2,
+ .signed_val1 = 0x1ffffffffLL,
+ .signed_val2 = -2,
+ }),
+ ENUM64VAL_CASE(enum64val___diff, {
+ .unsigned_val1_exists = true,
+ .unsigned_val2_exists = true,
+ .unsigned_val3_exists = true,
+ .signed_val1_exists = true,
+ .signed_val2_exists = true,
+ .signed_val3_exists = true,
+ .unsigned_val1 = 0x101ffffffffULL,
+ .unsigned_val2 = 0x202ffffffffULL,
+ .signed_val1 = -101,
+ .signed_val2 = -202,
+ }),
+ ENUM64VAL_CASE(enum64val___val3_missing, {
+ .unsigned_val1_exists = true,
+ .unsigned_val2_exists = true,
+ .unsigned_val3_exists = false,
+ .signed_val1_exists = true,
+ .signed_val2_exists = true,
+ .signed_val3_exists = false,
+ .unsigned_val1 = 0x111ffffffffULL,
+ .unsigned_val2 = 0x222,
+ .signed_val1 = 0x111ffffffffLL,
+ .signed_val2 = -222,
+ }),
+ ENUM64VAL_ERR_CASE(enum64val___err_missing),
};
struct data {
@@ -894,7 +957,7 @@ static void run_core_reloc_tests(bool use_btfgen)
/* generate a "minimal" BTF file and use it as source */
if (use_btfgen) {
- if (!test_case->btf_src_file || test_case->fails) {
+ if (!test_case->btf_src_file || test_case->run_btfgen_fails) {
test__skip();
continue;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
index a7e74297f15f..5a7e6011f6bf 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
@@ -7,11 +7,9 @@
void serial_test_fexit_stress(void)
{
- char test_skb[128] = {};
int fexit_fd[CNT] = {};
int link_fd[CNT] = {};
- char error[4096];
- int err, i, filter_fd;
+ int err, i;
const struct bpf_insn trace_program[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -20,25 +18,9 @@ void serial_test_fexit_stress(void)
LIBBPF_OPTS(bpf_prog_load_opts, trace_opts,
.expected_attach_type = BPF_TRACE_FEXIT,
- .log_buf = error,
- .log_size = sizeof(error),
);
- const struct bpf_insn skb_program[] = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
-
- LIBBPF_OPTS(bpf_prog_load_opts, skb_opts,
- .log_buf = error,
- .log_size = sizeof(error),
- );
-
- LIBBPF_OPTS(bpf_test_run_opts, topts,
- .data_in = test_skb,
- .data_size_in = sizeof(test_skb),
- .repeat = 1,
- );
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
err = libbpf_find_vmlinux_btf_id("bpf_fentry_test1",
trace_opts.expected_attach_type);
@@ -58,15 +40,9 @@ void serial_test_fexit_stress(void)
goto out;
}
- filter_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL",
- skb_program, sizeof(skb_program) / sizeof(struct bpf_insn),
- &skb_opts);
- if (!ASSERT_GE(filter_fd, 0, "test_program_loaded"))
- goto out;
+ err = bpf_prog_test_run_opts(fexit_fd[0], &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
- err = bpf_prog_test_run_opts(filter_fd, &topts);
- close(filter_fd);
- CHECK_FAIL(err);
out:
for (i = 0; i < CNT; i++) {
if (link_fd[i])
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index 586dc52d6fb9..5b93d5d0bd93 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -364,6 +364,9 @@ static int get_syms(char ***symsp, size_t *cntp)
continue;
if (!strncmp(name, "rcu_", 4))
continue;
+ if (!strncmp(name, "__ftrace_invalid_address__",
+ sizeof("__ftrace_invalid_address__") - 1))
+ continue;
err = hashmap__add(map, name, NULL);
if (err) {
free(name);
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
new file mode 100644
index 000000000000..93e9cddaadcf
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <ctype.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+/*
+ * Utility function uppercasing an entire string.
+ */
+static void uppercase(char *s)
+{
+ for (; *s != '\0'; s++)
+ *s = toupper(*s);
+}
+
+/*
+ * Test case to check that all bpf_attach_type variants are covered by
+ * libbpf_bpf_attach_type_str.
+ */
+static void test_libbpf_bpf_attach_type_str(void)
+{
+ struct btf *btf;
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ int i, n, id;
+
+ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL);
+ if (!ASSERT_OK_PTR(btf, "btf_parse"))
+ return;
+
+ /* find enum bpf_attach_type and enumerate each value */
+ id = btf__find_by_name_kind(btf, "bpf_attach_type", BTF_KIND_ENUM);
+ if (!ASSERT_GT(id, 0, "bpf_attach_type_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf, id);
+ e = btf_enum(t);
+ n = btf_vlen(t);
+ for (i = 0; i < n; e++, i++) {
+ enum bpf_attach_type attach_type = (enum bpf_attach_type)e->val;
+ const char *attach_type_name;
+ const char *attach_type_str;
+ char buf[256];
+
+ if (attach_type == __MAX_BPF_ATTACH_TYPE)
+ continue;
+
+ attach_type_name = btf__str_by_offset(btf, e->name_off);
+ attach_type_str = libbpf_bpf_attach_type_str(attach_type);
+ ASSERT_OK_PTR(attach_type_str, attach_type_name);
+
+ snprintf(buf, sizeof(buf), "BPF_%s", attach_type_str);
+ uppercase(buf);
+
+ ASSERT_STREQ(buf, attach_type_name, "exp_str_value");
+ }
+
+cleanup:
+ btf__free(btf);
+}
+
+/*
+ * Test case to check that all bpf_link_type variants are covered by
+ * libbpf_bpf_link_type_str.
+ */
+static void test_libbpf_bpf_link_type_str(void)
+{
+ struct btf *btf;
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ int i, n, id;
+
+ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL);
+ if (!ASSERT_OK_PTR(btf, "btf_parse"))
+ return;
+
+ /* find enum bpf_link_type and enumerate each value */
+ id = btf__find_by_name_kind(btf, "bpf_link_type", BTF_KIND_ENUM);
+ if (!ASSERT_GT(id, 0, "bpf_link_type_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf, id);
+ e = btf_enum(t);
+ n = btf_vlen(t);
+ for (i = 0; i < n; e++, i++) {
+ enum bpf_link_type link_type = (enum bpf_link_type)e->val;
+ const char *link_type_name;
+ const char *link_type_str;
+ char buf[256];
+
+ if (link_type == MAX_BPF_LINK_TYPE)
+ continue;
+
+ link_type_name = btf__str_by_offset(btf, e->name_off);
+ link_type_str = libbpf_bpf_link_type_str(link_type);
+ ASSERT_OK_PTR(link_type_str, link_type_name);
+
+ snprintf(buf, sizeof(buf), "BPF_LINK_TYPE_%s", link_type_str);
+ uppercase(buf);
+
+ ASSERT_STREQ(buf, link_type_name, "exp_str_value");
+ }
+
+cleanup:
+ btf__free(btf);
+}
+
+/*
+ * Test case to check that all bpf_map_type variants are covered by
+ * libbpf_bpf_map_type_str.
+ */
+static void test_libbpf_bpf_map_type_str(void)
+{
+ struct btf *btf;
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ int i, n, id;
+
+ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL);
+ if (!ASSERT_OK_PTR(btf, "btf_parse"))
+ return;
+
+ /* find enum bpf_map_type and enumerate each value */
+ id = btf__find_by_name_kind(btf, "bpf_map_type", BTF_KIND_ENUM);
+ if (!ASSERT_GT(id, 0, "bpf_map_type_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf, id);
+ e = btf_enum(t);
+ n = btf_vlen(t);
+ for (i = 0; i < n; e++, i++) {
+ enum bpf_map_type map_type = (enum bpf_map_type)e->val;
+ const char *map_type_name;
+ const char *map_type_str;
+ char buf[256];
+
+ map_type_name = btf__str_by_offset(btf, e->name_off);
+ map_type_str = libbpf_bpf_map_type_str(map_type);
+ ASSERT_OK_PTR(map_type_str, map_type_name);
+
+ snprintf(buf, sizeof(buf), "BPF_MAP_TYPE_%s", map_type_str);
+ uppercase(buf);
+
+ ASSERT_STREQ(buf, map_type_name, "exp_str_value");
+ }
+
+cleanup:
+ btf__free(btf);
+}
+
+/*
+ * Test case to check that all bpf_prog_type variants are covered by
+ * libbpf_bpf_prog_type_str.
+ */
+static void test_libbpf_bpf_prog_type_str(void)
+{
+ struct btf *btf;
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ int i, n, id;
+
+ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL);
+ if (!ASSERT_OK_PTR(btf, "btf_parse"))
+ return;
+
+ /* find enum bpf_prog_type and enumerate each value */
+ id = btf__find_by_name_kind(btf, "bpf_prog_type", BTF_KIND_ENUM);
+ if (!ASSERT_GT(id, 0, "bpf_prog_type_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf, id);
+ e = btf_enum(t);
+ n = btf_vlen(t);
+ for (i = 0; i < n; e++, i++) {
+ enum bpf_prog_type prog_type = (enum bpf_prog_type)e->val;
+ const char *prog_type_name;
+ const char *prog_type_str;
+ char buf[256];
+
+ prog_type_name = btf__str_by_offset(btf, e->name_off);
+ prog_type_str = libbpf_bpf_prog_type_str(prog_type);
+ ASSERT_OK_PTR(prog_type_str, prog_type_name);
+
+ snprintf(buf, sizeof(buf), "BPF_PROG_TYPE_%s", prog_type_str);
+ uppercase(buf);
+
+ ASSERT_STREQ(buf, prog_type_name, "exp_str_value");
+ }
+
+cleanup:
+ btf__free(btf);
+}
+
+/*
+ * Run all libbpf str conversion tests.
+ */
+void test_libbpf_str(void)
+{
+ if (test__start_subtest("bpf_attach_type_str"))
+ test_libbpf_bpf_attach_type_str();
+
+ if (test__start_subtest("bpf_link_type_str"))
+ test_libbpf_bpf_link_type_str();
+
+ if (test__start_subtest("bpf_map_type_str"))
+ test_libbpf_bpf_map_type_str();
+
+ if (test__start_subtest("bpf_prog_type_str"))
+ test_libbpf_bpf_prog_type_str();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index c4da87ec3ba4..19c70880cfb3 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -831,6 +831,59 @@ out:
bpf_object__close(obj);
}
+#include "tailcall_bpf2bpf6.skel.h"
+
+/* Tail call counting works even when there is data on stack which is
+ * not aligned to 8 bytes.
+ */
+static void test_tailcall_bpf2bpf_6(void)
+{
+ struct tailcall_bpf2bpf6 *obj;
+ int err, map_fd, prog_fd, main_fd, data_fd, i, val;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ obj = tailcall_bpf2bpf6__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "open and load"))
+ return;
+
+ main_fd = bpf_program__fd(obj->progs.entry);
+ if (!ASSERT_GE(main_fd, 0, "entry prog fd"))
+ goto out;
+
+ map_fd = bpf_map__fd(obj->maps.jmp_table);
+ if (!ASSERT_GE(map_fd, 0, "jmp_table map fd"))
+ goto out;
+
+ prog_fd = bpf_program__fd(obj->progs.classifier_0);
+ if (!ASSERT_GE(prog_fd, 0, "classifier_0 prog fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "jmp_table map update"))
+ goto out;
+
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "entry prog test run");
+ ASSERT_EQ(topts.retval, 0, "tailcall retval");
+
+ data_fd = bpf_map__fd(obj->maps.bss);
+ if (!ASSERT_GE(map_fd, 0, "bss map fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ ASSERT_OK(err, "bss map lookup");
+ ASSERT_EQ(val, 1, "done flag is set");
+
+out:
+ tailcall_bpf2bpf6__destroy(obj);
+}
+
void test_tailcalls(void)
{
if (test__start_subtest("tailcall_1"))
@@ -855,4 +908,6 @@ void test_tailcalls(void)
test_tailcall_bpf2bpf_4(false);
if (test__start_subtest("tailcall_bpf2bpf_5"))
test_tailcall_bpf2bpf_4(true);
+ if (test__start_subtest("tailcall_bpf2bpf_6"))
+ test_tailcall_bpf2bpf_6();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 958dae769c52..cb6a53b3e023 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -646,7 +646,7 @@ static void test_tcp_clear_dtime(struct test_tc_dtime *skel)
__u32 *errs = skel->bss->errs[t];
skel->bss->test = t;
- test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 0);
+ test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 50000 + t);
ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
dtime_cnt_str(t, INGRESS_FWDNS_P100));
@@ -683,7 +683,7 @@ static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
errs = skel->bss->errs[t];
skel->bss->test = t;
- test_inet_dtime(family, SOCK_STREAM, addr, 0);
+ test_inet_dtime(family, SOCK_STREAM, addr, 50000 + t);
/* fwdns_prio100 prog does not read delivery_time_type, so
* kernel puts the (rcv) timetamp in __sk_buff->tstamp
@@ -715,13 +715,13 @@ static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
errs = skel->bss->errs[t];
skel->bss->test = t;
- test_inet_dtime(family, SOCK_DGRAM, addr, 0);
+ test_inet_dtime(family, SOCK_DGRAM, addr, 50000 + t);
ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
dtime_cnt_str(t, INGRESS_FWDNS_P100));
/* non mono delivery time is not forwarded */
ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
- dtime_cnt_str(t, INGRESS_FWDNS_P100));
+ dtime_cnt_str(t, INGRESS_FWDNS_P101));
for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++)
ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
new file mode 100644
index 000000000000..fb77a123fe89
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <ctype.h>
+
+#define CMD_OUT_BUF_SIZE 1023
+
+#define SYS(cmd) ({ \
+ if (!ASSERT_OK(system(cmd), (cmd))) \
+ goto out; \
+})
+
+#define SYS_OUT(cmd, ...) ({ \
+ char buf[1024]; \
+ snprintf(buf, sizeof(buf), (cmd), ##__VA_ARGS__); \
+ FILE *f = popen(buf, "r"); \
+ if (!ASSERT_OK_PTR(f, buf)) \
+ goto out; \
+ f; \
+})
+
+/* out must be at least `size * 4 + 1` bytes long */
+static void escape_str(char *out, const char *in, size_t size)
+{
+ static const char *hex = "0123456789ABCDEF";
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ if (isprint(in[i]) && in[i] != '\\' && in[i] != '\'') {
+ *out++ = in[i];
+ } else {
+ *out++ = '\\';
+ *out++ = 'x';
+ *out++ = hex[(in[i] >> 4) & 0xf];
+ *out++ = hex[in[i] & 0xf];
+ }
+ }
+ *out++ = '\0';
+}
+
+static bool expect_str(char *buf, size_t size, const char *str, const char *name)
+{
+ static char escbuf_expected[CMD_OUT_BUF_SIZE * 4];
+ static char escbuf_actual[CMD_OUT_BUF_SIZE * 4];
+ static int duration = 0;
+ bool ok;
+
+ ok = size == strlen(str) && !memcmp(buf, str, size);
+
+ if (!ok) {
+ escape_str(escbuf_expected, str, strlen(str));
+ escape_str(escbuf_actual, buf, size);
+ }
+ CHECK(!ok, name, "unexpected %s: actual '%s' != expected '%s'\n",
+ name, escbuf_actual, escbuf_expected);
+
+ return ok;
+}
+
+static void test_synproxy(bool xdp)
+{
+ int server_fd = -1, client_fd = -1, accept_fd = -1;
+ char *prog_id, *prog_id_end;
+ struct nstoken *ns = NULL;
+ FILE *ctrl_file = NULL;
+ char buf[CMD_OUT_BUF_SIZE];
+ size_t size;
+
+ SYS("ip netns add synproxy");
+
+ SYS("ip link add tmp0 type veth peer name tmp1");
+ SYS("ip link set tmp1 netns synproxy");
+ SYS("ip link set tmp0 up");
+ SYS("ip addr replace 198.18.0.1/24 dev tmp0");
+
+ /* When checksum offload is enabled, the XDP program sees wrong
+ * checksums and drops packets.
+ */
+ SYS("ethtool -K tmp0 tx off");
+ if (xdp)
+ /* Workaround required for veth. */
+ SYS("ip link set tmp0 xdp object xdp_dummy.o section xdp 2> /dev/null");
+
+ ns = open_netns("synproxy");
+ if (!ASSERT_OK_PTR(ns, "setns"))
+ goto out;
+
+ SYS("ip link set lo up");
+ SYS("ip link set tmp1 up");
+ SYS("ip addr replace 198.18.0.2/24 dev tmp1");
+ SYS("sysctl -w net.ipv4.tcp_syncookies=2");
+ SYS("sysctl -w net.ipv4.tcp_timestamps=1");
+ SYS("sysctl -w net.netfilter.nf_conntrack_tcp_loose=0");
+ SYS("iptables -t raw -I PREROUTING \
+ -i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack");
+ SYS("iptables -t filter -A INPUT \
+ -i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \
+ -j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460");
+ SYS("iptables -t filter -A INPUT \
+ -i tmp1 -m state --state INVALID -j DROP");
+
+ ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 \
+ --single --mss4 1460 --mss6 1440 \
+ --wscale 7 --ttl 64%s", xdp ? "" : " --tc");
+ size = fread(buf, 1, sizeof(buf), ctrl_file);
+ pclose(ctrl_file);
+ if (!expect_str(buf, size, "Total SYNACKs generated: 0\n",
+ "initial SYNACKs"))
+ goto out;
+
+ if (!xdp) {
+ ctrl_file = SYS_OUT("tc filter show dev tmp1 ingress");
+ size = fread(buf, 1, sizeof(buf), ctrl_file);
+ pclose(ctrl_file);
+ prog_id = memmem(buf, size, " id ", 4);
+ if (!ASSERT_OK_PTR(prog_id, "find prog id"))
+ goto out;
+ prog_id += 4;
+ if (!ASSERT_LT(prog_id, buf + size, "find prog id begin"))
+ goto out;
+ prog_id_end = prog_id;
+ while (prog_id_end < buf + size && *prog_id_end >= '0' &&
+ *prog_id_end <= '9')
+ prog_id_end++;
+ if (!ASSERT_LT(prog_id_end, buf + size, "find prog id end"))
+ goto out;
+ *prog_id_end = '\0';
+ }
+
+ server_fd = start_server(AF_INET, SOCK_STREAM, "198.18.0.2", 8080, 0);
+ if (!ASSERT_GE(server_fd, 0, "start_server"))
+ goto out;
+
+ close_netns(ns);
+ ns = NULL;
+
+ client_fd = connect_to_fd(server_fd, 10000);
+ if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+ goto out;
+
+ accept_fd = accept(server_fd, NULL, NULL);
+ if (!ASSERT_GE(accept_fd, 0, "accept"))
+ goto out;
+
+ ns = open_netns("synproxy");
+ if (!ASSERT_OK_PTR(ns, "setns"))
+ goto out;
+
+ if (xdp)
+ ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --single");
+ else
+ ctrl_file = SYS_OUT("./xdp_synproxy --prog %s --single",
+ prog_id);
+ size = fread(buf, 1, sizeof(buf), ctrl_file);
+ pclose(ctrl_file);
+ if (!expect_str(buf, size, "Total SYNACKs generated: 1\n",
+ "SYNACKs after connection"))
+ goto out;
+
+out:
+ if (accept_fd >= 0)
+ close(accept_fd);
+ if (client_fd >= 0)
+ close(client_fd);
+ if (server_fd >= 0)
+ close(server_fd);
+ if (ns)
+ close_netns(ns);
+
+ system("ip link del tmp0");
+ system("ip netns del synproxy");
+}
+
+void test_xdp_synproxy(void)
+{
+ if (test__start_subtest("xdp"))
+ test_synproxy(true);
+ if (test__start_subtest("tc"))
+ test_synproxy(false);
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c b/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c
new file mode 100644
index 000000000000..56957557e3e1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Bytedance */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define MAX_ENTRIES 1000
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, MAX_ENTRIES);
+} hash_map_bench SEC(".maps");
+
+u64 __attribute__((__aligned__(256))) percpu_time[256];
+u64 nr_loops;
+
+static int loop_update_callback(__u32 index, u32 *key)
+{
+ u64 init_val = 1;
+
+ bpf_map_update_elem(&hash_map_bench, key, &init_val, BPF_ANY);
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int benchmark(void *ctx)
+{
+ u32 cpu = bpf_get_smp_processor_id();
+ u32 key = cpu + MAX_ENTRIES;
+ u64 start_time = bpf_ktime_get_ns();
+
+ bpf_loop(nr_loops, loop_update_callback, &key, 0);
+ percpu_time[cpu & 255] = bpf_ktime_get_ns() - start_time;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c
new file mode 100644
index 000000000000..888e79db6a77
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enum64val x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c
new file mode 100644
index 000000000000..194749130d87
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enum64val___diff x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c
new file mode 100644
index 000000000000..3d732d4193e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enum64val___err_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c
new file mode 100644
index 000000000000..17cf5d6a848d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enum64val___val3_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index f9dc9766546e..26e103302c05 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -1117,6 +1117,20 @@ struct core_reloc_enumval_output {
int anon_val2;
};
+struct core_reloc_enum64val_output {
+ bool unsigned_val1_exists;
+ bool unsigned_val2_exists;
+ bool unsigned_val3_exists;
+ bool signed_val1_exists;
+ bool signed_val2_exists;
+ bool signed_val3_exists;
+
+ long unsigned_val1;
+ long unsigned_val2;
+ long signed_val1;
+ long signed_val2;
+};
+
enum named_enum {
NAMED_ENUM_VAL1 = 1,
NAMED_ENUM_VAL2 = 2,
@@ -1134,6 +1148,23 @@ struct core_reloc_enumval {
anon_enum f2;
};
+enum named_unsigned_enum64 {
+ UNSIGNED_ENUM64_VAL1 = 0x1ffffffffULL,
+ UNSIGNED_ENUM64_VAL2 = 0x2,
+ UNSIGNED_ENUM64_VAL3 = 0x3ffffffffULL,
+};
+
+enum named_signed_enum64 {
+ SIGNED_ENUM64_VAL1 = 0x1ffffffffLL,
+ SIGNED_ENUM64_VAL2 = -2,
+ SIGNED_ENUM64_VAL3 = 0x3ffffffffLL,
+};
+
+struct core_reloc_enum64val {
+ enum named_unsigned_enum64 f1;
+ enum named_signed_enum64 f2;
+};
+
/* differing enumerator values */
enum named_enum___diff {
NAMED_ENUM_VAL1___diff = 101,
@@ -1152,6 +1183,23 @@ struct core_reloc_enumval___diff {
anon_enum___diff f2;
};
+enum named_unsigned_enum64___diff {
+ UNSIGNED_ENUM64_VAL1___diff = 0x101ffffffffULL,
+ UNSIGNED_ENUM64_VAL2___diff = 0x202ffffffffULL,
+ UNSIGNED_ENUM64_VAL3___diff = 0x303ffffffffULL,
+};
+
+enum named_signed_enum64___diff {
+ SIGNED_ENUM64_VAL1___diff = -101,
+ SIGNED_ENUM64_VAL2___diff = -202,
+ SIGNED_ENUM64_VAL3___diff = -303,
+};
+
+struct core_reloc_enum64val___diff {
+ enum named_unsigned_enum64___diff f1;
+ enum named_signed_enum64___diff f2;
+};
+
/* missing (optional) third enum value */
enum named_enum___val3_missing {
NAMED_ENUM_VAL1___val3_missing = 111,
@@ -1168,6 +1216,21 @@ struct core_reloc_enumval___val3_missing {
anon_enum___val3_missing f2;
};
+enum named_unsigned_enum64___val3_missing {
+ UNSIGNED_ENUM64_VAL1___val3_missing = 0x111ffffffffULL,
+ UNSIGNED_ENUM64_VAL2___val3_missing = 0x222,
+};
+
+enum named_signed_enum64___val3_missing {
+ SIGNED_ENUM64_VAL1___val3_missing = 0x111ffffffffLL,
+ SIGNED_ENUM64_VAL2___val3_missing = -222,
+};
+
+struct core_reloc_enum64val___val3_missing {
+ enum named_unsigned_enum64___val3_missing f1;
+ enum named_signed_enum64___val3_missing f2;
+};
+
/* missing (mandatory) second enum value, should fail */
enum named_enum___err_missing {
NAMED_ENUM_VAL1___err_missing = 1,
@@ -1183,3 +1246,18 @@ struct core_reloc_enumval___err_missing {
enum named_enum___err_missing f1;
anon_enum___err_missing f2;
};
+
+enum named_unsigned_enum64___err_missing {
+ UNSIGNED_ENUM64_VAL1___err_missing = 0x1ffffffffULL,
+ UNSIGNED_ENUM64_VAL3___err_missing = 0x3ffffffffULL,
+};
+
+enum named_signed_enum64___err_missing {
+ SIGNED_ENUM64_VAL1___err_missing = 0x1ffffffffLL,
+ SIGNED_ENUM64_VAL3___err_missing = -3,
+};
+
+struct core_reloc_enum64val___err_missing {
+ enum named_unsigned_enum64___err_missing f1;
+ enum named_signed_enum64___err_missing f2;
+};
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi.c b/tools/testing/selftests/bpf/progs/kprobe_multi.c
index 93510f4f0f3a..08f95a8155d1 100644
--- a/tools/testing/selftests/bpf/progs/kprobe_multi.c
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi.c
@@ -54,21 +54,21 @@ static void kprobe_multi_check(void *ctx, bool is_return)
if (is_return) {
SET(kretprobe_test1_result, &bpf_fentry_test1, 8);
- SET(kretprobe_test2_result, &bpf_fentry_test2, 7);
- SET(kretprobe_test3_result, &bpf_fentry_test3, 6);
- SET(kretprobe_test4_result, &bpf_fentry_test4, 5);
- SET(kretprobe_test5_result, &bpf_fentry_test5, 4);
- SET(kretprobe_test6_result, &bpf_fentry_test6, 3);
- SET(kretprobe_test7_result, &bpf_fentry_test7, 2);
+ SET(kretprobe_test2_result, &bpf_fentry_test2, 2);
+ SET(kretprobe_test3_result, &bpf_fentry_test3, 7);
+ SET(kretprobe_test4_result, &bpf_fentry_test4, 6);
+ SET(kretprobe_test5_result, &bpf_fentry_test5, 5);
+ SET(kretprobe_test6_result, &bpf_fentry_test6, 4);
+ SET(kretprobe_test7_result, &bpf_fentry_test7, 3);
SET(kretprobe_test8_result, &bpf_fentry_test8, 1);
} else {
SET(kprobe_test1_result, &bpf_fentry_test1, 1);
- SET(kprobe_test2_result, &bpf_fentry_test2, 2);
- SET(kprobe_test3_result, &bpf_fentry_test3, 3);
- SET(kprobe_test4_result, &bpf_fentry_test4, 4);
- SET(kprobe_test5_result, &bpf_fentry_test5, 5);
- SET(kprobe_test6_result, &bpf_fentry_test6, 6);
- SET(kprobe_test7_result, &bpf_fentry_test7, 7);
+ SET(kprobe_test2_result, &bpf_fentry_test2, 7);
+ SET(kprobe_test3_result, &bpf_fentry_test3, 2);
+ SET(kprobe_test4_result, &bpf_fentry_test4, 3);
+ SET(kprobe_test5_result, &bpf_fentry_test5, 4);
+ SET(kprobe_test6_result, &bpf_fentry_test6, 5);
+ SET(kprobe_test7_result, &bpf_fentry_test7, 6);
SET(kprobe_test8_result, &bpf_fentry_test8, 8);
}
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c
new file mode 100644
index 000000000000..41ce83da78e8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define __unused __attribute__((unused))
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+int done = 0;
+
+SEC("tc")
+int classifier_0(struct __sk_buff *skb __unused)
+{
+ done = 1;
+ return 0;
+}
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+ /* Don't propagate the constant to the caller */
+ volatile int ret = 1;
+
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ return ret;
+}
+
+SEC("tc")
+int entry(struct __sk_buff *skb)
+{
+ /* Have data on stack which size is not a multiple of 8 */
+ volatile char arr[1] = {};
+
+ return subprog_tail(skb);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c
index ce9acf4db8d2..f1c88ad368ef 100644
--- a/tools/testing/selftests/bpf/progs/test_attach_probe.c
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c
@@ -5,6 +5,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
#include "bpf_misc.h"
int kprobe_res = 0;
@@ -17,6 +18,11 @@ int uprobe_byname_res = 0;
int uretprobe_byname_res = 0;
int uprobe_byname2_res = 0;
int uretprobe_byname2_res = 0;
+int uprobe_byname3_sleepable_res = 0;
+int uprobe_byname3_res = 0;
+int uretprobe_byname3_sleepable_res = 0;
+int uretprobe_byname3_res = 0;
+void *user_ptr = 0;
SEC("kprobe")
int handle_kprobe(struct pt_regs *ctx)
@@ -32,6 +38,17 @@ int BPF_KPROBE(handle_kprobe_auto)
return 0;
}
+/**
+ * This program will be manually made sleepable on the userspace side
+ * and should thus be unattachable.
+ */
+SEC("kprobe/" SYS_PREFIX "sys_nanosleep")
+int handle_kprobe_sleepable(struct pt_regs *ctx)
+{
+ kprobe_res = 2;
+ return 0;
+}
+
SEC("kretprobe")
int handle_kretprobe(struct pt_regs *ctx)
{
@@ -93,4 +110,47 @@ int handle_uretprobe_byname2(struct pt_regs *ctx)
return 0;
}
+static __always_inline bool verify_sleepable_user_copy(void)
+{
+ char data[9];
+
+ bpf_copy_from_user(data, sizeof(data), user_ptr);
+ return bpf_strncmp(data, sizeof(data), "test_data") == 0;
+}
+
+SEC("uprobe.s//proc/self/exe:trigger_func3")
+int handle_uprobe_byname3_sleepable(struct pt_regs *ctx)
+{
+ if (verify_sleepable_user_copy())
+ uprobe_byname3_sleepable_res = 9;
+ return 0;
+}
+
+/**
+ * same target as the uprobe.s above to force sleepable and non-sleepable
+ * programs in the same bpf_prog_array
+ */
+SEC("uprobe//proc/self/exe:trigger_func3")
+int handle_uprobe_byname3(struct pt_regs *ctx)
+{
+ uprobe_byname3_res = 10;
+ return 0;
+}
+
+SEC("uretprobe.s//proc/self/exe:trigger_func3")
+int handle_uretprobe_byname3_sleepable(struct pt_regs *ctx)
+{
+ if (verify_sleepable_user_copy())
+ uretprobe_byname3_sleepable_res = 11;
+ return 0;
+}
+
+SEC("uretprobe//proc/self/exe:trigger_func3")
+int handle_uretprobe_byname3(struct pt_regs *ctx)
+{
+ uretprobe_byname3_res = 12;
+ return 0;
+}
+
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c b/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c
new file mode 100644
index 000000000000..63147fbfae6e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+ bool skip;
+} data = {};
+
+enum named_unsigned_enum64 {
+ UNSIGNED_ENUM64_VAL1 = 0x1ffffffffULL,
+ UNSIGNED_ENUM64_VAL2 = 0x2ffffffffULL,
+ UNSIGNED_ENUM64_VAL3 = 0x3ffffffffULL,
+};
+
+enum named_signed_enum64 {
+ SIGNED_ENUM64_VAL1 = 0x1ffffffffLL,
+ SIGNED_ENUM64_VAL2 = -2,
+ SIGNED_ENUM64_VAL3 = 0x3ffffffffLL,
+};
+
+struct core_reloc_enum64val_output {
+ bool unsigned_val1_exists;
+ bool unsigned_val2_exists;
+ bool unsigned_val3_exists;
+ bool signed_val1_exists;
+ bool signed_val2_exists;
+ bool signed_val3_exists;
+
+ long unsigned_val1;
+ long unsigned_val2;
+ long signed_val1;
+ long signed_val2;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_enum64val(void *ctx)
+{
+#if __clang_major__ >= 15
+ struct core_reloc_enum64val_output *out = (void *)&data.out;
+ enum named_unsigned_enum64 named_unsigned = 0;
+ enum named_signed_enum64 named_signed = 0;
+
+ out->unsigned_val1_exists = bpf_core_enum_value_exists(named_unsigned, UNSIGNED_ENUM64_VAL1);
+ out->unsigned_val2_exists = bpf_core_enum_value_exists(enum named_unsigned_enum64, UNSIGNED_ENUM64_VAL2);
+ out->unsigned_val3_exists = bpf_core_enum_value_exists(enum named_unsigned_enum64, UNSIGNED_ENUM64_VAL3);
+ out->signed_val1_exists = bpf_core_enum_value_exists(named_signed, SIGNED_ENUM64_VAL1);
+ out->signed_val2_exists = bpf_core_enum_value_exists(enum named_signed_enum64, SIGNED_ENUM64_VAL2);
+ out->signed_val3_exists = bpf_core_enum_value_exists(enum named_signed_enum64, SIGNED_ENUM64_VAL3);
+
+ out->unsigned_val1 = bpf_core_enum_value(named_unsigned, UNSIGNED_ENUM64_VAL1);
+ out->unsigned_val2 = bpf_core_enum_value(named_unsigned, UNSIGNED_ENUM64_VAL2);
+ out->signed_val1 = bpf_core_enum_value(named_signed, SIGNED_ENUM64_VAL1);
+ out->signed_val2 = bpf_core_enum_value(named_signed, SIGNED_ENUM64_VAL2);
+ /* NAMED_ENUM64_VAL3 value is optional */
+
+#else
+ data.skip = true;
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_dtime.c b/tools/testing/selftests/bpf/progs/test_tc_dtime.c
index 06f300d06dbd..b596479a9ebe 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_dtime.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_dtime.c
@@ -11,6 +11,8 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include <sys/socket.h>
@@ -115,6 +117,19 @@ static bool bpf_fwd(void)
return test < TCP_IP4_RT_FWD;
}
+static __u8 get_proto(void)
+{
+ switch (test) {
+ case UDP_IP4:
+ case UDP_IP6:
+ case UDP_IP4_RT_FWD:
+ case UDP_IP6_RT_FWD:
+ return IPPROTO_UDP;
+ default:
+ return IPPROTO_TCP;
+ }
+}
+
/* -1: parse error: TC_ACT_SHOT
* 0: not testing traffic: TC_ACT_OK
* >0: first byte is the inet_proto, second byte has the netns
@@ -122,11 +137,16 @@ static bool bpf_fwd(void)
*/
static int skb_get_type(struct __sk_buff *skb)
{
+ __u16 dst_ns_port = __bpf_htons(50000 + test);
void *data_end = ctx_ptr(skb->data_end);
void *data = ctx_ptr(skb->data);
__u8 inet_proto = 0, ns = 0;
struct ipv6hdr *ip6h;
+ __u16 sport, dport;
struct iphdr *iph;
+ struct tcphdr *th;
+ struct udphdr *uh;
+ void *trans;
switch (skb->protocol) {
case __bpf_htons(ETH_P_IP):
@@ -138,6 +158,7 @@ static int skb_get_type(struct __sk_buff *skb)
else if (iph->saddr == ip4_dst)
ns = DST_NS;
inet_proto = iph->protocol;
+ trans = iph + 1;
break;
case __bpf_htons(ETH_P_IPV6):
ip6h = data + sizeof(struct ethhdr);
@@ -148,15 +169,43 @@ static int skb_get_type(struct __sk_buff *skb)
else if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_dst))
ns = DST_NS;
inet_proto = ip6h->nexthdr;
+ trans = ip6h + 1;
break;
default:
return 0;
}
- if ((inet_proto != IPPROTO_TCP && inet_proto != IPPROTO_UDP) || !ns)
+ /* skb is not from src_ns or dst_ns.
+ * skb is not the testing IPPROTO.
+ */
+ if (!ns || inet_proto != get_proto())
return 0;
- return (ns << 8 | inet_proto);
+ switch (inet_proto) {
+ case IPPROTO_TCP:
+ th = trans;
+ if (th + 1 > data_end)
+ return -1;
+ sport = th->source;
+ dport = th->dest;
+ break;
+ case IPPROTO_UDP:
+ uh = trans;
+ if (uh + 1 > data_end)
+ return -1;
+ sport = uh->source;
+ dport = uh->dest;
+ break;
+ default:
+ return 0;
+ }
+
+ /* The skb is the testing traffic */
+ if ((ns == SRC_NS && dport == dst_ns_port) ||
+ (ns == DST_NS && sport == dst_ns_port))
+ return (ns << 8 | inet_proto);
+
+ return 0;
}
/* format: direction@iface@netns
diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c
index 913acdffd90f..3987ff174f1f 100644
--- a/tools/testing/selftests/bpf/progs/test_varlen.c
+++ b/tools/testing/selftests/bpf/progs/test_varlen.c
@@ -41,20 +41,20 @@ int handler64_unsigned(void *regs)
{
int pid = bpf_get_current_pid_tgid() >> 32;
void *payload = payload1;
- u64 len;
+ long len;
/* ignore irrelevant invocations */
if (test_pid != pid || !capture)
return 0;
len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
- if (len <= MAX_LEN) {
+ if (len >= 0) {
payload += len;
payload1_len1 = len;
}
len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
- if (len <= MAX_LEN) {
+ if (len >= 0) {
payload += len;
payload1_len2 = len;
}
@@ -123,7 +123,7 @@ int handler32_signed(void *regs)
{
int pid = bpf_get_current_pid_tgid() >> 32;
void *payload = payload4;
- int len;
+ long len;
/* ignore irrelevant invocations */
if (test_pid != pid || !capture)
diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
new file mode 100644
index 000000000000..9fd62e94b5e6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
@@ -0,0 +1,833 @@
+// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <asm/errno.h>
+
+#define TC_ACT_OK 0
+#define TC_ACT_SHOT 2
+
+#define NSEC_PER_SEC 1000000000L
+
+#define ETH_ALEN 6
+#define ETH_P_IP 0x0800
+#define ETH_P_IPV6 0x86DD
+
+#define tcp_flag_word(tp) (((union tcp_word_hdr *)(tp))->words[3])
+
+#define IP_DF 0x4000
+#define IP_MF 0x2000
+#define IP_OFFSET 0x1fff
+
+#define NEXTHDR_TCP 6
+
+#define TCPOPT_NOP 1
+#define TCPOPT_EOL 0
+#define TCPOPT_MSS 2
+#define TCPOPT_WINDOW 3
+#define TCPOPT_SACK_PERM 4
+#define TCPOPT_TIMESTAMP 8
+
+#define TCPOLEN_MSS 4
+#define TCPOLEN_WINDOW 3
+#define TCPOLEN_SACK_PERM 2
+#define TCPOLEN_TIMESTAMP 10
+
+#define TCP_TS_HZ 1000
+#define TS_OPT_WSCALE_MASK 0xf
+#define TS_OPT_SACK (1 << 4)
+#define TS_OPT_ECN (1 << 5)
+#define TSBITS 6
+#define TSMASK (((__u32)1 << TSBITS) - 1)
+#define TCP_MAX_WSCALE 14U
+
+#define IPV4_MAXLEN 60
+#define TCP_MAXLEN 60
+
+#define DEFAULT_MSS4 1460
+#define DEFAULT_MSS6 1440
+#define DEFAULT_WSCALE 7
+#define DEFAULT_TTL 64
+#define MAX_ALLOWED_PORTS 8
+
+#define swap(a, b) \
+ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+
+#define __get_unaligned_t(type, ptr) ({ \
+ const struct { type x; } __attribute__((__packed__)) *__pptr = (typeof(__pptr))(ptr); \
+ __pptr->x; \
+})
+
+#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr))
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u64);
+ __uint(max_entries, 2);
+} values SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u16);
+ __uint(max_entries, MAX_ALLOWED_PORTS);
+} allowed_ports SEC(".maps");
+
+extern struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx,
+ struct bpf_sock_tuple *bpf_tuple,
+ __u32 len_tuple,
+ struct bpf_ct_opts *opts,
+ __u32 len_opts) __ksym;
+
+extern struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *skb_ctx,
+ struct bpf_sock_tuple *bpf_tuple,
+ u32 len_tuple,
+ struct bpf_ct_opts *opts,
+ u32 len_opts) __ksym;
+
+extern void bpf_ct_release(struct nf_conn *ct) __ksym;
+
+static __always_inline void swap_eth_addr(__u8 *a, __u8 *b)
+{
+ __u8 tmp[ETH_ALEN];
+
+ __builtin_memcpy(tmp, a, ETH_ALEN);
+ __builtin_memcpy(a, b, ETH_ALEN);
+ __builtin_memcpy(b, tmp, ETH_ALEN);
+}
+
+static __always_inline __u16 csum_fold(__u32 csum)
+{
+ csum = (csum & 0xffff) + (csum >> 16);
+ csum = (csum & 0xffff) + (csum >> 16);
+ return (__u16)~csum;
+}
+
+static __always_inline __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+ __u32 len, __u8 proto,
+ __u32 csum)
+{
+ __u64 s = csum;
+
+ s += (__u32)saddr;
+ s += (__u32)daddr;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ s += proto + len;
+#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ s += (proto + len) << 8;
+#else
+#error Unknown endian
+#endif
+ s = (s & 0xffffffff) + (s >> 32);
+ s = (s & 0xffffffff) + (s >> 32);
+
+ return csum_fold((__u32)s);
+}
+
+static __always_inline __u16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __u32 csum)
+{
+ __u64 sum = csum;
+ int i;
+
+#pragma unroll
+ for (i = 0; i < 4; i++)
+ sum += (__u32)saddr->in6_u.u6_addr32[i];
+
+#pragma unroll
+ for (i = 0; i < 4; i++)
+ sum += (__u32)daddr->in6_u.u6_addr32[i];
+
+ /* Don't combine additions to avoid 32-bit overflow. */
+ sum += bpf_htonl(len);
+ sum += bpf_htonl(proto);
+
+ sum = (sum & 0xffffffff) + (sum >> 32);
+ sum = (sum & 0xffffffff) + (sum >> 32);
+
+ return csum_fold((__u32)sum);
+}
+
+static __always_inline __u64 tcp_clock_ns(void)
+{
+ return bpf_ktime_get_ns();
+}
+
+static __always_inline __u32 tcp_ns_to_ts(__u64 ns)
+{
+ return ns / (NSEC_PER_SEC / TCP_TS_HZ);
+}
+
+static __always_inline __u32 tcp_time_stamp_raw(void)
+{
+ return tcp_ns_to_ts(tcp_clock_ns());
+}
+
+struct tcpopt_context {
+ __u8 *ptr;
+ __u8 *end;
+ void *data_end;
+ __be32 *tsecr;
+ __u8 wscale;
+ bool option_timestamp;
+ bool option_sack;
+};
+
+static int tscookie_tcpopt_parse(struct tcpopt_context *ctx)
+{
+ __u8 opcode, opsize;
+
+ if (ctx->ptr >= ctx->end)
+ return 1;
+ if (ctx->ptr >= ctx->data_end)
+ return 1;
+
+ opcode = ctx->ptr[0];
+
+ if (opcode == TCPOPT_EOL)
+ return 1;
+ if (opcode == TCPOPT_NOP) {
+ ++ctx->ptr;
+ return 0;
+ }
+
+ if (ctx->ptr + 1 >= ctx->end)
+ return 1;
+ if (ctx->ptr + 1 >= ctx->data_end)
+ return 1;
+ opsize = ctx->ptr[1];
+ if (opsize < 2)
+ return 1;
+
+ if (ctx->ptr + opsize > ctx->end)
+ return 1;
+
+ switch (opcode) {
+ case TCPOPT_WINDOW:
+ if (opsize == TCPOLEN_WINDOW && ctx->ptr + TCPOLEN_WINDOW <= ctx->data_end)
+ ctx->wscale = ctx->ptr[2] < TCP_MAX_WSCALE ? ctx->ptr[2] : TCP_MAX_WSCALE;
+ break;
+ case TCPOPT_TIMESTAMP:
+ if (opsize == TCPOLEN_TIMESTAMP && ctx->ptr + TCPOLEN_TIMESTAMP <= ctx->data_end) {
+ ctx->option_timestamp = true;
+ /* Client's tsval becomes our tsecr. */
+ *ctx->tsecr = get_unaligned((__be32 *)(ctx->ptr + 2));
+ }
+ break;
+ case TCPOPT_SACK_PERM:
+ if (opsize == TCPOLEN_SACK_PERM)
+ ctx->option_sack = true;
+ break;
+ }
+
+ ctx->ptr += opsize;
+
+ return 0;
+}
+
+static int tscookie_tcpopt_parse_batch(__u32 index, void *context)
+{
+ int i;
+
+ for (i = 0; i < 7; i++)
+ if (tscookie_tcpopt_parse(context))
+ return 1;
+ return 0;
+}
+
+static __always_inline bool tscookie_init(struct tcphdr *tcp_header,
+ __u16 tcp_len, __be32 *tsval,
+ __be32 *tsecr, void *data_end)
+{
+ struct tcpopt_context loop_ctx = {
+ .ptr = (__u8 *)(tcp_header + 1),
+ .end = (__u8 *)tcp_header + tcp_len,
+ .data_end = data_end,
+ .tsecr = tsecr,
+ .wscale = TS_OPT_WSCALE_MASK,
+ .option_timestamp = false,
+ .option_sack = false,
+ };
+ u32 cookie;
+
+ bpf_loop(6, tscookie_tcpopt_parse_batch, &loop_ctx, 0);
+
+ if (!loop_ctx.option_timestamp)
+ return false;
+
+ cookie = tcp_time_stamp_raw() & ~TSMASK;
+ cookie |= loop_ctx.wscale & TS_OPT_WSCALE_MASK;
+ if (loop_ctx.option_sack)
+ cookie |= TS_OPT_SACK;
+ if (tcp_header->ece && tcp_header->cwr)
+ cookie |= TS_OPT_ECN;
+ *tsval = bpf_htonl(cookie);
+
+ return true;
+}
+
+static __always_inline void values_get_tcpipopts(__u16 *mss, __u8 *wscale,
+ __u8 *ttl, bool ipv6)
+{
+ __u32 key = 0;
+ __u64 *value;
+
+ value = bpf_map_lookup_elem(&values, &key);
+ if (value && *value != 0) {
+ if (ipv6)
+ *mss = (*value >> 32) & 0xffff;
+ else
+ *mss = *value & 0xffff;
+ *wscale = (*value >> 16) & 0xf;
+ *ttl = (*value >> 24) & 0xff;
+ return;
+ }
+
+ *mss = ipv6 ? DEFAULT_MSS6 : DEFAULT_MSS4;
+ *wscale = DEFAULT_WSCALE;
+ *ttl = DEFAULT_TTL;
+}
+
+static __always_inline void values_inc_synacks(void)
+{
+ __u32 key = 1;
+ __u32 *value;
+
+ value = bpf_map_lookup_elem(&values, &key);
+ if (value)
+ __sync_fetch_and_add(value, 1);
+}
+
+static __always_inline bool check_port_allowed(__u16 port)
+{
+ __u32 i;
+
+ for (i = 0; i < MAX_ALLOWED_PORTS; i++) {
+ __u32 key = i;
+ __u16 *value;
+
+ value = bpf_map_lookup_elem(&allowed_ports, &key);
+
+ if (!value)
+ break;
+ /* 0 is a terminator value. Check it first to avoid matching on
+ * a forbidden port == 0 and returning true.
+ */
+ if (*value == 0)
+ break;
+
+ if (*value == port)
+ return true;
+ }
+
+ return false;
+}
+
+struct header_pointers {
+ struct ethhdr *eth;
+ struct iphdr *ipv4;
+ struct ipv6hdr *ipv6;
+ struct tcphdr *tcp;
+ __u16 tcp_len;
+};
+
+static __always_inline int tcp_dissect(void *data, void *data_end,
+ struct header_pointers *hdr)
+{
+ hdr->eth = data;
+ if (hdr->eth + 1 > data_end)
+ return XDP_DROP;
+
+ switch (bpf_ntohs(hdr->eth->h_proto)) {
+ case ETH_P_IP:
+ hdr->ipv6 = NULL;
+
+ hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth);
+ if (hdr->ipv4 + 1 > data_end)
+ return XDP_DROP;
+ if (hdr->ipv4->ihl * 4 < sizeof(*hdr->ipv4))
+ return XDP_DROP;
+ if (hdr->ipv4->version != 4)
+ return XDP_DROP;
+
+ if (hdr->ipv4->protocol != IPPROTO_TCP)
+ return XDP_PASS;
+
+ hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4;
+ break;
+ case ETH_P_IPV6:
+ hdr->ipv4 = NULL;
+
+ hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth);
+ if (hdr->ipv6 + 1 > data_end)
+ return XDP_DROP;
+ if (hdr->ipv6->version != 6)
+ return XDP_DROP;
+
+ /* XXX: Extension headers are not supported and could circumvent
+ * XDP SYN flood protection.
+ */
+ if (hdr->ipv6->nexthdr != NEXTHDR_TCP)
+ return XDP_PASS;
+
+ hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6);
+ break;
+ default:
+ /* XXX: VLANs will circumvent XDP SYN flood protection. */
+ return XDP_PASS;
+ }
+
+ if (hdr->tcp + 1 > data_end)
+ return XDP_DROP;
+ hdr->tcp_len = hdr->tcp->doff * 4;
+ if (hdr->tcp_len < sizeof(*hdr->tcp))
+ return XDP_DROP;
+
+ return XDP_TX;
+}
+
+static __always_inline int tcp_lookup(void *ctx, struct header_pointers *hdr, bool xdp)
+{
+ struct bpf_ct_opts ct_lookup_opts = {
+ .netns_id = BPF_F_CURRENT_NETNS,
+ .l4proto = IPPROTO_TCP,
+ };
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+ __u32 tup_size;
+
+ if (hdr->ipv4) {
+ /* TCP doesn't normally use fragments, and XDP can't reassemble
+ * them.
+ */
+ if ((hdr->ipv4->frag_off & bpf_htons(IP_DF | IP_MF | IP_OFFSET)) != bpf_htons(IP_DF))
+ return XDP_DROP;
+
+ tup.ipv4.saddr = hdr->ipv4->saddr;
+ tup.ipv4.daddr = hdr->ipv4->daddr;
+ tup.ipv4.sport = hdr->tcp->source;
+ tup.ipv4.dport = hdr->tcp->dest;
+ tup_size = sizeof(tup.ipv4);
+ } else if (hdr->ipv6) {
+ __builtin_memcpy(tup.ipv6.saddr, &hdr->ipv6->saddr, sizeof(tup.ipv6.saddr));
+ __builtin_memcpy(tup.ipv6.daddr, &hdr->ipv6->daddr, sizeof(tup.ipv6.daddr));
+ tup.ipv6.sport = hdr->tcp->source;
+ tup.ipv6.dport = hdr->tcp->dest;
+ tup_size = sizeof(tup.ipv6);
+ } else {
+ /* The verifier can't track that either ipv4 or ipv6 is not
+ * NULL.
+ */
+ return XDP_ABORTED;
+ }
+ if (xdp)
+ ct = bpf_xdp_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts));
+ else
+ ct = bpf_skb_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts));
+ if (ct) {
+ unsigned long status = ct->status;
+
+ bpf_ct_release(ct);
+ if (status & IPS_CONFIRMED_BIT)
+ return XDP_PASS;
+ } else if (ct_lookup_opts.error != -ENOENT) {
+ return XDP_ABORTED;
+ }
+
+ /* error == -ENOENT || !(status & IPS_CONFIRMED_BIT) */
+ return XDP_TX;
+}
+
+static __always_inline __u8 tcp_mkoptions(__be32 *buf, __be32 *tsopt, __u16 mss,
+ __u8 wscale)
+{
+ __be32 *start = buf;
+
+ *buf++ = bpf_htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
+
+ if (!tsopt)
+ return buf - start;
+
+ if (tsopt[0] & bpf_htonl(1 << 4))
+ *buf++ = bpf_htonl((TCPOPT_SACK_PERM << 24) |
+ (TCPOLEN_SACK_PERM << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP);
+ else
+ *buf++ = bpf_htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP);
+ *buf++ = tsopt[0];
+ *buf++ = tsopt[1];
+
+ if ((tsopt[0] & bpf_htonl(0xf)) != bpf_htonl(0xf))
+ *buf++ = bpf_htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_WINDOW << 16) |
+ (TCPOLEN_WINDOW << 8) |
+ wscale);
+
+ return buf - start;
+}
+
+static __always_inline void tcp_gen_synack(struct tcphdr *tcp_header,
+ __u32 cookie, __be32 *tsopt,
+ __u16 mss, __u8 wscale)
+{
+ void *tcp_options;
+
+ tcp_flag_word(tcp_header) = TCP_FLAG_SYN | TCP_FLAG_ACK;
+ if (tsopt && (tsopt[0] & bpf_htonl(1 << 5)))
+ tcp_flag_word(tcp_header) |= TCP_FLAG_ECE;
+ tcp_header->doff = 5; /* doff is part of tcp_flag_word. */
+ swap(tcp_header->source, tcp_header->dest);
+ tcp_header->ack_seq = bpf_htonl(bpf_ntohl(tcp_header->seq) + 1);
+ tcp_header->seq = bpf_htonl(cookie);
+ tcp_header->window = 0;
+ tcp_header->urg_ptr = 0;
+ tcp_header->check = 0; /* Calculate checksum later. */
+
+ tcp_options = (void *)(tcp_header + 1);
+ tcp_header->doff += tcp_mkoptions(tcp_options, tsopt, mss, wscale);
+}
+
+static __always_inline void tcpv4_gen_synack(struct header_pointers *hdr,
+ __u32 cookie, __be32 *tsopt)
+{
+ __u8 wscale;
+ __u16 mss;
+ __u8 ttl;
+
+ values_get_tcpipopts(&mss, &wscale, &ttl, false);
+
+ swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest);
+
+ swap(hdr->ipv4->saddr, hdr->ipv4->daddr);
+ hdr->ipv4->check = 0; /* Calculate checksum later. */
+ hdr->ipv4->tos = 0;
+ hdr->ipv4->id = 0;
+ hdr->ipv4->ttl = ttl;
+
+ tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale);
+
+ hdr->tcp_len = hdr->tcp->doff * 4;
+ hdr->ipv4->tot_len = bpf_htons(sizeof(*hdr->ipv4) + hdr->tcp_len);
+}
+
+static __always_inline void tcpv6_gen_synack(struct header_pointers *hdr,
+ __u32 cookie, __be32 *tsopt)
+{
+ __u8 wscale;
+ __u16 mss;
+ __u8 ttl;
+
+ values_get_tcpipopts(&mss, &wscale, &ttl, true);
+
+ swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest);
+
+ swap(hdr->ipv6->saddr, hdr->ipv6->daddr);
+ *(__be32 *)hdr->ipv6 = bpf_htonl(0x60000000);
+ hdr->ipv6->hop_limit = ttl;
+
+ tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale);
+
+ hdr->tcp_len = hdr->tcp->doff * 4;
+ hdr->ipv6->payload_len = bpf_htons(hdr->tcp_len);
+}
+
+static __always_inline int syncookie_handle_syn(struct header_pointers *hdr,
+ void *ctx,
+ void *data, void *data_end,
+ bool xdp)
+{
+ __u32 old_pkt_size, new_pkt_size;
+ /* Unlike clang 10, clang 11 and 12 generate code that doesn't pass the
+ * BPF verifier if tsopt is not volatile. Volatile forces it to store
+ * the pointer value and use it directly, otherwise tcp_mkoptions is
+ * (mis)compiled like this:
+ * if (!tsopt)
+ * return buf - start;
+ * reg = stored_return_value_of_tscookie_init;
+ * if (reg)
+ * tsopt = tsopt_buf;
+ * else
+ * tsopt = NULL;
+ * ...
+ * *buf++ = tsopt[1];
+ * It creates a dead branch where tsopt is assigned NULL, but the
+ * verifier can't prove it's dead and blocks the program.
+ */
+ __be32 * volatile tsopt = NULL;
+ __be32 tsopt_buf[2] = {};
+ __u16 ip_len;
+ __u32 cookie;
+ __s64 value;
+
+ /* Checksum is not yet verified, but both checksum failure and TCP
+ * header checks return XDP_DROP, so the order doesn't matter.
+ */
+ if (hdr->tcp->fin || hdr->tcp->rst)
+ return XDP_DROP;
+
+ /* Issue SYN cookies on allowed ports, drop SYN packets on blocked
+ * ports.
+ */
+ if (!check_port_allowed(bpf_ntohs(hdr->tcp->dest)))
+ return XDP_DROP;
+
+ if (hdr->ipv4) {
+ /* Check the IPv4 and TCP checksums before creating a SYNACK. */
+ value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, hdr->ipv4->ihl * 4, 0);
+ if (value < 0)
+ return XDP_ABORTED;
+ if (csum_fold(value) != 0)
+ return XDP_DROP; /* Bad IPv4 checksum. */
+
+ value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0);
+ if (value < 0)
+ return XDP_ABORTED;
+ if (csum_tcpudp_magic(hdr->ipv4->saddr, hdr->ipv4->daddr,
+ hdr->tcp_len, IPPROTO_TCP, value) != 0)
+ return XDP_DROP; /* Bad TCP checksum. */
+
+ ip_len = sizeof(*hdr->ipv4);
+
+ value = bpf_tcp_raw_gen_syncookie_ipv4(hdr->ipv4, hdr->tcp,
+ hdr->tcp_len);
+ } else if (hdr->ipv6) {
+ /* Check the TCP checksum before creating a SYNACK. */
+ value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0);
+ if (value < 0)
+ return XDP_ABORTED;
+ if (csum_ipv6_magic(&hdr->ipv6->saddr, &hdr->ipv6->daddr,
+ hdr->tcp_len, IPPROTO_TCP, value) != 0)
+ return XDP_DROP; /* Bad TCP checksum. */
+
+ ip_len = sizeof(*hdr->ipv6);
+
+ value = bpf_tcp_raw_gen_syncookie_ipv6(hdr->ipv6, hdr->tcp,
+ hdr->tcp_len);
+ } else {
+ return XDP_ABORTED;
+ }
+
+ if (value < 0)
+ return XDP_ABORTED;
+ cookie = (__u32)value;
+
+ if (tscookie_init((void *)hdr->tcp, hdr->tcp_len,
+ &tsopt_buf[0], &tsopt_buf[1], data_end))
+ tsopt = tsopt_buf;
+
+ /* Check that there is enough space for a SYNACK. It also covers
+ * the check that the destination of the __builtin_memmove below
+ * doesn't overflow.
+ */
+ if (data + sizeof(*hdr->eth) + ip_len + TCP_MAXLEN > data_end)
+ return XDP_ABORTED;
+
+ if (hdr->ipv4) {
+ if (hdr->ipv4->ihl * 4 > sizeof(*hdr->ipv4)) {
+ struct tcphdr *new_tcp_header;
+
+ new_tcp_header = data + sizeof(*hdr->eth) + sizeof(*hdr->ipv4);
+ __builtin_memmove(new_tcp_header, hdr->tcp, sizeof(*hdr->tcp));
+ hdr->tcp = new_tcp_header;
+
+ hdr->ipv4->ihl = sizeof(*hdr->ipv4) / 4;
+ }
+
+ tcpv4_gen_synack(hdr, cookie, tsopt);
+ } else if (hdr->ipv6) {
+ tcpv6_gen_synack(hdr, cookie, tsopt);
+ } else {
+ return XDP_ABORTED;
+ }
+
+ /* Recalculate checksums. */
+ hdr->tcp->check = 0;
+ value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0);
+ if (value < 0)
+ return XDP_ABORTED;
+ if (hdr->ipv4) {
+ hdr->tcp->check = csum_tcpudp_magic(hdr->ipv4->saddr,
+ hdr->ipv4->daddr,
+ hdr->tcp_len,
+ IPPROTO_TCP,
+ value);
+
+ hdr->ipv4->check = 0;
+ value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, sizeof(*hdr->ipv4), 0);
+ if (value < 0)
+ return XDP_ABORTED;
+ hdr->ipv4->check = csum_fold(value);
+ } else if (hdr->ipv6) {
+ hdr->tcp->check = csum_ipv6_magic(&hdr->ipv6->saddr,
+ &hdr->ipv6->daddr,
+ hdr->tcp_len,
+ IPPROTO_TCP,
+ value);
+ } else {
+ return XDP_ABORTED;
+ }
+
+ /* Set the new packet size. */
+ old_pkt_size = data_end - data;
+ new_pkt_size = sizeof(*hdr->eth) + ip_len + hdr->tcp->doff * 4;
+ if (xdp) {
+ if (bpf_xdp_adjust_tail(ctx, new_pkt_size - old_pkt_size))
+ return XDP_ABORTED;
+ } else {
+ if (bpf_skb_change_tail(ctx, new_pkt_size, 0))
+ return XDP_ABORTED;
+ }
+
+ values_inc_synacks();
+
+ return XDP_TX;
+}
+
+static __always_inline int syncookie_handle_ack(struct header_pointers *hdr)
+{
+ int err;
+
+ if (hdr->tcp->rst)
+ return XDP_DROP;
+
+ if (hdr->ipv4)
+ err = bpf_tcp_raw_check_syncookie_ipv4(hdr->ipv4, hdr->tcp);
+ else if (hdr->ipv6)
+ err = bpf_tcp_raw_check_syncookie_ipv6(hdr->ipv6, hdr->tcp);
+ else
+ return XDP_ABORTED;
+ if (err)
+ return XDP_DROP;
+
+ return XDP_PASS;
+}
+
+static __always_inline int syncookie_part1(void *ctx, void *data, void *data_end,
+ struct header_pointers *hdr, bool xdp)
+{
+ struct bpf_ct_opts ct_lookup_opts = {
+ .netns_id = BPF_F_CURRENT_NETNS,
+ .l4proto = IPPROTO_TCP,
+ };
+ int ret;
+
+ ret = tcp_dissect(data, data_end, hdr);
+ if (ret != XDP_TX)
+ return ret;
+
+ ret = tcp_lookup(ctx, hdr, xdp);
+ if (ret != XDP_TX)
+ return ret;
+
+ /* Packet is TCP and doesn't belong to an established connection. */
+
+ if ((hdr->tcp->syn ^ hdr->tcp->ack) != 1)
+ return XDP_DROP;
+
+ /* Grow the TCP header to TCP_MAXLEN to be able to pass any hdr->tcp_len
+ * to bpf_tcp_raw_gen_syncookie_ipv{4,6} and pass the verifier.
+ */
+ if (xdp) {
+ if (bpf_xdp_adjust_tail(ctx, TCP_MAXLEN - hdr->tcp_len))
+ return XDP_ABORTED;
+ } else {
+ /* Without volatile the verifier throws this error:
+ * R9 32-bit pointer arithmetic prohibited
+ */
+ volatile u64 old_len = data_end - data;
+
+ if (bpf_skb_change_tail(ctx, old_len + TCP_MAXLEN - hdr->tcp_len, 0))
+ return XDP_ABORTED;
+ }
+
+ return XDP_TX;
+}
+
+static __always_inline int syncookie_part2(void *ctx, void *data, void *data_end,
+ struct header_pointers *hdr, bool xdp)
+{
+ if (hdr->ipv4) {
+ hdr->eth = data;
+ hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth);
+ /* IPV4_MAXLEN is needed when calculating checksum.
+ * At least sizeof(struct iphdr) is needed here to access ihl.
+ */
+ if ((void *)hdr->ipv4 + IPV4_MAXLEN > data_end)
+ return XDP_ABORTED;
+ hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4;
+ } else if (hdr->ipv6) {
+ hdr->eth = data;
+ hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth);
+ hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6);
+ } else {
+ return XDP_ABORTED;
+ }
+
+ if ((void *)hdr->tcp + TCP_MAXLEN > data_end)
+ return XDP_ABORTED;
+
+ /* We run out of registers, tcp_len gets spilled to the stack, and the
+ * verifier forgets its min and max values checked above in tcp_dissect.
+ */
+ hdr->tcp_len = hdr->tcp->doff * 4;
+ if (hdr->tcp_len < sizeof(*hdr->tcp))
+ return XDP_ABORTED;
+
+ return hdr->tcp->syn ? syncookie_handle_syn(hdr, ctx, data, data_end, xdp) :
+ syncookie_handle_ack(hdr);
+}
+
+SEC("xdp")
+int syncookie_xdp(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct header_pointers hdr;
+ int ret;
+
+ ret = syncookie_part1(ctx, data, data_end, &hdr, true);
+ if (ret != XDP_TX)
+ return ret;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = (void *)(long)ctx->data;
+
+ return syncookie_part2(ctx, data, data_end, &hdr, true);
+}
+
+SEC("tc")
+int syncookie_tc(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+ struct header_pointers hdr;
+ int ret;
+
+ ret = syncookie_part1(skb, data, data_end, &hdr, false);
+ if (ret != XDP_TX)
+ return ret == XDP_PASS ? TC_ACT_OK : TC_ACT_SHOT;
+
+ data_end = (void *)(long)skb->data_end;
+ data = (void *)(long)skb->data;
+
+ ret = syncookie_part2(skb, data, data_end, &hdr, false);
+ switch (ret) {
+ case XDP_PASS:
+ return TC_ACT_OK;
+ case XDP_TX:
+ return bpf_redirect(skb->ifindex, 0);
+ default:
+ return TC_ACT_SHOT;
+ }
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
index c0e7acd698ed..e443e6542cb9 100755
--- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py
+++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
@@ -58,7 +58,7 @@ class BlockParser(object):
class ArrayParser(BlockParser):
"""
- A parser for extracting dicionaries of values from some BPF-related arrays.
+ A parser for extracting a set of values from some BPF-related arrays.
@reader: a pointer to the open file to parse
@array_name: name of the array to parse
"""
@@ -66,7 +66,7 @@ class ArrayParser(BlockParser):
def __init__(self, reader, array_name):
self.array_name = array_name
- self.start_marker = re.compile(f'(static )?const char \* const {self.array_name}\[.*\] = {{\n')
+ self.start_marker = re.compile(f'(static )?const bool {self.array_name}\[.*\] = {{\n')
super().__init__(reader)
def search_block(self):
@@ -80,15 +80,15 @@ class ArrayParser(BlockParser):
Parse a block and return data as a dictionary. Items to extract must be
on separate lines in the file.
"""
- pattern = re.compile('\[(BPF_\w*)\]\s*= "(.*)",?$')
- entries = {}
+ pattern = re.compile('\[(BPF_\w*)\]\s*= (true|false),?$')
+ entries = set()
while True:
line = self.reader.readline()
if line == '' or re.match(self.end_marker, line):
break
capture = pattern.search(line)
if capture:
- entries[capture.group(1)] = capture.group(2)
+ entries |= {capture.group(1)}
return entries
class InlineListParser(BlockParser):
@@ -115,7 +115,7 @@ class InlineListParser(BlockParser):
class FileExtractor(object):
"""
A generic reader for extracting data from a given file. This class contains
- several helper methods that wrap arround parser objects to extract values
+ several helper methods that wrap around parser objects to extract values
from different structures.
This class does not offer a way to set a filename, which is expected to be
defined in children classes.
@@ -139,21 +139,19 @@ class FileExtractor(object):
def get_types_from_array(self, array_name):
"""
- Search for and parse an array associating names to BPF_* enum members,
- for example:
+ Search for and parse a list of allowed BPF_* enum members, for example:
- const char * const prog_type_name[] = {
- [BPF_PROG_TYPE_UNSPEC] = "unspec",
- [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
- [BPF_PROG_TYPE_KPROBE] = "kprobe",
+ const bool prog_type_name[] = {
+ [BPF_PROG_TYPE_UNSPEC] = true,
+ [BPF_PROG_TYPE_SOCKET_FILTER] = true,
+ [BPF_PROG_TYPE_KPROBE] = true,
};
- Return a dictionary with the enum member names as keys and the
- associated names as values, for example:
+ Return a set of the enum members, for example:
- {'BPF_PROG_TYPE_UNSPEC': 'unspec',
- 'BPF_PROG_TYPE_SOCKET_FILTER': 'socket_filter',
- 'BPF_PROG_TYPE_KPROBE': 'kprobe'}
+ {'BPF_PROG_TYPE_UNSPEC',
+ 'BPF_PROG_TYPE_SOCKET_FILTER',
+ 'BPF_PROG_TYPE_KPROBE'}
@array_name: name of the array to parse
"""
@@ -186,6 +184,27 @@ class FileExtractor(object):
parser.search_block(start_marker)
return parser.parse(pattern, end_marker)
+ def make_enum_map(self, names, enum_prefix):
+ """
+ Search for and parse an enum containing BPF_* members, just as get_enum
+ does. However, instead of just returning a set of the variant names,
+ also generate a textual representation from them by (assuming and)
+ removing a provided prefix and lowercasing the remainder. Then return a
+ dict mapping from name to textual representation.
+
+ @enum_values: a set of enum values; e.g., as retrieved by get_enum
+ @enum_prefix: the prefix to remove from each of the variants to infer
+ textual representation
+ """
+ mapping = {}
+ for name in names:
+ if not name.startswith(enum_prefix):
+ raise Exception(f"enum variant {name} does not start with {enum_prefix}")
+ text = name[len(enum_prefix):].lower()
+ mapping[name] = text
+
+ return mapping
+
def __get_description_list(self, start_marker, pattern, end_marker):
parser = InlineListParser(self.reader)
parser.search_block(start_marker)
@@ -333,11 +352,9 @@ class ProgFileExtractor(SourceFileExtractor):
"""
filename = os.path.join(BPFTOOL_DIR, 'prog.c')
- def get_prog_types(self):
- return self.get_types_from_array('prog_type_name')
-
def get_attach_types(self):
- return self.get_types_from_array('attach_type_strings')
+ types = self.get_types_from_array('attach_types')
+ return self.make_enum_map(types, 'BPF_')
def get_prog_attach_help(self):
return self.get_help_list('ATTACH_TYPE')
@@ -348,9 +365,6 @@ class MapFileExtractor(SourceFileExtractor):
"""
filename = os.path.join(BPFTOOL_DIR, 'map.c')
- def get_map_types(self):
- return self.get_types_from_array('map_type_name')
-
def get_map_help(self):
return self.get_help_list('TYPE')
@@ -363,30 +377,6 @@ class CgroupFileExtractor(SourceFileExtractor):
def get_prog_attach_help(self):
return self.get_help_list('ATTACH_TYPE')
-class CommonFileExtractor(SourceFileExtractor):
- """
- An extractor for bpftool's common.c.
- """
- filename = os.path.join(BPFTOOL_DIR, 'common.c')
-
- def __init__(self):
- super().__init__()
- self.attach_types = {}
-
- def get_attach_types(self):
- if not self.attach_types:
- self.attach_types = self.get_types_from_array('attach_type_name')
- return self.attach_types
-
- def get_cgroup_attach_types(self):
- if not self.attach_types:
- self.get_attach_types()
- cgroup_types = {}
- for (key, value) in self.attach_types.items():
- if key.find('BPF_CGROUP') != -1:
- cgroup_types[key] = value
- return cgroup_types
-
class GenericSourceExtractor(SourceFileExtractor):
"""
An extractor for generic source code files.
@@ -403,14 +393,28 @@ class BpfHeaderExtractor(FileExtractor):
"""
filename = os.path.join(INCLUDE_DIR, 'uapi/linux/bpf.h')
+ def __init__(self):
+ super().__init__()
+ self.attach_types = {}
+
def get_prog_types(self):
return self.get_enum('bpf_prog_type')
- def get_map_types(self):
- return self.get_enum('bpf_map_type')
+ def get_map_type_map(self):
+ names = self.get_enum('bpf_map_type')
+ return self.make_enum_map(names, 'BPF_MAP_TYPE_')
- def get_attach_types(self):
- return self.get_enum('bpf_attach_type')
+ def get_attach_type_map(self):
+ if not self.attach_types:
+ names = self.get_enum('bpf_attach_type')
+ self.attach_types = self.make_enum_map(names, 'BPF_')
+ return self.attach_types
+
+ def get_cgroup_attach_type_map(self):
+ if not self.attach_types:
+ self.get_attach_type_map()
+ return {name: text for name, text in self.attach_types.items()
+ if name.startswith('BPF_CGROUP')}
class ManPageExtractor(FileExtractor):
"""
@@ -495,21 +499,12 @@ def main():
""")
args = argParser.parse_args()
- # Map types (enum)
-
bpf_info = BpfHeaderExtractor()
- ref = bpf_info.get_map_types()
-
- map_info = MapFileExtractor()
- source_map_items = map_info.get_map_types()
- map_types_enum = set(source_map_items.keys())
-
- verify(ref, map_types_enum,
- f'Comparing BPF header (enum bpf_map_type) and {MapFileExtractor.filename} (map_type_name):')
# Map types (names)
- source_map_types = set(source_map_items.values())
+ map_info = MapFileExtractor()
+ source_map_types = set(bpf_info.get_map_type_map().values())
source_map_types.discard('unspec')
help_map_types = map_info.get_map_help()
@@ -525,37 +520,17 @@ def main():
bashcomp_map_types = bashcomp_info.get_map_types()
verify(source_map_types, help_map_types,
- f'Comparing {MapFileExtractor.filename} (map_type_name) and {MapFileExtractor.filename} (do_help() TYPE):')
+ f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {MapFileExtractor.filename} (do_help() TYPE):')
verify(source_map_types, man_map_types,
- f'Comparing {MapFileExtractor.filename} (map_type_name) and {ManMapExtractor.filename} (TYPE):')
+ f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {ManMapExtractor.filename} (TYPE):')
verify(help_map_options, man_map_options,
f'Comparing {MapFileExtractor.filename} (do_help() OPTIONS) and {ManMapExtractor.filename} (OPTIONS):')
verify(source_map_types, bashcomp_map_types,
- f'Comparing {MapFileExtractor.filename} (map_type_name) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):')
-
- # Program types (enum)
-
- ref = bpf_info.get_prog_types()
-
- prog_info = ProgFileExtractor()
- prog_types = set(prog_info.get_prog_types().keys())
-
- verify(ref, prog_types,
- f'Comparing BPF header (enum bpf_prog_type) and {ProgFileExtractor.filename} (prog_type_name):')
-
- # Attach types (enum)
-
- ref = bpf_info.get_attach_types()
- bpf_info.close()
-
- common_info = CommonFileExtractor()
- attach_types = common_info.get_attach_types()
-
- verify(ref, attach_types,
- f'Comparing BPF header (enum bpf_attach_type) and {CommonFileExtractor.filename} (attach_type_name):')
+ f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):')
# Attach types (names)
+ prog_info = ProgFileExtractor()
source_prog_attach_types = set(prog_info.get_attach_types().values())
help_prog_attach_types = prog_info.get_prog_attach_help()
@@ -571,18 +546,17 @@ def main():
bashcomp_prog_attach_types = bashcomp_info.get_prog_attach_types()
verify(source_prog_attach_types, help_prog_attach_types,
- f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):')
+ f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):')
verify(source_prog_attach_types, man_prog_attach_types,
- f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ManProgExtractor.filename} (ATTACH_TYPE):')
+ f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {ManProgExtractor.filename} (ATTACH_TYPE):')
verify(help_prog_options, man_prog_options,
f'Comparing {ProgFileExtractor.filename} (do_help() OPTIONS) and {ManProgExtractor.filename} (OPTIONS):')
verify(source_prog_attach_types, bashcomp_prog_attach_types,
- f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):')
+ f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):')
# Cgroup attach types
-
- source_cgroup_attach_types = set(common_info.get_cgroup_attach_types().values())
- common_info.close()
+ source_cgroup_attach_types = set(bpf_info.get_cgroup_attach_type_map().values())
+ bpf_info.close()
cgroup_info = CgroupFileExtractor()
help_cgroup_attach_types = cgroup_info.get_prog_attach_help()
@@ -598,13 +572,13 @@ def main():
bashcomp_info.close()
verify(source_cgroup_attach_types, help_cgroup_attach_types,
- f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):')
+ f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):')
verify(source_cgroup_attach_types, man_cgroup_attach_types,
- f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {ManCgroupExtractor.filename} (ATTACH_TYPE):')
+ f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {ManCgroupExtractor.filename} (ATTACH_TYPE):')
verify(help_cgroup_options, man_cgroup_options,
f'Comparing {CgroupFileExtractor.filename} (do_help() OPTIONS) and {ManCgroupExtractor.filename} (OPTIONS):')
verify(source_cgroup_attach_types, bashcomp_cgroup_attach_types,
- f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_CGROUP_ATTACH_TYPES):')
+ f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {BashcompExtractor.filename} (BPFTOOL_CGROUP_ATTACH_TYPES):')
# Options for remaining commands
diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h
index 128989bed8b7..38782bd47fdc 100644
--- a/tools/testing/selftests/bpf/test_btf.h
+++ b/tools/testing/selftests/bpf/test_btf.h
@@ -39,6 +39,7 @@
#define BTF_MEMBER_ENC(name, type, bits_offset) \
(name), (type), (bits_offset)
#define BTF_ENUM_ENC(name, val) (name), (val)
+#define BTF_ENUM64_ENC(name, val_lo32, val_hi32) (name), (val_lo32), (val_hi32)
#define BTF_MEMBER_OFFSET(bitfield_size, bits_offset) \
((bitfield_size) << 24 | (bits_offset))
diff --git a/tools/testing/selftests/bpf/test_xdping.sh b/tools/testing/selftests/bpf/test_xdping.sh
index c2f0ddb45531..c3d82e0a7378 100755
--- a/tools/testing/selftests/bpf/test_xdping.sh
+++ b/tools/testing/selftests/bpf/test_xdping.sh
@@ -95,5 +95,9 @@ for server_args in "" "-I veth0 -s -S" ; do
test "$client_args" "$server_args"
done
+# Test drv mode
+test "-I veth1 -N" "-I veth0 -s -N"
+test "-I veth1 -N -c 10" "-I veth0 -s -N"
+
echo "OK. All tests passed"
exit 0
diff --git a/tools/testing/selftests/bpf/xdp_synproxy.c b/tools/testing/selftests/bpf/xdp_synproxy.c
new file mode 100644
index 000000000000..d874ddfb39c4
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdp_synproxy.c
@@ -0,0 +1,466 @@
+// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <stdnoreturn.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <net/if.h>
+#include <linux/if_link.h>
+#include <linux/limits.h>
+
+static unsigned int ifindex;
+static __u32 attached_prog_id;
+static bool attached_tc;
+
+static void noreturn cleanup(int sig)
+{
+ LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
+ int prog_fd;
+ int err;
+
+ if (attached_prog_id == 0)
+ exit(0);
+
+ if (attached_tc) {
+ LIBBPF_OPTS(bpf_tc_hook, hook,
+ .ifindex = ifindex,
+ .attach_point = BPF_TC_INGRESS);
+
+ err = bpf_tc_hook_destroy(&hook);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_tc_hook_destroy: %s\n", strerror(-err));
+ fprintf(stderr, "Failed to destroy the TC hook\n");
+ exit(1);
+ }
+ exit(0);
+ }
+
+ prog_fd = bpf_prog_get_fd_by_id(attached_prog_id);
+ if (prog_fd < 0) {
+ fprintf(stderr, "Error: bpf_prog_get_fd_by_id: %s\n", strerror(-prog_fd));
+ err = bpf_xdp_attach(ifindex, -1, 0, NULL);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_set_link_xdp_fd: %s\n", strerror(-err));
+ fprintf(stderr, "Failed to detach XDP program\n");
+ exit(1);
+ }
+ } else {
+ opts.old_prog_fd = prog_fd;
+ err = bpf_xdp_attach(ifindex, -1, XDP_FLAGS_REPLACE, &opts);
+ close(prog_fd);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_set_link_xdp_fd_opts: %s\n", strerror(-err));
+ /* Not an error if already replaced by someone else. */
+ if (err != -EEXIST) {
+ fprintf(stderr, "Failed to detach XDP program\n");
+ exit(1);
+ }
+ }
+ }
+ exit(0);
+}
+
+static noreturn void usage(const char *progname)
+{
+ fprintf(stderr, "Usage: %s [--iface <iface>|--prog <prog_id>] [--mss4 <mss ipv4> --mss6 <mss ipv6> --wscale <wscale> --ttl <ttl>] [--ports <port1>,<port2>,...] [--single] [--tc]\n",
+ progname);
+ exit(1);
+}
+
+static unsigned long parse_arg_ul(const char *progname, const char *arg, unsigned long limit)
+{
+ unsigned long res;
+ char *endptr;
+
+ errno = 0;
+ res = strtoul(arg, &endptr, 10);
+ if (errno != 0 || *endptr != '\0' || arg[0] == '\0' || res > limit)
+ usage(progname);
+
+ return res;
+}
+
+static void parse_options(int argc, char *argv[], unsigned int *ifindex, __u32 *prog_id,
+ __u64 *tcpipopts, char **ports, bool *single, bool *tc)
+{
+ static struct option long_options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "iface", required_argument, NULL, 'i' },
+ { "prog", required_argument, NULL, 'x' },
+ { "mss4", required_argument, NULL, 4 },
+ { "mss6", required_argument, NULL, 6 },
+ { "wscale", required_argument, NULL, 'w' },
+ { "ttl", required_argument, NULL, 't' },
+ { "ports", required_argument, NULL, 'p' },
+ { "single", no_argument, NULL, 's' },
+ { "tc", no_argument, NULL, 'c' },
+ { NULL, 0, NULL, 0 },
+ };
+ unsigned long mss4, mss6, wscale, ttl;
+ unsigned int tcpipopts_mask = 0;
+
+ if (argc < 2)
+ usage(argv[0]);
+
+ *ifindex = 0;
+ *prog_id = 0;
+ *tcpipopts = 0;
+ *ports = NULL;
+ *single = false;
+
+ while (true) {
+ int opt;
+
+ opt = getopt_long(argc, argv, "", long_options, NULL);
+ if (opt == -1)
+ break;
+
+ switch (opt) {
+ case 'h':
+ usage(argv[0]);
+ break;
+ case 'i':
+ *ifindex = if_nametoindex(optarg);
+ if (*ifindex == 0)
+ usage(argv[0]);
+ break;
+ case 'x':
+ *prog_id = parse_arg_ul(argv[0], optarg, UINT32_MAX);
+ if (*prog_id == 0)
+ usage(argv[0]);
+ break;
+ case 4:
+ mss4 = parse_arg_ul(argv[0], optarg, UINT16_MAX);
+ tcpipopts_mask |= 1 << 0;
+ break;
+ case 6:
+ mss6 = parse_arg_ul(argv[0], optarg, UINT16_MAX);
+ tcpipopts_mask |= 1 << 1;
+ break;
+ case 'w':
+ wscale = parse_arg_ul(argv[0], optarg, 14);
+ tcpipopts_mask |= 1 << 2;
+ break;
+ case 't':
+ ttl = parse_arg_ul(argv[0], optarg, UINT8_MAX);
+ tcpipopts_mask |= 1 << 3;
+ break;
+ case 'p':
+ *ports = optarg;
+ break;
+ case 's':
+ *single = true;
+ break;
+ case 'c':
+ *tc = true;
+ break;
+ default:
+ usage(argv[0]);
+ }
+ }
+ if (optind < argc)
+ usage(argv[0]);
+
+ if (tcpipopts_mask == 0xf) {
+ if (mss4 == 0 || mss6 == 0 || wscale == 0 || ttl == 0)
+ usage(argv[0]);
+ *tcpipopts = (mss6 << 32) | (ttl << 24) | (wscale << 16) | mss4;
+ } else if (tcpipopts_mask != 0) {
+ usage(argv[0]);
+ }
+
+ if (*ifindex != 0 && *prog_id != 0)
+ usage(argv[0]);
+ if (*ifindex == 0 && *prog_id == 0)
+ usage(argv[0]);
+}
+
+static int syncookie_attach(const char *argv0, unsigned int ifindex, bool tc)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ char xdp_filename[PATH_MAX];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int prog_fd;
+ int err;
+
+ snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv0);
+ obj = bpf_object__open_file(xdp_filename, NULL);
+ err = libbpf_get_error(obj);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_object__open_file: %s\n", strerror(-err));
+ return err;
+ }
+
+ err = bpf_object__load(obj);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_object__open_file: %s\n", strerror(-err));
+ return err;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, tc ? "syncookie_tc" : "syncookie_xdp");
+ if (!prog) {
+ fprintf(stderr, "Error: bpf_object__find_program_by_name: program was not found\n");
+ return -ENOENT;
+ }
+
+ prog_fd = bpf_program__fd(prog);
+
+ err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err));
+ goto out;
+ }
+ attached_tc = tc;
+ attached_prog_id = info.id;
+ signal(SIGINT, cleanup);
+ signal(SIGTERM, cleanup);
+ if (tc) {
+ LIBBPF_OPTS(bpf_tc_hook, hook,
+ .ifindex = ifindex,
+ .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, opts,
+ .handle = 1,
+ .priority = 1,
+ .prog_fd = prog_fd);
+
+ err = bpf_tc_hook_create(&hook);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_tc_hook_create: %s\n",
+ strerror(-err));
+ goto fail;
+ }
+ err = bpf_tc_attach(&hook, &opts);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_tc_attach: %s\n",
+ strerror(-err));
+ goto fail;
+ }
+
+ } else {
+ err = bpf_xdp_attach(ifindex, prog_fd,
+ XDP_FLAGS_UPDATE_IF_NOEXIST, NULL);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_set_link_xdp_fd: %s\n",
+ strerror(-err));
+ goto fail;
+ }
+ }
+ err = 0;
+out:
+ bpf_object__close(obj);
+ return err;
+fail:
+ signal(SIGINT, SIG_DFL);
+ signal(SIGTERM, SIG_DFL);
+ attached_prog_id = 0;
+ goto out;
+}
+
+static int syncookie_open_bpf_maps(__u32 prog_id, int *values_map_fd, int *ports_map_fd)
+{
+ struct bpf_prog_info prog_info;
+ __u32 map_ids[8];
+ __u32 info_len;
+ int prog_fd;
+ int err;
+ int i;
+
+ *values_map_fd = -1;
+ *ports_map_fd = -1;
+
+ prog_fd = bpf_prog_get_fd_by_id(prog_id);
+ if (prog_fd < 0) {
+ fprintf(stderr, "Error: bpf_prog_get_fd_by_id: %s\n", strerror(-prog_fd));
+ return prog_fd;
+ }
+
+ prog_info = (struct bpf_prog_info) {
+ .nr_map_ids = 8,
+ .map_ids = (__u64)map_ids,
+ };
+ info_len = sizeof(prog_info);
+
+ err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err));
+ goto out;
+ }
+
+ if (prog_info.nr_map_ids < 2) {
+ fprintf(stderr, "Error: Found %u BPF maps, expected at least 2\n",
+ prog_info.nr_map_ids);
+ err = -ENOENT;
+ goto out;
+ }
+
+ for (i = 0; i < prog_info.nr_map_ids; i++) {
+ struct bpf_map_info map_info = {};
+ int map_fd;
+
+ err = bpf_map_get_fd_by_id(map_ids[i]);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_map_get_fd_by_id: %s\n", strerror(-err));
+ goto err_close_map_fds;
+ }
+ map_fd = err;
+
+ info_len = sizeof(map_info);
+ err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err));
+ close(map_fd);
+ goto err_close_map_fds;
+ }
+ if (strcmp(map_info.name, "values") == 0) {
+ *values_map_fd = map_fd;
+ continue;
+ }
+ if (strcmp(map_info.name, "allowed_ports") == 0) {
+ *ports_map_fd = map_fd;
+ continue;
+ }
+ close(map_fd);
+ }
+
+ if (*values_map_fd != -1 && *ports_map_fd != -1) {
+ err = 0;
+ goto out;
+ }
+
+ err = -ENOENT;
+
+err_close_map_fds:
+ if (*values_map_fd != -1)
+ close(*values_map_fd);
+ if (*ports_map_fd != -1)
+ close(*ports_map_fd);
+ *values_map_fd = -1;
+ *ports_map_fd = -1;
+
+out:
+ close(prog_fd);
+ return err;
+}
+
+int main(int argc, char *argv[])
+{
+ int values_map_fd, ports_map_fd;
+ __u64 tcpipopts;
+ bool firstiter;
+ __u64 prevcnt;
+ __u32 prog_id;
+ char *ports;
+ bool single;
+ int err = 0;
+ bool tc;
+
+ parse_options(argc, argv, &ifindex, &prog_id, &tcpipopts, &ports,
+ &single, &tc);
+
+ if (prog_id == 0) {
+ if (!tc) {
+ err = bpf_xdp_query_id(ifindex, 0, &prog_id);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_get_link_xdp_id: %s\n",
+ strerror(-err));
+ goto out;
+ }
+ }
+ if (prog_id == 0) {
+ err = syncookie_attach(argv[0], ifindex, tc);
+ if (err < 0)
+ goto out;
+ prog_id = attached_prog_id;
+ }
+ }
+
+ err = syncookie_open_bpf_maps(prog_id, &values_map_fd, &ports_map_fd);
+ if (err < 0)
+ goto out;
+
+ if (ports) {
+ __u16 port_last = 0;
+ __u32 port_idx = 0;
+ char *p = ports;
+
+ fprintf(stderr, "Replacing allowed ports\n");
+
+ while (p && *p != '\0') {
+ char *token = strsep(&p, ",");
+ __u16 port;
+
+ port = parse_arg_ul(argv[0], token, UINT16_MAX);
+ err = bpf_map_update_elem(ports_map_fd, &port_idx, &port, BPF_ANY);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err));
+ fprintf(stderr, "Failed to add port %u (index %u)\n",
+ port, port_idx);
+ goto out_close_maps;
+ }
+ fprintf(stderr, "Added port %u\n", port);
+ port_idx++;
+ }
+ err = bpf_map_update_elem(ports_map_fd, &port_idx, &port_last, BPF_ANY);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err));
+ fprintf(stderr, "Failed to add the terminator value 0 (index %u)\n",
+ port_idx);
+ goto out_close_maps;
+ }
+ }
+
+ if (tcpipopts) {
+ __u32 key = 0;
+
+ fprintf(stderr, "Replacing TCP/IP options\n");
+
+ err = bpf_map_update_elem(values_map_fd, &key, &tcpipopts, BPF_ANY);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err));
+ goto out_close_maps;
+ }
+ }
+
+ if ((ports || tcpipopts) && attached_prog_id == 0 && !single)
+ goto out_close_maps;
+
+ prevcnt = 0;
+ firstiter = true;
+ while (true) {
+ __u32 key = 1;
+ __u64 value;
+
+ err = bpf_map_lookup_elem(values_map_fd, &key, &value);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_map_lookup_elem: %s\n", strerror(-err));
+ goto out_close_maps;
+ }
+ if (firstiter) {
+ prevcnt = value;
+ firstiter = false;
+ }
+ if (single) {
+ printf("Total SYNACKs generated: %llu\n", value);
+ break;
+ }
+ printf("SYNACKs generated: %llu (total %llu)\n", value - prevcnt, value);
+ prevcnt = value;
+ sleep(1);
+ }
+
+out_close_maps:
+ close(values_map_fd);
+ close(ports_map_fd);
+out:
+ return err == 0 ? 0 : 1;
+}
diff --git a/tools/testing/selftests/dma/Makefile b/tools/testing/selftests/dma/Makefile
index aa8e8b5b3864..cd8c5ece1cba 100644
--- a/tools/testing/selftests/dma/Makefile
+++ b/tools/testing/selftests/dma/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += -I../../../../usr/include/
+CFLAGS += -I../../../../include/
TEST_GEN_PROGS := dma_map_benchmark
diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c
index c3b3c09e995e..5c997f17fcbd 100644
--- a/tools/testing/selftests/dma/dma_map_benchmark.c
+++ b/tools/testing/selftests/dma/dma_map_benchmark.c
@@ -10,8 +10,8 @@
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
-#include <linux/map_benchmark.h>
#include <linux/types.h>
+#include <linux/map_benchmark.h>
#define NSEC_PER_MSEC 1000000L
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh
new file mode 100644
index 000000000000..a43a9926e690
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: GPL-2.0
+
+RIF_COUNTER_NUM_NETIFS=2
+
+rif_counter_addr4()
+{
+ local i=$1; shift
+ local p=$1; shift
+
+ printf 192.0.%d.%d $((i / 64)) $(((4 * i % 256) + p))
+}
+
+rif_counter_addr4pfx()
+{
+ rif_counter_addr4 $@
+ printf /30
+}
+
+rif_counter_h1_create()
+{
+ simple_if_init $h1
+}
+
+rif_counter_h1_destroy()
+{
+ simple_if_fini $h1
+}
+
+rif_counter_h2_create()
+{
+ simple_if_init $h2
+}
+
+rif_counter_h2_destroy()
+{
+ simple_if_fini $h2
+}
+
+rif_counter_setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+
+ vrf_prepare
+
+ rif_counter_h1_create
+ rif_counter_h2_create
+}
+
+rif_counter_cleanup()
+{
+ local count=$1; shift
+
+ pre_cleanup
+
+ for ((i = 1; i <= count; i++)); do
+ vlan_destroy $h2 $i
+ done
+
+ rif_counter_h2_destroy
+ rif_counter_h1_destroy
+
+ vrf_cleanup
+
+ if [[ -v RIF_COUNTER_BATCH_FILE ]]; then
+ rm -f $RIF_COUNTER_BATCH_FILE
+ fi
+}
+
+
+rif_counter_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ RIF_COUNTER_BATCH_FILE="$(mktemp)"
+
+ for ((i = 1; i <= count; i++)); do
+ vlan_create $h2 $i v$h2 $(rif_counter_addr4pfx $i 2)
+ done
+ for ((i = 1; i <= count; i++)); do
+ cat >> $RIF_COUNTER_BATCH_FILE <<-EOF
+ stats set dev $h2.$i l3_stats on
+ EOF
+ done
+
+ ip -b $RIF_COUNTER_BATCH_FILE
+ check_err_fail $should_fail $? "RIF counter enablement"
+}
+
+rif_counter_traffic_test()
+{
+ local count=$1; shift
+ local i;
+
+ for ((i = count; i > 0; i /= 2)); do
+ $MZ $h1 -Q $i -c 1 -d 20msec -p 100 -a own -b $(mac_get $h2) \
+ -A $(rif_counter_addr4 $i 1) \
+ -B $(rif_counter_addr4 $i 2) \
+ -q -t udp sp=54321,dp=12345
+ done
+ for ((i = count; i > 0; i /= 2)); do
+ busywait "$TC_HIT_TIMEOUT" until_counter_is "== 1" \
+ hw_stats_get l3_stats $h2.$i rx packets > /dev/null
+ check_err $? "Traffic not seen at RIF $h2.$i"
+ done
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
index e9f65bd2e299..688338bbeb97 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
@@ -25,7 +25,16 @@ cleanup()
trap cleanup EXIT
-ALL_TESTS="router tc_flower mirror_gre tc_police port rif_mac_profile"
+ALL_TESTS="
+ router
+ tc_flower
+ mirror_gre
+ tc_police
+ port
+ rif_mac_profile
+ rif_counter
+"
+
for current_test in ${TESTS:-$ALL_TESTS}; do
RET_FIN=0
source ${current_test}_scale.sh
@@ -36,16 +45,32 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
for should_fail in 0 1; do
RET=0
target=$(${current_test}_get_target "$should_fail")
+ if ((target == 0)); then
+ log_test_skip "'$current_test' should_fail=$should_fail test"
+ continue
+ fi
+
${current_test}_setup_prepare
setup_wait $num_netifs
+ # Update target in case occupancy of a certain resource changed
+ # following the test setup.
+ target=$(${current_test}_get_target "$should_fail")
${current_test}_test "$target" "$should_fail"
- ${current_test}_cleanup
- devlink_reload
if [[ "$should_fail" -eq 0 ]]; then
log_test "'$current_test' $target"
+
+ if ((!RET)); then
+ tt=${current_test}_traffic_test
+ if [[ $(type -t $tt) == "function" ]]; then
+ $tt "$target"
+ log_test "'$current_test' $target traffic test"
+ fi
+ fi
else
log_test "'$current_test' overflow $target"
fi
+ ${current_test}_cleanup $target
+ devlink_reload
RET_FIN=$(( RET_FIN || RET ))
done
done
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh
new file mode 120000
index 000000000000..1f5752e8ffc0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh
@@ -0,0 +1 @@
+../spectrum/rif_counter_scale.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
index efd798a85931..4444bbace1a9 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
@@ -4,17 +4,22 @@ source ../tc_flower_scale.sh
tc_flower_get_target()
{
local should_fail=$1; shift
+ local max_cnts
# The driver associates a counter with each tc filter, which means the
# number of supported filters is bounded by the number of available
# counters.
- # Currently, the driver supports 30K (30,720) flow counters and six of
- # these are used for multicast routing.
- local target=30714
+ max_cnts=$(devlink_resource_size_get counters flow)
+
+ # Remove already allocated counters.
+ ((max_cnts -= $(devlink_resource_occ_get counters flow)))
+
+ # Each rule uses two counters, for packets and bytes.
+ ((max_cnts /= 2))
if ((! should_fail)); then
- echo $target
+ echo $max_cnts
else
- echo $((target + 1))
+ echo $((max_cnts + 1))
fi
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
index dea33dc93790..95d9f710a630 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -22,7 +22,16 @@ cleanup()
devlink_sp_read_kvd_defaults
trap cleanup EXIT
-ALL_TESTS="router tc_flower mirror_gre tc_police port rif_mac_profile"
+ALL_TESTS="
+ router
+ tc_flower
+ mirror_gre
+ tc_police
+ port
+ rif_mac_profile
+ rif_counter
+"
+
for current_test in ${TESTS:-$ALL_TESTS}; do
RET_FIN=0
source ${current_test}_scale.sh
@@ -41,15 +50,31 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
for should_fail in 0 1; do
RET=0
target=$(${current_test}_get_target "$should_fail")
+ if ((target == 0)); then
+ log_test_skip "'$current_test' [$profile] should_fail=$should_fail test"
+ continue
+ fi
${current_test}_setup_prepare
setup_wait $num_netifs
+ # Update target in case occupancy of a certain resource
+ # changed following the test setup.
+ target=$(${current_test}_get_target "$should_fail")
${current_test}_test "$target" "$should_fail"
- ${current_test}_cleanup
if [[ "$should_fail" -eq 0 ]]; then
log_test "'$current_test' [$profile] $target"
+
+ if ((!RET)); then
+ tt=${current_test}_traffic_test
+ if [[ $(type -t $tt) == "function" ]]
+ then
+ $tt "$target"
+ log_test "'$current_test' [$profile] $target traffic test"
+ fi
+ fi
else
log_test "'$current_test' [$profile] overflow $target"
fi
+ ${current_test}_cleanup $target
RET_FIN=$(( RET_FIN || RET ))
done
done
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh
new file mode 100644
index 000000000000..d44536276e8a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../rif_counter_scale.sh
+
+rif_counter_get_target()
+{
+ local should_fail=$1; shift
+ local max_cnts
+ local max_rifs
+ local target
+
+ max_rifs=$(devlink_resource_size_get rifs)
+ max_cnts=$(devlink_resource_size_get counters rif)
+
+ # Remove already allocated RIFs.
+ ((max_rifs -= $(devlink_resource_occ_get rifs)))
+
+ # 10 KVD slots per counter, ingress+egress counters per RIF
+ ((max_cnts /= 20))
+
+ # Pointless to run the overflow test if we don't have enough RIFs to
+ # host all the counters.
+ if ((max_cnts > max_rifs && should_fail)); then
+ echo 0
+ return
+ fi
+
+ target=$((max_rifs < max_cnts ? max_rifs : max_cnts))
+
+ if ((! should_fail)); then
+ echo $target
+ else
+ echo $((target + 1))
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
index aa74be9f47c8..d3d9e60d6ddf 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
@@ -77,6 +77,7 @@ tc_flower_rules_create()
filter add dev $h2 ingress \
prot ipv6 \
pref 1000 \
+ handle 42$i \
flower $tcflags dst_ip $(tc_flower_addr $i) \
action drop
EOF
@@ -121,3 +122,19 @@ tc_flower_test()
tcflags="skip_sw"
__tc_flower_test $count $should_fail
}
+
+tc_flower_traffic_test()
+{
+ local count=$1; shift
+ local i;
+
+ for ((i = count - 1; i > 0; i /= 2)); do
+ $MZ -6 $h1 -c 1 -d 20msec -p 100 -a own -b $(mac_get $h2) \
+ -A $(tc_flower_addr 0) -B $(tc_flower_addr $i) \
+ -q -t udp sp=54321,dp=12345
+ done
+ for ((i = count - 1; i > 0; i /= 2)); do
+ tc_check_packets "dev $h2 ingress" 42$i 1
+ check_err $? "Traffic not seen at rule #$i"
+ done
+}
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 81470a99ed1c..22423c871ed6 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -37,11 +37,38 @@ ifeq ($(ARCH),riscv)
UNAME_M := riscv
endif
-LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
-LIBKVM_x86_64 = lib/x86_64/apic.c lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
-LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S lib/aarch64/spinlock.c lib/aarch64/gic.c lib/aarch64/gic_v3.c lib/aarch64/vgic.c
-LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
-LIBKVM_riscv = lib/riscv/processor.c lib/riscv/ucall.c
+LIBKVM += lib/assert.c
+LIBKVM += lib/elf.c
+LIBKVM += lib/guest_modes.c
+LIBKVM += lib/io.c
+LIBKVM += lib/kvm_util.c
+LIBKVM += lib/perf_test_util.c
+LIBKVM += lib/rbtree.c
+LIBKVM += lib/sparsebit.c
+LIBKVM += lib/test_util.c
+
+LIBKVM_x86_64 += lib/x86_64/apic.c
+LIBKVM_x86_64 += lib/x86_64/handlers.S
+LIBKVM_x86_64 += lib/x86_64/perf_test_util.c
+LIBKVM_x86_64 += lib/x86_64/processor.c
+LIBKVM_x86_64 += lib/x86_64/svm.c
+LIBKVM_x86_64 += lib/x86_64/ucall.c
+LIBKVM_x86_64 += lib/x86_64/vmx.c
+
+LIBKVM_aarch64 += lib/aarch64/gic.c
+LIBKVM_aarch64 += lib/aarch64/gic_v3.c
+LIBKVM_aarch64 += lib/aarch64/handlers.S
+LIBKVM_aarch64 += lib/aarch64/processor.c
+LIBKVM_aarch64 += lib/aarch64/spinlock.c
+LIBKVM_aarch64 += lib/aarch64/ucall.c
+LIBKVM_aarch64 += lib/aarch64/vgic.c
+
+LIBKVM_s390x += lib/s390x/diag318_test_handler.c
+LIBKVM_s390x += lib/s390x/processor.c
+LIBKVM_s390x += lib/s390x/ucall.c
+
+LIBKVM_riscv += lib/riscv/processor.c
+LIBKVM_riscv += lib/riscv/ucall.c
TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
@@ -173,12 +200,13 @@ LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
include ../lib.mk
-STATIC_LIBS := $(OUTPUT)/libkvm.a
LIBKVM_C := $(filter %.c,$(LIBKVM))
LIBKVM_S := $(filter %.S,$(LIBKVM))
LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
-EXTRA_CLEAN += $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(STATIC_LIBS) cscope.*
+LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)
+
+EXTRA_CLEAN += $(LIBKVM_OBJS) cscope.*
x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
@@ -187,13 +215,8 @@ $(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S
$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
-LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)
-$(OUTPUT)/libkvm.a: $(LIBKVM_OBJS)
- $(AR) crs $@ $^
-
x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
-all: $(STATIC_LIBS)
-$(TEST_GEN_PROGS): $(STATIC_LIBS)
+$(TEST_GEN_PROGS): $(LIBKVM_OBJS)
cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib ..
cscope:
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 7b47ae4f952e..d60a34cdfaee 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -336,8 +336,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
static void help(char *name)
{
puts("");
- printf("usage: %s [-h] [-i iterations] [-p offset] [-g]"
- "[-m mode] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]"
+ printf("usage: %s [-h] [-i iterations] [-p offset] [-g] "
+ "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]"
"[-x memslots]\n", name);
puts("");
printf(" -i: specify iteration counts (default: %"PRIu64")\n",
@@ -351,6 +351,7 @@ static void help(char *name)
printf(" -p: specify guest physical test memory offset\n"
" Warning: a low offset can conflict with the loaded test code.\n");
guest_modes_help();
+ printf(" -n: Run the vCPUs in nested mode (L2)\n");
printf(" -b: specify the size of the memory region which should be\n"
" dirtied by each vCPU. e.g. 10M or 3G.\n"
" (default: 1G)\n");
@@ -387,7 +388,7 @@ int main(int argc, char *argv[])
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "ghi:p:m:b:f:v:os:x:")) != -1) {
+ while ((opt = getopt(argc, argv, "ghi:p:m:nb:f:v:os:x:")) != -1) {
switch (opt) {
case 'g':
dirty_log_manual_caps = 0;
@@ -401,6 +402,9 @@ int main(int argc, char *argv[])
case 'm':
guest_modes_cmdline(optarg);
break;
+ case 'n':
+ perf_test_args.nested = true;
+ break;
case 'b':
guest_percpu_mem_size = parse_size(optarg);
break;
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index a86f953d8d36..d822cb670f1c 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -30,10 +30,15 @@ struct perf_test_vcpu_args {
struct perf_test_args {
struct kvm_vm *vm;
+ /* The starting address and size of the guest test region. */
uint64_t gpa;
+ uint64_t size;
uint64_t guest_page_size;
int wr_fract;
+ /* Run vCPUs in L2 instead of L1, if the architecture supports it. */
+ bool nested;
+
struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS];
};
@@ -49,5 +54,9 @@ void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract);
void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *));
void perf_test_join_vcpu_threads(int vcpus);
+void perf_test_guest_code(uint32_t vcpu_id);
+
+uint64_t perf_test_nested_pages(int nr_vcpus);
+void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus);
#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index d0d51adec76e..6ce185449259 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -482,13 +482,23 @@ void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
void vm_xsave_req_perm(int bit);
-enum x86_page_size {
- X86_PAGE_SIZE_4K = 0,
- X86_PAGE_SIZE_2M,
- X86_PAGE_SIZE_1G,
+enum pg_level {
+ PG_LEVEL_NONE,
+ PG_LEVEL_4K,
+ PG_LEVEL_2M,
+ PG_LEVEL_1G,
+ PG_LEVEL_512G,
+ PG_LEVEL_NUM
};
-void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- enum x86_page_size page_size);
+
+#define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12)
+#define PG_LEVEL_SIZE(_level) (1ull << PG_LEVEL_SHIFT(_level))
+
+#define PG_SIZE_4K PG_LEVEL_SIZE(PG_LEVEL_4K)
+#define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M)
+#define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G)
+
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level);
/*
* Basic CPU control in CR0
@@ -505,9 +515,6 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
#define X86_CR0_CD (1UL<<30) /* Cache Disable */
#define X86_CR0_PG (1UL<<31) /* Paging */
-/* VMX_EPT_VPID_CAP bits */
-#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21)
-
#define XSTATE_XTILE_CFG_BIT 17
#define XSTATE_XTILE_DATA_BIT 18
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index 583ceb0d1457..cc3604f8f1d3 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -96,6 +96,9 @@
#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
+#define VMX_EPT_VPID_CAP_1G_PAGES 0x00020000
+#define VMX_EPT_VPID_CAP_AD_BITS 0x00200000
+
#define EXIT_REASON_FAILED_VMENTRY 0x80000000
#define EXIT_REASON_EXCEPTION_NMI 0
#define EXIT_REASON_EXTERNAL_INTERRUPT 1
@@ -606,6 +609,7 @@ bool load_vmcs(struct vmx_pages *vmx);
bool nested_vmx_supported(void);
void nested_vmx_check_supported(void);
+bool ept_1g_pages_supported(void);
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t nested_paddr, uint64_t paddr);
@@ -613,6 +617,8 @@ void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t nested_paddr, uint64_t paddr, uint64_t size);
void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t memslot);
+void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t addr, uint64_t size);
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t eptp_memslot);
void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index 722df3a28791..f989ff91f022 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -40,7 +40,7 @@ static bool all_vcpu_threads_running;
* Continuously write to the first 8 bytes of each page in the
* specified region.
*/
-static void guest_code(uint32_t vcpu_id)
+void perf_test_guest_code(uint32_t vcpu_id)
{
struct perf_test_args *pta = &perf_test_args;
struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_id];
@@ -108,8 +108,9 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
{
struct perf_test_args *pta = &perf_test_args;
struct kvm_vm *vm;
- uint64_t guest_num_pages;
+ uint64_t guest_num_pages, slot0_pages = DEFAULT_GUEST_PHY_PAGES;
uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src);
+ uint64_t region_end_gfn;
int i;
pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
@@ -135,33 +136,53 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
slots);
/*
+ * If using nested, allocate extra pages for the nested page tables and
+ * in-memory data structures.
+ */
+ if (pta->nested)
+ slot0_pages += perf_test_nested_pages(vcpus);
+
+ /*
* Pass guest_num_pages to populate the page tables for test memory.
* The memory is also added to memslot 0, but that's a benign side
* effect as KVM allows aliasing HVAs in meslots.
*/
- vm = vm_create_with_vcpus(mode, vcpus, DEFAULT_GUEST_PHY_PAGES,
- guest_num_pages, 0, guest_code, NULL);
+ vm = vm_create_with_vcpus(mode, vcpus, slot0_pages, guest_num_pages, 0,
+ perf_test_guest_code, NULL);
pta->vm = vm;
+ /* Put the test region at the top guest physical memory. */
+ region_end_gfn = vm_get_max_gfn(vm) + 1;
+
+#ifdef __x86_64__
+ /*
+ * When running vCPUs in L2, restrict the test region to 48 bits to
+ * avoid needing 5-level page tables to identity map L2.
+ */
+ if (pta->nested)
+ region_end_gfn = min(region_end_gfn, (1UL << 48) / pta->guest_page_size);
+#endif
/*
* If there should be more memory in the guest test region than there
* can be pages in the guest, it will definitely cause problems.
*/
- TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
+ TEST_ASSERT(guest_num_pages < region_end_gfn,
"Requested more guest memory than address space allows.\n"
" guest pages: %" PRIx64 " max gfn: %" PRIx64
" vcpus: %d wss: %" PRIx64 "]\n",
- guest_num_pages, vm_get_max_gfn(vm), vcpus,
+ guest_num_pages, region_end_gfn - 1, vcpus,
vcpu_memory_bytes);
- pta->gpa = (vm_get_max_gfn(vm) - guest_num_pages) * pta->guest_page_size;
+ pta->gpa = (region_end_gfn - guest_num_pages) * pta->guest_page_size;
pta->gpa = align_down(pta->gpa, backing_src_pagesz);
#ifdef __s390x__
/* Align to 1M (segment size) */
pta->gpa = align_down(pta->gpa, 1 << 20);
#endif
- pr_info("guest physical test memory offset: 0x%lx\n", pta->gpa);
+ pta->size = guest_num_pages * pta->guest_page_size;
+ pr_info("guest physical test memory: [0x%lx, 0x%lx)\n",
+ pta->gpa, pta->gpa + pta->size);
/* Add extra memory slots for testing */
for (i = 0; i < slots; i++) {
@@ -178,6 +199,11 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
perf_test_setup_vcpus(vm, vcpus, vcpu_memory_bytes, partition_vcpu_memory_access);
+ if (pta->nested) {
+ pr_info("Configuring vCPUs to run in L2 (nested).\n");
+ perf_test_setup_nested(vm, vcpus);
+ }
+
ucall_init(vm, NULL);
/* Export the shared variables to the guest. */
@@ -198,6 +224,17 @@ void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract)
sync_global_to_guest(vm, perf_test_args);
}
+uint64_t __weak perf_test_nested_pages(int nr_vcpus)
+{
+ return 0;
+}
+
+void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus)
+{
+ pr_info("%s() not support on this architecture, skipping.\n", __func__);
+ exit(KSFT_SKIP);
+}
+
static void *vcpu_thread_main(void *data)
{
struct vcpu_thread *vcpu = data;
diff --git a/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c b/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c
new file mode 100644
index 000000000000..e258524435a0
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * x86_64-specific extensions to perf_test_util.c.
+ *
+ * Copyright (C) 2022, Google, Inc.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "perf_test_util.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
+#include "vmx.h"
+
+void perf_test_l2_guest_code(uint64_t vcpu_id)
+{
+ perf_test_guest_code(vcpu_id);
+ vmcall();
+}
+
+extern char perf_test_l2_guest_entry[];
+__asm__(
+"perf_test_l2_guest_entry:"
+" mov (%rsp), %rdi;"
+" call perf_test_l2_guest_code;"
+" ud2;"
+);
+
+static void perf_test_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ unsigned long *rsp;
+
+ GUEST_ASSERT(vmx->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+ GUEST_ASSERT(load_vmcs(vmx));
+ GUEST_ASSERT(ept_1g_pages_supported());
+
+ rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1];
+ *rsp = vcpu_id;
+ prepare_vmcs(vmx, perf_test_l2_guest_entry, rsp);
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ GUEST_DONE();
+}
+
+uint64_t perf_test_nested_pages(int nr_vcpus)
+{
+ /*
+ * 513 page tables is enough to identity-map 256 TiB of L2 with 1G
+ * pages and 4-level paging, plus a few pages per-vCPU for data
+ * structures such as the VMCS.
+ */
+ return 513 + 10 * nr_vcpus;
+}
+
+void perf_test_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
+{
+ uint64_t start, end;
+
+ prepare_eptp(vmx, vm, 0);
+
+ /*
+ * Identity map the first 4G and the test region with 1G pages so that
+ * KVM can shadow the EPT12 with the maximum huge page size supported
+ * by the backing source.
+ */
+ nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL);
+
+ start = align_down(perf_test_args.gpa, PG_SIZE_1G);
+ end = align_up(perf_test_args.gpa + perf_test_args.size, PG_SIZE_1G);
+ nested_identity_map_1g(vmx, vm, start, end - start);
+}
+
+void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus)
+{
+ struct vmx_pages *vmx, *vmx0 = NULL;
+ struct kvm_regs regs;
+ vm_vaddr_t vmx_gva;
+ int vcpu_id;
+
+ nested_vmx_check_supported();
+
+ for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+ vmx = vcpu_alloc_vmx(vm, &vmx_gva);
+
+ if (vcpu_id == 0) {
+ perf_test_setup_ept(vmx, vm);
+ vmx0 = vmx;
+ } else {
+ /* Share the same EPT table across all vCPUs. */
+ vmx->eptp = vmx0->eptp;
+ vmx->eptp_hva = vmx0->eptp_hva;
+ vmx->eptp_gpa = vmx0->eptp_gpa;
+ }
+
+ /*
+ * Override the vCPU to run perf_test_l1_guest_code() which will
+ * bounce it into L2 before calling perf_test_guest_code().
+ */
+ vcpu_regs_get(vm, vcpu_id, &regs);
+ regs.rip = (unsigned long) perf_test_l1_guest_code;
+ vcpu_regs_set(vm, vcpu_id, &regs);
+ vcpu_args_set(vm, vcpu_id, 2, vmx_gva, vcpu_id);
+ }
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 33ea5e9955d9..ead7011ee8f6 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -158,7 +158,7 @@ static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
int level)
{
uint64_t *page_table = addr_gpa2hva(vm, pt_pfn << vm->page_shift);
- int index = vaddr >> (vm->page_shift + level * 9) & 0x1ffu;
+ int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
return &page_table[index];
}
@@ -167,14 +167,14 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
uint64_t pt_pfn,
uint64_t vaddr,
uint64_t paddr,
- int level,
- enum x86_page_size page_size)
+ int current_level,
+ int target_level)
{
- uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
+ uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, current_level);
if (!(*pte & PTE_PRESENT_MASK)) {
*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
- if (level == page_size)
+ if (current_level == target_level)
*pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
else
*pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
@@ -184,20 +184,19 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
* a hugepage at this level, and that there isn't a hugepage at
* this level.
*/
- TEST_ASSERT(level != page_size,
+ TEST_ASSERT(current_level != target_level,
"Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
- page_size, vaddr);
+ current_level, vaddr);
TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
"Cannot create page table at level: %u, vaddr: 0x%lx\n",
- level, vaddr);
+ current_level, vaddr);
}
return pte;
}
-void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- enum x86_page_size page_size)
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
{
- const uint64_t pg_size = 1ull << ((page_size * 9) + 12);
+ const uint64_t pg_size = PG_LEVEL_SIZE(level);
uint64_t *pml4e, *pdpe, *pde;
uint64_t *pte;
@@ -222,20 +221,20 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
* early if a hugepage was created.
*/
pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift,
- vaddr, paddr, 3, page_size);
+ vaddr, paddr, PG_LEVEL_512G, level);
if (*pml4e & PTE_LARGE_MASK)
return;
- pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, 2, page_size);
+ pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, PG_LEVEL_1G, level);
if (*pdpe & PTE_LARGE_MASK)
return;
- pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, 1, page_size);
+ pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, PG_LEVEL_2M, level);
if (*pde & PTE_LARGE_MASK)
return;
/* Fill in page table entry. */
- pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, 0);
+ pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, PG_LEVEL_4K);
TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
"PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
@@ -243,7 +242,7 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
{
- __virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K);
+ __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
}
static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index d089d8b850b5..b77a01d0a271 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -198,6 +198,16 @@ bool load_vmcs(struct vmx_pages *vmx)
return true;
}
+static bool ept_vpid_cap_supported(uint64_t mask)
+{
+ return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask;
+}
+
+bool ept_1g_pages_supported(void)
+{
+ return ept_vpid_cap_supported(VMX_EPT_VPID_CAP_1G_PAGES);
+}
+
/*
* Initialize the control fields to the most basic settings possible.
*/
@@ -215,7 +225,7 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
struct eptPageTablePointer eptp = {
.memory_type = VMX_BASIC_MEM_TYPE_WB,
.page_walk_length = 3, /* + 1 */
- .ad_enabled = !!(rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & VMX_EPT_VPID_CAP_AD_BITS),
+ .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS),
.address = vmx->eptp_gpa >> PAGE_SHIFT_4K,
};
@@ -392,80 +402,93 @@ void nested_vmx_check_supported(void)
}
}
-void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr)
+static void nested_create_pte(struct kvm_vm *vm,
+ struct eptPageTableEntry *pte,
+ uint64_t nested_paddr,
+ uint64_t paddr,
+ int current_level,
+ int target_level)
+{
+ if (!pte->readable) {
+ pte->writable = true;
+ pte->readable = true;
+ pte->executable = true;
+ pte->page_size = (current_level == target_level);
+ if (pte->page_size)
+ pte->address = paddr >> vm->page_shift;
+ else
+ pte->address = vm_alloc_page_table(vm) >> vm->page_shift;
+ } else {
+ /*
+ * Entry already present. Assert that the caller doesn't want
+ * a hugepage at this level, and that there isn't a hugepage at
+ * this level.
+ */
+ TEST_ASSERT(current_level != target_level,
+ "Cannot create hugepage at level: %u, nested_paddr: 0x%lx\n",
+ current_level, nested_paddr);
+ TEST_ASSERT(!pte->page_size,
+ "Cannot create page table at level: %u, nested_paddr: 0x%lx\n",
+ current_level, nested_paddr);
+ }
+}
+
+
+void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, int target_level)
{
- uint16_t index[4];
- struct eptPageTableEntry *pml4e;
+ const uint64_t page_size = PG_LEVEL_SIZE(target_level);
+ struct eptPageTableEntry *pt = vmx->eptp_hva, *pte;
+ uint16_t index;
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
- TEST_ASSERT((nested_paddr % vm->page_size) == 0,
+ TEST_ASSERT((nested_paddr >> 48) == 0,
+ "Nested physical address 0x%lx requires 5-level paging",
+ nested_paddr);
+ TEST_ASSERT((nested_paddr % page_size) == 0,
"Nested physical address not on page boundary,\n"
- " nested_paddr: 0x%lx vm->page_size: 0x%x",
- nested_paddr, vm->page_size);
+ " nested_paddr: 0x%lx page_size: 0x%lx",
+ nested_paddr, page_size);
TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
"Physical address beyond beyond maximum supported,\n"
" nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
paddr, vm->max_gfn, vm->page_size);
- TEST_ASSERT((paddr % vm->page_size) == 0,
+ TEST_ASSERT((paddr % page_size) == 0,
"Physical address not on page boundary,\n"
- " paddr: 0x%lx vm->page_size: 0x%x",
- paddr, vm->page_size);
+ " paddr: 0x%lx page_size: 0x%lx",
+ paddr, page_size);
TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
"Physical address beyond beyond maximum supported,\n"
" paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
paddr, vm->max_gfn, vm->page_size);
- index[0] = (nested_paddr >> 12) & 0x1ffu;
- index[1] = (nested_paddr >> 21) & 0x1ffu;
- index[2] = (nested_paddr >> 30) & 0x1ffu;
- index[3] = (nested_paddr >> 39) & 0x1ffu;
-
- /* Allocate page directory pointer table if not present. */
- pml4e = vmx->eptp_hva;
- if (!pml4e[index[3]].readable) {
- pml4e[index[3]].address = vm_alloc_page_table(vm) >> vm->page_shift;
- pml4e[index[3]].writable = true;
- pml4e[index[3]].readable = true;
- pml4e[index[3]].executable = true;
- }
+ for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) {
+ index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
+ pte = &pt[index];
- /* Allocate page directory table if not present. */
- struct eptPageTableEntry *pdpe;
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
- if (!pdpe[index[2]].readable) {
- pdpe[index[2]].address = vm_alloc_page_table(vm) >> vm->page_shift;
- pdpe[index[2]].writable = true;
- pdpe[index[2]].readable = true;
- pdpe[index[2]].executable = true;
- }
+ nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level);
- /* Allocate page table if not present. */
- struct eptPageTableEntry *pde;
- pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
- if (!pde[index[1]].readable) {
- pde[index[1]].address = vm_alloc_page_table(vm) >> vm->page_shift;
- pde[index[1]].writable = true;
- pde[index[1]].readable = true;
- pde[index[1]].executable = true;
- }
+ if (pte->page_size)
+ break;
- /* Fill in page table entry. */
- struct eptPageTableEntry *pte;
- pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
- pte[index[0]].address = paddr >> vm->page_shift;
- pte[index[0]].writable = true;
- pte[index[0]].readable = true;
- pte[index[0]].executable = true;
+ pt = addr_gpa2hva(vm, pte->address * vm->page_size);
+ }
/*
* For now mark these as accessed and dirty because the only
* testcase we have needs that. Can be reconsidered later.
*/
- pte[index[0]].accessed = true;
- pte[index[0]].dirty = true;
+ pte->accessed = true;
+ pte->dirty = true;
+
+}
+
+void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr)
+{
+ __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K);
}
/*
@@ -476,7 +499,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
* nested_paddr - Nested guest physical address to map
* paddr - VM Physical Address
* size - The size of the range to map
- * eptp_memslot - Memory region slot for new virtual translation tables
+ * level - The level at which to map the range
*
* Output Args: None
*
@@ -485,22 +508,29 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
* Within the VM given by vm, creates a nested guest translation for the
* page range starting at nested_paddr to the page range starting at paddr.
*/
-void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size)
+void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size,
+ int level)
{
- size_t page_size = vm->page_size;
+ size_t page_size = PG_LEVEL_SIZE(level);
size_t npages = size / page_size;
TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
while (npages--) {
- nested_pg_map(vmx, vm, nested_paddr, paddr);
+ __nested_pg_map(vmx, vm, nested_paddr, paddr, level);
nested_paddr += page_size;
paddr += page_size;
}
}
+void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size)
+{
+ __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K);
+}
+
/* Prepare an identity extended page table that maps all the
* physical pages in VM.
*/
@@ -525,6 +555,13 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
}
}
+/* Identity map a region with 1GiB Pages. */
+void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t addr, uint64_t size)
+{
+ __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G);
+}
+
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t eptp_memslot)
{
diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c
index 3875c4b23a04..15f046e19cb2 100644
--- a/tools/testing/selftests/kvm/max_guest_memory_test.c
+++ b/tools/testing/selftests/kvm/max_guest_memory_test.c
@@ -244,7 +244,7 @@ int main(int argc, char *argv[])
#ifdef __x86_64__
/* Identity map memory in the guest using 1gb pages. */
for (i = 0; i < slot_size; i += size_1gb)
- __virt_pg_map(vm, gpa + i, gpa + i, X86_PAGE_SIZE_1G);
+ __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G);
#else
for (i = 0; i < slot_size; i += vm_get_page_size(vm))
virt_pg_map(vm, gpa + i, gpa + i);
diff --git a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c
index da2325fcad87..bdecd532f935 100644
--- a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c
+++ b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c
@@ -35,7 +35,7 @@ static void mmu_role_test(u32 *cpuid_reg, u32 evil_cpuid_val)
run = vcpu_state(vm, VCPU_ID);
/* Map 1gb page without a backing memlot. */
- __virt_pg_map(vm, MMIO_GPA, MMIO_GPA, X86_PAGE_SIZE_1G);
+ __virt_pg_map(vm, MMIO_GPA, MMIO_GPA, PG_LEVEL_1G);
r = _vcpu_run(vm, VCPU_ID);
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 2a2d240cdc1b..1a5cc3cd97ec 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -7,10 +7,31 @@ else ifneq ($(filter -%,$(LLVM)),)
LLVM_SUFFIX := $(LLVM)
endif
-CC := $(LLVM_PREFIX)clang$(LLVM_SUFFIX)
+CLANG_TARGET_FLAGS_arm := arm-linux-gnueabi
+CLANG_TARGET_FLAGS_arm64 := aarch64-linux-gnu
+CLANG_TARGET_FLAGS_hexagon := hexagon-linux-musl
+CLANG_TARGET_FLAGS_m68k := m68k-linux-gnu
+CLANG_TARGET_FLAGS_mips := mipsel-linux-gnu
+CLANG_TARGET_FLAGS_powerpc := powerpc64le-linux-gnu
+CLANG_TARGET_FLAGS_riscv := riscv64-linux-gnu
+CLANG_TARGET_FLAGS_s390 := s390x-linux-gnu
+CLANG_TARGET_FLAGS_x86 := x86_64-linux-gnu
+CLANG_TARGET_FLAGS := $(CLANG_TARGET_FLAGS_$(ARCH))
+
+ifeq ($(CROSS_COMPILE),)
+ifeq ($(CLANG_TARGET_FLAGS),)
+$(error Specify CROSS_COMPILE or add '--target=' option to lib.mk
+else
+CLANG_FLAGS += --target=$(CLANG_TARGET_FLAGS)
+endif # CLANG_TARGET_FLAGS
+else
+CLANG_FLAGS += --target=$(notdir $(CROSS_COMPILE:%-=%))
+endif # CROSS_COMPILE
+
+CC := $(LLVM_PREFIX)clang$(LLVM_SUFFIX) $(CLANG_FLAGS) -fintegrated-as
else
CC := $(CROSS_COMPILE)gcc
-endif
+endif # LLVM
ifeq (0,$(MAKELEVEL))
ifeq ($(OUTPUT),)
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index b984f8c8d523..a29f79618934 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -37,4 +37,3 @@ gro
ioam6_parser
toeplitz
cmsg_sender
-bind_bhash_test
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 464df13831f2..7ea54af55490 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -59,7 +59,6 @@ TEST_GEN_FILES += toeplitz
TEST_GEN_FILES += cmsg_sender
TEST_GEN_FILES += stress_reuseport_listen
TEST_PROGS += test_vxlan_vnifiltering.sh
-TEST_GEN_FILES += bind_bhash_test
TEST_FILES := settings
@@ -70,5 +69,4 @@ include bpf/Makefile
$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread
-$(OUTPUT)/bind_bhash_test: LDLIBS += -lpthread
$(OUTPUT)/tcp_inq: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/net/bind_bhash_test.c b/tools/testing/selftests/net/bind_bhash_test.c
deleted file mode 100644
index 252e73754e76..000000000000
--- a/tools/testing/selftests/net/bind_bhash_test.c
+++ /dev/null
@@ -1,119 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * This times how long it takes to bind to a port when the port already
- * has multiple sockets in its bhash table.
- *
- * In the setup(), we populate the port's bhash table with
- * MAX_THREADS * MAX_CONNECTIONS number of entries.
- */
-
-#include <unistd.h>
-#include <stdio.h>
-#include <netdb.h>
-#include <pthread.h>
-
-#define MAX_THREADS 600
-#define MAX_CONNECTIONS 40
-
-static const char *bind_addr = "::1";
-static const char *port;
-
-static int fd_array[MAX_THREADS][MAX_CONNECTIONS];
-
-static int bind_socket(int opt, const char *addr)
-{
- struct addrinfo *res, hint = {};
- int sock_fd, reuse = 1, err;
-
- sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
- if (sock_fd < 0) {
- perror("socket fd err");
- return -1;
- }
-
- hint.ai_family = AF_INET6;
- hint.ai_socktype = SOCK_STREAM;
-
- err = getaddrinfo(addr, port, &hint, &res);
- if (err) {
- perror("getaddrinfo failed");
- return -1;
- }
-
- if (opt) {
- err = setsockopt(sock_fd, SOL_SOCKET, opt, &reuse, sizeof(reuse));
- if (err) {
- perror("setsockopt failed");
- return -1;
- }
- }
-
- err = bind(sock_fd, res->ai_addr, res->ai_addrlen);
- if (err) {
- perror("failed to bind to port");
- return -1;
- }
-
- return sock_fd;
-}
-
-static void *setup(void *arg)
-{
- int sock_fd, i;
- int *array = (int *)arg;
-
- for (i = 0; i < MAX_CONNECTIONS; i++) {
- sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr);
- if (sock_fd < 0)
- return NULL;
- array[i] = sock_fd;
- }
-
- return NULL;
-}
-
-int main(int argc, const char *argv[])
-{
- int listener_fd, sock_fd, i, j;
- pthread_t tid[MAX_THREADS];
- clock_t begin, end;
-
- if (argc != 2) {
- printf("Usage: listener <port>\n");
- return -1;
- }
-
- port = argv[1];
-
- listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr);
- if (listen(listener_fd, 100) < 0) {
- perror("listen failed");
- return -1;
- }
-
- /* Set up threads to populate the bhash table entry for the port */
- for (i = 0; i < MAX_THREADS; i++)
- pthread_create(&tid[i], NULL, setup, fd_array[i]);
-
- for (i = 0; i < MAX_THREADS; i++)
- pthread_join(tid[i], NULL);
-
- begin = clock();
-
- /* Bind to the same port on a different address */
- sock_fd = bind_socket(0, "2001:0db8:0:f101::1");
-
- end = clock();
-
- printf("time spent = %f\n", (double)(end - begin) / CLOCKS_PER_SEC);
-
- /* clean up */
- close(sock_fd);
- close(listener_fd);
- for (i = 0; i < MAX_THREADS; i++) {
- for (j = 0; i < MAX_THREADS; i++)
- close(fd_array[i][j]);
- }
-
- return 0;
-}
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 54701c8b0cd7..03b586760164 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -70,6 +70,10 @@ NSB_LO_IP6=2001:db8:2::2
NL_IP=172.17.1.1
NL_IP6=2001:db8:4::1
+# multicast and broadcast addresses
+MCAST_IP=224.0.0.1
+BCAST_IP=255.255.255.255
+
MD5_PW=abc123
MD5_WRONG_PW=abc1234
@@ -308,6 +312,9 @@ addr2str()
127.0.0.1) echo "loopback";;
::1) echo "IPv6 loopback";;
+ ${BCAST_IP}) echo "broadcast";;
+ ${MCAST_IP}) echo "multicast";;
+
${NSA_IP}) echo "ns-A IP";;
${NSA_IP6}) echo "ns-A IPv6";;
${NSA_LO_IP}) echo "ns-A loopback IP";;
@@ -1793,12 +1800,33 @@ ipv4_addr_bind_novrf()
done
#
- # raw socket with nonlocal bind
+ # tests for nonlocal bind
#
a=${NL_IP}
log_start
- run_cmd nettest -s -R -P icmp -f -l ${a} -I ${NSA_DEV} -b
- log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after device bind"
+ run_cmd nettest -s -R -f -l ${a} -b
+ log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address"
+
+ log_start
+ run_cmd nettest -s -f -l ${a} -b
+ log_test_addr ${a} $? 0 "TCP socket bind to nonlocal address"
+
+ log_start
+ run_cmd nettest -s -D -P icmp -f -l ${a} -b
+ log_test_addr ${a} $? 0 "ICMP socket bind to nonlocal address"
+
+ #
+ # check that ICMP sockets cannot bind to broadcast and multicast addresses
+ #
+ a=${BCAST_IP}
+ log_start
+ run_cmd nettest -s -D -P icmp -l ${a} -b
+ log_test_addr ${a} $? 1 "ICMP socket bind to broadcast address"
+
+ a=${MCAST_IP}
+ log_start
+ run_cmd nettest -s -D -P icmp -l ${a} -b
+ log_test_addr ${a} $? 1 "ICMP socket bind to multicast address"
#
# tcp sockets
@@ -1850,13 +1878,34 @@ ipv4_addr_bind_vrf()
log_test_addr ${a} $? 1 "Raw socket bind to out of scope address after VRF bind"
#
- # raw socket with nonlocal bind
+ # tests for nonlocal bind
#
a=${NL_IP}
log_start
- run_cmd nettest -s -R -P icmp -f -l ${a} -I ${VRF} -b
+ run_cmd nettest -s -R -f -l ${a} -I ${VRF} -b
log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after VRF bind"
+ log_start
+ run_cmd nettest -s -f -l ${a} -I ${VRF} -b
+ log_test_addr ${a} $? 0 "TCP socket bind to nonlocal address after VRF bind"
+
+ log_start
+ run_cmd nettest -s -D -P icmp -f -l ${a} -I ${VRF} -b
+ log_test_addr ${a} $? 0 "ICMP socket bind to nonlocal address after VRF bind"
+
+ #
+ # check that ICMP sockets cannot bind to broadcast and multicast addresses
+ #
+ a=${BCAST_IP}
+ log_start
+ run_cmd nettest -s -D -P icmp -l ${a} -I ${VRF} -b
+ log_test_addr ${a} $? 1 "ICMP socket bind to broadcast address after VRF bind"
+
+ a=${MCAST_IP}
+ log_start
+ run_cmd nettest -s -D -P icmp -l ${a} -I ${VRF} -b
+ log_test_addr ${a} $? 1 "ICMP socket bind to multicast address after VRF bind"
+
#
# tcp sockets
#
@@ -1889,10 +1938,12 @@ ipv4_addr_bind()
log_subsection "No VRF"
setup
+ set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
ipv4_addr_bind_novrf
log_subsection "With VRF"
setup "yes"
+ set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
ipv4_addr_bind_vrf
}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
index 28d568c48a73..91e431cd919e 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
@@ -141,12 +141,13 @@ switch_create()
ip link set dev $swp4 up
ip link add name br1 type bridge vlan_filtering 1
- ip link set dev br1 up
- __addr_add_del br1 add 192.0.2.129/32
- ip -4 route add 192.0.2.130/32 dev br1
team_create lag loadbalance $swp3 $swp4
ip link set dev lag master br1
+
+ ip link set dev br1 up
+ __addr_add_del br1 add 192.0.2.129/32
+ ip -4 route add 192.0.2.130/32 dev br1
}
switch_destroy()
diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh
index b35010cc7f6a..a6991877e50c 100755
--- a/tools/testing/selftests/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/netfilter/nft_concat_range.sh
@@ -31,7 +31,7 @@ BUGS="flush_remove_add reload"
# List of possible paths to pktgen script from kernel tree for performance tests
PKTGEN_SCRIPT_PATHS="
- ../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
+ ../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
pktgen/pktgen_bench_xmit_mode_netif_receive.sh"
# Definition of set types:
diff --git a/tools/testing/selftests/tc-testing/.gitignore b/tools/testing/selftests/tc-testing/.gitignore
index d52f65de23b4..9fe1cef72728 100644
--- a/tools/testing/selftests/tc-testing/.gitignore
+++ b/tools/testing/selftests/tc-testing/.gitignore
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
__pycache__/
*.pyc
-plugins/
*.xml
*.tap
tdc_config_local.py
diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
index 6bb36ca71cb5..a309876d832f 100644
--- a/tools/testing/selftests/vm/gup_test.c
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -209,7 +209,7 @@ int main(int argc, char **argv)
if (write)
gup.gup_flags |= FOLL_WRITE;
- gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+ gup_fd = open(GUP_TEST_FILE, O_RDWR);
if (gup_fd == -1) {
switch (errno) {
case EACCES:
@@ -224,7 +224,7 @@ int main(int argc, char **argv)
printf("check if CONFIG_GUP_TEST is enabled in kernel config\n");
break;
default:
- perror("failed to open /sys/kernel/debug/gup_test");
+ perror("failed to open " GUP_TEST_FILE);
break;
}
exit(KSFT_SKIP);
diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c
index 2fcf24312da8..f5e4e0bbd081 100644
--- a/tools/testing/selftests/vm/ksm_tests.c
+++ b/tools/testing/selftests/vm/ksm_tests.c
@@ -54,6 +54,7 @@ static int ksm_write_sysfs(const char *file_path, unsigned long val)
}
if (fprintf(f, "%lu", val) < 0) {
perror("fprintf");
+ fclose(f);
return 1;
}
fclose(f);
@@ -72,6 +73,7 @@ static int ksm_read_sysfs(const char *file_path, unsigned long *val)
}
if (fscanf(f, "%lu", val) != 1) {
perror("fscanf");
+ fclose(f);
return 1;
}
fclose(f);
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index bca07b93eeb0..7d1b80988d8a 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -64,8 +64,8 @@ QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
-QEMU_MACHINE := -cpu cortex-a53 -machine virt
-CFLAGS += -march=armv8-a -mtune=cortex-a53
+QEMU_MACHINE := -cpu max -machine virt
+CFLAGS += -march=armv8-a
endif
else ifeq ($(ARCH),aarch64_be)
CHOST := aarch64_be-linux-musl
@@ -76,8 +76,8 @@ QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
-QEMU_MACHINE := -cpu cortex-a53 -machine virt
-CFLAGS += -march=armv8-a -mtune=cortex-a53
+QEMU_MACHINE := -cpu max -machine virt
+CFLAGS += -march=armv8-a
endif
else ifeq ($(ARCH),arm)
CHOST := arm-linux-musleabi
@@ -88,8 +88,8 @@ QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
-QEMU_MACHINE := -cpu cortex-a15 -machine virt
-CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux
+QEMU_MACHINE := -cpu max -machine virt
+CFLAGS += -march=armv7-a -mabi=aapcs-linux
endif
else ifeq ($(ARCH),armeb)
CHOST := armeb-linux-musleabi
@@ -100,8 +100,8 @@ QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
-QEMU_MACHINE := -cpu cortex-a15 -machine virt
-CFLAGS += -march=armv7-a -mabi=aapcs-linux # We don't pass -mtune=cortex-a15 due to a compiler bug on big endian.
+QEMU_MACHINE := -cpu max -machine virt
+CFLAGS += -march=armv7-a -mabi=aapcs-linux
LDFLAGS += -Wl,--be8
endif
else ifeq ($(ARCH),x86_64)
@@ -112,8 +112,7 @@ KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine q35,accel=kvm
else
-QEMU_MACHINE := -cpu Skylake-Server -machine q35
-CFLAGS += -march=skylake-avx512
+QEMU_MACHINE := -cpu max -machine q35
endif
else ifeq ($(ARCH),i686)
CHOST := i686-linux-musl
@@ -123,8 +122,7 @@ KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH))
QEMU_MACHINE := -cpu host -machine q35,accel=kvm
else
-QEMU_MACHINE := -cpu coreduo -machine q35
-CFLAGS += -march=prescott
+QEMU_MACHINE := -cpu max -machine q35
endif
else ifeq ($(ARCH),mips64)
CHOST := mips64-linux-musl
@@ -182,7 +180,7 @@ KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
else
-QEMU_MACHINE := -machine pseries
+QEMU_MACHINE := -machine pseries -device spapr-rng,rng=rng -object rng-random,id=rng
endif
else ifeq ($(ARCH),powerpc64le)
CHOST := powerpc64le-linux-musl
@@ -192,7 +190,7 @@ KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
else
-QEMU_MACHINE := -machine pseries
+QEMU_MACHINE := -machine pseries -device spapr-rng,rng=rng -object rng-random,id=rng
endif
else ifeq ($(ARCH),powerpc)
CHOST := powerpc-linux-musl
@@ -247,7 +245,7 @@ QEMU_VPORT_RESULT := virtio-serial-ccw
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host,accel=kvm -machine s390-ccw-virtio -append $(KERNEL_CMDLINE)
else
-QEMU_MACHINE := -machine s390-ccw-virtio -append $(KERNEL_CMDLINE)
+QEMU_MACHINE := -cpu max -machine s390-ccw-virtio -append $(KERNEL_CMDLINE)
endif
else
$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64, powerpc64le, powerpc, m68k, riscv64, riscv32, s390x)
diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c
index 2a0f48fac925..c9e128436546 100644
--- a/tools/testing/selftests/wireguard/qemu/init.c
+++ b/tools/testing/selftests/wireguard/qemu/init.c
@@ -21,6 +21,7 @@
#include <sys/utsname.h>
#include <sys/sendfile.h>
#include <sys/sysmacros.h>
+#include <sys/random.h>
#include <linux/random.h>
#include <linux/version.h>
@@ -58,6 +59,8 @@ static void seed_rng(void)
{
int bits = 256, fd;
+ if (!getrandom(NULL, 0, GRND_NONBLOCK))
+ return;
pretty_message("[+] Fake seeding RNG...");
fd = open("/dev/random", O_WRONLY);
if (fd < 0)
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
index a9b5a520a1d2..bad88f4b0a03 100644
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -31,6 +31,7 @@ CONFIG_TTY=y
CONFIG_BINFMT_ELF=y
CONFIG_BINFMT_SCRIPT=y
CONFIG_VDSO=y
+CONFIG_STRICT_KERNEL_RWX=y
CONFIG_VIRTUALIZATION=y
CONFIG_HYPERVISOR_GUEST=y
CONFIG_PARAVIRT=y
@@ -65,6 +66,8 @@ CONFIG_PROC_FS=y
CONFIG_PROC_SYSCTL=y
CONFIG_SYSFS=y
CONFIG_TMPFS=y
+CONFIG_RANDOM_TRUST_CPU=y
+CONFIG_RANDOM_TRUST_BOOTLOADER=y
CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15
CONFIG_LOG_BUF_SHIFT=18
CONFIG_PRINTK_TIME=y
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 44c47670447a..a49df8988cd6 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3328,9 +3328,11 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu)
vcpu->stat.generic.blocking = 1;
+ preempt_disable();
kvm_arch_vcpu_blocking(vcpu);
-
prepare_to_rcuwait(wait);
+ preempt_enable();
+
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
@@ -3340,9 +3342,11 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu)
waited = true;
schedule();
}
- finish_rcuwait(wait);
+ preempt_disable();
+ finish_rcuwait(wait);
kvm_arch_vcpu_unblocking(vcpu);
+ preempt_enable();
vcpu->stat.generic.blocking = 0;