summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-05-26 14:38:57 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-05-26 14:38:57 -0700
commit3349ada3cffdbe4579872a004360daa31938f683 (patch)
treee1bb25ee685df1ced55c85343ac620b8a3af3cd2
parentd8cb068359f6210d790828714081d4ccb47014ff (diff)
parent8682a5749a3d2b416b57709115c0351b50c8efcb (diff)
Merge tag 'powerpc-6.16-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Madhavan Srinivasan: - Support for dynamic preemption - Migrate powerpc boards GPIO driver to new setter API - Added new PMU for KVM host-wide measurement - Enhancement to htmdump driver to support more functions - Added character device for couple RTAS supported APIs - Minor fixes and cleanup Thanks to Amit Machhiwal, Athira Rajeev, Bagas Sanjaya, Bartosz Golaszewski, Christophe Leroy, Eddie James, Gaurav Batra, Gautam Menghani, Geert Uytterhoeven, Haren Myneni, Hari Bathini, Jiri Slaby (SUSE), Linus Walleij, Michal Suchanek, Naveen N Rao (AMD), Nilay Shroff, Ricardo B. Marlière, Ritesh Harjani (IBM), Sathvika Vasireddy, Shrikanth Hegde, Stephen Rothwell, Sourabh Jain, Thorsten Blum, Vaibhav Jain, Venkat Rao Bagalkote, and Viktor Malik. * tag 'powerpc-6.16-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (52 commits) MAINTAINERS: powerpc: Remove myself as a reviewer powerpc/iommu: Use str_disabled_enabled() helper powerpc/powermac: Use str_enabled_disabled() and str_on_off() helpers powerpc/mm/fault: Use str_write_read() helper function powerpc: Replace strcpy() with strscpy() in proc_ppc64_init() powerpc/pseries/iommu: Fix kmemleak in TCE table userspace view powerpc/kernel: Fix ppc_save_regs inclusion in build powerpc: Transliterate author name and remove FIXME powerpc/pseries/htmdump: Include header file to get is_kvm_guest() definition KVM: PPC: Book3S HV: Fix IRQ map warnings with XICS on pSeries KVM Guest powerpc/8xx: Reduce alignment constraint for kernel memory powerpc/boot: Fix build with gcc 15 powerpc/pseries/htmdump: Add documentation for H_HTM debugfs interface powerpc/pseries/htmdump: Add htm capabilities support to htmdump module powerpc/pseries/htmdump: Add htm flags support to htmdump module powerpc/pseries/htmdump: Add htm setup support to htmdump module powerpc/pseries/htmdump: Add htm info support to htmdump module powerpc/pseries/htmdump: Add htm status support to htmdump module powerpc/pseries/htmdump: Add htm start support to htmdump module powerpc/pseries/htmdump: Add htm configure support to htmdump module ...
-rw-r--r--Documentation/arch/powerpc/htm.rst104
-rw-r--r--Documentation/arch/powerpc/kvm-nested.rst40
-rw-r--r--Documentation/userspace-api/ioctl/ioctl-number.rst6
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/powerpc/Kconfig11
-rw-r--r--arch/powerpc/boot/Makefile1
-rw-r--r--arch/powerpc/boot/rs6000.h6
-rw-r--r--arch/powerpc/include/asm/guest-state-buffer.h35
-rw-r--r--arch/powerpc/include/asm/hvcall.h13
-rw-r--r--arch/powerpc/include/asm/plpar_wrappers.h20
-rw-r--r--arch/powerpc/include/asm/preempt.h16
-rw-r--r--arch/powerpc/include/asm/rtas.h4
-rw-r--r--arch/powerpc/include/uapi/asm/papr-indices.h41
-rw-r--r--arch/powerpc/include/uapi/asm/papr-physical-attestation.h31
-rw-r--r--arch/powerpc/include/uapi/asm/papr-platform-dump.h16
-rw-r--r--arch/powerpc/kernel/Makefile2
-rw-r--r--arch/powerpc/kernel/fadump.c6
-rw-r--r--arch/powerpc/kernel/interrupt.c6
-rw-r--r--arch/powerpc/kernel/iommu.c5
-rw-r--r--arch/powerpc/kernel/proc_powerpc.c3
-rw-r--r--arch/powerpc/kernel/process.c8
-rw-r--r--arch/powerpc/kernel/rtas.c8
-rw-r--r--arch/powerpc/kernel/trace/ftrace_entry.S2
-rw-r--r--arch/powerpc/kexec/crash.c5
-rw-r--r--arch/powerpc/kvm/Kconfig13
-rw-r--r--arch/powerpc/kvm/book3s_hv.c20
-rw-r--r--arch/powerpc/kvm/book3s_hv_nestedv2.c6
-rw-r--r--arch/powerpc/kvm/guest-state-buffer.c39
-rw-r--r--arch/powerpc/kvm/test-guest-state-buffer.c214
-rw-r--r--arch/powerpc/kvm/timing.h4
-rw-r--r--arch/powerpc/lib/vmx-helper.c2
-rw-r--r--arch/powerpc/mm/fault.c5
-rw-r--r--arch/powerpc/mm/nohash/8xx.c32
-rw-r--r--arch/powerpc/net/bpf_jit.h20
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c33
-rw-r--r--arch/powerpc/net/bpf_jit_comp32.c6
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c15
-rw-r--r--arch/powerpc/perf/Makefile2
-rw-r--r--arch/powerpc/perf/kvm-hv-pmu.c435
-rw-r--r--arch/powerpc/platforms/44x/gpio.c7
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_gpt.c6
-rw-r--r--arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c13
-rw-r--r--arch/powerpc/platforms/8xx/cpm1.c12
-rw-r--r--arch/powerpc/platforms/powermac/setup.c4
-rw-r--r--arch/powerpc/platforms/powermac/time.c3
-rw-r--r--arch/powerpc/platforms/ps3/device-init.c3
-rw-r--r--arch/powerpc/platforms/pseries/Makefile3
-rw-r--r--arch/powerpc/platforms/pseries/htmdump.c395
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c2
-rw-r--r--arch/powerpc/platforms/pseries/msi.c7
-rw-r--r--arch/powerpc/platforms/pseries/papr-indices.c488
-rw-r--r--arch/powerpc/platforms/pseries/papr-phy-attest.c288
-rw-r--r--arch/powerpc/platforms/pseries/papr-platform-dump.c411
-rw-r--r--arch/powerpc/platforms/pseries/papr-rtas-common.c311
-rw-r--r--arch/powerpc/platforms/pseries/papr-rtas-common.h61
-rw-r--r--arch/powerpc/platforms/pseries/papr-vpd.c352
-rw-r--r--arch/powerpc/sysdev/cpm_common.c6
-rw-r--r--arch/powerpc/sysdev/mpic.c7
-rw-r--r--arch/powerpc/xmon/xmon.c2
59 files changed, 3146 insertions, 471 deletions
diff --git a/Documentation/arch/powerpc/htm.rst b/Documentation/arch/powerpc/htm.rst
new file mode 100644
index 000000000000..fcb4eb6306b1
--- /dev/null
+++ b/Documentation/arch/powerpc/htm.rst
@@ -0,0 +1,104 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. _htm:
+
+===================================
+HTM (Hardware Trace Macro)
+===================================
+
+Athira Rajeev, 2 Mar 2025
+
+.. contents::
+ :depth: 3
+
+
+Basic overview
+==============
+
+H_HTM is used as an interface for executing Hardware Trace Macro (HTM)
+functions, including setup, configuration, control and dumping of the HTM data.
+For using HTM, it is required to setup HTM buffers and HTM operations can
+be controlled using the H_HTM hcall. The hcall can be invoked for any core/chip
+of the system from within a partition itself. To use this feature, a debugfs
+folder called "htmdump" is present under /sys/kernel/debug/powerpc.
+
+
+HTM debugfs example usage
+=========================
+
+.. code-block:: sh
+
+ # ls /sys/kernel/debug/powerpc/htmdump/
+ coreindexonchip htmcaps htmconfigure htmflags htminfo htmsetup
+ htmstart htmstatus htmtype nodalchipindex nodeindex trace
+
+Details on each file:
+
+* nodeindex, nodalchipindex, coreindexonchip specifies which partition to configure the HTM for.
+* htmtype: specifies the type of HTM. Supported target is hardwareTarget.
+* trace: is to read the HTM data.
+* htmconfigure: Configure/Deconfigure the HTM. Writing 1 to the file will configure the trace, writing 0 to the file will do deconfigure.
+* htmstart: start/Stop the HTM. Writing 1 to the file will start the tracing, writing 0 to the file will stop the tracing.
+* htmstatus: get the status of HTM. This is needed to understand the HTM state after each operation.
+* htmsetup: set the HTM buffer size. Size of HTM buffer is in power of 2
+* htminfo: provides the system processor configuration details. This is needed to understand the appropriate values for nodeindex, nodalchipindex, coreindexonchip.
+* htmcaps : provides the HTM capabilities like minimum/maximum buffer size, what kind of tracing the HTM supports etc.
+* htmflags : allows to pass flags to hcall. Currently supports controlling the wrapping of HTM buffer.
+
+To see the system processor configuration details:
+
+.. code-block:: sh
+
+ # cat /sys/kernel/debug/powerpc/htmdump/htminfo > htminfo_file
+
+The result can be interpreted using hexdump.
+
+To collect HTM traces for a partition represented by nodeindex as
+zero, nodalchipindex as 1 and coreindexonchip as 12
+
+.. code-block:: sh
+
+ # cd /sys/kernel/debug/powerpc/htmdump/
+ # echo 2 > htmtype
+ # echo 33 > htmsetup ( sets 8GB memory for HTM buffer, number is size in power of 2 )
+
+This requires a CEC reboot to get the HTM buffers allocated.
+
+.. code-block:: sh
+
+ # cd /sys/kernel/debug/powerpc/htmdump/
+ # echo 2 > htmtype
+ # echo 0 > nodeindex
+ # echo 1 > nodalchipindex
+ # echo 12 > coreindexonchip
+ # echo 1 > htmflags # to set noWrap for HTM buffers
+ # echo 1 > htmconfigure # Configure the HTM
+ # echo 1 > htmstart # Start the HTM
+ # echo 0 > htmstart # Stop the HTM
+ # echo 0 > htmconfigure # Deconfigure the HTM
+ # cat htmstatus # Dump the status of HTM entries as data
+
+Above will set the htmtype and core details, followed by executing respective HTM operation.
+
+Read the HTM trace data
+========================
+
+After starting the trace collection, run the workload
+of interest. Stop the trace collection after required period
+of time, and read the trace file.
+
+.. code-block:: sh
+
+ # cat /sys/kernel/debug/powerpc/htmdump/trace > trace_file
+
+This trace file will contain the relevant instruction traces
+collected during the workload execution. And can be used as
+input file for trace decoders to understand data.
+
+Benefits of using HTM debugfs interface
+=======================================
+
+It is now possible to collect traces for a particular core/chip
+from within any partition of the system and decode it. Through
+this enablement, a small partition can be dedicated to collect the
+trace data and analyze to provide important information for Performance
+analysis, Software tuning, or Hardware debug.
diff --git a/Documentation/arch/powerpc/kvm-nested.rst b/Documentation/arch/powerpc/kvm-nested.rst
index 5defd13cc6c1..574592505604 100644
--- a/Documentation/arch/powerpc/kvm-nested.rst
+++ b/Documentation/arch/powerpc/kvm-nested.rst
@@ -208,13 +208,9 @@ associated values for each ID in the GSB::
flags:
Bit 0: getGuestWideState: Request state of the Guest instead
of an individual VCPU.
- Bit 1: takeOwnershipOfVcpuState Indicate the L1 is taking
- over ownership of the VCPU state and that the L0 can free
- the storage holding the state. The VCPU state will need to
- be returned to the Hypervisor via H_GUEST_SET_STATE prior
- to H_GUEST_RUN_VCPU being called for this VCPU. The data
- returned in the dataBuffer is in a Hypervisor internal
- format.
+ Bit 1: getHostWideState: Request stats of the Host. This causes
+ the guestId and vcpuId parameters to be ignored and attempting
+ to get the VCPU/Guest state will cause an error.
Bits 2-63: Reserved
guestId: ID obtained from H_GUEST_CREATE
vcpuId: ID of the vCPU pass to H_GUEST_CREATE_VCPU
@@ -406,9 +402,10 @@ the partition like the timebase offset and partition scoped page
table information.
+--------+-------+----+--------+----------------------------------+
-| ID | Size | RW | Thread | Details |
-| | Bytes | | Guest | |
-| | | | Scope | |
+| ID | Size | RW |(H)ost | Details |
+| | Bytes | |(G)uest | |
+| | | |(T)hread| |
+| | | |Scope | |
+========+=======+====+========+==================================+
| 0x0000 | | RW | TG | NOP element |
+--------+-------+----+--------+----------------------------------+
@@ -434,6 +431,29 @@ table information.
| | | | |- 0x8 Table size. |
+--------+-------+----+--------+----------------------------------+
| 0x0007-| | | | Reserved |
+| 0x07FF | | | | |
++--------+-------+----+--------+----------------------------------+
+| 0x0800 | 0x08 | R | H | Current usage in bytes of the |
+| | | | | L0's Guest Management Space |
+| | | | | for an L1-Lpar. |
++--------+-------+----+--------+----------------------------------+
+| 0x0801 | 0x08 | R | H | Max bytes available in the |
+| | | | | L0's Guest Management Space for |
+| | | | | an L1-Lpar |
++--------+-------+----+--------+----------------------------------+
+| 0x0802 | 0x08 | R | H | Current usage in bytes of the |
+| | | | | L0's Guest Page Table Management |
+| | | | | Space for an L1-Lpar |
++--------+-------+----+--------+----------------------------------+
+| 0x0803 | 0x08 | R | H | Max bytes available in the L0's |
+| | | | | Guest Page Table Management |
+| | | | | Space for an L1-Lpar |
++--------+-------+----+--------+----------------------------------+
+| 0x0804 | 0x08 | R | H | Cumulative Reclaimed bytes from |
+| | | | | L0 Guest's Page Table Management |
+| | | | | Space due to overcommit |
++--------+-------+----+--------+----------------------------------+
+| 0x0805-| | | | Reserved |
| 0x0BFF | | | | |
+--------+-------+----+--------+----------------------------------+
| 0x0C00 | 0x10 | RW | T |Run vCPU Input Buffer: |
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 7a1409ecc238..fee5c4731501 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -366,6 +366,12 @@ Code Seq# Include File Comments
<mailto:linuxppc-dev>
0xB2 01-02 arch/powerpc/include/uapi/asm/papr-sysparm.h powerpc/pseries system parameter API
<mailto:linuxppc-dev>
+0xB2 03-05 arch/powerpc/include/uapi/asm/papr-indices.h powerpc/pseries indices API
+ <mailto:linuxppc-dev>
+0xB2 06-07 arch/powerpc/include/uapi/asm/papr-platform-dump.h powerpc/pseries Platform Dump API
+ <mailto:linuxppc-dev>
+0xB2 08 powerpc/include/uapi/asm/papr-physical-attestation.h powerpc/pseries Physical Attestation API
+ <mailto:linuxppc-dev>
0xB3 00 linux/mmc/ioctl.h
0xB4 00-0F linux/gpio.h <mailto:linux-gpio@vger.kernel.org>
0xB5 00-0F uapi/linux/rpmsg.h <mailto:linux-remoteproc@vger.kernel.org>
diff --git a/MAINTAINERS b/MAINTAINERS
index d4a2c7d4d14a..4661dc6e7523 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13665,7 +13665,6 @@ M: Madhavan Srinivasan <maddy@linux.ibm.com>
M: Michael Ellerman <mpe@ellerman.id.au>
R: Nicholas Piggin <npiggin@gmail.com>
R: Christophe Leroy <christophe.leroy@csgroup.eu>
-R: Naveen N Rao <naveen@kernel.org>
L: linuxppc-dev@lists.ozlabs.org
S: Supported
W: https://github.com/linuxppc/wiki/wiki
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 6722625a406a..c3e0cc83f120 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -277,6 +277,7 @@ config PPC
select HAVE_PERF_EVENTS_NMI if PPC64
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_PREEMPT_DYNAMIC_KEY
select HAVE_RETHOOK if KPROBES
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE
@@ -894,7 +895,7 @@ config DATA_SHIFT
int "Data shift" if DATA_SHIFT_BOOL
default 24 if STRICT_KERNEL_RWX && PPC64
range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32
- range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx
+ range 14 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx
range 20 24 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_85xx
default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
default 18 if (DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32
@@ -907,10 +908,10 @@ config DATA_SHIFT
On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO.
Smaller is the alignment, greater is the number of necessary DBATs.
- On 8xx, large pages (512kb or 8M) are used to map kernel linear
- memory. Aligning to 8M reduces TLB misses as only 8M pages are used
- in that case. If PIN_TLB is selected, it must be aligned to 8M as
- 8M pages will be pinned.
+ On 8xx, large pages (16kb or 512kb or 8M) are used to map kernel
+ linear memory. Aligning to 8M reduces TLB misses as only 8M pages
+ are used in that case. If PIN_TLB is selected, it must be aligned
+ to 8M as 8M pages will be pinned.
config ARCH_FORCE_MAX_ORDER
int "Order of maximal physically contiguous allocations"
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 184d0680e661..a7ab087d412c 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -70,6 +70,7 @@ BOOTCPPFLAGS := -nostdinc $(LINUXINCLUDE)
BOOTCPPFLAGS += -isystem $(shell $(BOOTCC) -print-file-name=include)
BOOTCFLAGS := $(BOOTTARGETFLAGS) \
+ -std=gnu11 \
-Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
-fno-strict-aliasing -O2 \
-msoft-float -mno-altivec -mno-vsx \
diff --git a/arch/powerpc/boot/rs6000.h b/arch/powerpc/boot/rs6000.h
index a9d879155ef9..16df8f3c43f1 100644
--- a/arch/powerpc/boot/rs6000.h
+++ b/arch/powerpc/boot/rs6000.h
@@ -1,11 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* IBM RS/6000 "XCOFF" file definitions for BFD.
Copyright (C) 1990, 1991 Free Software Foundation, Inc.
- FIXME: Can someone provide a transliteration of this name into ASCII?
- Using the following chars caused a compiler warning on HIUX (so I replaced
- them with octal escapes), and isn't useful without an understanding of what
- character set it is.
- Written by Mimi Ph\373\364ng-Th\345o V\365 of IBM
+ Written by Mimi Phuong-Thao Vo of IBM
and John Gilmore of Cygnus Support. */
/********************** FILE HEADER **********************/
diff --git a/arch/powerpc/include/asm/guest-state-buffer.h b/arch/powerpc/include/asm/guest-state-buffer.h
index d107abe1468f..acd61eb36d59 100644
--- a/arch/powerpc/include/asm/guest-state-buffer.h
+++ b/arch/powerpc/include/asm/guest-state-buffer.h
@@ -28,6 +28,21 @@
/* Process Table Info */
#define KVMPPC_GSID_PROCESS_TABLE 0x0006
+/* Guest Management Heap Size */
+#define KVMPPC_GSID_L0_GUEST_HEAP 0x0800
+
+/* Guest Management Heap Max Size */
+#define KVMPPC_GSID_L0_GUEST_HEAP_MAX 0x0801
+
+/* Guest Pagetable Size */
+#define KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE 0x0802
+
+/* Guest Pagetable Max Size */
+#define KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX 0x0803
+
+/* Guest Pagetable Reclaim in bytes */
+#define KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM 0x0804
+
/* H_GUEST_RUN_VCPU input buffer Info */
#define KVMPPC_GSID_RUN_INPUT 0x0C00
/* H_GUEST_RUN_VCPU output buffer Info */
@@ -106,6 +121,11 @@
#define KVMPPC_GSE_GUESTWIDE_COUNT \
(KVMPPC_GSE_GUESTWIDE_END - KVMPPC_GSE_GUESTWIDE_START + 1)
+#define KVMPPC_GSE_HOSTWIDE_START KVMPPC_GSID_L0_GUEST_HEAP
+#define KVMPPC_GSE_HOSTWIDE_END KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM
+#define KVMPPC_GSE_HOSTWIDE_COUNT \
+ (KVMPPC_GSE_HOSTWIDE_END - KVMPPC_GSE_HOSTWIDE_START + 1)
+
#define KVMPPC_GSE_META_START KVMPPC_GSID_RUN_INPUT
#define KVMPPC_GSE_META_END KVMPPC_GSID_VPA
#define KVMPPC_GSE_META_COUNT (KVMPPC_GSE_META_END - KVMPPC_GSE_META_START + 1)
@@ -130,7 +150,8 @@
(KVMPPC_GSE_INTR_REGS_END - KVMPPC_GSE_INTR_REGS_START + 1)
#define KVMPPC_GSE_IDEN_COUNT \
- (KVMPPC_GSE_GUESTWIDE_COUNT + KVMPPC_GSE_META_COUNT + \
+ (KVMPPC_GSE_HOSTWIDE_COUNT + \
+ KVMPPC_GSE_GUESTWIDE_COUNT + KVMPPC_GSE_META_COUNT + \
KVMPPC_GSE_DW_REGS_COUNT + KVMPPC_GSE_W_REGS_COUNT + \
KVMPPC_GSE_VSRS_COUNT + KVMPPC_GSE_INTR_REGS_COUNT)
@@ -139,10 +160,11 @@
*/
enum {
KVMPPC_GS_CLASS_GUESTWIDE = 0x01,
- KVMPPC_GS_CLASS_META = 0x02,
- KVMPPC_GS_CLASS_DWORD_REG = 0x04,
- KVMPPC_GS_CLASS_WORD_REG = 0x08,
- KVMPPC_GS_CLASS_VECTOR = 0x10,
+ KVMPPC_GS_CLASS_HOSTWIDE = 0x02,
+ KVMPPC_GS_CLASS_META = 0x04,
+ KVMPPC_GS_CLASS_DWORD_REG = 0x08,
+ KVMPPC_GS_CLASS_WORD_REG = 0x10,
+ KVMPPC_GS_CLASS_VECTOR = 0x18,
KVMPPC_GS_CLASS_INTR = 0x20,
};
@@ -164,6 +186,7 @@ enum {
*/
enum {
KVMPPC_GS_FLAGS_WIDE = 0x01,
+ KVMPPC_GS_FLAGS_HOST_WIDE = 0x02,
};
/**
@@ -287,7 +310,7 @@ struct kvmppc_gs_msg_ops {
* struct kvmppc_gs_msg - a guest state message
* @bitmap: the guest state ids that should be included
* @ops: modify message behavior for reading and writing to buffers
- * @flags: guest wide or thread wide
+ * @flags: host wide, guest wide or thread wide
* @data: location where buffer data will be written to or from.
*
* A guest state message is allows flexibility in sending in receiving data
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index eeef13db2770..6df6dbbe1e7c 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -490,14 +490,15 @@
#define H_RPTI_PAGE_ALL (-1UL)
/* Flags for H_GUEST_{S,G}_STATE */
-#define H_GUEST_FLAGS_WIDE (1UL<<(63-0))
+#define H_GUEST_FLAGS_WIDE (1UL << (63 - 0))
+#define H_GUEST_FLAGS_HOST_WIDE (1UL << (63 - 1))
/* Flag values used for H_{S,G}SET_GUEST_CAPABILITIES */
-#define H_GUEST_CAP_COPY_MEM (1UL<<(63-0))
-#define H_GUEST_CAP_POWER9 (1UL<<(63-1))
-#define H_GUEST_CAP_POWER10 (1UL<<(63-2))
-#define H_GUEST_CAP_POWER11 (1UL<<(63-3))
-#define H_GUEST_CAP_BITMAP2 (1UL<<(63-63))
+#define H_GUEST_CAP_COPY_MEM (1UL << (63 - 0))
+#define H_GUEST_CAP_POWER9 (1UL << (63 - 1))
+#define H_GUEST_CAP_POWER10 (1UL << (63 - 2))
+#define H_GUEST_CAP_POWER11 (1UL << (63 - 3))
+#define H_GUEST_CAP_BITMAP2 (1UL << (63 - 63))
/*
* Defines for H_HTM - Macros for hardware trace macro (HTM) function.
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 91be7b885944..f2b6cc4341bb 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -65,6 +65,14 @@ static inline long register_dtl(unsigned long cpu, unsigned long vpa)
return vpa_call(H_VPA_REG_DTL, cpu, vpa);
}
+/*
+ * Invokes H_HTM hcall with parameters passed from htm_hcall_wrapper.
+ * flags: Set to hardwareTarget.
+ * target: Specifies target using node index, nodal chip index and core index.
+ * operation : action to perform ie configure, start, stop, deconfigure, trace
+ * based on the HTM type.
+ * param1, param2, param3: parameters for each action.
+ */
static inline long htm_call(unsigned long flags, unsigned long target,
unsigned long operation, unsigned long param1,
unsigned long param2, unsigned long param3)
@@ -73,17 +81,17 @@ static inline long htm_call(unsigned long flags, unsigned long target,
param1, param2, param3);
}
-static inline long htm_get_dump_hardware(unsigned long nodeindex,
+static inline long htm_hcall_wrapper(unsigned long flags, unsigned long nodeindex,
unsigned long nodalchipindex, unsigned long coreindexonchip,
- unsigned long type, unsigned long addr, unsigned long size,
- unsigned long offset)
+ unsigned long type, unsigned long htm_op, unsigned long param1, unsigned long param2,
+ unsigned long param3)
{
- return htm_call(H_HTM_FLAGS_HARDWARE_TARGET,
+ return htm_call(H_HTM_FLAGS_HARDWARE_TARGET | flags,
H_HTM_TARGET_NODE_INDEX(nodeindex) |
H_HTM_TARGET_NODAL_CHIP_INDEX(nodalchipindex) |
H_HTM_TARGET_CORE_INDEX_ON_CHIP(coreindexonchip),
- H_HTM_OP(H_HTM_OP_DUMP_DATA) | H_HTM_TYPE(type),
- addr, size, offset);
+ H_HTM_OP(htm_op) | H_HTM_TYPE(type),
+ param1, param2, param3);
}
extern void vpa_init(int cpu);
diff --git a/arch/powerpc/include/asm/preempt.h b/arch/powerpc/include/asm/preempt.h
new file mode 100644
index 000000000000..000e2b9681f3
--- /dev/null
+++ b/arch/powerpc/include/asm/preempt.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_POWERPC_PREEMPT_H
+#define __ASM_POWERPC_PREEMPT_H
+
+#include <asm-generic/preempt.h>
+
+#if defined(CONFIG_PREEMPT_DYNAMIC)
+#include <linux/jump_label.h>
+DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
+#define need_irq_preemption() \
+ (static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
+#else
+#define need_irq_preemption() (IS_ENABLED(CONFIG_PREEMPTION))
+#endif
+
+#endif /* __ASM_POWERPC_PREEMPT_H */
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 04406162fc5a..75fa0293c508 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -515,6 +515,10 @@ extern char rtas_data_buf[RTAS_DATA_BUF_SIZE];
extern unsigned long rtas_rmo_buf;
extern struct mutex rtas_ibm_get_vpd_lock;
+extern struct mutex rtas_ibm_get_indices_lock;
+extern struct mutex rtas_ibm_set_dynamic_indicator_lock;
+extern struct mutex rtas_ibm_get_dynamic_sensor_state_lock;
+extern struct mutex rtas_ibm_physical_attestation_lock;
#define GLOBAL_INTERRUPT_QUEUE 9005
diff --git a/arch/powerpc/include/uapi/asm/papr-indices.h b/arch/powerpc/include/uapi/asm/papr-indices.h
new file mode 100644
index 000000000000..c2999d89d52a
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-indices.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_INDICES_H_
+#define _UAPI_PAPR_INDICES_H_
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+#include <asm/papr-miscdev.h>
+
+#define LOC_CODE_SIZE 80
+#define RTAS_GET_INDICES_BUF_SIZE SZ_4K
+
+struct papr_indices_io_block {
+ union {
+ struct {
+ __u8 is_sensor; /* 0 for indicator and 1 for sensor */
+ __u32 indice_type;
+ } indices;
+ struct {
+ __u32 token; /* Sensor or indicator token */
+ __u32 state; /* get / set state */
+ /*
+ * PAPR+ 12.3.2.4 Converged Location Code Rules - Length
+ * Restrictions. 79 characters plus null.
+ */
+ char location_code_str[LOC_CODE_SIZE]; /* location code */
+ } dynamic_param;
+ };
+};
+
+/*
+ * ioctls for /dev/papr-indices.
+ * PAPR_INDICES_IOC_GET: Returns a get-indices handle fd to read data
+ * PAPR_DYNAMIC_SENSOR_IOC_GET: Gets the state of the input sensor
+ * PAPR_DYNAMIC_INDICATOR_IOC_SET: Sets the new state for the input indicator
+ */
+#define PAPR_INDICES_IOC_GET _IOW(PAPR_MISCDEV_IOC_ID, 3, struct papr_indices_io_block)
+#define PAPR_DYNAMIC_SENSOR_IOC_GET _IOWR(PAPR_MISCDEV_IOC_ID, 4, struct papr_indices_io_block)
+#define PAPR_DYNAMIC_INDICATOR_IOC_SET _IOW(PAPR_MISCDEV_IOC_ID, 5, struct papr_indices_io_block)
+
+
+#endif /* _UAPI_PAPR_INDICES_H_ */
diff --git a/arch/powerpc/include/uapi/asm/papr-physical-attestation.h b/arch/powerpc/include/uapi/asm/papr-physical-attestation.h
new file mode 100644
index 000000000000..ea746837bb9a
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-physical-attestation.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_PHYSICAL_ATTESTATION_H_
+#define _UAPI_PAPR_PHYSICAL_ATTESTATION_H_
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+#include <asm/papr-miscdev.h>
+
+#define PAPR_PHYATTEST_MAX_INPUT 4084 /* Max 4K buffer: 4K-12 */
+
+/*
+ * Defined in PAPR 2.13+ 21.6 Attestation Command Structures.
+ * User space pass this struct and the max size should be 4K.
+ */
+struct papr_phy_attest_io_block {
+ __u8 version;
+ __u8 command;
+ __u8 TCG_major_ver;
+ __u8 TCG_minor_ver;
+ __be32 length;
+ __be32 correlator;
+ __u8 payload[PAPR_PHYATTEST_MAX_INPUT];
+};
+
+/*
+ * ioctl for /dev/papr-physical-attestation. Returns a attestation
+ * command fd handle
+ */
+#define PAPR_PHY_ATTEST_IOC_HANDLE _IOW(PAPR_MISCDEV_IOC_ID, 8, struct papr_phy_attest_io_block)
+
+#endif /* _UAPI_PAPR_PHYSICAL_ATTESTATION_H_ */
diff --git a/arch/powerpc/include/uapi/asm/papr-platform-dump.h b/arch/powerpc/include/uapi/asm/papr-platform-dump.h
new file mode 100644
index 000000000000..8a1c060e89a9
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-platform-dump.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_PLATFORM_DUMP_H_
+#define _UAPI_PAPR_PLATFORM_DUMP_H_
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+#include <asm/papr-miscdev.h>
+
+/*
+ * ioctl for /dev/papr-platform-dump. Returns a platform-dump handle fd
+ * corresponding to dump tag.
+ */
+#define PAPR_PLATFORM_DUMP_IOC_CREATE_HANDLE _IOW(PAPR_MISCDEV_IOC_ID, 6, __u64)
+#define PAPR_PLATFORM_DUMP_IOC_INVALIDATE _IOW(PAPR_MISCDEV_IOC_ID, 7, __u64)
+
+#endif /* _UAPI_PAPR_PLATFORM_DUMP_H_ */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 6ac621155ec3..9d1ab3971694 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -160,9 +160,7 @@ endif
obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o
-ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE)(CONFIG_PPC_BOOK3S),)
obj-y += ppc_save_regs.o
-endif
obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index df16c7f547ab..8ca49e40c473 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -290,10 +290,8 @@ static void __init fadump_show_config(void)
if (!fw_dump.fadump_supported)
return;
- pr_debug("Fadump enabled : %s\n",
- (fw_dump.fadump_enabled ? "yes" : "no"));
- pr_debug("Dump Active : %s\n",
- (fw_dump.dump_active ? "yes" : "no"));
+ pr_debug("Fadump enabled : %s\n", str_yes_no(fw_dump.fadump_enabled));
+ pr_debug("Dump Active : %s\n", str_yes_no(fw_dump.dump_active));
pr_debug("Dump section sizes:\n");
pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size);
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 8f4acc55407b..e0c681d0b076 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -25,6 +25,10 @@
unsigned long global_dbcr0[NR_CPUS];
#endif
+#if defined(CONFIG_PREEMPT_DYNAMIC)
+DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
+#endif
+
#ifdef CONFIG_PPC_BOOK3S_64
DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
static inline bool exit_must_hard_disable(void)
@@ -396,7 +400,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
/* Returning to a kernel context with local irqs enabled. */
WARN_ON_ONCE(!(regs->msr & MSR_EE));
again:
- if (IS_ENABLED(CONFIG_PREEMPTION)) {
+ if (need_irq_preemption()) {
/* Return to preemptible kernel context */
if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) {
if (preempt_count() == 0)
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 0ebae6e4c19d..244eb4857e7f 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -16,6 +16,7 @@
#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/string.h>
+#include <linux/string_choices.h>
#include <linux/dma-mapping.h>
#include <linux/bitmap.h>
#include <linux/iommu-helper.h>
@@ -769,8 +770,8 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
iommu_table_clear(tbl);
if (!welcomed) {
- printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
- novmerge ? "disabled" : "enabled");
+ pr_info("IOMMU table initialized, virtual merging %s\n",
+ str_disabled_enabled(novmerge));
welcomed = 1;
}
diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c
index 3816a2bf2b84..d083b4517065 100644
--- a/arch/powerpc/kernel/proc_powerpc.c
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -9,6 +9,7 @@
#include <linux/proc_fs.h>
#include <linux/kernel.h>
#include <linux/of.h>
+#include <linux/string.h>
#include <asm/machdep.h>
#include <asm/vdso_datapage.h>
@@ -56,7 +57,7 @@ static int __init proc_ppc64_init(void)
{
struct proc_dir_entry *pde;
- strcpy((char *)systemcfg->eye_catcher, "SYSTEMCFG:PPC64");
+ strscpy(systemcfg->eye_catcher, "SYSTEMCFG:PPC64");
systemcfg->version.major = SYSTEMCFG_MAJOR;
systemcfg->version.minor = SYSTEMCFG_MINOR;
systemcfg->processor = mfspr(SPRN_PVR);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ef91f71e07c4..855e09886503 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1000,7 +1000,7 @@ static inline void tm_reclaim_task(struct task_struct *tsk)
WARN_ON(tm_suspend_disabled);
- TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, "
+ TM_DEBUG("---- tm_reclaim on pid %d (NIP=%lx, "
"ccr=%lx, msr=%lx, trap=%lx)\n",
tsk->pid, thr->regs->nip,
thr->regs->ccr, thr->regs->msr,
@@ -1008,7 +1008,7 @@ static inline void tm_reclaim_task(struct task_struct *tsk)
tm_reclaim_thread(thr, TM_CAUSE_RESCHED);
- TM_DEBUG("--- tm_reclaim on pid %d complete\n",
+ TM_DEBUG("---- tm_reclaim on pid %d complete\n",
tsk->pid);
out_and_saveregs:
@@ -2367,14 +2367,14 @@ void __no_sanitize_address show_stack(struct task_struct *tsk,
(sp + STACK_INT_FRAME_REGS);
lr = regs->link;
- printk("%s--- interrupt: %lx at %pS\n",
+ printk("%s---- interrupt: %lx at %pS\n",
loglvl, regs->trap, (void *)regs->nip);
// Detect the case of an empty pt_regs at the very base
// of the stack and suppress showing it in full.
if (!empty_user_regs(regs, tsk)) {
__show_regs(regs);
- printk("%s--- interrupt: %lx\n", loglvl, regs->trap);
+ printk("%s---- interrupt: %lx\n", loglvl, regs->trap);
}
firstframe = 1;
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index d7a738f1858d..e61245c4468e 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -92,12 +92,12 @@ struct rtas_function {
* Per-function locks for sequence-based RTAS functions.
*/
static DEFINE_MUTEX(rtas_ibm_activate_firmware_lock);
-static DEFINE_MUTEX(rtas_ibm_get_dynamic_sensor_state_lock);
-static DEFINE_MUTEX(rtas_ibm_get_indices_lock);
static DEFINE_MUTEX(rtas_ibm_lpar_perftools_lock);
-static DEFINE_MUTEX(rtas_ibm_physical_attestation_lock);
-static DEFINE_MUTEX(rtas_ibm_set_dynamic_indicator_lock);
+DEFINE_MUTEX(rtas_ibm_physical_attestation_lock);
DEFINE_MUTEX(rtas_ibm_get_vpd_lock);
+DEFINE_MUTEX(rtas_ibm_get_indices_lock);
+DEFINE_MUTEX(rtas_ibm_set_dynamic_indicator_lock);
+DEFINE_MUTEX(rtas_ibm_get_dynamic_sensor_state_lock);
static struct rtas_function rtas_function_table[] __ro_after_init = {
[RTAS_FNIDX__CHECK_EXCEPTION] = {
diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S
index 2c1b24100eca..3565c67fc638 100644
--- a/arch/powerpc/kernel/trace/ftrace_entry.S
+++ b/arch/powerpc/kernel/trace/ftrace_entry.S
@@ -212,10 +212,10 @@
bne- 1f
mr r3, r15
+1: mtlr r3
.if \allregs == 0
REST_GPR(15, r1)
.endif
-1: mtlr r3
#endif
/* Restore gprs */
diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
index 9ac3266e4965..a325c1c02f96 100644
--- a/arch/powerpc/kexec/crash.c
+++ b/arch/powerpc/kexec/crash.c
@@ -359,7 +359,10 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
if (TRAP(regs) == INTERRUPT_SYSTEM_RESET)
is_via_system_reset = 1;
- crash_smp_send_stop();
+ if (IS_ENABLED(CONFIG_SMP))
+ crash_smp_send_stop();
+ else
+ crash_kexec_prepare();
crash_save_cpu(regs, crashing_cpu);
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index dbfdc126bf14..2f2702c867f7 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -83,6 +83,7 @@ config KVM_BOOK3S_64_HV
depends on KVM_BOOK3S_64 && PPC_POWERNV
select KVM_BOOK3S_HV_POSSIBLE
select KVM_GENERIC_MMU_NOTIFIER
+ select KVM_BOOK3S_HV_PMU
select CMA
help
Support running unmodified book3s_64 guest kernels in
@@ -171,6 +172,18 @@ config KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND
those buggy L1s which saves the L2 state, at the cost of performance
in all nested-capable guest entry/exit.
+config KVM_BOOK3S_HV_PMU
+ tristate "Hypervisor Perf events for KVM Book3s-HV"
+ depends on KVM_BOOK3S_64_HV
+ help
+ Enable Book3s-HV Hypervisor Perf events PMU named 'kvm-hv'. These
+ Perf events give an overview of hypervisor performance overall
+ instead of a specific guests. Currently the PMU reports
+ L0-Hypervisor stats on a kvm-hv enabled PSeries LPAR like:
+ * Total/Used Guest-Heap
+ * Total/Used Guest Page-table Memory
+ * Total amount of Guest Page-table Memory reclaimed
+
config KVM_BOOKE_HV
bool
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 19f4d298dd17..7667563fb9ff 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -6541,10 +6541,6 @@ static struct kvmppc_ops kvm_ops_hv = {
.fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv,
.arch_vm_ioctl = kvm_arch_vm_ioctl_hv,
.hcall_implemented = kvmppc_hcall_impl_hv,
-#ifdef CONFIG_KVM_XICS
- .irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv,
- .irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv,
-#endif
.configure_mmu = kvmhv_configure_mmu,
.get_rmmu_info = kvmhv_get_rmmu_info,
.set_smt_mode = kvmhv_set_smt_mode,
@@ -6662,6 +6658,22 @@ static int kvmppc_book3s_init_hv(void)
return r;
}
+#if defined(CONFIG_KVM_XICS)
+ /*
+ * IRQ bypass is supported only for interrupts whose EOI operations are
+ * handled via OPAL calls. Therefore, register IRQ bypass handlers
+ * exclusively for PowerNV KVM when booted with 'xive=off', indicating
+ * the use of the emulated XICS interrupt controller.
+ */
+ if (!kvmhv_on_pseries()) {
+ pr_info("KVM-HV: Enabling IRQ bypass\n");
+ kvm_ops_hv.irq_bypass_add_producer =
+ kvmppc_irq_bypass_add_producer_hv;
+ kvm_ops_hv.irq_bypass_del_producer =
+ kvmppc_irq_bypass_del_producer_hv;
+ }
+#endif
+
kvm_ops_hv.owner = THIS_MODULE;
kvmppc_hv_ops = &kvm_ops_hv;
diff --git a/arch/powerpc/kvm/book3s_hv_nestedv2.c b/arch/powerpc/kvm/book3s_hv_nestedv2.c
index e5c7ce1fb761..87691cf86cae 100644
--- a/arch/powerpc/kvm/book3s_hv_nestedv2.c
+++ b/arch/powerpc/kvm/book3s_hv_nestedv2.c
@@ -123,6 +123,12 @@ static size_t gs_msg_ops_vcpu_get_size(struct kvmppc_gs_msg *gsm)
case KVMPPC_GSID_PROCESS_TABLE:
case KVMPPC_GSID_RUN_INPUT:
case KVMPPC_GSID_RUN_OUTPUT:
+ /* Host wide counters */
+ case KVMPPC_GSID_L0_GUEST_HEAP:
+ case KVMPPC_GSID_L0_GUEST_HEAP_MAX:
+ case KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE:
+ case KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX:
+ case KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM:
break;
default:
size += kvmppc_gse_total_size(kvmppc_gsid_size(iden));
diff --git a/arch/powerpc/kvm/guest-state-buffer.c b/arch/powerpc/kvm/guest-state-buffer.c
index b80dbc58621f..871cf60ddeb6 100644
--- a/arch/powerpc/kvm/guest-state-buffer.c
+++ b/arch/powerpc/kvm/guest-state-buffer.c
@@ -92,6 +92,10 @@ static int kvmppc_gsid_class(u16 iden)
(iden <= KVMPPC_GSE_GUESTWIDE_END))
return KVMPPC_GS_CLASS_GUESTWIDE;
+ if ((iden >= KVMPPC_GSE_HOSTWIDE_START) &&
+ (iden <= KVMPPC_GSE_HOSTWIDE_END))
+ return KVMPPC_GS_CLASS_HOSTWIDE;
+
if ((iden >= KVMPPC_GSE_META_START) && (iden <= KVMPPC_GSE_META_END))
return KVMPPC_GS_CLASS_META;
@@ -118,6 +122,21 @@ static int kvmppc_gsid_type(u16 iden)
int type = -1;
switch (kvmppc_gsid_class(iden)) {
+ case KVMPPC_GS_CLASS_HOSTWIDE:
+ switch (iden) {
+ case KVMPPC_GSID_L0_GUEST_HEAP:
+ fallthrough;
+ case KVMPPC_GSID_L0_GUEST_HEAP_MAX:
+ fallthrough;
+ case KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE:
+ fallthrough;
+ case KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX:
+ fallthrough;
+ case KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM:
+ type = KVMPPC_GSE_BE64;
+ break;
+ }
+ break;
case KVMPPC_GS_CLASS_GUESTWIDE:
switch (iden) {
case KVMPPC_GSID_HOST_STATE_SIZE:
@@ -187,6 +206,9 @@ unsigned long kvmppc_gsid_flags(u16 iden)
case KVMPPC_GS_CLASS_GUESTWIDE:
flags = KVMPPC_GS_FLAGS_WIDE;
break;
+ case KVMPPC_GS_CLASS_HOSTWIDE:
+ flags = KVMPPC_GS_FLAGS_HOST_WIDE;
+ break;
case KVMPPC_GS_CLASS_META:
case KVMPPC_GS_CLASS_DWORD_REG:
case KVMPPC_GS_CLASS_WORD_REG:
@@ -310,6 +332,13 @@ static inline int kvmppc_gse_flatten_iden(u16 iden)
bit += KVMPPC_GSE_GUESTWIDE_COUNT;
+ if (class == KVMPPC_GS_CLASS_HOSTWIDE) {
+ bit += iden - KVMPPC_GSE_HOSTWIDE_START;
+ return bit;
+ }
+
+ bit += KVMPPC_GSE_HOSTWIDE_COUNT;
+
if (class == KVMPPC_GS_CLASS_META) {
bit += iden - KVMPPC_GSE_META_START;
return bit;
@@ -356,6 +385,12 @@ static inline u16 kvmppc_gse_unflatten_iden(int bit)
}
bit -= KVMPPC_GSE_GUESTWIDE_COUNT;
+ if (bit < KVMPPC_GSE_HOSTWIDE_COUNT) {
+ iden = KVMPPC_GSE_HOSTWIDE_START + bit;
+ return iden;
+ }
+ bit -= KVMPPC_GSE_HOSTWIDE_COUNT;
+
if (bit < KVMPPC_GSE_META_COUNT) {
iden = KVMPPC_GSE_META_START + bit;
return iden;
@@ -588,6 +623,8 @@ int kvmppc_gsb_send(struct kvmppc_gs_buff *gsb, unsigned long flags)
if (flags & KVMPPC_GS_FLAGS_WIDE)
hflags |= H_GUEST_FLAGS_WIDE;
+ if (flags & KVMPPC_GS_FLAGS_HOST_WIDE)
+ hflags |= H_GUEST_FLAGS_HOST_WIDE;
rc = plpar_guest_set_state(hflags, gsb->guest_id, gsb->vcpu_id,
__pa(gsb->hdr), gsb->capacity, &i);
@@ -613,6 +650,8 @@ int kvmppc_gsb_recv(struct kvmppc_gs_buff *gsb, unsigned long flags)
if (flags & KVMPPC_GS_FLAGS_WIDE)
hflags |= H_GUEST_FLAGS_WIDE;
+ if (flags & KVMPPC_GS_FLAGS_HOST_WIDE)
+ hflags |= H_GUEST_FLAGS_HOST_WIDE;
rc = plpar_guest_get_state(hflags, gsb->guest_id, gsb->vcpu_id,
__pa(gsb->hdr), gsb->capacity, &i);
diff --git a/arch/powerpc/kvm/test-guest-state-buffer.c b/arch/powerpc/kvm/test-guest-state-buffer.c
index bfd225329a18..5ccca306997a 100644
--- a/arch/powerpc/kvm/test-guest-state-buffer.c
+++ b/arch/powerpc/kvm/test-guest-state-buffer.c
@@ -5,6 +5,7 @@
#include <kunit/test.h>
#include <asm/guest-state-buffer.h>
+#include <asm/kvm_ppc.h>
static void test_creating_buffer(struct kunit *test)
{
@@ -141,6 +142,16 @@ static void test_gs_bitmap(struct kunit *test)
i++;
}
+ for (u16 iden = KVMPPC_GSID_L0_GUEST_HEAP;
+ iden <= KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM; iden++) {
+ kvmppc_gsbm_set(&gsbm, iden);
+ kvmppc_gsbm_set(&gsbm1, iden);
+ KUNIT_EXPECT_TRUE(test, kvmppc_gsbm_test(&gsbm, iden));
+ kvmppc_gsbm_clear(&gsbm, iden);
+ KUNIT_EXPECT_FALSE(test, kvmppc_gsbm_test(&gsbm, iden));
+ i++;
+ }
+
for (u16 iden = KVMPPC_GSID_RUN_INPUT; iden <= KVMPPC_GSID_VPA;
iden++) {
kvmppc_gsbm_set(&gsbm, iden);
@@ -309,12 +320,215 @@ static void test_gs_msg(struct kunit *test)
kvmppc_gsm_free(gsm);
}
+/* Test data struct for hostwide/L0 counters */
+struct kvmppc_gs_msg_test_hostwide_data {
+ u64 guest_heap;
+ u64 guest_heap_max;
+ u64 guest_pgtable_size;
+ u64 guest_pgtable_size_max;
+ u64 guest_pgtable_reclaim;
+};
+
+static size_t test_hostwide_get_size(struct kvmppc_gs_msg *gsm)
+
+{
+ size_t size = 0;
+ u16 ids[] = {
+ KVMPPC_GSID_L0_GUEST_HEAP,
+ KVMPPC_GSID_L0_GUEST_HEAP_MAX,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM
+ };
+
+ for (int i = 0; i < ARRAY_SIZE(ids); i++)
+ size += kvmppc_gse_total_size(kvmppc_gsid_size(ids[i]));
+ return size;
+}
+
+static int test_hostwide_fill_info(struct kvmppc_gs_buff *gsb,
+ struct kvmppc_gs_msg *gsm)
+{
+ struct kvmppc_gs_msg_test_hostwide_data *data = gsm->data;
+
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_HEAP))
+ kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_HEAP,
+ data->guest_heap);
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_HEAP_MAX))
+ kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_HEAP_MAX,
+ data->guest_heap_max);
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE))
+ kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE,
+ data->guest_pgtable_size);
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX))
+ kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX,
+ data->guest_pgtable_size_max);
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM))
+ kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM,
+ data->guest_pgtable_reclaim);
+
+ return 0;
+}
+
+static int test_hostwide_refresh_info(struct kvmppc_gs_msg *gsm,
+ struct kvmppc_gs_buff *gsb)
+{
+ struct kvmppc_gs_parser gsp = { 0 };
+ struct kvmppc_gs_msg_test_hostwide_data *data = gsm->data;
+ struct kvmppc_gs_elem *gse;
+ int rc;
+
+ rc = kvmppc_gse_parse(&gsp, gsb);
+ if (rc < 0)
+ return rc;
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP);
+ if (gse)
+ data->guest_heap = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP_MAX);
+ if (gse)
+ data->guest_heap_max = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE);
+ if (gse)
+ data->guest_pgtable_size = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX);
+ if (gse)
+ data->guest_pgtable_size_max = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM);
+ if (gse)
+ data->guest_pgtable_reclaim = kvmppc_gse_get_u64(gse);
+
+ return 0;
+}
+
+static struct kvmppc_gs_msg_ops gs_msg_test_hostwide_ops = {
+ .get_size = test_hostwide_get_size,
+ .fill_info = test_hostwide_fill_info,
+ .refresh_info = test_hostwide_refresh_info,
+};
+
+static void test_gs_hostwide_msg(struct kunit *test)
+{
+ struct kvmppc_gs_msg_test_hostwide_data test_data = {
+ .guest_heap = 0xdeadbeef,
+ .guest_heap_max = ~0ULL,
+ .guest_pgtable_size = 0xff,
+ .guest_pgtable_size_max = 0xffffff,
+ .guest_pgtable_reclaim = 0xdeadbeef,
+ };
+ struct kvmppc_gs_msg *gsm;
+ struct kvmppc_gs_buff *gsb;
+
+ gsm = kvmppc_gsm_new(&gs_msg_test_hostwide_ops, &test_data, GSM_SEND,
+ GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsm);
+
+ gsb = kvmppc_gsb_new(kvmppc_gsm_size(gsm), 0, 0, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsb);
+
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_HEAP);
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_HEAP_MAX);
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE);
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX);
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM);
+
+ kvmppc_gsm_fill_info(gsm, gsb);
+
+ memset(&test_data, 0, sizeof(test_data));
+
+ kvmppc_gsm_refresh_info(gsm, gsb);
+ KUNIT_EXPECT_EQ(test, test_data.guest_heap, 0xdeadbeef);
+ KUNIT_EXPECT_EQ(test, test_data.guest_heap_max, ~0ULL);
+ KUNIT_EXPECT_EQ(test, test_data.guest_pgtable_size, 0xff);
+ KUNIT_EXPECT_EQ(test, test_data.guest_pgtable_size_max, 0xffffff);
+ KUNIT_EXPECT_EQ(test, test_data.guest_pgtable_reclaim, 0xdeadbeef);
+
+ kvmppc_gsm_free(gsm);
+}
+
+/* Test if the H_GUEST_GET_STATE for hostwide counters works */
+static void test_gs_hostwide_counters(struct kunit *test)
+{
+ struct kvmppc_gs_msg_test_hostwide_data test_data;
+ struct kvmppc_gs_parser gsp = { 0 };
+
+ struct kvmppc_gs_msg *gsm;
+ struct kvmppc_gs_buff *gsb;
+ struct kvmppc_gs_elem *gse;
+ int rc;
+
+ if (!kvmhv_on_pseries())
+ kunit_skip(test, "This test need a kmv-hv guest");
+
+ gsm = kvmppc_gsm_new(&gs_msg_test_hostwide_ops, &test_data, GSM_SEND,
+ GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsm);
+
+ gsb = kvmppc_gsb_new(kvmppc_gsm_size(gsm), 0, 0, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsb);
+
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_HEAP);
+
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_HEAP_MAX);
+
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE);
+
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX);
+
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM);
+
+ kvmppc_gsm_fill_info(gsm, gsb);
+
+ /* With HOST_WIDE flags guestid and vcpuid will be ignored */
+ rc = kvmppc_gsb_recv(gsb, KVMPPC_GS_FLAGS_HOST_WIDE);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ /* Parse the guest state buffer is successful */
+ rc = kvmppc_gse_parse(&gsp, gsb);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ /* Parse the GSB and get the counters */
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP);
+ KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 Heap counter missing");
+ kunit_info(test, "Guest Heap Size=%llu bytes",
+ kvmppc_gse_get_u64(gse));
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP_MAX);
+ KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 Heap counter max missing");
+ kunit_info(test, "Guest Heap Size Max=%llu bytes",
+ kvmppc_gse_get_u64(gse));
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE);
+ KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 page-table size missing");
+ kunit_info(test, "Guest Page-table Size=%llu bytes",
+ kvmppc_gse_get_u64(gse));
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX);
+ KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 page-table size-max missing");
+ kunit_info(test, "Guest Page-table Size Max=%llu bytes",
+ kvmppc_gse_get_u64(gse));
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM);
+ KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 page-table reclaim size missing");
+ kunit_info(test, "Guest Page-table Reclaim Size=%llu bytes",
+ kvmppc_gse_get_u64(gse));
+
+ kvmppc_gsm_free(gsm);
+ kvmppc_gsb_free(gsb);
+}
+
static struct kunit_case guest_state_buffer_testcases[] = {
KUNIT_CASE(test_creating_buffer),
KUNIT_CASE(test_adding_element),
KUNIT_CASE(test_gs_bitmap),
KUNIT_CASE(test_gs_parsing),
KUNIT_CASE(test_gs_msg),
+ KUNIT_CASE(test_gs_hostwide_msg),
+ KUNIT_CASE(test_gs_hostwide_counters),
{}
};
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
index 45817ab82bb4..14b0e23f601f 100644
--- a/arch/powerpc/kvm/timing.h
+++ b/arch/powerpc/kvm/timing.h
@@ -38,11 +38,7 @@ static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {}
static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
{
/* type has to be known at build time for optimization */
-
- /* The BUILD_BUG_ON below breaks in funny ways, commented out
- * for now ... -BenH
BUILD_BUG_ON(!__builtin_constant_p(type));
- */
switch (type) {
case EXT_INTR_EXITS:
vcpu->stat.ext_intr_exits++;
diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c
index 58ed6bd613a6..54340912398f 100644
--- a/arch/powerpc/lib/vmx-helper.c
+++ b/arch/powerpc/lib/vmx-helper.c
@@ -45,7 +45,7 @@ int exit_vmx_usercopy(void)
* set and we are preemptible. The hack here is to schedule a
* decrementer to fire here and reschedule for us if necessary.
*/
- if (IS_ENABLED(CONFIG_PREEMPTION) && need_resched())
+ if (need_irq_preemption() && need_resched())
set_dec(1);
return 0;
}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index c156fe0d53c3..806c74e0d5ab 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -17,6 +17,7 @@
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
+#include <linux/string_choices.h>
#include <linux/types.h>
#include <linux/pagemap.h>
#include <linux/ptrace.h>
@@ -218,7 +219,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
// Read/write fault blocked by KUAP is bad, it can never succeed.
if (bad_kuap_fault(regs, address, is_write)) {
pr_crit_ratelimited("Kernel attempted to %s user page (%lx) - exploit attempt? (uid: %d)\n",
- is_write ? "write" : "read", address,
+ str_write_read(is_write), address,
from_kuid(&init_user_ns, current_uid()));
// Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad
@@ -625,7 +626,7 @@ static void __bad_page_fault(struct pt_regs *regs, int sig)
case INTERRUPT_DATA_STORAGE:
case INTERRUPT_H_DATA_STORAGE:
pr_alert("BUG: %s on %s at 0x%08lx\n", msg,
- is_write ? "write" : "read", regs->dar);
+ str_write_read(is_write), regs->dar);
break;
case INTERRUPT_DATA_SEGMENT:
pr_alert("BUG: %s at 0x%08lx\n", msg, regs->dar);
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index 8b54f12d1889..ab1505cf42bf 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -54,20 +54,13 @@ static int __ref __early_map_kernel_hugepage(unsigned long va, phys_addr_t pa,
{
pmd_t *pmdp = pmd_off_k(va);
pte_t *ptep;
-
- if (WARN_ON(psize != MMU_PAGE_512K && psize != MMU_PAGE_8M))
- return -EINVAL;
+ unsigned int shift = mmu_psize_to_shift(psize);
if (new) {
if (WARN_ON(slab_is_available()))
return -EINVAL;
- if (psize == MMU_PAGE_512K) {
- ptep = early_pte_alloc_kernel(pmdp, va);
- /* The PTE should never be already present */
- if (WARN_ON(pte_present(*ptep) && pgprot_val(prot)))
- return -EINVAL;
- } else {
+ if (psize == MMU_PAGE_8M) {
if (WARN_ON(!pmd_none(*pmdp) || !pmd_none(*(pmdp + 1))))
return -EINVAL;
@@ -78,20 +71,25 @@ static int __ref __early_map_kernel_hugepage(unsigned long va, phys_addr_t pa,
pmd_populate_kernel(&init_mm, pmdp + 1, ptep);
ptep = (pte_t *)pmdp;
+ } else {
+ ptep = early_pte_alloc_kernel(pmdp, va);
+ /* The PTE should never be already present */
+ if (WARN_ON(pte_present(*ptep) && pgprot_val(prot)))
+ return -EINVAL;
}
} else {
- if (psize == MMU_PAGE_512K)
- ptep = pte_offset_kernel(pmdp, va);
- else
+ if (psize == MMU_PAGE_8M)
ptep = (pte_t *)pmdp;
+ else
+ ptep = pte_offset_kernel(pmdp, va);
}
if (WARN_ON(!ptep))
return -ENOMEM;
set_huge_pte_at(&init_mm, va, ptep,
- pte_mkhuge(pfn_pte(pa >> PAGE_SHIFT, prot)),
- 1UL << mmu_psize_to_shift(psize));
+ arch_make_huge_pte(pfn_pte(pa >> PAGE_SHIFT, prot), shift, 0),
+ 1UL << shift);
return 0;
}
@@ -123,14 +121,18 @@ static int mmu_mapin_ram_chunk(unsigned long offset, unsigned long top,
unsigned long p = offset;
int err = 0;
- WARN_ON(!IS_ALIGNED(offset, SZ_512K) || !IS_ALIGNED(top, SZ_512K));
+ WARN_ON(!IS_ALIGNED(offset, SZ_16K) || !IS_ALIGNED(top, SZ_16K));
+ for (; p < ALIGN(p, SZ_512K) && p < top && !err; p += SZ_16K, v += SZ_16K)
+ err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_16K, new);
for (; p < ALIGN(p, SZ_8M) && p < top && !err; p += SZ_512K, v += SZ_512K)
err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new);
for (; p < ALIGN_DOWN(top, SZ_8M) && p < top && !err; p += SZ_8M, v += SZ_8M)
err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_8M, new);
for (; p < ALIGN_DOWN(top, SZ_512K) && p < top && !err; p += SZ_512K, v += SZ_512K)
err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new);
+ for (; p < ALIGN_DOWN(top, SZ_16K) && p < top && !err; p += SZ_16K, v += SZ_16K)
+ err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_16K, new);
if (!new)
flush_tlb_kernel_range(PAGE_OFFSET + v, PAGE_OFFSET + top);
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 6beacaec63d3..4c26912c2e3c 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -51,8 +51,16 @@
EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc)); \
} while (0)
-/* Sign-extended 32-bit immediate load */
+/*
+ * Sign-extended 32-bit immediate load
+ *
+ * If this is a dummy pass (!image), account for
+ * maximum possible instructions.
+ */
#define PPC_LI32(d, i) do { \
+ if (!image) \
+ ctx->idx += 2; \
+ else { \
if ((int)(uintptr_t)(i) >= -32768 && \
(int)(uintptr_t)(i) < 32768) \
EMIT(PPC_RAW_LI(d, i)); \
@@ -60,10 +68,15 @@
EMIT(PPC_RAW_LIS(d, IMM_H(i))); \
if (IMM_L(i)) \
EMIT(PPC_RAW_ORI(d, d, IMM_L(i))); \
- } } while(0)
+ } \
+ } } while (0)
#ifdef CONFIG_PPC64
+/* If dummy pass (!image), account for maximum possible instructions */
#define PPC_LI64(d, i) do { \
+ if (!image) \
+ ctx->idx += 5; \
+ else { \
if ((long)(i) >= -2147483648 && \
(long)(i) < 2147483648) \
PPC_LI32(d, i); \
@@ -84,7 +97,8 @@
if ((uintptr_t)(i) & 0x000000000000ffffULL) \
EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) & \
0xffff)); \
- } } while (0)
+ } \
+ } } while (0)
#define PPC_LI_ADDR PPC_LI64
#ifndef CONFIG_PPC_KERNEL_PCREL
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 2991bb171a9b..c0684733e9d6 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -504,10 +504,11 @@ static int invoke_bpf_prog(u32 *image, u32 *ro_image, struct codegen_context *ct
EMIT(PPC_RAW_ADDI(_R3, _R1, regs_off));
if (!p->jited)
PPC_LI_ADDR(_R4, (unsigned long)p->insnsi);
- if (!create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx], (unsigned long)p->bpf_func,
- BRANCH_SET_LINK)) {
- if (image)
- image[ctx->idx] = ppc_inst_val(branch_insn);
+ /* Account for max possible instructions during dummy pass for size calculation */
+ if (image && !create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx],
+ (unsigned long)p->bpf_func,
+ BRANCH_SET_LINK)) {
+ image[ctx->idx] = ppc_inst_val(branch_insn);
ctx->idx++;
} else {
EMIT(PPC_RAW_LL(_R12, _R25, offsetof(struct bpf_prog, bpf_func)));
@@ -889,7 +890,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
bpf_trampoline_restore_tail_call_cnt(image, ctx, func_frame_offset, r4_off);
/* Reserve space to patch branch instruction to skip fexit progs */
- im->ip_after_call = &((u32 *)ro_image)[ctx->idx];
+ if (ro_image) /* image is NULL for dummy pass */
+ im->ip_after_call = &((u32 *)ro_image)[ctx->idx];
EMIT(PPC_RAW_NOP());
}
@@ -912,7 +914,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
if (flags & BPF_TRAMP_F_CALL_ORIG) {
- im->ip_epilogue = &((u32 *)ro_image)[ctx->idx];
+ if (ro_image) /* image is NULL for dummy pass */
+ im->ip_epilogue = &((u32 *)ro_image)[ctx->idx];
PPC_LI_ADDR(_R3, im);
ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx,
(unsigned long)__bpf_tramp_exit);
@@ -973,25 +976,9 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
struct bpf_tramp_links *tlinks, void *func_addr)
{
struct bpf_tramp_image im;
- void *image;
int ret;
- /*
- * Allocate a temporary buffer for __arch_prepare_bpf_trampoline().
- * This will NOT cause fragmentation in direct map, as we do not
- * call set_memory_*() on this buffer.
- *
- * We cannot use kvmalloc here, because we need image to be in
- * module memory range.
- */
- image = bpf_jit_alloc_exec(PAGE_SIZE);
- if (!image)
- return -ENOMEM;
-
- ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image,
- m, flags, tlinks, func_addr);
- bpf_jit_free_exec(image);
-
+ ret = __arch_prepare_bpf_trampoline(&im, NULL, NULL, NULL, m, flags, tlinks, func_addr);
return ret;
}
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index c4db278dae36..0aace304dfe1 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -313,7 +313,6 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
u64 func_addr;
u32 true_cond;
u32 tmp_idx;
- int j;
if (i && (BPF_CLASS(code) == BPF_ALU64 || BPF_CLASS(code) == BPF_ALU) &&
(BPF_CLASS(prevcode) == BPF_ALU64 || BPF_CLASS(prevcode) == BPF_ALU) &&
@@ -1099,13 +1098,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
* 16 byte instruction that uses two 'struct bpf_insn'
*/
case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
- tmp_idx = ctx->idx;
PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm);
PPC_LI32(dst_reg, (u32)insn[i].imm);
- /* padding to allow full 4 instructions for later patching */
- if (!image)
- for (j = ctx->idx - tmp_idx; j < 4; j++)
- EMIT(PPC_RAW_NOP());
/* Adjust for two bpf instructions */
addrs[++i] = ctx->idx * 4;
break;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 233703b06d7c..5daa77aee7f7 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -227,7 +227,14 @@ int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *
#ifdef CONFIG_PPC_KERNEL_PCREL
reladdr = func_addr - local_paca->kernelbase;
- if (reladdr < (long)SZ_8G && reladdr >= -(long)SZ_8G) {
+ /*
+ * If fimage is NULL (the initial pass to find image size),
+ * account for the maximum no. of instructions possible.
+ */
+ if (!fimage) {
+ ctx->idx += 7;
+ return 0;
+ } else if (reladdr < (long)SZ_8G && reladdr >= -(long)SZ_8G) {
EMIT(PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernelbase)));
/* Align for subsequent prefix instruction */
if (!IS_ALIGNED((unsigned long)fimage + CTX_NIA(ctx), 8))
@@ -412,7 +419,6 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
u64 imm64;
u32 true_cond;
u32 tmp_idx;
- int j;
/*
* addrs[] maps a BPF bytecode address into a real offset from
@@ -1046,12 +1052,7 @@ emit_clear:
case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
imm64 = ((u64)(u32) insn[i].imm) |
(((u64)(u32) insn[i+1].imm) << 32);
- tmp_idx = ctx->idx;
PPC_LI64(dst_reg, imm64);
- /* padding to allow full 5 instructions for later patching */
- if (!image)
- for (j = ctx->idx - tmp_idx; j < 5; j++)
- EMIT(PPC_RAW_NOP());
/* Adjust for two bpf instructions */
addrs[++i] = ctx->idx * 4;
break;
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index ac2cf58d62db..7f53fcb7495a 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -18,6 +18,8 @@ obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o
obj-$(CONFIG_VPA_PMU) += vpa-pmu.o
+obj-$(CONFIG_KVM_BOOK3S_HV_PMU) += kvm-hv-pmu.o
+
obj-$(CONFIG_PPC_8xx) += 8xx-pmu.o
obj-$(CONFIG_PPC64) += $(obj64-y)
diff --git a/arch/powerpc/perf/kvm-hv-pmu.c b/arch/powerpc/perf/kvm-hv-pmu.c
new file mode 100644
index 000000000000..ae264c9080ef
--- /dev/null
+++ b/arch/powerpc/perf/kvm-hv-pmu.c
@@ -0,0 +1,435 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Description: PMUs specific to running nested KVM-HV guests
+ * on Book3S processors (specifically POWER9 and later).
+ */
+
+#define pr_fmt(fmt) "kvmppc-pmu: " fmt
+
+#include "asm-generic/local64.h"
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/ratelimit.h>
+#include <linux/kvm_host.h>
+#include <linux/gfp_types.h>
+#include <linux/pgtable.h>
+#include <linux/perf_event.h>
+#include <linux/spinlock_types.h>
+#include <linux/spinlock.h>
+
+#include <asm/types.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu.h>
+#include <asm/pgalloc.h>
+#include <asm/pte-walk.h>
+#include <asm/reg.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/firmware.h>
+
+#include "asm/guest-state-buffer.h"
+
+enum kvmppc_pmu_eventid {
+ KVMPPC_EVENT_HOST_HEAP,
+ KVMPPC_EVENT_HOST_HEAP_MAX,
+ KVMPPC_EVENT_HOST_PGTABLE,
+ KVMPPC_EVENT_HOST_PGTABLE_MAX,
+ KVMPPC_EVENT_HOST_PGTABLE_RECLAIM,
+ KVMPPC_EVENT_MAX,
+};
+
+#define KVMPPC_PMU_EVENT_ATTR(_name, _id) \
+ PMU_EVENT_ATTR_ID(_name, kvmppc_events_sysfs_show, _id)
+
+static ssize_t kvmppc_events_sysfs_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+ return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+/* Holds the hostwide stats */
+static struct kvmppc_hostwide_stats {
+ u64 guest_heap;
+ u64 guest_heap_max;
+ u64 guest_pgtable_size;
+ u64 guest_pgtable_size_max;
+ u64 guest_pgtable_reclaim;
+} l0_stats;
+
+/* Protect access to l0_stats */
+static DEFINE_SPINLOCK(lock_l0_stats);
+
+/* GSB related structs needed to talk to L0 */
+static struct kvmppc_gs_msg *gsm_l0_stats;
+static struct kvmppc_gs_buff *gsb_l0_stats;
+static struct kvmppc_gs_parser gsp_l0_stats;
+
+static struct attribute *kvmppc_pmu_events_attr[] = {
+ KVMPPC_PMU_EVENT_ATTR(host_heap, KVMPPC_EVENT_HOST_HEAP),
+ KVMPPC_PMU_EVENT_ATTR(host_heap_max, KVMPPC_EVENT_HOST_HEAP_MAX),
+ KVMPPC_PMU_EVENT_ATTR(host_pagetable, KVMPPC_EVENT_HOST_PGTABLE),
+ KVMPPC_PMU_EVENT_ATTR(host_pagetable_max, KVMPPC_EVENT_HOST_PGTABLE_MAX),
+ KVMPPC_PMU_EVENT_ATTR(host_pagetable_reclaim, KVMPPC_EVENT_HOST_PGTABLE_RECLAIM),
+ NULL,
+};
+
+static const struct attribute_group kvmppc_pmu_events_group = {
+ .name = "events",
+ .attrs = kvmppc_pmu_events_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-5");
+static struct attribute *kvmppc_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group kvmppc_pmu_format_group = {
+ .name = "format",
+ .attrs = kvmppc_pmu_format_attr,
+};
+
+static const struct attribute_group *kvmppc_pmu_attr_groups[] = {
+ &kvmppc_pmu_events_group,
+ &kvmppc_pmu_format_group,
+ NULL,
+};
+
+/*
+ * Issue the hcall to get the L0-host stats.
+ * Should be called with l0-stat lock held
+ */
+static int kvmppc_update_l0_stats(void)
+{
+ int rc;
+
+ /* With HOST_WIDE flags guestid and vcpuid will be ignored */
+ rc = kvmppc_gsb_recv(gsb_l0_stats, KVMPPC_GS_FLAGS_HOST_WIDE);
+ if (rc)
+ goto out;
+
+ /* Parse the guest state buffer is successful */
+ rc = kvmppc_gse_parse(&gsp_l0_stats, gsb_l0_stats);
+ if (rc)
+ goto out;
+
+ /* Update the l0 returned stats*/
+ memset(&l0_stats, 0, sizeof(l0_stats));
+ rc = kvmppc_gsm_refresh_info(gsm_l0_stats, gsb_l0_stats);
+
+out:
+ return rc;
+}
+
+/* Update the value of the given perf_event */
+static int kvmppc_pmu_event_update(struct perf_event *event)
+{
+ int rc;
+ u64 curr_val, prev_val;
+ unsigned long flags;
+ unsigned int config = event->attr.config;
+
+ /* Ensure no one else is modifying the l0_stats */
+ spin_lock_irqsave(&lock_l0_stats, flags);
+
+ rc = kvmppc_update_l0_stats();
+ if (!rc) {
+ switch (config) {
+ case KVMPPC_EVENT_HOST_HEAP:
+ curr_val = l0_stats.guest_heap;
+ break;
+ case KVMPPC_EVENT_HOST_HEAP_MAX:
+ curr_val = l0_stats.guest_heap_max;
+ break;
+ case KVMPPC_EVENT_HOST_PGTABLE:
+ curr_val = l0_stats.guest_pgtable_size;
+ break;
+ case KVMPPC_EVENT_HOST_PGTABLE_MAX:
+ curr_val = l0_stats.guest_pgtable_size_max;
+ break;
+ case KVMPPC_EVENT_HOST_PGTABLE_RECLAIM:
+ curr_val = l0_stats.guest_pgtable_reclaim;
+ break;
+ default:
+ rc = -ENOENT;
+ break;
+ }
+ }
+
+ spin_unlock_irqrestore(&lock_l0_stats, flags);
+
+ /* If no error than update the perf event */
+ if (!rc) {
+ prev_val = local64_xchg(&event->hw.prev_count, curr_val);
+ if (curr_val > prev_val)
+ local64_add(curr_val - prev_val, &event->count);
+ }
+
+ return rc;
+}
+
+static int kvmppc_pmu_event_init(struct perf_event *event)
+{
+ unsigned int config = event->attr.config;
+
+ pr_debug("%s: Event(%p) id=%llu cpu=%x on_cpu=%x config=%u",
+ __func__, event, event->id, event->cpu,
+ event->oncpu, config);
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (config >= KVMPPC_EVENT_MAX)
+ return -EINVAL;
+
+ local64_set(&event->hw.prev_count, 0);
+ local64_set(&event->count, 0);
+
+ return 0;
+}
+
+static void kvmppc_pmu_del(struct perf_event *event, int flags)
+{
+ kvmppc_pmu_event_update(event);
+}
+
+static int kvmppc_pmu_add(struct perf_event *event, int flags)
+{
+ if (flags & PERF_EF_START)
+ return kvmppc_pmu_event_update(event);
+ return 0;
+}
+
+static void kvmppc_pmu_read(struct perf_event *event)
+{
+ kvmppc_pmu_event_update(event);
+}
+
+/* Return the size of the needed guest state buffer */
+static size_t hostwide_get_size(struct kvmppc_gs_msg *gsm)
+
+{
+ size_t size = 0;
+ const u16 ids[] = {
+ KVMPPC_GSID_L0_GUEST_HEAP,
+ KVMPPC_GSID_L0_GUEST_HEAP_MAX,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM
+ };
+
+ for (int i = 0; i < ARRAY_SIZE(ids); i++)
+ size += kvmppc_gse_total_size(kvmppc_gsid_size(ids[i]));
+ return size;
+}
+
+/* Populate the request guest state buffer */
+static int hostwide_fill_info(struct kvmppc_gs_buff *gsb,
+ struct kvmppc_gs_msg *gsm)
+{
+ int rc = 0;
+ struct kvmppc_hostwide_stats *stats = gsm->data;
+
+ /*
+ * It doesn't matter what values are put into request buffer as
+ * they are going to be overwritten anyways. But for the sake of
+ * testcode and symmetry contents of existing stats are put
+ * populated into the request guest state buffer.
+ */
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_HEAP))
+ rc = kvmppc_gse_put_u64(gsb,
+ KVMPPC_GSID_L0_GUEST_HEAP,
+ stats->guest_heap);
+
+ if (!rc && kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_HEAP_MAX))
+ rc = kvmppc_gse_put_u64(gsb,
+ KVMPPC_GSID_L0_GUEST_HEAP_MAX,
+ stats->guest_heap_max);
+
+ if (!rc && kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE))
+ rc = kvmppc_gse_put_u64(gsb,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE,
+ stats->guest_pgtable_size);
+ if (!rc &&
+ kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX))
+ rc = kvmppc_gse_put_u64(gsb,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX,
+ stats->guest_pgtable_size_max);
+ if (!rc &&
+ kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM))
+ rc = kvmppc_gse_put_u64(gsb,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM,
+ stats->guest_pgtable_reclaim);
+
+ return rc;
+}
+
+/* Parse and update the host wide stats from returned gsb */
+static int hostwide_refresh_info(struct kvmppc_gs_msg *gsm,
+ struct kvmppc_gs_buff *gsb)
+{
+ struct kvmppc_gs_parser gsp = { 0 };
+ struct kvmppc_hostwide_stats *stats = gsm->data;
+ struct kvmppc_gs_elem *gse;
+ int rc;
+
+ rc = kvmppc_gse_parse(&gsp, gsb);
+ if (rc < 0)
+ return rc;
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP);
+ if (gse)
+ stats->guest_heap = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP_MAX);
+ if (gse)
+ stats->guest_heap_max = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE);
+ if (gse)
+ stats->guest_pgtable_size = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX);
+ if (gse)
+ stats->guest_pgtable_size_max = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM);
+ if (gse)
+ stats->guest_pgtable_reclaim = kvmppc_gse_get_u64(gse);
+
+ return 0;
+}
+
+/* gsb-message ops for setting up/parsing */
+static struct kvmppc_gs_msg_ops gsb_ops_l0_stats = {
+ .get_size = hostwide_get_size,
+ .fill_info = hostwide_fill_info,
+ .refresh_info = hostwide_refresh_info,
+};
+
+static int kvmppc_init_hostwide(void)
+{
+ int rc = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&lock_l0_stats, flags);
+
+ /* already registered ? */
+ if (gsm_l0_stats) {
+ rc = 0;
+ goto out;
+ }
+
+ /* setup the Guest state message/buffer to talk to L0 */
+ gsm_l0_stats = kvmppc_gsm_new(&gsb_ops_l0_stats, &l0_stats,
+ GSM_SEND, GFP_KERNEL);
+ if (!gsm_l0_stats) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* Populate the Idents */
+ kvmppc_gsm_include(gsm_l0_stats, KVMPPC_GSID_L0_GUEST_HEAP);
+ kvmppc_gsm_include(gsm_l0_stats, KVMPPC_GSID_L0_GUEST_HEAP_MAX);
+ kvmppc_gsm_include(gsm_l0_stats, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE);
+ kvmppc_gsm_include(gsm_l0_stats, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX);
+ kvmppc_gsm_include(gsm_l0_stats, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM);
+
+ /* allocate GSB. Guest/Vcpu Id is ignored */
+ gsb_l0_stats = kvmppc_gsb_new(kvmppc_gsm_size(gsm_l0_stats), 0, 0,
+ GFP_KERNEL);
+ if (!gsb_l0_stats) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* ask the ops to fill in the info */
+ rc = kvmppc_gsm_fill_info(gsm_l0_stats, gsb_l0_stats);
+
+out:
+ if (rc) {
+ if (gsm_l0_stats)
+ kvmppc_gsm_free(gsm_l0_stats);
+ if (gsb_l0_stats)
+ kvmppc_gsb_free(gsb_l0_stats);
+ gsm_l0_stats = NULL;
+ gsb_l0_stats = NULL;
+ }
+ spin_unlock_irqrestore(&lock_l0_stats, flags);
+ return rc;
+}
+
+static void kvmppc_cleanup_hostwide(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&lock_l0_stats, flags);
+
+ if (gsm_l0_stats)
+ kvmppc_gsm_free(gsm_l0_stats);
+ if (gsb_l0_stats)
+ kvmppc_gsb_free(gsb_l0_stats);
+ gsm_l0_stats = NULL;
+ gsb_l0_stats = NULL;
+
+ spin_unlock_irqrestore(&lock_l0_stats, flags);
+}
+
+/* L1 wide counters PMU */
+static struct pmu kvmppc_pmu = {
+ .module = THIS_MODULE,
+ .task_ctx_nr = perf_sw_context,
+ .name = "kvm-hv",
+ .event_init = kvmppc_pmu_event_init,
+ .add = kvmppc_pmu_add,
+ .del = kvmppc_pmu_del,
+ .read = kvmppc_pmu_read,
+ .attr_groups = kvmppc_pmu_attr_groups,
+ .type = -1,
+ .scope = PERF_PMU_SCOPE_SYS_WIDE,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
+};
+
+static int __init kvmppc_register_pmu(void)
+{
+ int rc = -EOPNOTSUPP;
+
+ /* only support events for nestedv2 right now */
+ if (kvmhv_is_nestedv2()) {
+ rc = kvmppc_init_hostwide();
+ if (rc)
+ goto out;
+
+ /* Register the pmu */
+ rc = perf_pmu_register(&kvmppc_pmu, kvmppc_pmu.name, -1);
+ if (rc)
+ goto out;
+
+ pr_info("Registered kvm-hv pmu");
+ }
+
+out:
+ return rc;
+}
+
+static void __exit kvmppc_unregister_pmu(void)
+{
+ if (kvmhv_is_nestedv2()) {
+ kvmppc_cleanup_hostwide();
+
+ if (kvmppc_pmu.type != -1)
+ perf_pmu_unregister(&kvmppc_pmu);
+
+ pr_info("kvmhv_pmu unregistered.\n");
+ }
+}
+
+module_init(kvmppc_register_pmu);
+module_exit(kvmppc_unregister_pmu);
+MODULE_DESCRIPTION("KVM PPC Book3s-hv PMU");
+MODULE_AUTHOR("Vaibhav Jain <vaibhav@linux.ibm.com>");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/44x/gpio.c b/arch/powerpc/platforms/44x/gpio.c
index e5f2319e5cbe..d540e261d85a 100644
--- a/arch/powerpc/platforms/44x/gpio.c
+++ b/arch/powerpc/platforms/44x/gpio.c
@@ -75,8 +75,7 @@ __ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
clrbits32(&regs->or, GPIO_MASK(gpio));
}
-static void
-ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
+static int ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
{
struct ppc4xx_gpio_chip *chip = gpiochip_get_data(gc);
unsigned long flags;
@@ -88,6 +87,8 @@ ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
spin_unlock_irqrestore(&chip->lock, flags);
pr_debug("%s: gpio: %d val: %d\n", __func__, gpio, val);
+
+ return 0;
}
static int ppc4xx_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
@@ -179,7 +180,7 @@ static int __init ppc4xx_add_gpiochips(void)
gc->direction_input = ppc4xx_gpio_dir_in;
gc->direction_output = ppc4xx_gpio_dir_out;
gc->get = ppc4xx_gpio_get;
- gc->set = ppc4xx_gpio_set;
+ gc->set_rv = ppc4xx_gpio_set;
ret = of_mm_gpiochip_add_data(np, mm_gc, ppc4xx_gc);
if (ret)
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index 1ea591ec6083..c96af6b0eab4 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -280,7 +280,7 @@ static int mpc52xx_gpt_gpio_get(struct gpio_chip *gc, unsigned int gpio)
return (in_be32(&gpt->regs->status) >> 8) & 1;
}
-static void
+static int
mpc52xx_gpt_gpio_set(struct gpio_chip *gc, unsigned int gpio, int v)
{
struct mpc52xx_gpt_priv *gpt = gpiochip_get_data(gc);
@@ -293,6 +293,8 @@ mpc52xx_gpt_gpio_set(struct gpio_chip *gc, unsigned int gpio, int v)
raw_spin_lock_irqsave(&gpt->lock, flags);
clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK, r);
raw_spin_unlock_irqrestore(&gpt->lock, flags);
+
+ return 0;
}
static int mpc52xx_gpt_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
@@ -334,7 +336,7 @@ static void mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt)
gpt->gc.direction_input = mpc52xx_gpt_gpio_dir_in;
gpt->gc.direction_output = mpc52xx_gpt_gpio_dir_out;
gpt->gc.get = mpc52xx_gpt_gpio_get;
- gpt->gc.set = mpc52xx_gpt_gpio_set;
+ gpt->gc.set_rv = mpc52xx_gpt_gpio_set;
gpt->gc.base = -1;
gpt->gc.parent = gpt->dev;
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
index 4d8fa9ed1a67..6e37dfc6c5c9 100644
--- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -92,10 +92,11 @@ static void mcu_power_off(void)
mutex_unlock(&mcu->lock);
}
-static void mcu_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
+static int mcu_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
{
struct mcu *mcu = gpiochip_get_data(gc);
u8 bit = 1 << (4 + gpio);
+ int ret;
mutex_lock(&mcu->lock);
if (val)
@@ -103,14 +104,16 @@ static void mcu_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
else
mcu->reg_ctrl |= bit;
- i2c_smbus_write_byte_data(mcu->client, MCU_REG_CTRL, mcu->reg_ctrl);
+ ret = i2c_smbus_write_byte_data(mcu->client, MCU_REG_CTRL,
+ mcu->reg_ctrl);
mutex_unlock(&mcu->lock);
+
+ return ret;
}
static int mcu_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
{
- mcu_gpio_set(gc, gpio, val);
- return 0;
+ return mcu_gpio_set(gc, gpio, val);
}
static int mcu_gpiochip_add(struct mcu *mcu)
@@ -123,7 +126,7 @@ static int mcu_gpiochip_add(struct mcu *mcu)
gc->can_sleep = 1;
gc->ngpio = MCU_NUM_GPIO;
gc->base = -1;
- gc->set = mcu_gpio_set;
+ gc->set_rv = mcu_gpio_set;
gc->direction_output = mcu_gpio_dir_out;
gc->parent = dev;
diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c
index 1dc095ad48fc..7462c221115c 100644
--- a/arch/powerpc/platforms/8xx/cpm1.c
+++ b/arch/powerpc/platforms/8xx/cpm1.c
@@ -417,7 +417,7 @@ static void __cpm1_gpio16_set(struct cpm1_gpio16_chip *cpm1_gc, u16 pin_mask, in
out_be16(&iop->dat, cpm1_gc->cpdata);
}
-static void cpm1_gpio16_set(struct gpio_chip *gc, unsigned int gpio, int value)
+static int cpm1_gpio16_set(struct gpio_chip *gc, unsigned int gpio, int value)
{
struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(gc);
unsigned long flags;
@@ -428,6 +428,8 @@ static void cpm1_gpio16_set(struct gpio_chip *gc, unsigned int gpio, int value)
__cpm1_gpio16_set(cpm1_gc, pin_mask, value);
spin_unlock_irqrestore(&cpm1_gc->lock, flags);
+
+ return 0;
}
static int cpm1_gpio16_to_irq(struct gpio_chip *gc, unsigned int gpio)
@@ -497,7 +499,7 @@ int cpm1_gpiochip_add16(struct device *dev)
gc->direction_input = cpm1_gpio16_dir_in;
gc->direction_output = cpm1_gpio16_dir_out;
gc->get = cpm1_gpio16_get;
- gc->set = cpm1_gpio16_set;
+ gc->set_rv = cpm1_gpio16_set;
gc->to_irq = cpm1_gpio16_to_irq;
gc->parent = dev;
gc->owner = THIS_MODULE;
@@ -554,7 +556,7 @@ static void __cpm1_gpio32_set(struct cpm1_gpio32_chip *cpm1_gc, u32 pin_mask, in
out_be32(&iop->dat, cpm1_gc->cpdata);
}
-static void cpm1_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value)
+static int cpm1_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value)
{
struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(gc);
unsigned long flags;
@@ -565,6 +567,8 @@ static void cpm1_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value)
__cpm1_gpio32_set(cpm1_gc, pin_mask, value);
spin_unlock_irqrestore(&cpm1_gc->lock, flags);
+
+ return 0;
}
static int cpm1_gpio32_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
@@ -618,7 +622,7 @@ int cpm1_gpiochip_add32(struct device *dev)
gc->direction_input = cpm1_gpio32_dir_in;
gc->direction_output = cpm1_gpio32_dir_out;
gc->get = cpm1_gpio32_get;
- gc->set = cpm1_gpio32_set;
+ gc->set_rv = cpm1_gpio32_set;
gc->parent = dev;
gc->owner = THIS_MODULE;
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 6de1cd5d8a58..e119ced05d10 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -45,6 +45,7 @@
#include <linux/root_dev.h>
#include <linux/bitops.h>
#include <linux/suspend.h>
+#include <linux/string_choices.h>
#include <linux/of.h>
#include <linux/of_platform.h>
@@ -238,8 +239,7 @@ static void __init l2cr_init(void)
_set_L2CR(0);
_set_L2CR(*l2cr);
pr_info("L2CR overridden (0x%x), backside cache is %s\n",
- *l2cr, ((*l2cr) & 0x80000000) ?
- "enabled" : "disabled");
+ *l2cr, str_enabled_disabled((*l2cr) & 0x80000000));
}
of_node_put(np);
break;
diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c
index 8633891b7aa5..b4426a35aca3 100644
--- a/arch/powerpc/platforms/powermac/time.c
+++ b/arch/powerpc/platforms/powermac/time.c
@@ -15,6 +15,7 @@
#include <linux/kernel.h>
#include <linux/param.h>
#include <linux/string.h>
+#include <linux/string_choices.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/time.h>
@@ -77,7 +78,7 @@ long __init pmac_time_init(void)
delta |= 0xFF000000UL;
dst = ((pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x8) & 0x80) != 0);
printk("GMT Delta read from XPRAM: %d minutes, DST: %s\n", delta/60,
- dst ? "on" : "off");
+ str_on_off(dst));
#endif
return delta;
}
diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c
index 61722133eb2d..22d91ac424dd 100644
--- a/arch/powerpc/platforms/ps3/device-init.c
+++ b/arch/powerpc/platforms/ps3/device-init.c
@@ -14,6 +14,7 @@
#include <linux/slab.h>
#include <linux/reboot.h>
#include <linux/rcuwait.h>
+#include <linux/string_choices.h>
#include <asm/firmware.h>
#include <asm/lv1call.h>
@@ -724,7 +725,7 @@ static irqreturn_t ps3_notification_interrupt(int irq, void *data)
static int ps3_notification_read_write(struct ps3_notification_device *dev,
u64 lpar, int write)
{
- const char *op = write ? "write" : "read";
+ const char *op = str_write_read(write);
unsigned long flags;
int res;
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 3f3e3492e436..57222678bb3f 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -3,7 +3,8 @@ ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG
obj-y := lpar.o hvCall.o nvram.o reconfig.o \
of_helpers.o rtas-work-area.o papr-sysparm.o \
- papr-vpd.o \
+ papr-rtas-common.o papr-vpd.o papr-indices.o \
+ papr-platform-dump.o papr-phy-attest.o \
setup.o iommu.o event_sources.o ras.o \
firmware.o power.o dlpar.o mobility.o rng.o \
pci.o pci_dlpar.o eeh_pseries.o msi.o \
diff --git a/arch/powerpc/platforms/pseries/htmdump.c b/arch/powerpc/platforms/pseries/htmdump.c
index 57fc1700f604..742ec52c9d4d 100644
--- a/arch/powerpc/platforms/pseries/htmdump.c
+++ b/arch/powerpc/platforms/pseries/htmdump.c
@@ -10,28 +10,40 @@
#include <asm/io.h>
#include <asm/machdep.h>
#include <asm/plpar_wrappers.h>
+#include <asm/kvm_guest.h>
static void *htm_buf;
+static void *htm_status_buf;
+static void *htm_info_buf;
+static void *htm_caps_buf;
static u32 nodeindex;
static u32 nodalchipindex;
static u32 coreindexonchip;
static u32 htmtype;
+static u32 htmconfigure;
+static u32 htmstart;
+static u32 htmsetup;
+static u64 htmflags;
+
static struct dentry *htmdump_debugfs_dir;
+#define HTM_ENABLE 1
+#define HTM_DISABLE 0
+#define HTM_NOWRAP 1
+#define HTM_WRAP 0
-static ssize_t htmdump_read(struct file *filp, char __user *ubuf,
- size_t count, loff_t *ppos)
+/*
+ * Check the return code for H_HTM hcall.
+ * Return non-zero value (1) if either H_PARTIAL or H_SUCCESS
+ * is returned. For other return codes:
+ * Return zero if H_NOT_AVAILABLE.
+ * Return -EBUSY if hcall return busy.
+ * Return -EINVAL if any parameter or operation is not valid.
+ * Return -EPERM if HTM Virtualization Engine Technology code
+ * is not applied.
+ * Return -EIO if the HTM state is not valid.
+ */
+static ssize_t htm_return_check(long rc)
{
- void *htm_buf = filp->private_data;
- unsigned long page, read_size, available;
- loff_t offset;
- long rc;
-
- page = ALIGN_DOWN(*ppos, PAGE_SIZE);
- offset = (*ppos) % PAGE_SIZE;
-
- rc = htm_get_dump_hardware(nodeindex, nodalchipindex, coreindexonchip,
- htmtype, virt_to_phys(htm_buf), PAGE_SIZE, page);
-
switch (rc) {
case H_SUCCESS:
/* H_PARTIAL for the case where all available data can't be
@@ -65,6 +77,38 @@ static ssize_t htmdump_read(struct file *filp, char __user *ubuf,
return -EPERM;
}
+ /*
+ * Return 1 for H_SUCCESS/H_PARTIAL
+ */
+ return 1;
+}
+
+static ssize_t htmdump_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ void *htm_buf = filp->private_data;
+ unsigned long page, read_size, available;
+ loff_t offset;
+ long rc, ret;
+
+ page = ALIGN_DOWN(*ppos, PAGE_SIZE);
+ offset = (*ppos) % PAGE_SIZE;
+
+ /*
+ * Invoke H_HTM call with:
+ * - operation as htm dump (H_HTM_OP_DUMP_DATA)
+ * - last three values are address, size and offset
+ */
+ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip,
+ htmtype, H_HTM_OP_DUMP_DATA, virt_to_phys(htm_buf),
+ PAGE_SIZE, page);
+
+ ret = htm_return_check(rc);
+ if (ret <= 0) {
+ pr_debug("H_HTM hcall failed for op: H_HTM_OP_DUMP_DATA, returning %ld\n", ret);
+ return ret;
+ }
+
available = PAGE_SIZE;
read_size = min(count, available);
*ppos += read_size;
@@ -77,6 +121,292 @@ static const struct file_operations htmdump_fops = {
.open = simple_open,
};
+static int htmconfigure_set(void *data, u64 val)
+{
+ long rc, ret;
+ unsigned long param1 = -1, param2 = -1;
+
+ /*
+ * value as 1 : configure HTM.
+ * value as 0 : deconfigure HTM. Return -EINVAL for
+ * other values.
+ */
+ if (val == HTM_ENABLE) {
+ /*
+ * Invoke H_HTM call with:
+ * - operation as htm configure (H_HTM_OP_CONFIGURE)
+ * - If htmflags is set, param1 and param2 will be -1
+ * which is an indicator to use default htm mode reg mask
+ * and htm mode reg value.
+ * - last three values are unused, hence set to zero
+ */
+ if (!htmflags) {
+ param1 = 0;
+ param2 = 0;
+ }
+
+ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip,
+ htmtype, H_HTM_OP_CONFIGURE, param1, param2, 0);
+ } else if (val == HTM_DISABLE) {
+ /*
+ * Invoke H_HTM call with:
+ * - operation as htm deconfigure (H_HTM_OP_DECONFIGURE)
+ * - last three values are unused, hence set to zero
+ */
+ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip,
+ htmtype, H_HTM_OP_DECONFIGURE, 0, 0, 0);
+ } else
+ return -EINVAL;
+
+ ret = htm_return_check(rc);
+ if (ret <= 0) {
+ pr_debug("H_HTM hcall failed, returning %ld\n", ret);
+ return ret;
+ }
+
+ /* Set htmconfigure if operation succeeds */
+ htmconfigure = val;
+
+ return 0;
+}
+
+static int htmconfigure_get(void *data, u64 *val)
+{
+ *val = htmconfigure;
+ return 0;
+}
+
+static int htmstart_set(void *data, u64 val)
+{
+ long rc, ret;
+
+ /*
+ * value as 1: start HTM
+ * value as 0: stop HTM
+ * Return -EINVAL for other values.
+ */
+ if (val == HTM_ENABLE) {
+ /*
+ * Invoke H_HTM call with:
+ * - operation as htm start (H_HTM_OP_START)
+ * - last three values are unused, hence set to zero
+ */
+ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip,
+ htmtype, H_HTM_OP_START, 0, 0, 0);
+
+ } else if (val == HTM_DISABLE) {
+ /*
+ * Invoke H_HTM call with:
+ * - operation as htm stop (H_HTM_OP_STOP)
+ * - last three values are unused, hence set to zero
+ */
+ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip,
+ htmtype, H_HTM_OP_STOP, 0, 0, 0);
+ } else
+ return -EINVAL;
+
+ ret = htm_return_check(rc);
+ if (ret <= 0) {
+ pr_debug("H_HTM hcall failed, returning %ld\n", ret);
+ return ret;
+ }
+
+ /* Set htmstart if H_HTM_OP_START/H_HTM_OP_STOP operation succeeds */
+ htmstart = val;
+
+ return 0;
+}
+
+static int htmstart_get(void *data, u64 *val)
+{
+ *val = htmstart;
+ return 0;
+}
+
+static ssize_t htmstatus_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ void *htm_status_buf = filp->private_data;
+ long rc, ret;
+ u64 *num_entries;
+ u64 to_copy;
+ int htmstatus_flag;
+
+ /*
+ * Invoke H_HTM call with:
+ * - operation as htm status (H_HTM_OP_STATUS)
+ * - last three values as addr, size and offset
+ */
+ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip,
+ htmtype, H_HTM_OP_STATUS, virt_to_phys(htm_status_buf),
+ PAGE_SIZE, 0);
+
+ ret = htm_return_check(rc);
+ if (ret <= 0) {
+ pr_debug("H_HTM hcall failed for op: H_HTM_OP_STATUS, returning %ld\n", ret);
+ return ret;
+ }
+
+ /*
+ * HTM status buffer, start of buffer + 0x10 gives the
+ * number of HTM entries in the buffer. Each nest htm status
+ * entry is 0x6 bytes where each core htm status entry is
+ * 0x8 bytes.
+ * So total count to copy is:
+ * 32 bytes (for first 7 fields) + (number of HTM entries * entry size)
+ */
+ num_entries = htm_status_buf + 0x10;
+ if (htmtype == 0x2)
+ htmstatus_flag = 0x8;
+ else
+ htmstatus_flag = 0x6;
+ to_copy = 32 + (be64_to_cpu(*num_entries) * htmstatus_flag);
+ return simple_read_from_buffer(ubuf, count, ppos, htm_status_buf, to_copy);
+}
+
+static const struct file_operations htmstatus_fops = {
+ .llseek = NULL,
+ .read = htmstatus_read,
+ .open = simple_open,
+};
+
+static ssize_t htminfo_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ void *htm_info_buf = filp->private_data;
+ long rc, ret;
+ u64 *num_entries;
+ u64 to_copy;
+
+ /*
+ * Invoke H_HTM call with:
+ * - operation as htm status (H_HTM_OP_STATUS)
+ * - last three values as addr, size and offset
+ */
+ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip,
+ htmtype, H_HTM_OP_DUMP_SYSPROC_CONF, virt_to_phys(htm_info_buf),
+ PAGE_SIZE, 0);
+
+ ret = htm_return_check(rc);
+ if (ret <= 0) {
+ pr_debug("H_HTM hcall failed for op: H_HTM_OP_DUMP_SYSPROC_CONF, returning %ld\n", ret);
+ return ret;
+ }
+
+ /*
+ * HTM status buffer, start of buffer + 0x10 gives the
+ * number of HTM entries in the buffer. Each entry of processor
+ * is 16 bytes.
+ *
+ * So total count to copy is:
+ * 32 bytes (for first 5 fields) + (number of HTM entries * entry size)
+ */
+ num_entries = htm_info_buf + 0x10;
+ to_copy = 32 + (be64_to_cpu(*num_entries) * 16);
+ return simple_read_from_buffer(ubuf, count, ppos, htm_info_buf, to_copy);
+}
+
+static ssize_t htmcaps_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ void *htm_caps_buf = filp->private_data;
+ long rc, ret;
+
+ /*
+ * Invoke H_HTM call with:
+ * - operation as htm capabilities (H_HTM_OP_CAPABILITIES)
+ * - last three values as addr, size (0x80 for Capabilities Output Buffer
+ * and zero
+ */
+ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip,
+ htmtype, H_HTM_OP_CAPABILITIES, virt_to_phys(htm_caps_buf),
+ 0x80, 0);
+
+ ret = htm_return_check(rc);
+ if (ret <= 0) {
+ pr_debug("H_HTM hcall failed for op: H_HTM_OP_CAPABILITIES, returning %ld\n", ret);
+ return ret;
+ }
+
+ return simple_read_from_buffer(ubuf, count, ppos, htm_caps_buf, 0x80);
+}
+
+static const struct file_operations htminfo_fops = {
+ .llseek = NULL,
+ .read = htminfo_read,
+ .open = simple_open,
+};
+
+static const struct file_operations htmcaps_fops = {
+ .llseek = NULL,
+ .read = htmcaps_read,
+ .open = simple_open,
+};
+
+static int htmsetup_set(void *data, u64 val)
+{
+ long rc, ret;
+
+ /*
+ * Input value: HTM buffer size in the power of 2
+ * example: hex value 0x21 ( decimal: 33 ) is for
+ * 8GB
+ * Invoke H_HTM call with:
+ * - operation as htm start (H_HTM_OP_SETUP)
+ * - parameter 1 set to input value.
+ * - last two values are unused, hence set to zero
+ */
+ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip,
+ htmtype, H_HTM_OP_SETUP, val, 0, 0);
+
+ ret = htm_return_check(rc);
+ if (ret <= 0) {
+ pr_debug("H_HTM hcall failed for op: H_HTM_OP_SETUP, returning %ld\n", ret);
+ return ret;
+ }
+
+ /* Set htmsetup if H_HTM_OP_SETUP operation succeeds */
+ htmsetup = val;
+
+ return 0;
+}
+
+static int htmsetup_get(void *data, u64 *val)
+{
+ *val = htmsetup;
+ return 0;
+}
+
+static int htmflags_set(void *data, u64 val)
+{
+ /*
+ * Input value:
+ * Currently supported flag value is to enable/disable
+ * HTM buffer wrap. wrap is used along with "configure"
+ * to prevent HTM buffer from wrapping.
+ * Writing 1 will set noWrap while configuring HTM
+ */
+ if (val == HTM_NOWRAP)
+ htmflags = H_HTM_FLAGS_NOWRAP;
+ else if (val == HTM_WRAP)
+ htmflags = 0;
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+static int htmflags_get(void *data, u64 *val)
+{
+ *val = htmflags;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(htmconfigure_fops, htmconfigure_get, htmconfigure_set, "%llu\n");
+DEFINE_SIMPLE_ATTRIBUTE(htmstart_fops, htmstart_get, htmstart_set, "%llu\n");
+DEFINE_SIMPLE_ATTRIBUTE(htmsetup_fops, htmsetup_get, htmsetup_set, "%llu\n");
+DEFINE_SIMPLE_ATTRIBUTE(htmflags_fops, htmflags_get, htmflags_set, "%llu\n");
+
static int htmdump_init_debugfs(void)
{
htm_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
@@ -98,11 +428,50 @@ static int htmdump_init_debugfs(void)
htmdump_debugfs_dir, &htmtype);
debugfs_create_file("trace", 0400, htmdump_debugfs_dir, htm_buf, &htmdump_fops);
+ /*
+ * Debugfs interface files to control HTM operations:
+ */
+ debugfs_create_file("htmconfigure", 0600, htmdump_debugfs_dir, NULL, &htmconfigure_fops);
+ debugfs_create_file("htmstart", 0600, htmdump_debugfs_dir, NULL, &htmstart_fops);
+ debugfs_create_file("htmsetup", 0600, htmdump_debugfs_dir, NULL, &htmsetup_fops);
+ debugfs_create_file("htmflags", 0600, htmdump_debugfs_dir, NULL, &htmflags_fops);
+
+ /* Debugfs interface file to present status of HTM */
+ htm_status_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!htm_status_buf) {
+ pr_err("Failed to allocate htmstatus buf\n");
+ return -ENOMEM;
+ }
+
+ /* Debugfs interface file to present System Processor Configuration */
+ htm_info_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!htm_info_buf) {
+ pr_err("Failed to allocate htm info buf\n");
+ return -ENOMEM;
+ }
+
+ /* Debugfs interface file to present HTM capabilities */
+ htm_caps_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!htm_caps_buf) {
+ pr_err("Failed to allocate htm caps buf\n");
+ return -ENOMEM;
+ }
+
+ debugfs_create_file("htmstatus", 0400, htmdump_debugfs_dir, htm_status_buf, &htmstatus_fops);
+ debugfs_create_file("htminfo", 0400, htmdump_debugfs_dir, htm_info_buf, &htminfo_fops);
+ debugfs_create_file("htmcaps", 0400, htmdump_debugfs_dir, htm_caps_buf, &htmcaps_fops);
+
return 0;
}
static int __init htmdump_init(void)
{
+ /* Disable on kvm guest */
+ if (is_kvm_guest()) {
+ pr_info("htmdump not supported inside KVM guest\n");
+ return -EOPNOTSUPP;
+ }
+
if (htmdump_init_debugfs())
return -ENOMEM;
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index d6ebc19fb99c..eec333dd2e59 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -197,7 +197,7 @@ static void tce_iommu_userspace_view_free(struct iommu_table *tbl)
static void tce_free_pSeries(struct iommu_table *tbl)
{
- if (!tbl->it_userspace)
+ if (tbl->it_userspace)
tce_iommu_userspace_view_free(tbl);
}
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index f9d80111c322..957c0c03d259 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -525,7 +525,12 @@ static struct msi_domain_info pseries_msi_domain_info = {
static void pseries_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
{
- __pci_read_msi_msg(irq_data_get_msi_desc(data), msg);
+ struct pci_dev *dev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data));
+
+ if (dev->current_state == PCI_D0)
+ __pci_read_msi_msg(irq_data_get_msi_desc(data), msg);
+ else
+ get_cached_msi_msg(data->irq, msg);
}
static struct irq_chip pseries_msi_irq_chip = {
diff --git a/arch/powerpc/platforms/pseries/papr-indices.c b/arch/powerpc/platforms/pseries/papr-indices.c
new file mode 100644
index 000000000000..3c7545591c45
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr-indices.c
@@ -0,0 +1,488 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define pr_fmt(fmt) "papr-indices: " fmt
+
+#include <linux/build_bug.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/lockdep.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/signal.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/string_helpers.h>
+#include <linux/uaccess.h>
+#include <asm/machdep.h>
+#include <asm/rtas-work-area.h>
+#include <asm/rtas.h>
+#include <uapi/asm/papr-indices.h>
+#include "papr-rtas-common.h"
+
+/*
+ * Function-specific return values for ibm,set-dynamic-indicator and
+ * ibm,get-dynamic-sensor-state RTAS calls.
+ * PAPR+ v2.13 7.3.18 and 7.3.19.
+ */
+#define RTAS_IBM_DYNAMIC_INDICE_NO_INDICATOR -3
+
+/**
+ * struct rtas_get_indices_params - Parameters (in and out) for
+ * ibm,get-indices.
+ * @is_sensor: In: Caller-provided whether sensor or indicator.
+ * @indice_type:In: Caller-provided indice (sensor or indicator) token
+ * @work_area: In: Caller-provided work area buffer for results.
+ * @next: In: Sequence number. Out: Next sequence number.
+ * @status: Out: RTAS call status.
+ */
+struct rtas_get_indices_params {
+ u8 is_sensor;
+ u32 indice_type;
+ struct rtas_work_area *work_area;
+ u32 next;
+ s32 status;
+};
+
+/*
+ * rtas_ibm_get_indices() - Call ibm,get-indices to fill a work area buffer.
+ * @params: See &struct rtas_ibm_get_indices_params.
+ *
+ * Calls ibm,get-indices until it errors or successfully deposits data
+ * into the supplied work area. Handles RTAS retry statuses. Maps RTAS
+ * error statuses to reasonable errno values.
+ *
+ * The caller is expected to invoke rtas_ibm_get_indices() multiple times
+ * to retrieve all indices data for the provided indice type. Only one
+ * sequence should be in progress at any time; starting a new sequence
+ * will disrupt any sequence already in progress. Serialization of
+ * indices retrieval sequences is the responsibility of the caller.
+ *
+ * The caller should inspect @params.status to determine whether more
+ * calls are needed to complete the sequence.
+ *
+ * Context: May sleep.
+ * Return: -ve on error, 0 otherwise.
+ */
+static int rtas_ibm_get_indices(struct rtas_get_indices_params *params)
+{
+ struct rtas_work_area *work_area = params->work_area;
+ const s32 token = rtas_function_token(RTAS_FN_IBM_GET_INDICES);
+ u32 rets;
+ s32 fwrc;
+ int ret;
+
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return -ENOENT;
+
+ lockdep_assert_held(&rtas_ibm_get_indices_lock);
+
+ do {
+ fwrc = rtas_call(token, 5, 2, &rets, params->is_sensor,
+ params->indice_type,
+ rtas_work_area_phys(work_area),
+ rtas_work_area_size(work_area),
+ params->next);
+ } while (rtas_busy_delay(fwrc));
+
+ switch (fwrc) {
+ case RTAS_HARDWARE_ERROR:
+ ret = -EIO;
+ break;
+ case RTAS_INVALID_PARAMETER: /* Indicator type is not supported */
+ ret = -EINVAL;
+ break;
+ case RTAS_SEQ_START_OVER:
+ ret = -EAGAIN;
+ pr_info_ratelimited("Indices changed during retrieval, retrying\n");
+ params->next = 1;
+ break;
+ case RTAS_SEQ_MORE_DATA:
+ params->next = rets;
+ ret = 0;
+ break;
+ case RTAS_SEQ_COMPLETE:
+ params->next = 0;
+ ret = 0;
+ break;
+ default:
+ ret = -EIO;
+ pr_err_ratelimited("unexpected ibm,get-indices status %d\n", fwrc);
+ break;
+ }
+
+ params->status = fwrc;
+ return ret;
+}
+
+/*
+ * Internal indices sequence APIs. A sequence is a series of calls to
+ * ibm,get-indices for a given location code. The sequence ends when
+ * an error is encountered or all indices for the input has been
+ * returned.
+ */
+
+/*
+ * indices_sequence_begin() - Begin a indices retrieval sequence.
+ *
+ * Context: May sleep.
+ */
+static void indices_sequence_begin(struct papr_rtas_sequence *seq)
+{
+ struct rtas_get_indices_params *param;
+
+ param = (struct rtas_get_indices_params *)seq->params;
+ /*
+ * We could allocate the work area before acquiring the
+ * function lock, but that would allow concurrent requests to
+ * exhaust the limited work area pool for no benefit. So
+ * allocate the work area under the lock.
+ */
+ mutex_lock(&rtas_ibm_get_indices_lock);
+ param->work_area = rtas_work_area_alloc(RTAS_GET_INDICES_BUF_SIZE);
+ param->next = 1;
+ param->status = 0;
+}
+
+/*
+ * indices_sequence_end() - Finalize a indices retrieval sequence.
+ *
+ * Releases resources obtained by indices_sequence_begin().
+ */
+static void indices_sequence_end(struct papr_rtas_sequence *seq)
+{
+ struct rtas_get_indices_params *param;
+
+ param = (struct rtas_get_indices_params *)seq->params;
+ rtas_work_area_free(param->work_area);
+ mutex_unlock(&rtas_ibm_get_indices_lock);
+}
+
+/*
+ * Work function to be passed to papr_rtas_blob_generate().
+ *
+ * ibm,get-indices RTAS call fills the work area with the certain
+ * format but does not return the bytes written in the buffer. So
+ * instead of kernel parsing this work area to determine the buffer
+ * length, copy the complete work area (RTAS_GET_INDICES_BUF_SIZE)
+ * to the blob and let the user space to obtain the data.
+ * Means RTAS_GET_INDICES_BUF_SIZE data will be returned for each
+ * read().
+ */
+
+static const char *indices_sequence_fill_work_area(struct papr_rtas_sequence *seq,
+ size_t *len)
+{
+ struct rtas_get_indices_params *p;
+ bool init_state;
+
+ p = (struct rtas_get_indices_params *)seq->params;
+ init_state = (p->next == 1) ? true : false;
+
+ if (papr_rtas_sequence_should_stop(seq, p->status, init_state))
+ return NULL;
+ if (papr_rtas_sequence_set_err(seq, rtas_ibm_get_indices(p)))
+ return NULL;
+
+ *len = RTAS_GET_INDICES_BUF_SIZE;
+ return rtas_work_area_raw_buf(p->work_area);
+}
+
+/*
+ * papr_indices_handle_read - returns indices blob data to the user space
+ *
+ * ibm,get-indices RTAS call fills the work area with the certian
+ * format but does not return the bytes written in the buffer and
+ * copied RTAS_GET_INDICES_BUF_SIZE data to the blob for each RTAS
+ * call. So send RTAS_GET_INDICES_BUF_SIZE buffer to the user space
+ * for each read().
+ */
+static ssize_t papr_indices_handle_read(struct file *file,
+ char __user *buf, size_t size, loff_t *off)
+{
+ const struct papr_rtas_blob *blob = file->private_data;
+
+ /* we should not instantiate a handle without any data attached. */
+ if (!papr_rtas_blob_has_data(blob)) {
+ pr_err_once("handle without data\n");
+ return -EIO;
+ }
+
+ if (size < RTAS_GET_INDICES_BUF_SIZE) {
+ pr_err_once("Invalid buffer length %ld, expect %d\n",
+ size, RTAS_GET_INDICES_BUF_SIZE);
+ return -EINVAL;
+ } else if (size > RTAS_GET_INDICES_BUF_SIZE)
+ size = RTAS_GET_INDICES_BUF_SIZE;
+
+ return simple_read_from_buffer(buf, size, off, blob->data, blob->len);
+}
+
+static const struct file_operations papr_indices_handle_ops = {
+ .read = papr_indices_handle_read,
+ .llseek = papr_rtas_common_handle_seek,
+ .release = papr_rtas_common_handle_release,
+};
+
+/*
+ * papr_indices_create_handle() - Create a fd-based handle for reading
+ * indices data
+ * @ubuf: Input parameters to RTAS call such as whether sensor or indicator
+ * and indice type in user memory
+ *
+ * Handler for PAPR_INDICES_IOC_GET ioctl command. Validates @ubuf
+ * and instantiates an immutable indices "blob" for it. The blob is
+ * attached to a file descriptor for reading by user space. The memory
+ * backing the blob is freed when the file is released.
+ *
+ * The entire requested indices is retrieved by this call and all
+ * necessary RTAS interactions are performed before returning the fd
+ * to user space. This keeps the read handler simple and ensures that
+ * the kernel can prevent interleaving of ibm,get-indices call sequences.
+ *
+ * Return: The installed fd number if successful, -ve errno otherwise.
+ */
+static long papr_indices_create_handle(struct papr_indices_io_block __user *ubuf)
+{
+ struct papr_rtas_sequence seq = {};
+ struct rtas_get_indices_params params = {};
+ int fd;
+
+ if (get_user(params.is_sensor, &ubuf->indices.is_sensor))
+ return -EFAULT;
+
+ if (get_user(params.indice_type, &ubuf->indices.indice_type))
+ return -EFAULT;
+
+ seq = (struct papr_rtas_sequence) {
+ .begin = indices_sequence_begin,
+ .end = indices_sequence_end,
+ .work = indices_sequence_fill_work_area,
+ };
+
+ seq.params = &params;
+ fd = papr_rtas_setup_file_interface(&seq,
+ &papr_indices_handle_ops, "[papr-indices]");
+
+ return fd;
+}
+
+/*
+ * Create work area with the input parameters. This function is used
+ * for both ibm,set-dynamic-indicator and ibm,get-dynamic-sensor-state
+ * RTAS Calls.
+ */
+static struct rtas_work_area *
+papr_dynamic_indice_buf_from_user(struct papr_indices_io_block __user *ubuf,
+ struct papr_indices_io_block *kbuf)
+{
+ struct rtas_work_area *work_area;
+ u32 length;
+ __be32 len_be;
+
+ if (copy_from_user(kbuf, ubuf, sizeof(*kbuf)))
+ return ERR_PTR(-EFAULT);
+
+
+ if (!string_is_terminated(kbuf->dynamic_param.location_code_str,
+ ARRAY_SIZE(kbuf->dynamic_param.location_code_str)))
+ return ERR_PTR(-EINVAL);
+
+ /*
+ * The input data in the work area should be as follows:
+ * - 32-bit integer length of the location code string,
+ * including NULL.
+ * - Location code string, NULL terminated, identifying the
+ * token (sensor or indicator).
+ * PAPR 2.13 - R1–7.3.18–5 ibm,set-dynamic-indicator
+ * - R1–7.3.19–5 ibm,get-dynamic-sensor-state
+ */
+ /*
+ * Length that user space passed should also include NULL
+ * terminator.
+ */
+ length = strlen(kbuf->dynamic_param.location_code_str) + 1;
+ if (length > LOC_CODE_SIZE)
+ return ERR_PTR(-EINVAL);
+
+ len_be = cpu_to_be32(length);
+
+ work_area = rtas_work_area_alloc(LOC_CODE_SIZE + sizeof(u32));
+ memcpy(rtas_work_area_raw_buf(work_area), &len_be, sizeof(u32));
+ memcpy((rtas_work_area_raw_buf(work_area) + sizeof(u32)),
+ &kbuf->dynamic_param.location_code_str, length);
+
+ return work_area;
+}
+
+/**
+ * papr_dynamic_indicator_ioc_set - ibm,set-dynamic-indicator RTAS Call
+ * PAPR 2.13 7.3.18
+ *
+ * @ubuf: Input parameters to RTAS call such as indicator token and
+ * new state.
+ *
+ * Returns success or -errno.
+ */
+static long papr_dynamic_indicator_ioc_set(struct papr_indices_io_block __user *ubuf)
+{
+ struct papr_indices_io_block kbuf;
+ struct rtas_work_area *work_area;
+ s32 fwrc, token, ret;
+
+ token = rtas_function_token(RTAS_FN_IBM_SET_DYNAMIC_INDICATOR);
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return -ENOENT;
+
+ mutex_lock(&rtas_ibm_set_dynamic_indicator_lock);
+ work_area = papr_dynamic_indice_buf_from_user(ubuf, &kbuf);
+ if (IS_ERR(work_area)) {
+ ret = PTR_ERR(work_area);
+ goto out;
+ }
+
+ do {
+ fwrc = rtas_call(token, 3, 1, NULL,
+ kbuf.dynamic_param.token,
+ kbuf.dynamic_param.state,
+ rtas_work_area_phys(work_area));
+ } while (rtas_busy_delay(fwrc));
+
+ rtas_work_area_free(work_area);
+
+ switch (fwrc) {
+ case RTAS_SUCCESS:
+ ret = 0;
+ break;
+ case RTAS_IBM_DYNAMIC_INDICE_NO_INDICATOR: /* No such indicator */
+ ret = -EOPNOTSUPP;
+ break;
+ default:
+ pr_err("unexpected ibm,set-dynamic-indicator result %d\n",
+ fwrc);
+ fallthrough;
+ case RTAS_HARDWARE_ERROR: /* Hardware/platform error */
+ ret = -EIO;
+ break;
+ }
+
+out:
+ mutex_unlock(&rtas_ibm_set_dynamic_indicator_lock);
+ return ret;
+}
+
+/**
+ * papr_dynamic_sensor_ioc_get - ibm,get-dynamic-sensor-state RTAS Call
+ * PAPR 2.13 7.3.19
+ *
+ * @ubuf: Input parameters to RTAS call such as sensor token
+ * Copies the state in user space buffer.
+ *
+ *
+ * Returns success or -errno.
+ */
+
+static long papr_dynamic_sensor_ioc_get(struct papr_indices_io_block __user *ubuf)
+{
+ struct papr_indices_io_block kbuf;
+ struct rtas_work_area *work_area;
+ s32 fwrc, token, ret;
+ u32 rets;
+
+ token = rtas_function_token(RTAS_FN_IBM_GET_DYNAMIC_SENSOR_STATE);
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return -ENOENT;
+
+ mutex_lock(&rtas_ibm_get_dynamic_sensor_state_lock);
+ work_area = papr_dynamic_indice_buf_from_user(ubuf, &kbuf);
+ if (IS_ERR(work_area)) {
+ ret = PTR_ERR(work_area);
+ goto out;
+ }
+
+ do {
+ fwrc = rtas_call(token, 2, 2, &rets,
+ kbuf.dynamic_param.token,
+ rtas_work_area_phys(work_area));
+ } while (rtas_busy_delay(fwrc));
+
+ rtas_work_area_free(work_area);
+
+ switch (fwrc) {
+ case RTAS_SUCCESS:
+ if (put_user(rets, &ubuf->dynamic_param.state))
+ ret = -EFAULT;
+ else
+ ret = 0;
+ break;
+ case RTAS_IBM_DYNAMIC_INDICE_NO_INDICATOR: /* No such indicator */
+ ret = -EOPNOTSUPP;
+ break;
+ default:
+ pr_err("unexpected ibm,get-dynamic-sensor result %d\n",
+ fwrc);
+ fallthrough;
+ case RTAS_HARDWARE_ERROR: /* Hardware/platform error */
+ ret = -EIO;
+ break;
+ }
+
+out:
+ mutex_unlock(&rtas_ibm_get_dynamic_sensor_state_lock);
+ return ret;
+}
+
+/*
+ * Top-level ioctl handler for /dev/papr-indices.
+ */
+static long papr_indices_dev_ioctl(struct file *filp, unsigned int ioctl,
+ unsigned long arg)
+{
+ void __user *argp = (__force void __user *)arg;
+ long ret;
+
+ switch (ioctl) {
+ case PAPR_INDICES_IOC_GET:
+ ret = papr_indices_create_handle(argp);
+ break;
+ case PAPR_DYNAMIC_SENSOR_IOC_GET:
+ ret = papr_dynamic_sensor_ioc_get(argp);
+ break;
+ case PAPR_DYNAMIC_INDICATOR_IOC_SET:
+ if (filp->f_mode & FMODE_WRITE)
+ ret = papr_dynamic_indicator_ioc_set(argp);
+ else
+ ret = -EBADF;
+ break;
+ default:
+ ret = -ENOIOCTLCMD;
+ break;
+ }
+
+ return ret;
+}
+
+static const struct file_operations papr_indices_ops = {
+ .unlocked_ioctl = papr_indices_dev_ioctl,
+};
+
+static struct miscdevice papr_indices_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "papr-indices",
+ .fops = &papr_indices_ops,
+};
+
+static __init int papr_indices_init(void)
+{
+ if (!rtas_function_implemented(RTAS_FN_IBM_GET_INDICES))
+ return -ENODEV;
+
+ if (!rtas_function_implemented(RTAS_FN_IBM_SET_DYNAMIC_INDICATOR))
+ return -ENODEV;
+
+ if (!rtas_function_implemented(RTAS_FN_IBM_GET_DYNAMIC_SENSOR_STATE))
+ return -ENODEV;
+
+ return misc_register(&papr_indices_dev);
+}
+machine_device_initcall(pseries, papr_indices_init);
diff --git a/arch/powerpc/platforms/pseries/papr-phy-attest.c b/arch/powerpc/platforms/pseries/papr-phy-attest.c
new file mode 100644
index 000000000000..1907f2411567
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr-phy-attest.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define pr_fmt(fmt) "papr-phy-attest: " fmt
+
+#include <linux/build_bug.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/lockdep.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/signal.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/string_helpers.h>
+#include <linux/uaccess.h>
+#include <asm/machdep.h>
+#include <asm/rtas-work-area.h>
+#include <asm/rtas.h>
+#include <uapi/asm/papr-physical-attestation.h>
+#include "papr-rtas-common.h"
+
+/**
+ * struct rtas_phy_attest_params - Parameters (in and out) for
+ * ibm,physical-attestation.
+ *
+ * @cmd: In: Caller-provided attestation command buffer. Must be
+ * RTAS-addressable.
+ * @work_area: In: Caller-provided work area buffer for attestation
+ * command structure
+ * Out: Caller-provided work area buffer for the response
+ * @cmd_len: In: Caller-provided attestation command structure
+ * length
+ * @sequence: In: Sequence number. Out: Next sequence number.
+ * @written: Out: Bytes written by ibm,physical-attestation to
+ * @work_area.
+ * @status: Out: RTAS call status.
+ */
+struct rtas_phy_attest_params {
+ struct papr_phy_attest_io_block cmd;
+ struct rtas_work_area *work_area;
+ u32 cmd_len;
+ u32 sequence;
+ u32 written;
+ s32 status;
+};
+
+/**
+ * rtas_physical_attestation() - Call ibm,physical-attestation to
+ * fill a work area buffer.
+ * @params: See &struct rtas_phy_attest_params.
+ *
+ * Calls ibm,physical-attestation until it errors or successfully
+ * deposits data into the supplied work area. Handles RTAS retry
+ * statuses. Maps RTAS error statuses to reasonable errno values.
+ *
+ * The caller is expected to invoke rtas_physical_attestation()
+ * multiple times to retrieve all the data for the provided
+ * attestation command. Only one sequence should be in progress at
+ * any time; starting a new sequence will disrupt any sequence
+ * already in progress. Serialization of attestation retrieval
+ * sequences is the responsibility of the caller.
+ *
+ * The caller should inspect @params.status to determine whether more
+ * calls are needed to complete the sequence.
+ *
+ * Context: May sleep.
+ * Return: -ve on error, 0 otherwise.
+ */
+static int rtas_physical_attestation(struct rtas_phy_attest_params *params)
+{
+ struct rtas_work_area *work_area;
+ s32 fwrc, token;
+ u32 rets[2];
+ int ret;
+
+ work_area = params->work_area;
+ token = rtas_function_token(RTAS_FN_IBM_PHYSICAL_ATTESTATION);
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return -ENOENT;
+
+ lockdep_assert_held(&rtas_ibm_physical_attestation_lock);
+
+ do {
+ fwrc = rtas_call(token, 3, 3, rets,
+ rtas_work_area_phys(work_area),
+ params->cmd_len,
+ params->sequence);
+ } while (rtas_busy_delay(fwrc));
+
+ switch (fwrc) {
+ case RTAS_HARDWARE_ERROR:
+ ret = -EIO;
+ break;
+ case RTAS_INVALID_PARAMETER:
+ ret = -EINVAL;
+ break;
+ case RTAS_SEQ_MORE_DATA:
+ params->sequence = rets[0];
+ fallthrough;
+ case RTAS_SEQ_COMPLETE:
+ params->written = rets[1];
+ /*
+ * Kernel or firmware bug, do not continue.
+ */
+ if (WARN(params->written > rtas_work_area_size(work_area),
+ "possible write beyond end of work area"))
+ ret = -EFAULT;
+ else
+ ret = 0;
+ break;
+ default:
+ ret = -EIO;
+ pr_err_ratelimited("unexpected ibm,get-phy_attest status %d\n", fwrc);
+ break;
+ }
+
+ params->status = fwrc;
+ return ret;
+}
+
+/*
+ * Internal physical-attestation sequence APIs. A physical-attestation
+ * sequence is a series of calls to get ibm,physical-attestation
+ * for a given attestation command. The sequence ends when an error
+ * is encountered or all data for the attestation command has been
+ * returned.
+ */
+
+/**
+ * phy_attest_sequence_begin() - Begin a response data for attestation
+ * command retrieval sequence.
+ * @seq: user specified parameters for RTAS call from seq struct.
+ *
+ * Context: May sleep.
+ */
+static void phy_attest_sequence_begin(struct papr_rtas_sequence *seq)
+{
+ struct rtas_phy_attest_params *param;
+
+ /*
+ * We could allocate the work area before acquiring the
+ * function lock, but that would allow concurrent requests to
+ * exhaust the limited work area pool for no benefit. So
+ * allocate the work area under the lock.
+ */
+ mutex_lock(&rtas_ibm_physical_attestation_lock);
+ param = (struct rtas_phy_attest_params *)seq->params;
+ param->work_area = rtas_work_area_alloc(SZ_4K);
+ memcpy(rtas_work_area_raw_buf(param->work_area), &param->cmd,
+ param->cmd_len);
+ param->sequence = 1;
+ param->status = 0;
+}
+
+/**
+ * phy_attest_sequence_end() - Finalize a attestation command
+ * response retrieval sequence.
+ * @seq: Sequence state.
+ *
+ * Releases resources obtained by phy_attest_sequence_begin().
+ */
+static void phy_attest_sequence_end(struct papr_rtas_sequence *seq)
+{
+ struct rtas_phy_attest_params *param;
+
+ param = (struct rtas_phy_attest_params *)seq->params;
+ rtas_work_area_free(param->work_area);
+ mutex_unlock(&rtas_ibm_physical_attestation_lock);
+ kfree(param);
+}
+
+/*
+ * Generator function to be passed to papr_rtas_blob_generate().
+ */
+static const char *phy_attest_sequence_fill_work_area(struct papr_rtas_sequence *seq,
+ size_t *len)
+{
+ struct rtas_phy_attest_params *p;
+ bool init_state;
+
+ p = (struct rtas_phy_attest_params *)seq->params;
+ init_state = (p->written == 0) ? true : false;
+
+ if (papr_rtas_sequence_should_stop(seq, p->status, init_state))
+ return NULL;
+ if (papr_rtas_sequence_set_err(seq, rtas_physical_attestation(p)))
+ return NULL;
+ *len = p->written;
+ return rtas_work_area_raw_buf(p->work_area);
+}
+
+static const struct file_operations papr_phy_attest_handle_ops = {
+ .read = papr_rtas_common_handle_read,
+ .llseek = papr_rtas_common_handle_seek,
+ .release = papr_rtas_common_handle_release,
+};
+
+/**
+ * papr_phy_attest_create_handle() - Create a fd-based handle for
+ * reading the response for the given attestation command.
+ * @ulc: Attestation command in user memory; defines the scope of
+ * data for the attestation command to retrieve.
+ *
+ * Handler for PAPR_PHYSICAL_ATTESTATION_IOC_CREATE_HANDLE ioctl
+ * command. Validates @ulc and instantiates an immutable response
+ * "blob" for attestation command. The blob is attached to a file
+ * descriptor for reading by user space. The memory backing the blob
+ * is freed when the file is released.
+ *
+ * The entire requested response buffer for the attestation command
+ * retrieved by this call and all necessary RTAS interactions are
+ * performed before returning the fd to user space. This keeps the
+ * read handler simple and ensures that kernel can prevent
+ * interleaving ibm,physical-attestation call sequences.
+ *
+ * Return: The installed fd number if successful, -ve errno otherwise.
+ */
+static long papr_phy_attest_create_handle(struct papr_phy_attest_io_block __user *ulc)
+{
+ struct rtas_phy_attest_params *params;
+ struct papr_rtas_sequence seq = {};
+ int fd;
+
+ /*
+ * Freed in phy_attest_sequence_end().
+ */
+ params = kzalloc(sizeof(*params), GFP_KERNEL_ACCOUNT);
+ if (!params)
+ return -ENOMEM;
+
+ if (copy_from_user(&params->cmd, ulc,
+ sizeof(struct papr_phy_attest_io_block)))
+ return -EFAULT;
+
+ params->cmd_len = be32_to_cpu(params->cmd.length);
+ seq = (struct papr_rtas_sequence) {
+ .begin = phy_attest_sequence_begin,
+ .end = phy_attest_sequence_end,
+ .work = phy_attest_sequence_fill_work_area,
+ };
+
+ seq.params = (void *)params;
+
+ fd = papr_rtas_setup_file_interface(&seq,
+ &papr_phy_attest_handle_ops,
+ "[papr-physical-attestation]");
+
+ return fd;
+}
+
+/*
+ * Top-level ioctl handler for /dev/papr-physical-attestation.
+ */
+static long papr_phy_attest_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
+{
+ void __user *argp = (__force void __user *)arg;
+ long ret;
+
+ switch (ioctl) {
+ case PAPR_PHY_ATTEST_IOC_HANDLE:
+ ret = papr_phy_attest_create_handle(argp);
+ break;
+ default:
+ ret = -ENOIOCTLCMD;
+ break;
+ }
+ return ret;
+}
+
+static const struct file_operations papr_phy_attest_ops = {
+ .unlocked_ioctl = papr_phy_attest_dev_ioctl,
+};
+
+static struct miscdevice papr_phy_attest_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "papr-physical-attestation",
+ .fops = &papr_phy_attest_ops,
+};
+
+static __init int papr_phy_attest_init(void)
+{
+ if (!rtas_function_implemented(RTAS_FN_IBM_PHYSICAL_ATTESTATION))
+ return -ENODEV;
+
+ return misc_register(&papr_phy_attest_dev);
+}
+machine_device_initcall(pseries, papr_phy_attest_init);
diff --git a/arch/powerpc/platforms/pseries/papr-platform-dump.c b/arch/powerpc/platforms/pseries/papr-platform-dump.c
new file mode 100644
index 000000000000..f8d55eccdb6b
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr-platform-dump.c
@@ -0,0 +1,411 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define pr_fmt(fmt) "papr-platform-dump: " fmt
+
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <asm/machdep.h>
+#include <asm/rtas-work-area.h>
+#include <asm/rtas.h>
+#include <uapi/asm/papr-platform-dump.h>
+
+/*
+ * Function-specific return values for ibm,platform-dump, derived from
+ * PAPR+ v2.13 7.3.3.4.1 "ibm,platform-dump RTAS Call".
+ */
+#define RTAS_IBM_PLATFORM_DUMP_COMPLETE 0 /* Complete dump retrieved. */
+#define RTAS_IBM_PLATFORM_DUMP_CONTINUE 1 /* Continue dump */
+#define RTAS_NOT_AUTHORIZED -9002 /* Not Authorized */
+
+#define RTAS_IBM_PLATFORM_DUMP_START 2 /* Linux status to start dump */
+
+/**
+ * struct ibm_platform_dump_params - Parameters (in and out) for
+ * ibm,platform-dump
+ * @work_area: In: work area buffer for results.
+ * @buf_length: In: work area buffer length in bytes
+ * @dump_tag_hi: In: Most-significant 32 bits of a Dump_Tag representing
+ * an id of the dump being processed.
+ * @dump_tag_lo: In: Least-significant 32 bits of a Dump_Tag representing
+ * an id of the dump being processed.
+ * @sequence_hi: In: Sequence number in most-significant 32 bits.
+ * Out: Next sequence number in most-significant 32 bits.
+ * @sequence_lo: In: Sequence number in Least-significant 32 bits
+ * Out: Next sequence number in Least-significant 32 bits.
+ * @bytes_ret_hi: Out: Bytes written in most-significant 32 bits.
+ * @bytes_ret_lo: Out: Bytes written in Least-significant 32 bits.
+ * @status: Out: RTAS call status.
+ * @list: Maintain the list of dumps are in progress. Can
+ * retrieve multiple dumps with different dump IDs at
+ * the same time but not with the same dump ID. This list
+ * is used to determine whether the dump for the same ID
+ * is in progress.
+ */
+struct ibm_platform_dump_params {
+ struct rtas_work_area *work_area;
+ u32 buf_length;
+ u32 dump_tag_hi;
+ u32 dump_tag_lo;
+ u32 sequence_hi;
+ u32 sequence_lo;
+ u32 bytes_ret_hi;
+ u32 bytes_ret_lo;
+ s32 status;
+ struct list_head list;
+};
+
+/*
+ * Multiple dumps with different dump IDs can be retrieved at the same
+ * time, but not with dame dump ID. platform_dump_list_mutex and
+ * platform_dump_list are used to prevent this behavior.
+ */
+static DEFINE_MUTEX(platform_dump_list_mutex);
+static LIST_HEAD(platform_dump_list);
+
+/**
+ * rtas_ibm_platform_dump() - Call ibm,platform-dump to fill a work area
+ * buffer.
+ * @params: See &struct ibm_platform_dump_params.
+ * @buf_addr: Address of dump buffer (work_area)
+ * @buf_length: Length of the buffer in bytes (min. 1024)
+ *
+ * Calls ibm,platform-dump until it errors or successfully deposits data
+ * into the supplied work area. Handles RTAS retry statuses. Maps RTAS
+ * error statuses to reasonable errno values.
+ *
+ * Can request multiple dumps with different dump IDs at the same time,
+ * but not with the same dump ID which is prevented with the check in
+ * the ioctl code (papr_platform_dump_create_handle()).
+ *
+ * The caller should inspect @params.status to determine whether more
+ * calls are needed to complete the sequence.
+ *
+ * Context: May sleep.
+ * Return: -ve on error, 0 for dump complete and 1 for continue dump
+ */
+static int rtas_ibm_platform_dump(struct ibm_platform_dump_params *params,
+ phys_addr_t buf_addr, u32 buf_length)
+{
+ u32 rets[4];
+ s32 fwrc;
+ int ret = 0;
+
+ do {
+ fwrc = rtas_call(rtas_function_token(RTAS_FN_IBM_PLATFORM_DUMP),
+ 6, 5,
+ rets,
+ params->dump_tag_hi,
+ params->dump_tag_lo,
+ params->sequence_hi,
+ params->sequence_lo,
+ buf_addr,
+ buf_length);
+ } while (rtas_busy_delay(fwrc));
+
+ switch (fwrc) {
+ case RTAS_HARDWARE_ERROR:
+ ret = -EIO;
+ break;
+ case RTAS_NOT_AUTHORIZED:
+ ret = -EPERM;
+ break;
+ case RTAS_IBM_PLATFORM_DUMP_CONTINUE:
+ case RTAS_IBM_PLATFORM_DUMP_COMPLETE:
+ params->sequence_hi = rets[0];
+ params->sequence_lo = rets[1];
+ params->bytes_ret_hi = rets[2];
+ params->bytes_ret_lo = rets[3];
+ break;
+ default:
+ ret = -EIO;
+ pr_err_ratelimited("unexpected ibm,platform-dump status %d\n",
+ fwrc);
+ break;
+ }
+
+ params->status = fwrc;
+ return ret;
+}
+
+/*
+ * Platform dump is used with multiple RTAS calls to retrieve the
+ * complete dump for the provided dump ID. Once the complete dump is
+ * retrieved, the hypervisor returns dump complete status (0) for the
+ * last RTAS call and expects the caller issues one more call with
+ * NULL buffer to invalidate the dump so that the hypervisor can remove
+ * the dump.
+ *
+ * After the specific dump is invalidated in the hypervisor, expect the
+ * dump complete status for the new sequence - the user space initiates
+ * new request for the same dump ID.
+ */
+static ssize_t papr_platform_dump_handle_read(struct file *file,
+ char __user *buf, size_t size, loff_t *off)
+{
+ struct ibm_platform_dump_params *params = file->private_data;
+ u64 total_bytes;
+ s32 fwrc;
+
+ /*
+ * Dump already completed with the previous read calls.
+ * In case if the user space issues further reads, returns
+ * -EINVAL.
+ */
+ if (!params->buf_length) {
+ pr_warn_once("Platform dump completed for dump ID %llu\n",
+ (u64) (((u64)params->dump_tag_hi << 32) |
+ params->dump_tag_lo));
+ return -EINVAL;
+ }
+
+ /*
+ * The hypervisor returns status 0 if no more data available to
+ * download. The dump will be invalidated with ioctl (see below).
+ */
+ if (params->status == RTAS_IBM_PLATFORM_DUMP_COMPLETE) {
+ params->buf_length = 0;
+ /*
+ * Returns 0 to the user space so that user
+ * space read stops.
+ */
+ return 0;
+ }
+
+ if (size < SZ_1K) {
+ pr_err_once("Buffer length should be minimum 1024 bytes\n");
+ return -EINVAL;
+ } else if (size > params->buf_length) {
+ /*
+ * Allocate 4K work area. So if the user requests > 4K,
+ * resize the buffer length.
+ */
+ size = params->buf_length;
+ }
+
+ fwrc = rtas_ibm_platform_dump(params,
+ rtas_work_area_phys(params->work_area),
+ size);
+ if (fwrc < 0)
+ return fwrc;
+
+ total_bytes = (u64) (((u64)params->bytes_ret_hi << 32) |
+ params->bytes_ret_lo);
+
+ /*
+ * Kernel or firmware bug, do not continue.
+ */
+ if (WARN(total_bytes > size, "possible write beyond end of work area"))
+ return -EFAULT;
+
+ if (copy_to_user(buf, rtas_work_area_raw_buf(params->work_area),
+ total_bytes))
+ return -EFAULT;
+
+ return total_bytes;
+}
+
+static int papr_platform_dump_handle_release(struct inode *inode,
+ struct file *file)
+{
+ struct ibm_platform_dump_params *params = file->private_data;
+
+ if (params->work_area)
+ rtas_work_area_free(params->work_area);
+
+ mutex_lock(&platform_dump_list_mutex);
+ list_del(&params->list);
+ mutex_unlock(&platform_dump_list_mutex);
+
+ kfree(params);
+ file->private_data = NULL;
+ return 0;
+}
+
+/*
+ * This ioctl is used to invalidate the dump assuming the user space
+ * issue this ioctl after obtain the complete dump.
+ * Issue the last RTAS call with NULL buffer to invalidate the dump
+ * which means dump will be freed in the hypervisor.
+ */
+static long papr_platform_dump_invalidate_ioctl(struct file *file,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct ibm_platform_dump_params *params;
+ u64 __user *argp = (void __user *)arg;
+ u64 param_dump_tag, dump_tag;
+
+ if (ioctl != PAPR_PLATFORM_DUMP_IOC_INVALIDATE)
+ return -ENOIOCTLCMD;
+
+ if (get_user(dump_tag, argp))
+ return -EFAULT;
+
+ /*
+ * private_data is freeded during release(), so should not
+ * happen.
+ */
+ if (!file->private_data) {
+ pr_err("No valid FD to invalidate dump for the ID(%llu)\n",
+ dump_tag);
+ return -EINVAL;
+ }
+
+ params = file->private_data;
+ param_dump_tag = (u64) (((u64)params->dump_tag_hi << 32) |
+ params->dump_tag_lo);
+ if (dump_tag != param_dump_tag) {
+ pr_err("Invalid dump ID(%llu) to invalidate dump\n",
+ dump_tag);
+ return -EINVAL;
+ }
+
+ if (params->status != RTAS_IBM_PLATFORM_DUMP_COMPLETE) {
+ pr_err("Platform dump is not complete, but requested "
+ "to invalidate dump for ID(%llu)\n",
+ dump_tag);
+ return -EINPROGRESS;
+ }
+
+ return rtas_ibm_platform_dump(params, 0, 0);
+}
+
+static const struct file_operations papr_platform_dump_handle_ops = {
+ .read = papr_platform_dump_handle_read,
+ .release = papr_platform_dump_handle_release,
+ .unlocked_ioctl = papr_platform_dump_invalidate_ioctl,
+};
+
+/**
+ * papr_platform_dump_create_handle() - Create a fd-based handle for
+ * reading platform dump
+ *
+ * Handler for PAPR_PLATFORM_DUMP_IOC_CREATE_HANDLE ioctl command
+ * Allocates RTAS parameter struct and work area and attached to the
+ * file descriptor for reading by user space with the multiple RTAS
+ * calls until the dump is completed. This memory allocation is freed
+ * when the file is released.
+ *
+ * Multiple dump requests with different IDs are allowed at the same
+ * time, but not with the same dump ID. So if the user space is
+ * already opened file descriptor for the specific dump ID, return
+ * -EALREADY for the next request.
+ *
+ * @dump_tag: Dump ID for the dump requested to retrieve from the
+ * hypervisor
+ *
+ * Return: The installed fd number if successful, -ve errno otherwise.
+ */
+static long papr_platform_dump_create_handle(u64 dump_tag)
+{
+ struct ibm_platform_dump_params *params;
+ u64 param_dump_tag;
+ struct file *file;
+ long err;
+ int fd;
+
+ /*
+ * Return failure if the user space is already opened FD for
+ * the specific dump ID. This check will prevent multiple dump
+ * requests for the same dump ID at the same time. Generally
+ * should not expect this, but in case.
+ */
+ list_for_each_entry(params, &platform_dump_list, list) {
+ param_dump_tag = (u64) (((u64)params->dump_tag_hi << 32) |
+ params->dump_tag_lo);
+ if (dump_tag == param_dump_tag) {
+ pr_err("Platform dump for ID(%llu) is already in progress\n",
+ dump_tag);
+ return -EALREADY;
+ }
+ }
+
+ params = kzalloc(sizeof(struct ibm_platform_dump_params),
+ GFP_KERNEL_ACCOUNT);
+ if (!params)
+ return -ENOMEM;
+
+ params->work_area = rtas_work_area_alloc(SZ_4K);
+ params->buf_length = SZ_4K;
+ params->dump_tag_hi = (u32)(dump_tag >> 32);
+ params->dump_tag_lo = (u32)(dump_tag & 0x00000000ffffffffULL);
+ params->status = RTAS_IBM_PLATFORM_DUMP_START;
+
+ fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC);
+ if (fd < 0) {
+ err = fd;
+ goto free_area;
+ }
+
+ file = anon_inode_getfile_fmode("[papr-platform-dump]",
+ &papr_platform_dump_handle_ops,
+ (void *)params, O_RDONLY,
+ FMODE_LSEEK | FMODE_PREAD);
+ if (IS_ERR(file)) {
+ err = PTR_ERR(file);
+ goto put_fd;
+ }
+
+ fd_install(fd, file);
+
+ list_add(&params->list, &platform_dump_list);
+
+ pr_info("%s (%d) initiated platform dump for dump tag %llu\n",
+ current->comm, current->pid, dump_tag);
+ return fd;
+put_fd:
+ put_unused_fd(fd);
+free_area:
+ rtas_work_area_free(params->work_area);
+ kfree(params);
+ return err;
+}
+
+/*
+ * Top-level ioctl handler for /dev/papr-platform-dump.
+ */
+static long papr_platform_dump_dev_ioctl(struct file *filp,
+ unsigned int ioctl,
+ unsigned long arg)
+{
+ u64 __user *argp = (void __user *)arg;
+ u64 dump_tag;
+ long ret;
+
+ if (get_user(dump_tag, argp))
+ return -EFAULT;
+
+ switch (ioctl) {
+ case PAPR_PLATFORM_DUMP_IOC_CREATE_HANDLE:
+ mutex_lock(&platform_dump_list_mutex);
+ ret = papr_platform_dump_create_handle(dump_tag);
+ mutex_unlock(&platform_dump_list_mutex);
+ break;
+ default:
+ ret = -ENOIOCTLCMD;
+ break;
+ }
+ return ret;
+}
+
+static const struct file_operations papr_platform_dump_ops = {
+ .unlocked_ioctl = papr_platform_dump_dev_ioctl,
+};
+
+static struct miscdevice papr_platform_dump_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "papr-platform-dump",
+ .fops = &papr_platform_dump_ops,
+};
+
+static __init int papr_platform_dump_init(void)
+{
+ if (!rtas_function_implemented(RTAS_FN_IBM_PLATFORM_DUMP))
+ return -ENODEV;
+
+ return misc_register(&papr_platform_dump_dev);
+}
+machine_device_initcall(pseries, papr_platform_dump_init);
diff --git a/arch/powerpc/platforms/pseries/papr-rtas-common.c b/arch/powerpc/platforms/pseries/papr-rtas-common.c
new file mode 100644
index 000000000000..33c606e3378a
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr-rtas-common.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define pr_fmt(fmt) "papr-common: " fmt
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/anon_inodes.h>
+#include <linux/sched/signal.h>
+#include "papr-rtas-common.h"
+
+/*
+ * Sequence based RTAS HCALL has to issue multiple times to retrieve
+ * complete data from the hypervisor. For some of these RTAS calls,
+ * the OS should not interleave calls with different input until the
+ * sequence is completed. So data is collected for these calls during
+ * ioctl handle and export to user space with read() handle.
+ * This file provides common functions needed for such sequence based
+ * RTAS calls Ex: ibm,get-vpd and ibm,get-indices.
+ */
+
+bool papr_rtas_blob_has_data(const struct papr_rtas_blob *blob)
+{
+ return blob->data && blob->len;
+}
+
+void papr_rtas_blob_free(const struct papr_rtas_blob *blob)
+{
+ if (blob) {
+ kvfree(blob->data);
+ kfree(blob);
+ }
+}
+
+/**
+ * papr_rtas_blob_extend() - Append data to a &struct papr_rtas_blob.
+ * @blob: The blob to extend.
+ * @data: The new data to append to @blob.
+ * @len: The length of @data.
+ *
+ * Context: May sleep.
+ * Return: -ENOMEM on allocation failure, 0 otherwise.
+ */
+static int papr_rtas_blob_extend(struct papr_rtas_blob *blob,
+ const char *data, size_t len)
+{
+ const size_t new_len = blob->len + len;
+ const size_t old_len = blob->len;
+ const char *old_ptr = blob->data;
+ char *new_ptr;
+
+ new_ptr = kvrealloc(old_ptr, new_len, GFP_KERNEL_ACCOUNT);
+ if (!new_ptr)
+ return -ENOMEM;
+
+ memcpy(&new_ptr[old_len], data, len);
+ blob->data = new_ptr;
+ blob->len = new_len;
+ return 0;
+}
+
+/**
+ * papr_rtas_blob_generate() - Construct a new &struct papr_rtas_blob.
+ * @seq: work function of the caller that is called to obtain
+ * data with the caller RTAS call.
+ *
+ * The @work callback is invoked until it returns NULL. @seq is
+ * passed to @work in its first argument on each call. When
+ * @work returns data, it should store the data length in its
+ * second argument.
+ *
+ * Context: May sleep.
+ * Return: A completely populated &struct papr_rtas_blob, or NULL on error.
+ */
+static const struct papr_rtas_blob *
+papr_rtas_blob_generate(struct papr_rtas_sequence *seq)
+{
+ struct papr_rtas_blob *blob;
+ const char *buf;
+ size_t len;
+ int err = 0;
+
+ blob = kzalloc(sizeof(*blob), GFP_KERNEL_ACCOUNT);
+ if (!blob)
+ return NULL;
+
+ if (!seq->work)
+ return ERR_PTR(-EINVAL);
+
+
+ while (err == 0 && (buf = seq->work(seq, &len)))
+ err = papr_rtas_blob_extend(blob, buf, len);
+
+ if (err != 0 || !papr_rtas_blob_has_data(blob))
+ goto free_blob;
+
+ return blob;
+free_blob:
+ papr_rtas_blob_free(blob);
+ return NULL;
+}
+
+int papr_rtas_sequence_set_err(struct papr_rtas_sequence *seq, int err)
+{
+ /* Preserve the first error recorded. */
+ if (seq->error == 0)
+ seq->error = err;
+
+ return seq->error;
+}
+
+/*
+ * Higher-level retrieval code below. These functions use the
+ * papr_rtas_blob_* and sequence_* APIs defined above to create fd-based
+ * handles for consumption by user space.
+ */
+
+/**
+ * papr_rtas_run_sequence() - Run a single retrieval sequence.
+ * @seq: Functions of the caller to complete the sequence
+ *
+ * Context: May sleep. Holds a mutex and an RTAS work area for its
+ * duration. Typically performs multiple sleepable slab
+ * allocations.
+ *
+ * Return: A populated &struct papr_rtas_blob on success. Encoded error
+ * pointer otherwise.
+ */
+static const struct papr_rtas_blob *papr_rtas_run_sequence(struct papr_rtas_sequence *seq)
+{
+ const struct papr_rtas_blob *blob;
+
+ if (seq->begin)
+ seq->begin(seq);
+
+ blob = papr_rtas_blob_generate(seq);
+ if (!blob)
+ papr_rtas_sequence_set_err(seq, -ENOMEM);
+
+ if (seq->end)
+ seq->end(seq);
+
+
+ if (seq->error) {
+ papr_rtas_blob_free(blob);
+ return ERR_PTR(seq->error);
+ }
+
+ return blob;
+}
+
+/**
+ * papr_rtas_retrieve() - Return the data blob that is exposed to
+ * user space.
+ * @seq: RTAS call specific functions to be invoked until the
+ * sequence is completed.
+ *
+ * Run sequences against @param until a blob is successfully
+ * instantiated, or a hard error is encountered, or a fatal signal is
+ * pending.
+ *
+ * Context: May sleep.
+ * Return: A fully populated data blob when successful. Encoded error
+ * pointer otherwise.
+ */
+const struct papr_rtas_blob *papr_rtas_retrieve(struct papr_rtas_sequence *seq)
+{
+ const struct papr_rtas_blob *blob;
+
+ /*
+ * EAGAIN means the sequence returns error with a -4 (data
+ * changed and need to start the sequence) status from RTAS calls
+ * and we should attempt a new sequence. PAPR+ (v2.13 R1–7.3.20–5
+ * - ibm,get-vpd, R1–7.3.17–6 - ibm,get-indices) indicates that
+ * this should be a transient condition, not something that
+ * happens continuously. But we'll stop trying on a fatal signal.
+ */
+ do {
+ blob = papr_rtas_run_sequence(seq);
+ if (!IS_ERR(blob)) /* Success. */
+ break;
+ if (PTR_ERR(blob) != -EAGAIN) /* Hard error. */
+ break;
+ cond_resched();
+ } while (!fatal_signal_pending(current));
+
+ return blob;
+}
+
+/**
+ * papr_rtas_setup_file_interface - Complete the sequence and obtain
+ * the data and export to user space with fd-based handles. Then the
+ * user spave gets the data with read() handle.
+ * @seq: RTAS call specific functions to get the data.
+ * @fops: RTAS call specific file operations such as read().
+ * @name: RTAS call specific char device node.
+ *
+ * Return: FD handle for consumption by user space
+ */
+long papr_rtas_setup_file_interface(struct papr_rtas_sequence *seq,
+ const struct file_operations *fops,
+ char *name)
+{
+ const struct papr_rtas_blob *blob;
+ struct file *file;
+ long ret;
+ int fd;
+
+ blob = papr_rtas_retrieve(seq);
+ if (IS_ERR(blob))
+ return PTR_ERR(blob);
+
+ fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC);
+ if (fd < 0) {
+ ret = fd;
+ goto free_blob;
+ }
+
+ file = anon_inode_getfile_fmode(name, fops, (void *)blob,
+ O_RDONLY, FMODE_LSEEK | FMODE_PREAD);
+ if (IS_ERR(file)) {
+ ret = PTR_ERR(file);
+ goto put_fd;
+ }
+
+ fd_install(fd, file);
+ return fd;
+
+put_fd:
+ put_unused_fd(fd);
+free_blob:
+ papr_rtas_blob_free(blob);
+ return ret;
+}
+
+/*
+ * papr_rtas_sequence_should_stop() - Determine whether RTAS retrieval
+ * sequence should continue.
+ *
+ * Examines the sequence error state and outputs of the last call to
+ * the specific RTAS to determine whether the sequence in progress
+ * should continue or stop.
+ *
+ * Return: True if the sequence has encountered an error or if all data
+ * for this sequence has been retrieved. False otherwise.
+ */
+bool papr_rtas_sequence_should_stop(const struct papr_rtas_sequence *seq,
+ s32 status, bool init_state)
+{
+ bool done;
+
+ if (seq->error)
+ return true;
+
+ switch (status) {
+ case RTAS_SEQ_COMPLETE:
+ if (init_state)
+ done = false; /* Initial state. */
+ else
+ done = true; /* All data consumed. */
+ break;
+ case RTAS_SEQ_MORE_DATA:
+ done = false; /* More data available. */
+ break;
+ default:
+ done = true; /* Error encountered. */
+ break;
+ }
+
+ return done;
+}
+
+/*
+ * User space read to retrieve data for the corresponding RTAS call.
+ * papr_rtas_blob is filled with the data using the corresponding RTAS
+ * call sequence API.
+ */
+ssize_t papr_rtas_common_handle_read(struct file *file,
+ char __user *buf, size_t size, loff_t *off)
+{
+ const struct papr_rtas_blob *blob = file->private_data;
+
+ /* We should not instantiate a handle without any data attached. */
+ if (!papr_rtas_blob_has_data(blob)) {
+ pr_err_once("handle without data\n");
+ return -EIO;
+ }
+
+ return simple_read_from_buffer(buf, size, off, blob->data, blob->len);
+}
+
+int papr_rtas_common_handle_release(struct inode *inode,
+ struct file *file)
+{
+ const struct papr_rtas_blob *blob = file->private_data;
+
+ papr_rtas_blob_free(blob);
+
+ return 0;
+}
+
+loff_t papr_rtas_common_handle_seek(struct file *file, loff_t off,
+ int whence)
+{
+ const struct papr_rtas_blob *blob = file->private_data;
+
+ return fixed_size_llseek(file, off, whence, blob->len);
+}
diff --git a/arch/powerpc/platforms/pseries/papr-rtas-common.h b/arch/powerpc/platforms/pseries/papr-rtas-common.h
new file mode 100644
index 000000000000..4ceabcaf4905
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr-rtas-common.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_POWERPC_PAPR_RTAS_COMMON_H
+#define _ASM_POWERPC_PAPR_RTAS_COMMON_H
+
+#include <linux/types.h>
+
+/*
+ * Return codes for sequence based RTAS calls.
+ * Not listed under PAPR+ v2.13 7.2.8: "Return Codes".
+ * But defined in the specific section of each RTAS call.
+ */
+#define RTAS_SEQ_COMPLETE 0 /* All data has been retrieved. */
+#define RTAS_SEQ_MORE_DATA 1 /* More data is available */
+#define RTAS_SEQ_START_OVER -4 /* Data changed, restart call sequence. */
+
+/*
+ * Internal "blob" APIs for accumulating RTAS call results into
+ * an immutable buffer to be attached to a file descriptor.
+ */
+struct papr_rtas_blob {
+ const char *data;
+ size_t len;
+};
+
+/**
+ * struct papr_sequence - State for managing a sequence of RTAS calls.
+ * @error: Shall be zero as long as the sequence has not encountered an error,
+ * -ve errno otherwise. Use papr_rtas_sequence_set_err() to update.
+ * @params: Parameter block to pass to rtas_*() calls.
+ * @begin: Work area allocation and initialize the needed parameter
+ * values passed to RTAS call
+ * @end: Free the allocated work area
+ * @work: Obtain data with RTAS call and invoke it until the sequence is
+ * completed.
+ *
+ */
+struct papr_rtas_sequence {
+ int error;
+ void *params;
+ void (*begin)(struct papr_rtas_sequence *seq);
+ void (*end)(struct papr_rtas_sequence *seq);
+ const char *(*work)(struct papr_rtas_sequence *seq, size_t *len);
+};
+
+extern bool papr_rtas_blob_has_data(const struct papr_rtas_blob *blob);
+extern void papr_rtas_blob_free(const struct papr_rtas_blob *blob);
+extern int papr_rtas_sequence_set_err(struct papr_rtas_sequence *seq,
+ int err);
+extern const struct papr_rtas_blob *papr_rtas_retrieve(struct papr_rtas_sequence *seq);
+extern long papr_rtas_setup_file_interface(struct papr_rtas_sequence *seq,
+ const struct file_operations *fops, char *name);
+extern bool papr_rtas_sequence_should_stop(const struct papr_rtas_sequence *seq,
+ s32 status, bool init_state);
+extern ssize_t papr_rtas_common_handle_read(struct file *file,
+ char __user *buf, size_t size, loff_t *off);
+extern int papr_rtas_common_handle_release(struct inode *inode,
+ struct file *file);
+extern loff_t papr_rtas_common_handle_seek(struct file *file, loff_t off,
+ int whence);
+#endif /* _ASM_POWERPC_PAPR_RTAS_COMMON_H */
+
diff --git a/arch/powerpc/platforms/pseries/papr-vpd.c b/arch/powerpc/platforms/pseries/papr-vpd.c
index c86950d7105a..f38c188fc4a1 100644
--- a/arch/powerpc/platforms/pseries/papr-vpd.c
+++ b/arch/powerpc/platforms/pseries/papr-vpd.c
@@ -2,7 +2,6 @@
#define pr_fmt(fmt) "papr-vpd: " fmt
-#include <linux/anon_inodes.h>
#include <linux/build_bug.h>
#include <linux/file.h>
#include <linux/fs.h>
@@ -20,14 +19,7 @@
#include <asm/rtas-work-area.h>
#include <asm/rtas.h>
#include <uapi/asm/papr-vpd.h>
-
-/*
- * Function-specific return values for ibm,get-vpd, derived from PAPR+
- * v2.13 7.3.20 "ibm,get-vpd RTAS Call".
- */
-#define RTAS_IBM_GET_VPD_COMPLETE 0 /* All VPD has been retrieved. */
-#define RTAS_IBM_GET_VPD_MORE_DATA 1 /* More VPD is available. */
-#define RTAS_IBM_GET_VPD_START_OVER -4 /* VPD changed, restart call sequence. */
+#include "papr-rtas-common.h"
/**
* struct rtas_ibm_get_vpd_params - Parameters (in and out) for ibm,get-vpd.
@@ -91,13 +83,14 @@ static int rtas_ibm_get_vpd(struct rtas_ibm_get_vpd_params *params)
case RTAS_INVALID_PARAMETER:
ret = -EINVAL;
break;
- case RTAS_IBM_GET_VPD_START_OVER:
+ case RTAS_SEQ_START_OVER:
ret = -EAGAIN;
+ pr_info_ratelimited("VPD changed during retrieval, retrying\n");
break;
- case RTAS_IBM_GET_VPD_MORE_DATA:
+ case RTAS_SEQ_MORE_DATA:
params->sequence = rets[0];
fallthrough;
- case RTAS_IBM_GET_VPD_COMPLETE:
+ case RTAS_SEQ_COMPLETE:
params->written = rets[1];
/*
* Kernel or firmware bug, do not continue.
@@ -119,91 +112,6 @@ static int rtas_ibm_get_vpd(struct rtas_ibm_get_vpd_params *params)
}
/*
- * Internal VPD "blob" APIs for accumulating ibm,get-vpd results into
- * an immutable buffer to be attached to a file descriptor.
- */
-struct vpd_blob {
- const char *data;
- size_t len;
-};
-
-static bool vpd_blob_has_data(const struct vpd_blob *blob)
-{
- return blob->data && blob->len;
-}
-
-static void vpd_blob_free(const struct vpd_blob *blob)
-{
- if (blob) {
- kvfree(blob->data);
- kfree(blob);
- }
-}
-
-/**
- * vpd_blob_extend() - Append data to a &struct vpd_blob.
- * @blob: The blob to extend.
- * @data: The new data to append to @blob.
- * @len: The length of @data.
- *
- * Context: May sleep.
- * Return: -ENOMEM on allocation failure, 0 otherwise.
- */
-static int vpd_blob_extend(struct vpd_blob *blob, const char *data, size_t len)
-{
- const size_t new_len = blob->len + len;
- const size_t old_len = blob->len;
- const char *old_ptr = blob->data;
- char *new_ptr;
-
- new_ptr = kvrealloc(old_ptr, new_len, GFP_KERNEL_ACCOUNT);
- if (!new_ptr)
- return -ENOMEM;
-
- memcpy(&new_ptr[old_len], data, len);
- blob->data = new_ptr;
- blob->len = new_len;
- return 0;
-}
-
-/**
- * vpd_blob_generate() - Construct a new &struct vpd_blob.
- * @generator: Function that supplies the blob data.
- * @arg: Context pointer supplied by caller, passed to @generator.
- *
- * The @generator callback is invoked until it returns NULL. @arg is
- * passed to @generator in its first argument on each call. When
- * @generator returns data, it should store the data length in its
- * second argument.
- *
- * Context: May sleep.
- * Return: A completely populated &struct vpd_blob, or NULL on error.
- */
-static const struct vpd_blob *
-vpd_blob_generate(const char * (*generator)(void *, size_t *), void *arg)
-{
- struct vpd_blob *blob;
- const char *buf;
- size_t len;
- int err = 0;
-
- blob = kzalloc(sizeof(*blob), GFP_KERNEL_ACCOUNT);
- if (!blob)
- return NULL;
-
- while (err == 0 && (buf = generator(arg, &len)))
- err = vpd_blob_extend(blob, buf, len);
-
- if (err != 0 || !vpd_blob_has_data(blob))
- goto free_blob;
-
- return blob;
-free_blob:
- vpd_blob_free(blob);
- return NULL;
-}
-
-/*
* Internal VPD sequence APIs. A VPD sequence is a series of calls to
* ibm,get-vpd for a given location code. The sequence ends when an
* error is encountered or all VPD for the location code has been
@@ -211,30 +119,14 @@ free_blob:
*/
/**
- * struct vpd_sequence - State for managing a VPD sequence.
- * @error: Shall be zero as long as the sequence has not encountered an error,
- * -ve errno otherwise. Use vpd_sequence_set_err() to update this.
- * @params: Parameter block to pass to rtas_ibm_get_vpd().
- */
-struct vpd_sequence {
- int error;
- struct rtas_ibm_get_vpd_params params;
-};
-
-/**
* vpd_sequence_begin() - Begin a VPD retrieval sequence.
- * @seq: Uninitialized sequence state.
- * @loc_code: Location code that defines the scope of the VPD to return.
- *
- * Initializes @seq with the resources necessary to carry out a VPD
- * sequence. Callers must pass @seq to vpd_sequence_end() regardless
- * of whether the sequence succeeds.
+ * @seq: vpd call parameters from sequence struct
*
* Context: May sleep.
*/
-static void vpd_sequence_begin(struct vpd_sequence *seq,
- const struct papr_location_code *loc_code)
+static void vpd_sequence_begin(struct papr_rtas_sequence *seq)
{
+ struct rtas_ibm_get_vpd_params *vpd_params;
/*
* Use a static data structure for the location code passed to
* RTAS to ensure it's in the RMA and avoid a separate work
@@ -242,6 +134,7 @@ static void vpd_sequence_begin(struct vpd_sequence *seq,
*/
static struct papr_location_code static_loc_code;
+ vpd_params = (struct rtas_ibm_get_vpd_params *)seq->params;
/*
* We could allocate the work area before acquiring the
* function lock, but that would allow concurrent requests to
@@ -249,14 +142,12 @@ static void vpd_sequence_begin(struct vpd_sequence *seq,
* allocate the work area under the lock.
*/
mutex_lock(&rtas_ibm_get_vpd_lock);
- static_loc_code = *loc_code;
- *seq = (struct vpd_sequence) {
- .params = {
- .work_area = rtas_work_area_alloc(SZ_4K),
- .loc_code = &static_loc_code,
- .sequence = 1,
- },
- };
+ static_loc_code = *(struct papr_location_code *)vpd_params->loc_code;
+ vpd_params = (struct rtas_ibm_get_vpd_params *)seq->params;
+ vpd_params->work_area = rtas_work_area_alloc(SZ_4K);
+ vpd_params->loc_code = &static_loc_code;
+ vpd_params->sequence = 1;
+ vpd_params->status = 0;
}
/**
@@ -265,180 +156,39 @@ static void vpd_sequence_begin(struct vpd_sequence *seq,
*
* Releases resources obtained by vpd_sequence_begin().
*/
-static void vpd_sequence_end(struct vpd_sequence *seq)
+static void vpd_sequence_end(struct papr_rtas_sequence *seq)
{
- rtas_work_area_free(seq->params.work_area);
- mutex_unlock(&rtas_ibm_get_vpd_lock);
-}
-
-/**
- * vpd_sequence_should_stop() - Determine whether a VPD retrieval sequence
- * should continue.
- * @seq: VPD sequence state.
- *
- * Examines the sequence error state and outputs of the last call to
- * ibm,get-vpd to determine whether the sequence in progress should
- * continue or stop.
- *
- * Return: True if the sequence has encountered an error or if all VPD for
- * this sequence has been retrieved. False otherwise.
- */
-static bool vpd_sequence_should_stop(const struct vpd_sequence *seq)
-{
- bool done;
-
- if (seq->error)
- return true;
+ struct rtas_ibm_get_vpd_params *vpd_params;
- switch (seq->params.status) {
- case 0:
- if (seq->params.written == 0)
- done = false; /* Initial state. */
- else
- done = true; /* All data consumed. */
- break;
- case 1:
- done = false; /* More data available. */
- break;
- default:
- done = true; /* Error encountered. */
- break;
- }
-
- return done;
-}
-
-static int vpd_sequence_set_err(struct vpd_sequence *seq, int err)
-{
- /* Preserve the first error recorded. */
- if (seq->error == 0)
- seq->error = err;
-
- return seq->error;
+ vpd_params = (struct rtas_ibm_get_vpd_params *)seq->params;
+ rtas_work_area_free(vpd_params->work_area);
+ mutex_unlock(&rtas_ibm_get_vpd_lock);
}
/*
- * Generator function to be passed to vpd_blob_generate().
+ * Generator function to be passed to papr_rtas_blob_generate().
*/
-static const char *vpd_sequence_fill_work_area(void *arg, size_t *len)
+static const char *vpd_sequence_fill_work_area(struct papr_rtas_sequence *seq,
+ size_t *len)
{
- struct vpd_sequence *seq = arg;
- struct rtas_ibm_get_vpd_params *p = &seq->params;
+ struct rtas_ibm_get_vpd_params *p;
+ bool init_state;
- if (vpd_sequence_should_stop(seq))
+ p = (struct rtas_ibm_get_vpd_params *)seq->params;
+ init_state = (p->written == 0) ? true : false;
+
+ if (papr_rtas_sequence_should_stop(seq, p->status, init_state))
return NULL;
- if (vpd_sequence_set_err(seq, rtas_ibm_get_vpd(p)))
+ if (papr_rtas_sequence_set_err(seq, rtas_ibm_get_vpd(p)))
return NULL;
*len = p->written;
return rtas_work_area_raw_buf(p->work_area);
}
-/*
- * Higher-level VPD retrieval code below. These functions use the
- * vpd_blob_* and vpd_sequence_* APIs defined above to create fd-based
- * VPD handles for consumption by user space.
- */
-
-/**
- * papr_vpd_run_sequence() - Run a single VPD retrieval sequence.
- * @loc_code: Location code that defines the scope of VPD to return.
- *
- * Context: May sleep. Holds a mutex and an RTAS work area for its
- * duration. Typically performs multiple sleepable slab
- * allocations.
- *
- * Return: A populated &struct vpd_blob on success. Encoded error
- * pointer otherwise.
- */
-static const struct vpd_blob *papr_vpd_run_sequence(const struct papr_location_code *loc_code)
-{
- const struct vpd_blob *blob;
- struct vpd_sequence seq;
-
- vpd_sequence_begin(&seq, loc_code);
- blob = vpd_blob_generate(vpd_sequence_fill_work_area, &seq);
- if (!blob)
- vpd_sequence_set_err(&seq, -ENOMEM);
- vpd_sequence_end(&seq);
-
- if (seq.error) {
- vpd_blob_free(blob);
- return ERR_PTR(seq.error);
- }
-
- return blob;
-}
-
-/**
- * papr_vpd_retrieve() - Return the VPD for a location code.
- * @loc_code: Location code that defines the scope of VPD to return.
- *
- * Run VPD sequences against @loc_code until a blob is successfully
- * instantiated, or a hard error is encountered, or a fatal signal is
- * pending.
- *
- * Context: May sleep.
- * Return: A fully populated VPD blob when successful. Encoded error
- * pointer otherwise.
- */
-static const struct vpd_blob *papr_vpd_retrieve(const struct papr_location_code *loc_code)
-{
- const struct vpd_blob *blob;
-
- /*
- * EAGAIN means the sequence errored with a -4 (VPD changed)
- * status from ibm,get-vpd, and we should attempt a new
- * sequence. PAPR+ v2.13 R1–7.3.20–5 indicates that this
- * should be a transient condition, not something that happens
- * continuously. But we'll stop trying on a fatal signal.
- */
- do {
- blob = papr_vpd_run_sequence(loc_code);
- if (!IS_ERR(blob)) /* Success. */
- break;
- if (PTR_ERR(blob) != -EAGAIN) /* Hard error. */
- break;
- pr_info_ratelimited("VPD changed during retrieval, retrying\n");
- cond_resched();
- } while (!fatal_signal_pending(current));
-
- return blob;
-}
-
-static ssize_t papr_vpd_handle_read(struct file *file, char __user *buf, size_t size, loff_t *off)
-{
- const struct vpd_blob *blob = file->private_data;
-
- /* bug: we should not instantiate a handle without any data attached. */
- if (!vpd_blob_has_data(blob)) {
- pr_err_once("handle without data\n");
- return -EIO;
- }
-
- return simple_read_from_buffer(buf, size, off, blob->data, blob->len);
-}
-
-static int papr_vpd_handle_release(struct inode *inode, struct file *file)
-{
- const struct vpd_blob *blob = file->private_data;
-
- vpd_blob_free(blob);
-
- return 0;
-}
-
-static loff_t papr_vpd_handle_seek(struct file *file, loff_t off, int whence)
-{
- const struct vpd_blob *blob = file->private_data;
-
- return fixed_size_llseek(file, off, whence, blob->len);
-}
-
-
static const struct file_operations papr_vpd_handle_ops = {
- .read = papr_vpd_handle_read,
- .llseek = papr_vpd_handle_seek,
- .release = papr_vpd_handle_release,
+ .read = papr_rtas_common_handle_read,
+ .llseek = papr_rtas_common_handle_seek,
+ .release = papr_rtas_common_handle_release,
};
/**
@@ -460,10 +210,9 @@ static const struct file_operations papr_vpd_handle_ops = {
*/
static long papr_vpd_create_handle(struct papr_location_code __user *ulc)
{
+ struct rtas_ibm_get_vpd_params vpd_params = {};
+ struct papr_rtas_sequence seq = {};
struct papr_location_code klc;
- const struct vpd_blob *blob;
- struct file *file;
- long err;
int fd;
if (copy_from_user(&klc, ulc, sizeof(klc)))
@@ -472,30 +221,19 @@ static long papr_vpd_create_handle(struct papr_location_code __user *ulc)
if (!string_is_terminated(klc.str, ARRAY_SIZE(klc.str)))
return -EINVAL;
- blob = papr_vpd_retrieve(&klc);
- if (IS_ERR(blob))
- return PTR_ERR(blob);
+ seq = (struct papr_rtas_sequence) {
+ .begin = vpd_sequence_begin,
+ .end = vpd_sequence_end,
+ .work = vpd_sequence_fill_work_area,
+ };
- fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC);
- if (fd < 0) {
- err = fd;
- goto free_blob;
- }
+ vpd_params.loc_code = &klc;
+ seq.params = (void *)&vpd_params;
+
+ fd = papr_rtas_setup_file_interface(&seq, &papr_vpd_handle_ops,
+ "[papr-vpd]");
- file = anon_inode_getfile_fmode("[papr-vpd]", &papr_vpd_handle_ops,
- (void *)blob, O_RDONLY,
- FMODE_LSEEK | FMODE_PREAD);
- if (IS_ERR(file)) {
- err = PTR_ERR(file);
- goto put_fd;
- }
- fd_install(fd, file);
return fd;
-put_fd:
- put_unused_fd(fd);
-free_blob:
- vpd_blob_free(blob);
- return err;
}
/*
diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c
index 47db732981a8..e22fc638dbc7 100644
--- a/arch/powerpc/sysdev/cpm_common.c
+++ b/arch/powerpc/sysdev/cpm_common.c
@@ -138,7 +138,7 @@ static void __cpm2_gpio32_set(struct of_mm_gpio_chip *mm_gc, u32 pin_mask,
out_be32(&iop->dat, cpm2_gc->cpdata);
}
-static void cpm2_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value)
+static int cpm2_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value)
{
struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
struct cpm2_gpio32_chip *cpm2_gc = gpiochip_get_data(gc);
@@ -150,6 +150,8 @@ static void cpm2_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value)
__cpm2_gpio32_set(mm_gc, pin_mask, value);
spin_unlock_irqrestore(&cpm2_gc->lock, flags);
+
+ return 0;
}
static int cpm2_gpio32_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
@@ -208,7 +210,7 @@ int cpm2_gpiochip_add32(struct device *dev)
gc->direction_input = cpm2_gpio32_dir_in;
gc->direction_output = cpm2_gpio32_dir_out;
gc->get = cpm2_gpio32_get;
- gc->set = cpm2_gpio32_set;
+ gc->set_rv = cpm2_gpio32_set;
gc->parent = dev;
gc->owner = THIS_MODULE;
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 4afbab83a2e2..c706a08e9955 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -27,6 +27,7 @@
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <linux/slab.h>
+#include <linux/string_choices.h>
#include <linux/syscore_ops.h>
#include <linux/ratelimit.h>
#include <linux/pgtable.h>
@@ -474,9 +475,9 @@ static void __init mpic_scan_ht_msi(struct mpic *mpic, u8 __iomem *devbase,
addr = addr | ((u64)readl(base + HT_MSI_ADDR_HI) << 32);
}
- printk(KERN_DEBUG "mpic: - HT:%02x.%x %s MSI mapping found @ 0x%llx\n",
- PCI_SLOT(devfn), PCI_FUNC(devfn),
- flags & HT_MSI_FLAGS_ENABLE ? "enabled" : "disabled", addr);
+ pr_debug("mpic: - HT:%02x.%x %s MSI mapping found @ 0x%llx\n",
+ PCI_SLOT(devfn), PCI_FUNC(devfn),
+ str_enabled_disabled(flags & HT_MSI_FLAGS_ENABLE), addr);
if (!(flags & HT_MSI_FLAGS_ENABLE))
writeb(flags | HT_MSI_FLAGS_ENABLE, base + HT_MSI_FLAGS);
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 88abffa8b54c..cb3a3244ae6f 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1770,7 +1770,7 @@ static void xmon_show_stack(unsigned long sp, unsigned long lr,
sp + STACK_INT_FRAME_REGS);
break;
}
- printf("--- Exception: %lx %s at ", regs.trap,
+ printf("---- Exception: %lx %s at ", regs.trap,
getvecname(TRAP(&regs)));
pc = regs.nip;
lr = regs.link;