summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/arm/psci.txt37
-rw-r--r--Documentation/virtual/kvm/api.txt17
-rw-r--r--arch/arm/include/asm/kvm_host.h2
-rw-r--r--arch/arm/include/asm/kvm_psci.h6
-rw-r--r--arch/arm/include/asm/psci.h7
-rw-r--r--arch/arm/include/uapi/asm/kvm.h10
-rw-r--r--arch/arm/kernel/psci.c196
-rw-r--r--arch/arm/kernel/psci_smp.c33
-rw-r--r--arch/arm/kvm/arm.c1
-rw-r--r--arch/arm/kvm/handle_exit.c10
-rw-r--r--arch/arm/kvm/psci.c235
-rw-r--r--arch/arm64/include/asm/cpu_ops.h2
-rw-r--r--arch/arm64/include/asm/cputype.h1
-rw-r--r--arch/arm64/include/asm/kvm_host.h2
-rw-r--r--arch/arm64/include/asm/kvm_psci.h6
-rw-r--r--arch/arm64/include/asm/psci.h2
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h13
-rw-r--r--arch/arm64/kernel/psci.c231
-rw-r--r--arch/arm64/kernel/smp.c22
-rw-r--r--arch/arm64/kvm/guest.c2
-rw-r--r--arch/arm64/kvm/handle_exit.c10
-rw-r--r--arch/arm64/kvm/sys_regs_generic_v8.c2
-rw-r--r--arch/mips/Kconfig12
-rw-r--r--arch/mips/include/asm/kvm_host.h183
-rw-r--r--arch/mips/include/uapi/asm/kvm.h35
-rw-r--r--arch/mips/kvm/kvm_locore.S32
-rw-r--r--arch/mips/kvm/kvm_mips.c145
-rw-r--r--arch/mips/kvm/kvm_mips_dyntrans.c15
-rw-r--r--arch/mips/kvm/kvm_mips_emul.c557
-rw-r--r--arch/mips/kvm/kvm_tlb.c77
-rw-r--r--arch/mips/kvm/kvm_trap_emul.c86
-rw-r--r--arch/mips/mm/cache.c1
-rw-r--r--arch/mips/mti-malta/malta-time.c14
-rw-r--r--arch/s390/include/asm/kvm_host.h1
-rw-r--r--arch/s390/kvm/gaccess.c57
-rw-r--r--arch/s390/kvm/gaccess.h5
-rw-r--r--arch/s390/kvm/interrupt.c1
-rw-r--r--arch/s390/kvm/kvm-s390.c6
-rw-r--r--arch/s390/kvm/priv.c56
-rw-r--r--arch/s390/kvm/sigp.c56
-rw-r--r--arch/x86/kvm/emulate.c13
-rw-r--r--arch/x86/kvm/lapic.c62
-rw-r--r--include/uapi/linux/Kbuild1
-rw-r--r--include/uapi/linux/kvm.h11
-rw-r--r--include/uapi/linux/psci.h90
45 files changed, 1919 insertions, 444 deletions
diff --git a/Documentation/devicetree/bindings/arm/psci.txt b/Documentation/devicetree/bindings/arm/psci.txt
index 433afe9cb590..b4a58f39223c 100644
--- a/Documentation/devicetree/bindings/arm/psci.txt
+++ b/Documentation/devicetree/bindings/arm/psci.txt
@@ -21,7 +21,15 @@ to #0.
Main node required properties:
- - compatible : Must be "arm,psci"
+ - compatible : should contain at least one of:
+
+ * "arm,psci" : for implementations complying to PSCI versions prior to
+ 0.2. For these cases function IDs must be provided.
+
+ * "arm,psci-0.2" : for implementations complying to PSCI 0.2. Function
+ IDs are not required and should be ignored by an OS with PSCI 0.2
+ support, but are permitted to be present for compatibility with
+ existing software when "arm,psci" is later in the compatible list.
- method : The method of calling the PSCI firmware. Permitted
values are:
@@ -45,6 +53,8 @@ Main node optional properties:
Example:
+Case 1: PSCI v0.1 only.
+
psci {
compatible = "arm,psci";
method = "smc";
@@ -53,3 +63,28 @@ Example:
cpu_on = <0x95c10002>;
migrate = <0x95c10003>;
};
+
+
+Case 2: PSCI v0.2 only
+
+ psci {
+ compatible = "arm,psci-0.2";
+ method = "smc";
+ };
+
+Case 3: PSCI v0.2 and PSCI v0.1.
+
+ A DTB may provide IDs for use by kernels without PSCI 0.2 support,
+ enabling firmware and hypervisors to support existing and new kernels.
+ These IDs will be ignored by kernels with PSCI 0.2 support, which will
+ use the standard PSCI 0.2 IDs exclusively.
+
+ psci {
+ compatible = "arm,psci-0.2", "arm,psci";
+ method = "hvc";
+
+ cpu_on = < arbitrary value >;
+ cpu_off = < arbitrary value >;
+
+ ...
+ };
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 6b3029016e9c..6ff3a7742989 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2384,6 +2384,8 @@ Possible features:
Depends on KVM_CAP_ARM_PSCI.
- KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode.
Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
+ - KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU.
+ Depends on KVM_CAP_ARM_PSCI_0_2.
4.83 KVM_ARM_PREFERRED_TARGET
@@ -2746,6 +2748,21 @@ It gets triggered whenever both KVM_CAP_PPC_EPR are enabled and an
external interrupt has just been delivered into the guest. User space
should put the acknowledged interrupt vector into the 'epr' field.
+ /* KVM_EXIT_SYSTEM_EVENT */
+ struct {
+#define KVM_SYSTEM_EVENT_SHUTDOWN 1
+#define KVM_SYSTEM_EVENT_RESET 2
+ __u32 type;
+ __u64 flags;
+ } system_event;
+
+If exit_reason is KVM_EXIT_SYSTEM_EVENT then the vcpu has triggered
+a system-level event using some architecture specific mechanism (hypercall
+or some special instruction). In case of ARM/ARM64, this is triggered using
+HVC instruction based PSCI call from the vcpu. The 'type' field describes
+the system-level event type. The 'flags' field describes architecture
+specific flags for the system-level event.
+
/* Fix the size of the union. */
char padding[256];
};
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 09af14999c9b..193ceaf01bfd 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -36,7 +36,7 @@
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#define KVM_HAVE_ONE_REG
-#define KVM_VCPU_MAX_FEATURES 1
+#define KVM_VCPU_MAX_FEATURES 2
#include <kvm/arm_vgic.h>
diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h
index 9a83d98bf170..6bda945d31fa 100644
--- a/arch/arm/include/asm/kvm_psci.h
+++ b/arch/arm/include/asm/kvm_psci.h
@@ -18,6 +18,10 @@
#ifndef __ARM_KVM_PSCI_H__
#define __ARM_KVM_PSCI_H__
-bool kvm_psci_call(struct kvm_vcpu *vcpu);
+#define KVM_ARM_PSCI_0_1 1
+#define KVM_ARM_PSCI_0_2 2
+
+int kvm_psci_version(struct kvm_vcpu *vcpu);
+int kvm_psci_call(struct kvm_vcpu *vcpu);
#endif /* __ARM_KVM_PSCI_H__ */
diff --git a/arch/arm/include/asm/psci.h b/arch/arm/include/asm/psci.h
index c4ae171850f8..c25ef3ec6d1f 100644
--- a/arch/arm/include/asm/psci.h
+++ b/arch/arm/include/asm/psci.h
@@ -29,16 +29,19 @@ struct psci_operations {
int (*cpu_off)(struct psci_power_state state);
int (*cpu_on)(unsigned long cpuid, unsigned long entry_point);
int (*migrate)(unsigned long cpuid);
+ int (*affinity_info)(unsigned long target_affinity,
+ unsigned long lowest_affinity_level);
+ int (*migrate_info_type)(void);
};
extern struct psci_operations psci_ops;
extern struct smp_operations psci_smp_ops;
#ifdef CONFIG_ARM_PSCI
-void psci_init(void);
+int psci_init(void);
bool psci_smp_available(void);
#else
-static inline void psci_init(void) { }
+static inline int psci_init(void) { return 0; }
static inline bool psci_smp_available(void) { return false; }
#endif
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index ef0c8785ba16..e6ebdd3471e5 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -20,6 +20,7 @@
#define __ARM_KVM_H__
#include <linux/types.h>
+#include <linux/psci.h>
#include <asm/ptrace.h>
#define __KVM_HAVE_GUEST_DEBUG
@@ -83,6 +84,7 @@ struct kvm_regs {
#define KVM_VGIC_V2_CPU_SIZE 0x2000
#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
+#define KVM_ARM_VCPU_PSCI_0_2 1 /* CPU uses PSCI v0.2 */
struct kvm_vcpu_init {
__u32 target;
@@ -201,9 +203,9 @@ struct kvm_arch_memory_slot {
#define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2)
#define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3)
-#define KVM_PSCI_RET_SUCCESS 0
-#define KVM_PSCI_RET_NI ((unsigned long)-1)
-#define KVM_PSCI_RET_INVAL ((unsigned long)-2)
-#define KVM_PSCI_RET_DENIED ((unsigned long)-3)
+#define KVM_PSCI_RET_SUCCESS PSCI_RET_SUCCESS
+#define KVM_PSCI_RET_NI PSCI_RET_NOT_SUPPORTED
+#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS
+#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED
#endif /* __ARM_KVM_H__ */
diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c
index 46931880093d..f73891b6b730 100644
--- a/arch/arm/kernel/psci.c
+++ b/arch/arm/kernel/psci.c
@@ -17,63 +17,58 @@
#include <linux/init.h>
#include <linux/of.h>
+#include <linux/reboot.h>
+#include <linux/pm.h>
+#include <uapi/linux/psci.h>
#include <asm/compiler.h>
#include <asm/errno.h>
#include <asm/opcodes-sec.h>
#include <asm/opcodes-virt.h>
#include <asm/psci.h>
+#include <asm/system_misc.h>
struct psci_operations psci_ops;
static int (*invoke_psci_fn)(u32, u32, u32, u32);
+typedef int (*psci_initcall_t)(const struct device_node *);
enum psci_function {
PSCI_FN_CPU_SUSPEND,
PSCI_FN_CPU_ON,
PSCI_FN_CPU_OFF,
PSCI_FN_MIGRATE,
+ PSCI_FN_AFFINITY_INFO,
+ PSCI_FN_MIGRATE_INFO_TYPE,
PSCI_FN_MAX,
};
static u32 psci_function_id[PSCI_FN_MAX];
-#define PSCI_RET_SUCCESS 0
-#define PSCI_RET_EOPNOTSUPP -1
-#define PSCI_RET_EINVAL -2
-#define PSCI_RET_EPERM -3
-
static int psci_to_linux_errno(int errno)
{
switch (errno) {
case PSCI_RET_SUCCESS:
return 0;
- case PSCI_RET_EOPNOTSUPP:
+ case PSCI_RET_NOT_SUPPORTED:
return -EOPNOTSUPP;
- case PSCI_RET_EINVAL:
+ case PSCI_RET_INVALID_PARAMS:
return -EINVAL;
- case PSCI_RET_EPERM:
+ case PSCI_RET_DENIED:
return -EPERM;
};
return -EINVAL;
}
-#define PSCI_POWER_STATE_ID_MASK 0xffff
-#define PSCI_POWER_STATE_ID_SHIFT 0
-#define PSCI_POWER_STATE_TYPE_MASK 0x1
-#define PSCI_POWER_STATE_TYPE_SHIFT 16
-#define PSCI_POWER_STATE_AFFL_MASK 0x3
-#define PSCI_POWER_STATE_AFFL_SHIFT 24
-
static u32 psci_power_state_pack(struct psci_power_state state)
{
- return ((state.id & PSCI_POWER_STATE_ID_MASK)
- << PSCI_POWER_STATE_ID_SHIFT) |
- ((state.type & PSCI_POWER_STATE_TYPE_MASK)
- << PSCI_POWER_STATE_TYPE_SHIFT) |
- ((state.affinity_level & PSCI_POWER_STATE_AFFL_MASK)
- << PSCI_POWER_STATE_AFFL_SHIFT);
+ return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT)
+ & PSCI_0_2_POWER_STATE_ID_MASK) |
+ ((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT)
+ & PSCI_0_2_POWER_STATE_TYPE_MASK) |
+ ((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT)
+ & PSCI_0_2_POWER_STATE_AFFL_MASK);
}
/*
@@ -110,6 +105,14 @@ static noinline int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1,
return function_id;
}
+static int psci_get_version(void)
+{
+ int err;
+
+ err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);
+ return err;
+}
+
static int psci_cpu_suspend(struct psci_power_state state,
unsigned long entry_point)
{
@@ -153,26 +156,36 @@ static int psci_migrate(unsigned long cpuid)
return psci_to_linux_errno(err);
}
-static const struct of_device_id psci_of_match[] __initconst = {
- { .compatible = "arm,psci", },
- {},
-};
+static int psci_affinity_info(unsigned long target_affinity,
+ unsigned long lowest_affinity_level)
+{
+ int err;
+ u32 fn;
+
+ fn = psci_function_id[PSCI_FN_AFFINITY_INFO];
+ err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0);
+ return err;
+}
-void __init psci_init(void)
+static int psci_migrate_info_type(void)
{
- struct device_node *np;
- const char *method;
- u32 id;
+ int err;
+ u32 fn;
- np = of_find_matching_node(NULL, psci_of_match);
- if (!np)
- return;
+ fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE];
+ err = invoke_psci_fn(fn, 0, 0, 0);
+ return err;
+}
+
+static int get_set_conduit_method(struct device_node *np)
+{
+ const char *method;
- pr_info("probing function IDs from device-tree\n");
+ pr_info("probing for conduit method from DT.\n");
if (of_property_read_string(np, "method", &method)) {
- pr_warning("missing \"method\" property\n");
- goto out_put_node;
+ pr_warn("missing \"method\" property\n");
+ return -ENXIO;
}
if (!strcmp("hvc", method)) {
@@ -180,10 +193,99 @@ void __init psci_init(void)
} else if (!strcmp("smc", method)) {
invoke_psci_fn = __invoke_psci_fn_smc;
} else {
- pr_warning("invalid \"method\" property: %s\n", method);
+ pr_warn("invalid \"method\" property: %s\n", method);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd)
+{
+ invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0);
+}
+
+static void psci_sys_poweroff(void)
+{
+ invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0);
+}
+
+/*
+ * PSCI Function IDs for v0.2+ are well defined so use
+ * standard values.
+ */
+static int psci_0_2_init(struct device_node *np)
+{
+ int err, ver;
+
+ err = get_set_conduit_method(np);
+
+ if (err)
+ goto out_put_node;
+
+ ver = psci_get_version();
+
+ if (ver == PSCI_RET_NOT_SUPPORTED) {
+ /* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */
+ pr_err("PSCI firmware does not comply with the v0.2 spec.\n");
+ err = -EOPNOTSUPP;
goto out_put_node;
+ } else {
+ pr_info("PSCIv%d.%d detected in firmware.\n",
+ PSCI_VERSION_MAJOR(ver),
+ PSCI_VERSION_MINOR(ver));
+
+ if (PSCI_VERSION_MAJOR(ver) == 0 &&
+ PSCI_VERSION_MINOR(ver) < 2) {
+ err = -EINVAL;
+ pr_err("Conflicting PSCI version detected.\n");
+ goto out_put_node;
+ }
}
+ pr_info("Using standard PSCI v0.2 function IDs\n");
+ psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN_CPU_SUSPEND;
+ psci_ops.cpu_suspend = psci_cpu_suspend;
+
+ psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF;
+ psci_ops.cpu_off = psci_cpu_off;
+
+ psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN_CPU_ON;
+ psci_ops.cpu_on = psci_cpu_on;
+
+ psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN_MIGRATE;
+ psci_ops.migrate = psci_migrate;
+
+ psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN_AFFINITY_INFO;
+ psci_ops.affinity_info = psci_affinity_info;
+
+ psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] =
+ PSCI_0_2_FN_MIGRATE_INFO_TYPE;
+ psci_ops.migrate_info_type = psci_migrate_info_type;
+
+ arm_pm_restart = psci_sys_reset;
+
+ pm_power_off = psci_sys_poweroff;
+
+out_put_node:
+ of_node_put(np);
+ return err;
+}
+
+/*
+ * PSCI < v0.2 get PSCI Function IDs via DT.
+ */
+static int psci_0_1_init(struct device_node *np)
+{
+ u32 id;
+ int err;
+
+ err = get_set_conduit_method(np);
+
+ if (err)
+ goto out_put_node;
+
+ pr_info("Using PSCI v0.1 Function IDs from DT\n");
+
if (!of_property_read_u32(np, "cpu_suspend", &id)) {
psci_function_id[PSCI_FN_CPU_SUSPEND] = id;
psci_ops.cpu_suspend = psci_cpu_suspend;
@@ -206,5 +308,25 @@ void __init psci_init(void)
out_put_node:
of_node_put(np);
- return;
+ return err;
+}
+
+static const struct of_device_id psci_of_match[] __initconst = {
+ { .compatible = "arm,psci", .data = psci_0_1_init},
+ { .compatible = "arm,psci-0.2", .data = psci_0_2_init},
+ {},
+};
+
+int __init psci_init(void)
+{
+ struct device_node *np;
+ const struct of_device_id *matched_np;
+ psci_initcall_t init_fn;
+
+ np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np);
+ if (!np)
+ return -ENODEV;
+
+ init_fn = (psci_initcall_t)matched_np->data;
+ return init_fn(np);
}
diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c
index 570a48cc3d64..28a1db4da704 100644
--- a/arch/arm/kernel/psci_smp.c
+++ b/arch/arm/kernel/psci_smp.c
@@ -16,6 +16,8 @@
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/of.h>
+#include <linux/delay.h>
+#include <uapi/linux/psci.h>
#include <asm/psci.h>
#include <asm/smp_plat.h>
@@ -66,6 +68,36 @@ void __ref psci_cpu_die(unsigned int cpu)
/* We should never return */
panic("psci: cpu %d failed to shutdown\n", cpu);
}
+
+int __ref psci_cpu_kill(unsigned int cpu)
+{
+ int err, i;
+
+ if (!psci_ops.affinity_info)
+ return 1;
+ /*
+ * cpu_kill could race with cpu_die and we can
+ * potentially end up declaring this cpu undead
+ * while it is dying. So, try again a few times.
+ */
+
+ for (i = 0; i < 10; i++) {
+ err = psci_ops.affinity_info(cpu_logical_map(cpu), 0);
+ if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) {
+ pr_info("CPU%d killed.\n", cpu);
+ return 1;
+ }
+
+ msleep(10);
+ pr_info("Retrying again to check for CPU kill\n");
+ }
+
+ pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n",
+ cpu, err);
+ /* Make platform_cpu_kill() fail. */
+ return 0;
+}
+
#endif
bool __init psci_smp_available(void)
@@ -78,5 +110,6 @@ struct smp_operations __initdata psci_smp_ops = {
.smp_boot_secondary = psci_boot_secondary,
#ifdef CONFIG_HOTPLUG_CPU
.cpu_die = psci_cpu_die,
+ .cpu_kill = psci_cpu_kill,
#endif
};
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index f0e50a0f3a65..3c82b37c0f9e 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -197,6 +197,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
case KVM_CAP_ONE_REG:
case KVM_CAP_ARM_PSCI:
+ case KVM_CAP_ARM_PSCI_0_2:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 0de91fc6de0f..4c979d466cc1 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -38,14 +38,18 @@ static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
+ int ret;
+
trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
kvm_vcpu_hvc_get_imm(vcpu));
- if (kvm_psci_call(vcpu))
+ ret = kvm_psci_call(vcpu);
+ if (ret < 0) {
+ kvm_inject_undefined(vcpu);
return 1;
+ }
- kvm_inject_undefined(vcpu);
- return 1;
+ return ret;
}
static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 448f60e8d23c..09cf37737ee2 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -27,6 +27,36 @@
* as described in ARM document number ARM DEN 0022A.
*/
+#define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1)
+
+static unsigned long psci_affinity_mask(unsigned long affinity_level)
+{
+ if (affinity_level <= 3)
+ return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level);
+
+ return 0;
+}
+
+static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
+{
+ /*
+ * NOTE: For simplicity, we make VCPU suspend emulation to be
+ * same-as WFI (Wait-for-interrupt) emulation.
+ *
+ * This means for KVM the wakeup events are interrupts and
+ * this is consistent with intended use of StateID as described
+ * in section 5.4.1 of PSCI v0.2 specification (ARM DEN 0022A).
+ *
+ * Further, we also treat power-down request to be same as
+ * stand-by request as-per section 5.4.2 clause 3 of PSCI v0.2
+ * specification (ARM DEN 0022A). This means all suspend states
+ * for KVM will preserve the register state.
+ */
+ kvm_vcpu_block(vcpu);
+
+ return PSCI_RET_SUCCESS;
+}
+
static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
{
vcpu->arch.pause = true;
@@ -38,6 +68,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
struct kvm_vcpu *vcpu = NULL, *tmp;
wait_queue_head_t *wq;
unsigned long cpu_id;
+ unsigned long context_id;
unsigned long mpidr;
phys_addr_t target_pc;
int i;
@@ -58,10 +89,17 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
* Make sure the caller requested a valid CPU and that the CPU is
* turned off.
*/
- if (!vcpu || !vcpu->arch.pause)
- return KVM_PSCI_RET_INVAL;
+ if (!vcpu)
+ return PSCI_RET_INVALID_PARAMS;
+ if (!vcpu->arch.pause) {
+ if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
+ return PSCI_RET_ALREADY_ON;
+ else
+ return PSCI_RET_INVALID_PARAMS;
+ }
target_pc = *vcpu_reg(source_vcpu, 2);
+ context_id = *vcpu_reg(source_vcpu, 3);
kvm_reset_vcpu(vcpu);
@@ -76,26 +114,160 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
kvm_vcpu_set_be(vcpu);
*vcpu_pc(vcpu) = target_pc;
+ /*
+ * NOTE: We always update r0 (or x0) because for PSCI v0.1
+ * the general puspose registers are undefined upon CPU_ON.
+ */
+ *vcpu_reg(vcpu, 0) = context_id;
vcpu->arch.pause = false;
smp_mb(); /* Make sure the above is visible */
wq = kvm_arch_vcpu_wq(vcpu);
wake_up_interruptible(wq);
- return KVM_PSCI_RET_SUCCESS;
+ return PSCI_RET_SUCCESS;
}
-/**
- * kvm_psci_call - handle PSCI call if r0 value is in range
- * @vcpu: Pointer to the VCPU struct
- *
- * Handle PSCI calls from guests through traps from HVC instructions.
- * The calling convention is similar to SMC calls to the secure world where
- * the function number is placed in r0 and this function returns true if the
- * function number specified in r0 is withing the PSCI range, and false
- * otherwise.
- */
-bool kvm_psci_call(struct kvm_vcpu *vcpu)
+static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
+{
+ int i;
+ unsigned long mpidr;
+ unsigned long target_affinity;
+ unsigned long target_affinity_mask;
+ unsigned long lowest_affinity_level;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_vcpu *tmp;
+
+ target_affinity = *vcpu_reg(vcpu, 1);
+ lowest_affinity_level = *vcpu_reg(vcpu, 2);
+
+ /* Determine target affinity mask */
+ target_affinity_mask = psci_affinity_mask(lowest_affinity_level);
+ if (!target_affinity_mask)
+ return PSCI_RET_INVALID_PARAMS;
+
+ /* Ignore other bits of target affinity */
+ target_affinity &= target_affinity_mask;
+
+ /*
+ * If one or more VCPU matching target affinity are running
+ * then ON else OFF
+ */
+ kvm_for_each_vcpu(i, tmp, kvm) {
+ mpidr = kvm_vcpu_get_mpidr(tmp);
+ if (((mpidr & target_affinity_mask) == target_affinity) &&
+ !tmp->arch.pause) {
+ return PSCI_0_2_AFFINITY_LEVEL_ON;
+ }
+ }
+
+ return PSCI_0_2_AFFINITY_LEVEL_OFF;
+}
+
+static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
+{
+ memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
+ vcpu->run->system_event.type = type;
+ vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+}
+
+static void kvm_psci_system_off(struct kvm_vcpu *vcpu)
+{
+ kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN);
+}
+
+static void kvm_psci_system_reset(struct kvm_vcpu *vcpu)
+{
+ kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET);
+}
+
+int kvm_psci_version(struct kvm_vcpu *vcpu)
+{
+ if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features))
+ return KVM_ARM_PSCI_0_2;
+
+ return KVM_ARM_PSCI_0_1;
+}
+
+static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
+{
+ int ret = 1;
+ unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
+ unsigned long val;
+
+ switch (psci_fn) {
+ case PSCI_0_2_FN_PSCI_VERSION:
+ /*
+ * Bits[31:16] = Major Version = 0
+ * Bits[15:0] = Minor Version = 2
+ */
+ val = 2;
+ break;
+ case PSCI_0_2_FN_CPU_SUSPEND:
+ case PSCI_0_2_FN64_CPU_SUSPEND:
+ val = kvm_psci_vcpu_suspend(vcpu);
+ break;
+ case PSCI_0_2_FN_CPU_OFF:
+ kvm_psci_vcpu_off(vcpu);
+ val = PSCI_RET_SUCCESS;
+ break;
+ case PSCI_0_2_FN_CPU_ON:
+ case PSCI_0_2_FN64_CPU_ON:
+ val = kvm_psci_vcpu_on(vcpu);
+ break;
+ case PSCI_0_2_FN_AFFINITY_INFO:
+ case PSCI_0_2_FN64_AFFINITY_INFO:
+ val = kvm_psci_vcpu_affinity_info(vcpu);
+ break;
+ case PSCI_0_2_FN_MIGRATE:
+ case PSCI_0_2_FN64_MIGRATE:
+ val = PSCI_RET_NOT_SUPPORTED;
+ break;
+ case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
+ /*
+ * Trusted OS is MP hence does not require migration
+ * or
+ * Trusted OS is not present
+ */
+ val = PSCI_0_2_TOS_MP;
+ break;
+ case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU:
+ case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU:
+ val = PSCI_RET_NOT_SUPPORTED;
+ break;
+ case PSCI_0_2_FN_SYSTEM_OFF:
+ kvm_psci_system_off(vcpu);
+ /*
+ * We should'nt be going back to guest VCPU after
+ * receiving SYSTEM_OFF request.
+ *
+ * If user space accidently/deliberately resumes
+ * guest VCPU after SYSTEM_OFF request then guest
+ * VCPU should see internal failure from PSCI return
+ * value. To achieve this, we preload r0 (or x0) with
+ * PSCI return value INTERNAL_FAILURE.
+ */
+ val = PSCI_RET_INTERNAL_FAILURE;
+ ret = 0;
+ break;
+ case PSCI_0_2_FN_SYSTEM_RESET:
+ kvm_psci_system_reset(vcpu);
+ /*
+ * Same reason as SYSTEM_OFF for preloading r0 (or x0)
+ * with PSCI return value INTERNAL_FAILURE.
+ */
+ val = PSCI_RET_INTERNAL_FAILURE;
+ ret = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ *vcpu_reg(vcpu, 0) = val;
+ return ret;
+}
+
+static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
{
unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
unsigned long val;
@@ -103,20 +275,45 @@ bool kvm_psci_call(struct kvm_vcpu *vcpu)
switch (psci_fn) {
case KVM_PSCI_FN_CPU_OFF:
kvm_psci_vcpu_off(vcpu);
- val = KVM_PSCI_RET_SUCCESS;
+ val = PSCI_RET_SUCCESS;
break;
case KVM_PSCI_FN_CPU_ON:
val = kvm_psci_vcpu_on(vcpu);
break;
case KVM_PSCI_FN_CPU_SUSPEND:
case KVM_PSCI_FN_MIGRATE:
- val = KVM_PSCI_RET_NI;
+ val = PSCI_RET_NOT_SUPPORTED;
break;
-
default:
- return false;
+ return -EINVAL;
}
*vcpu_reg(vcpu, 0) = val;
- return true;
+ return 1;
+}
+
+/**
+ * kvm_psci_call - handle PSCI call if r0 value is in range
+ * @vcpu: Pointer to the VCPU struct
+ *
+ * Handle PSCI calls from guests through traps from HVC instructions.
+ * The calling convention is similar to SMC calls to the secure world
+ * where the function number is placed in r0.
+ *
+ * This function returns: > 0 (success), 0 (success but exit to user
+ * space), and < 0 (errors)
+ *
+ * Errors:
+ * -EINVAL: Unrecognized PSCI function
+ */
+int kvm_psci_call(struct kvm_vcpu *vcpu)
+{
+ switch (kvm_psci_version(vcpu)) {
+ case KVM_ARM_PSCI_0_2:
+ return kvm_psci_0_2_call(vcpu);
+ case KVM_ARM_PSCI_0_1:
+ return kvm_psci_0_1_call(vcpu);
+ default:
+ return -EINVAL;
+ };
}
diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h
index 152413076503..d7b4b38a8e86 100644
--- a/arch/arm64/include/asm/cpu_ops.h
+++ b/arch/arm64/include/asm/cpu_ops.h
@@ -39,6 +39,7 @@ struct device_node;
* from the cpu to be killed.
* @cpu_die: Makes a cpu leave the kernel. Must not fail. Called from the
* cpu being killed.
+ * @cpu_kill: Ensures a cpu has left the kernel. Called from another cpu.
* @cpu_suspend: Suspends a cpu and saves the required context. May fail owing
* to wrong parameters or error conditions. Called from the
* CPU being suspended. Must be called with IRQs disabled.
@@ -52,6 +53,7 @@ struct cpu_operations {
#ifdef CONFIG_HOTPLUG_CPU
int (*cpu_disable)(unsigned int cpu);
void (*cpu_die)(unsigned int cpu);
+ int (*cpu_kill)(unsigned int cpu);
#endif
#ifdef CONFIG_ARM64_CPU_SUSPEND
int (*cpu_suspend)(unsigned long);
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index c404fb0df3a6..27f54a7cc81b 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -41,6 +41,7 @@
#define ARM_CPU_PART_AEM_V8 0xD0F0
#define ARM_CPU_PART_FOUNDATION 0xD000
+#define ARM_CPU_PART_CORTEX_A53 0xD030
#define ARM_CPU_PART_CORTEX_A57 0xD070
#define APM_CPU_PART_POTENZA 0x0000
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 0a1d69751562..92242ce06309 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -39,7 +39,7 @@
#include <kvm/arm_vgic.h>
#include <kvm/arm_arch_timer.h>
-#define KVM_VCPU_MAX_FEATURES 2
+#define KVM_VCPU_MAX_FEATURES 3
struct kvm_vcpu;
int kvm_target_cpu(void);
diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h
index e301a4816355..bc39e557c56c 100644
--- a/arch/arm64/include/asm/kvm_psci.h
+++ b/arch/arm64/include/asm/kvm_psci.h
@@ -18,6 +18,10 @@
#ifndef __ARM64_KVM_PSCI_H__
#define __ARM64_KVM_PSCI_H__
-bool kvm_psci_call(struct kvm_vcpu *vcpu);
+#define KVM_ARM_PSCI_0_1 1
+#define KVM_ARM_PSCI_0_2 2
+
+int kvm_psci_version(struct kvm_vcpu *vcpu);
+int kvm_psci_call(struct kvm_vcpu *vcpu);
#endif /* __ARM64_KVM_PSCI_H__ */
diff --git a/arch/arm64/include/asm/psci.h b/arch/arm64/include/asm/psci.h
index d15ab8b46336..e5312ea0ec1a 100644
--- a/arch/arm64/include/asm/psci.h
+++ b/arch/arm64/include/asm/psci.h
@@ -14,6 +14,6 @@
#ifndef __ASM_PSCI_H
#define __ASM_PSCI_H
-void psci_init(void);
+int psci_init(void);
#endif /* __ASM_PSCI_H */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index eaf54a30bedc..e633ff8cdec8 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -31,6 +31,7 @@
#define KVM_NR_SPSR 5
#ifndef __ASSEMBLY__
+#include <linux/psci.h>
#include <asm/types.h>
#include <asm/ptrace.h>
@@ -56,8 +57,9 @@ struct kvm_regs {
#define KVM_ARM_TARGET_FOUNDATION_V8 1
#define KVM_ARM_TARGET_CORTEX_A57 2
#define KVM_ARM_TARGET_XGENE_POTENZA 3
+#define KVM_ARM_TARGET_CORTEX_A53 4
-#define KVM_ARM_NUM_TARGETS 4
+#define KVM_ARM_NUM_TARGETS 5
/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
#define KVM_ARM_DEVICE_TYPE_SHIFT 0
@@ -77,6 +79,7 @@ struct kvm_regs {
#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
#define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */
+#define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */
struct kvm_vcpu_init {
__u32 target;
@@ -186,10 +189,10 @@ struct kvm_arch_memory_slot {
#define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2)
#define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3)
-#define KVM_PSCI_RET_SUCCESS 0
-#define KVM_PSCI_RET_NI ((unsigned long)-1)
-#define KVM_PSCI_RET_INVAL ((unsigned long)-2)
-#define KVM_PSCI_RET_DENIED ((unsigned long)-3)
+#define KVM_PSCI_RET_SUCCESS PSCI_RET_SUCCESS
+#define KVM_PSCI_RET_NI PSCI_RET_NOT_SUPPORTED
+#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS
+#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED
#endif
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index ea4828a4aa96..9e9798f91172 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -18,12 +18,17 @@
#include <linux/init.h>
#include <linux/of.h>
#include <linux/smp.h>
+#include <linux/reboot.h>
+#include <linux/pm.h>
+#include <linux/delay.h>
+#include <uapi/linux/psci.h>
#include <asm/compiler.h>
#include <asm/cpu_ops.h>
#include <asm/errno.h>
#include <asm/psci.h>
#include <asm/smp_plat.h>
+#include <asm/system_misc.h>
#define PSCI_POWER_STATE_TYPE_STANDBY 0
#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1
@@ -40,58 +45,52 @@ struct psci_operations {
int (*cpu_off)(struct psci_power_state state);
int (*cpu_on)(unsigned long cpuid, unsigned long entry_point);
int (*migrate)(unsigned long cpuid);
+ int (*affinity_info)(unsigned long target_affinity,
+ unsigned long lowest_affinity_level);
+ int (*migrate_info_type)(void);
};
static struct psci_operations psci_ops;
static int (*invoke_psci_fn)(u64, u64, u64, u64);
+typedef int (*psci_initcall_t)(const struct device_node *);
enum psci_function {
PSCI_FN_CPU_SUSPEND,
PSCI_FN_CPU_ON,
PSCI_FN_CPU_OFF,
PSCI_FN_MIGRATE,
+ PSCI_FN_AFFINITY_INFO,
+ PSCI_FN_MIGRATE_INFO_TYPE,
PSCI_FN_MAX,
};
static u32 psci_function_id[PSCI_FN_MAX];
-#define PSCI_RET_SUCCESS 0
-#define PSCI_RET_EOPNOTSUPP -1
-#define PSCI_RET_EINVAL -2
-#define PSCI_RET_EPERM -3
-
static int psci_to_linux_errno(int errno)
{
switch (errno) {
case PSCI_RET_SUCCESS:
return 0;
- case PSCI_RET_EOPNOTSUPP:
+ case PSCI_RET_NOT_SUPPORTED:
return -EOPNOTSUPP;
- case PSCI_RET_EINVAL:
+ case PSCI_RET_INVALID_PARAMS:
return -EINVAL;
- case PSCI_RET_EPERM:
+ case PSCI_RET_DENIED:
return -EPERM;
};
return -EINVAL;
}
-#define PSCI_POWER_STATE_ID_MASK 0xffff
-#define PSCI_POWER_STATE_ID_SHIFT 0
-#define PSCI_POWER_STATE_TYPE_MASK 0x1
-#define PSCI_POWER_STATE_TYPE_SHIFT 16
-#define PSCI_POWER_STATE_AFFL_MASK 0x3
-#define PSCI_POWER_STATE_AFFL_SHIFT 24
-
static u32 psci_power_state_pack(struct psci_power_state state)
{
- return ((state.id & PSCI_POWER_STATE_ID_MASK)
- << PSCI_POWER_STATE_ID_SHIFT) |
- ((state.type & PSCI_POWER_STATE_TYPE_MASK)
- << PSCI_POWER_STATE_TYPE_SHIFT) |
- ((state.affinity_level & PSCI_POWER_STATE_AFFL_MASK)
- << PSCI_POWER_STATE_AFFL_SHIFT);
+ return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT)
+ & PSCI_0_2_POWER_STATE_ID_MASK) |
+ ((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT)
+ & PSCI_0_2_POWER_STATE_TYPE_MASK) |
+ ((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT)
+ & PSCI_0_2_POWER_STATE_AFFL_MASK);
}
/*
@@ -128,6 +127,14 @@ static noinline int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1,
return function_id;
}
+static int psci_get_version(void)
+{
+ int err;
+
+ err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);
+ return err;
+}
+
static int psci_cpu_suspend(struct psci_power_state state,
unsigned long entry_point)
{
@@ -171,26 +178,36 @@ static int psci_migrate(unsigned long cpuid)
return psci_to_linux_errno(err);
}
-static const struct of_device_id psci_of_match[] __initconst = {
- { .compatible = "arm,psci", },
- {},
-};
+static int psci_affinity_info(unsigned long target_affinity,
+ unsigned long lowest_affinity_level)
+{
+ int err;
+ u32 fn;
+
+ fn = psci_function_id[PSCI_FN_AFFINITY_INFO];
+ err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0);
+ return err;
+}
-void __init psci_init(void)
+static int psci_migrate_info_type(void)
{
- struct device_node *np;
- const char *method;
- u32 id;
+ int err;
+ u32 fn;
- np = of_find_matching_node(NULL, psci_of_match);
- if (!np)
- return;
+ fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE];
+ err = invoke_psci_fn(fn, 0, 0, 0);
+ return err;
+}
- pr_info("probing function IDs from device-tree\n");
+static int get_set_conduit_method(struct device_node *np)
+{
+ const char *method;
+
+ pr_info("probing for conduit method from DT.\n");
if (of_property_read_string(np, "method", &method)) {
- pr_warning("missing \"method\" property\n");
- goto out_put_node;
+ pr_warn("missing \"method\" property\n");
+ return -ENXIO;
}
if (!strcmp("hvc", method)) {
@@ -198,10 +215,99 @@ void __init psci_init(void)
} else if (!strcmp("smc", method)) {
invoke_psci_fn = __invoke_psci_fn_smc;
} else {
- pr_warning("invalid \"method\" property: %s\n", method);
+ pr_warn("invalid \"method\" property: %s\n", method);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd)
+{
+ invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0);
+}
+
+static void psci_sys_poweroff(void)
+{
+ invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0);
+}
+
+/*
+ * PSCI Function IDs for v0.2+ are well defined so use
+ * standard values.
+ */
+static int psci_0_2_init(struct device_node *np)
+{
+ int err, ver;
+
+ err = get_set_conduit_method(np);
+
+ if (err)
+ goto out_put_node;
+
+ ver = psci_get_version();
+
+ if (ver == PSCI_RET_NOT_SUPPORTED) {
+ /* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */
+ pr_err("PSCI firmware does not comply with the v0.2 spec.\n");
+ err = -EOPNOTSUPP;
goto out_put_node;
+ } else {
+ pr_info("PSCIv%d.%d detected in firmware.\n",
+ PSCI_VERSION_MAJOR(ver),
+ PSCI_VERSION_MINOR(ver));
+
+ if (PSCI_VERSION_MAJOR(ver) == 0 &&
+ PSCI_VERSION_MINOR(ver) < 2) {
+ err = -EINVAL;
+ pr_err("Conflicting PSCI version detected.\n");
+ goto out_put_node;
+ }
}
+ pr_info("Using standard PSCI v0.2 function IDs\n");
+ psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN64_CPU_SUSPEND;
+ psci_ops.cpu_suspend = psci_cpu_suspend;
+
+ psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF;
+ psci_ops.cpu_off = psci_cpu_off;
+
+ psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN64_CPU_ON;
+ psci_ops.cpu_on = psci_cpu_on;
+
+ psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN64_MIGRATE;
+ psci_ops.migrate = psci_migrate;
+
+ psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN64_AFFINITY_INFO;
+ psci_ops.affinity_info = psci_affinity_info;
+
+ psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] =
+ PSCI_0_2_FN_MIGRATE_INFO_TYPE;
+ psci_ops.migrate_info_type = psci_migrate_info_type;
+
+ arm_pm_restart = psci_sys_reset;
+
+ pm_power_off = psci_sys_poweroff;
+
+out_put_node:
+ of_node_put(np);
+ return err;
+}
+
+/*
+ * PSCI < v0.2 get PSCI Function IDs via DT.
+ */
+static int psci_0_1_init(struct device_node *np)
+{
+ u32 id;
+ int err;
+
+ err = get_set_conduit_method(np);
+
+ if (err)
+ goto out_put_node;
+
+ pr_info("Using PSCI v0.1 Function IDs from DT\n");
+
if (!of_property_read_u32(np, "cpu_suspend", &id)) {
psci_function_id[PSCI_FN_CPU_SUSPEND] = id;
psci_ops.cpu_suspend = psci_cpu_suspend;
@@ -224,7 +330,28 @@ void __init psci_init(void)
out_put_node:
of_node_put(np);
- return;
+ return err;
+}
+
+static const struct of_device_id psci_of_match[] __initconst = {
+ { .compatible = "arm,psci", .data = psci_0_1_init},
+ { .compatible = "arm,psci-0.2", .data = psci_0_2_init},
+ {},
+};
+
+int __init psci_init(void)
+{
+ struct device_node *np;
+ const struct of_device_id *matched_np;
+ psci_initcall_t init_fn;
+
+ np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np);
+
+ if (!np)
+ return -ENODEV;
+
+ init_fn = (psci_initcall_t)matched_np->data;
+ return init_fn(np);
}
#ifdef CONFIG_SMP
@@ -277,6 +404,35 @@ static void cpu_psci_cpu_die(unsigned int cpu)
pr_crit("unable to power off CPU%u (%d)\n", cpu, ret);
}
+
+static int cpu_psci_cpu_kill(unsigned int cpu)
+{
+ int err, i;
+
+ if (!psci_ops.affinity_info)
+ return 1;
+ /*
+ * cpu_kill could race with cpu_die and we can
+ * potentially end up declaring this cpu undead
+ * while it is dying. So, try again a few times.
+ */
+
+ for (i = 0; i < 10; i++) {
+ err = psci_ops.affinity_info(cpu_logical_map(cpu), 0);
+ if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) {
+ pr_info("CPU%d killed.\n", cpu);
+ return 1;
+ }
+
+ msleep(10);
+ pr_info("Retrying again to check for CPU kill\n");
+ }
+
+ pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n",
+ cpu, err);
+ /* Make op_cpu_kill() fail. */
+ return 0;
+}
#endif
const struct cpu_operations cpu_psci_ops = {
@@ -287,6 +443,7 @@ const struct cpu_operations cpu_psci_ops = {
#ifdef CONFIG_HOTPLUG_CPU
.cpu_disable = cpu_psci_cpu_disable,
.cpu_die = cpu_psci_cpu_die,
+ .cpu_kill = cpu_psci_cpu_kill,
#endif
};
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index f0a141dd5655..c3cb160edc69 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -228,6 +228,19 @@ int __cpu_disable(void)
return 0;
}
+static int op_cpu_kill(unsigned int cpu)
+{
+ /*
+ * If we have no means of synchronising with the dying CPU, then assume
+ * that it is really dead. We can only wait for an arbitrary length of
+ * time and hope that it's dead, so let's skip the wait and just hope.
+ */
+ if (!cpu_ops[cpu]->cpu_kill)
+ return 1;
+
+ return cpu_ops[cpu]->cpu_kill(cpu);
+}
+
static DECLARE_COMPLETION(cpu_died);
/*
@@ -241,6 +254,15 @@ void __cpu_die(unsigned int cpu)
return;
}
pr_notice("CPU%u: shutdown\n", cpu);
+
+ /*
+ * Now that the dying CPU is beyond the point of no return w.r.t.
+ * in-kernel synchronisation, try to get the firwmare to help us to
+ * verify that it has really left the kernel before we consider
+ * clobbering anything it might still be using.
+ */
+ if (!op_cpu_kill(cpu))
+ pr_warn("CPU%d may not have shut down cleanly\n", cpu);
}
/*
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 08745578d54d..60b5c31f3c10 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -214,6 +214,8 @@ int __attribute_const__ kvm_target_cpu(void)
return KVM_ARM_TARGET_AEM_V8;
case ARM_CPU_PART_FOUNDATION:
return KVM_ARM_TARGET_FOUNDATION_V8;
+ case ARM_CPU_PART_CORTEX_A53:
+ return KVM_ARM_TARGET_CORTEX_A53;
case ARM_CPU_PART_CORTEX_A57:
return KVM_ARM_TARGET_CORTEX_A57;
};
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 7bc41eab4c64..182415e1a952 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -30,11 +30,15 @@ typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
- if (kvm_psci_call(vcpu))
+ int ret;
+
+ ret = kvm_psci_call(vcpu);
+ if (ret < 0) {
+ kvm_inject_undefined(vcpu);
return 1;
+ }
- kvm_inject_undefined(vcpu);
- return 1;
+ return ret;
}
static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index 8fe6f76b0edc..475fd2929310 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -88,6 +88,8 @@ static int __init sys_reg_genericv8_init(void)
&genericv8_target_table);
kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8,
&genericv8_target_table);
+ kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A53,
+ &genericv8_target_table);
kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57,
&genericv8_target_table);
kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA,
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 5cd695f905a1..5e0014e864f3 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1756,14 +1756,14 @@ config KVM_GUEST
help
Select this option if building a guest kernel for KVM (Trap & Emulate) mode
-config KVM_HOST_FREQ
- int "KVM Host Processor Frequency (MHz)"
+config KVM_GUEST_TIMER_FREQ
+ int "Count/Compare Timer Frequency (MHz)"
depends on KVM_GUEST
- default 500
+ default 100
help
- Select this option if building a guest kernel for KVM to skip
- RTC emulation when determining guest CPU Frequency. Instead, the guest
- processor frequency is automatically derived from the host frequency.
+ Set this to non-zero if building a guest kernel for KVM to skip RTC
+ emulation when determining guest CPU Frequency. Instead, the guest's
+ timer frequency is specified directly.
choice
prompt "Kernel page size"
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 060aaa6348d7..b0aa95565752 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -19,6 +19,38 @@
#include <linux/threads.h>
#include <linux/spinlock.h>
+/* MIPS KVM register ids */
+#define MIPS_CP0_32(_R, _S) \
+ (KVM_REG_MIPS | KVM_REG_SIZE_U32 | 0x10000 | (8 * (_R) + (_S)))
+
+#define MIPS_CP0_64(_R, _S) \
+ (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 0x10000 | (8 * (_R) + (_S)))
+
+#define KVM_REG_MIPS_CP0_INDEX MIPS_CP0_32(0, 0)
+#define KVM_REG_MIPS_CP0_ENTRYLO0 MIPS_CP0_64(2, 0)
+#define KVM_REG_MIPS_CP0_ENTRYLO1 MIPS_CP0_64(3, 0)
+#define KVM_REG_MIPS_CP0_CONTEXT MIPS_CP0_64(4, 0)
+#define KVM_REG_MIPS_CP0_USERLOCAL MIPS_CP0_64(4, 2)
+#define KVM_REG_MIPS_CP0_PAGEMASK MIPS_CP0_32(5, 0)
+#define KVM_REG_MIPS_CP0_PAGEGRAIN MIPS_CP0_32(5, 1)
+#define KVM_REG_MIPS_CP0_WIRED MIPS_CP0_32(6, 0)
+#define KVM_REG_MIPS_CP0_HWRENA MIPS_CP0_32(7, 0)
+#define KVM_REG_MIPS_CP0_BADVADDR MIPS_CP0_64(8, 0)
+#define KVM_REG_MIPS_CP0_COUNT MIPS_CP0_32(9, 0)
+#define KVM_REG_MIPS_CP0_ENTRYHI MIPS_CP0_64(10, 0)
+#define KVM_REG_MIPS_CP0_COMPARE MIPS_CP0_32(11, 0)
+#define KVM_REG_MIPS_CP0_STATUS MIPS_CP0_32(12, 0)
+#define KVM_REG_MIPS_CP0_CAUSE MIPS_CP0_32(13, 0)
+#define KVM_REG_MIPS_CP0_EPC MIPS_CP0_64(14, 0)
+#define KVM_REG_MIPS_CP0_EBASE MIPS_CP0_64(15, 1)
+#define KVM_REG_MIPS_CP0_CONFIG MIPS_CP0_32(16, 0)
+#define KVM_REG_MIPS_CP0_CONFIG1 MIPS_CP0_32(16, 1)
+#define KVM_REG_MIPS_CP0_CONFIG2 MIPS_CP0_32(16, 2)
+#define KVM_REG_MIPS_CP0_CONFIG3 MIPS_CP0_32(16, 3)
+#define KVM_REG_MIPS_CP0_CONFIG7 MIPS_CP0_32(16, 7)
+#define KVM_REG_MIPS_CP0_XCONTEXT MIPS_CP0_64(20, 0)
+#define KVM_REG_MIPS_CP0_ERROREPC MIPS_CP0_64(30, 0)
+
#define KVM_MAX_VCPUS 1
#define KVM_USER_MEM_SLOTS 8
@@ -372,8 +404,19 @@ struct kvm_vcpu_arch {
u32 io_gpr; /* GPR used as IO source/target */
- /* Used to calibrate the virutal count register for the guest */
- int32_t host_cp0_count;
+ struct hrtimer comparecount_timer;
+ /* Count timer control KVM register */
+ uint32_t count_ctl;
+ /* Count bias from the raw time */
+ uint32_t count_bias;
+ /* Frequency of timer in Hz */
+ uint32_t count_hz;
+ /* Dynamic nanosecond bias (multiple of count_period) to avoid overflow */
+ s64 count_dyn_bias;
+ /* Resume time */
+ ktime_t count_resume;
+ /* Period of timer tick in ns */
+ u64 count_period;
/* Bitmask of exceptions that are pending */
unsigned long pending_exceptions;
@@ -394,8 +437,6 @@ struct kvm_vcpu_arch {
uint32_t guest_kernel_asid[NR_CPUS];
struct mm_struct guest_kernel_mm, guest_user_mm;
- struct hrtimer comparecount_timer;
-
int last_sched_cpu;
/* WAIT executed */
@@ -410,6 +451,7 @@ struct kvm_vcpu_arch {
#define kvm_read_c0_guest_context(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0])
#define kvm_write_c0_guest_context(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val))
#define kvm_read_c0_guest_userlocal(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2])
+#define kvm_write_c0_guest_userlocal(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2] = (val))
#define kvm_read_c0_guest_pagemask(cop0) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0])
#define kvm_write_c0_guest_pagemask(cop0, val) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0] = (val))
#define kvm_read_c0_guest_wired(cop0) (cop0->reg[MIPS_CP0_TLB_WIRED][0])
@@ -449,15 +491,74 @@ struct kvm_vcpu_arch {
#define kvm_read_c0_guest_errorepc(cop0) (cop0->reg[MIPS_CP0_ERROR_PC][0])
#define kvm_write_c0_guest_errorepc(cop0, val) (cop0->reg[MIPS_CP0_ERROR_PC][0] = (val))
+/*
+ * Some of the guest registers may be modified asynchronously (e.g. from a
+ * hrtimer callback in hard irq context) and therefore need stronger atomicity
+ * guarantees than other registers.
+ */
+
+static inline void _kvm_atomic_set_c0_guest_reg(unsigned long *reg,
+ unsigned long val)
+{
+ unsigned long temp;
+ do {
+ __asm__ __volatile__(
+ " .set mips3 \n"
+ " " __LL "%0, %1 \n"
+ " or %0, %2 \n"
+ " " __SC "%0, %1 \n"
+ " .set mips0 \n"
+ : "=&r" (temp), "+m" (*reg)
+ : "r" (val));
+ } while (unlikely(!temp));
+}
+
+static inline void _kvm_atomic_clear_c0_guest_reg(unsigned long *reg,
+ unsigned long val)
+{
+ unsigned long temp;
+ do {
+ __asm__ __volatile__(
+ " .set mips3 \n"
+ " " __LL "%0, %1 \n"
+ " and %0, %2 \n"
+ " " __SC "%0, %1 \n"
+ " .set mips0 \n"
+ : "=&r" (temp), "+m" (*reg)
+ : "r" (~val));
+ } while (unlikely(!temp));
+}
+
+static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg,
+ unsigned long change,
+ unsigned long val)
+{
+ unsigned long temp;
+ do {
+ __asm__ __volatile__(
+ " .set mips3 \n"
+ " " __LL "%0, %1 \n"
+ " and %0, %2 \n"
+ " or %0, %3 \n"
+ " " __SC "%0, %1 \n"
+ " .set mips0 \n"
+ : "=&r" (temp), "+m" (*reg)
+ : "r" (~change), "r" (val & change));
+ } while (unlikely(!temp));
+}
+
#define kvm_set_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] |= (val))
#define kvm_clear_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] &= ~(val))
-#define kvm_set_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] |= (val))
-#define kvm_clear_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] &= ~(val))
+
+/* Cause can be modified asynchronously from hardirq hrtimer callback */
+#define kvm_set_c0_guest_cause(cop0, val) \
+ _kvm_atomic_set_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val)
+#define kvm_clear_c0_guest_cause(cop0, val) \
+ _kvm_atomic_clear_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val)
#define kvm_change_c0_guest_cause(cop0, change, val) \
-{ \
- kvm_clear_c0_guest_cause(cop0, change); \
- kvm_set_c0_guest_cause(cop0, ((val) & (change))); \
-}
+ _kvm_atomic_change_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], \
+ change, val)
+
#define kvm_set_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] |= (val))
#define kvm_clear_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] &= ~(val))
#define kvm_change_c0_guest_ebase(cop0, change, val) \
@@ -468,29 +569,33 @@ struct kvm_vcpu_arch {
struct kvm_mips_callbacks {
- int (*handle_cop_unusable) (struct kvm_vcpu *vcpu);
- int (*handle_tlb_mod) (struct kvm_vcpu *vcpu);
- int (*handle_tlb_ld_miss) (struct kvm_vcpu *vcpu);
- int (*handle_tlb_st_miss) (struct kvm_vcpu *vcpu);
- int (*handle_addr_err_st) (struct kvm_vcpu *vcpu);
- int (*handle_addr_err_ld) (struct kvm_vcpu *vcpu);
- int (*handle_syscall) (struct kvm_vcpu *vcpu);
- int (*handle_res_inst) (struct kvm_vcpu *vcpu);
- int (*handle_break) (struct kvm_vcpu *vcpu);
- int (*vm_init) (struct kvm *kvm);
- int (*vcpu_init) (struct kvm_vcpu *vcpu);
- int (*vcpu_setup) (struct kvm_vcpu *vcpu);
- gpa_t(*gva_to_gpa) (gva_t gva);
- void (*queue_timer_int) (struct kvm_vcpu *vcpu);
- void (*dequeue_timer_int) (struct kvm_vcpu *vcpu);
- void (*queue_io_int) (struct kvm_vcpu *vcpu,
- struct kvm_mips_interrupt *irq);
- void (*dequeue_io_int) (struct kvm_vcpu *vcpu,
- struct kvm_mips_interrupt *irq);
- int (*irq_deliver) (struct kvm_vcpu *vcpu, unsigned int priority,
- uint32_t cause);
- int (*irq_clear) (struct kvm_vcpu *vcpu, unsigned int priority,
- uint32_t cause);
+ int (*handle_cop_unusable)(struct kvm_vcpu *vcpu);
+ int (*handle_tlb_mod)(struct kvm_vcpu *vcpu);
+ int (*handle_tlb_ld_miss)(struct kvm_vcpu *vcpu);
+ int (*handle_tlb_st_miss)(struct kvm_vcpu *vcpu);
+ int (*handle_addr_err_st)(struct kvm_vcpu *vcpu);
+ int (*handle_addr_err_ld)(struct kvm_vcpu *vcpu);
+ int (*handle_syscall)(struct kvm_vcpu *vcpu);
+ int (*handle_res_inst)(struct kvm_vcpu *vcpu);
+ int (*handle_break)(struct kvm_vcpu *vcpu);
+ int (*vm_init)(struct kvm *kvm);
+ int (*vcpu_init)(struct kvm_vcpu *vcpu);
+ int (*vcpu_setup)(struct kvm_vcpu *vcpu);
+ gpa_t (*gva_to_gpa)(gva_t gva);
+ void (*queue_timer_int)(struct kvm_vcpu *vcpu);
+ void (*dequeue_timer_int)(struct kvm_vcpu *vcpu);
+ void (*queue_io_int)(struct kvm_vcpu *vcpu,
+ struct kvm_mips_interrupt *irq);
+ void (*dequeue_io_int)(struct kvm_vcpu *vcpu,
+ struct kvm_mips_interrupt *irq);
+ int (*irq_deliver)(struct kvm_vcpu *vcpu, unsigned int priority,
+ uint32_t cause);
+ int (*irq_clear)(struct kvm_vcpu *vcpu, unsigned int priority,
+ uint32_t cause);
+ int (*get_one_reg)(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg, s64 *v);
+ int (*set_one_reg)(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg, s64 v);
};
extern struct kvm_mips_callbacks *kvm_mips_callbacks;
int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks);
@@ -609,7 +714,16 @@ extern enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause,
extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
struct kvm_run *run);
-enum emulation_result kvm_mips_emulate_count(struct kvm_vcpu *vcpu);
+uint32_t kvm_mips_read_count(struct kvm_vcpu *vcpu);
+void kvm_mips_write_count(struct kvm_vcpu *vcpu, uint32_t count);
+void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare);
+void kvm_mips_init_count(struct kvm_vcpu *vcpu);
+int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl);
+int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume);
+int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz);
+void kvm_mips_count_enable_cause(struct kvm_vcpu *vcpu);
+void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu);
+enum hrtimer_restart kvm_mips_count_timeout(struct kvm_vcpu *vcpu);
enum emulation_result kvm_mips_check_privilege(unsigned long cause,
uint32_t *opc,
@@ -646,7 +760,6 @@ extern int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc,
struct kvm_vcpu *vcpu);
/* Misc */
-extern void mips32_SyncICache(unsigned long addr, unsigned long size);
extern int kvm_mips_dump_stats(struct kvm_vcpu *vcpu);
extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
diff --git a/arch/mips/include/uapi/asm/kvm.h b/arch/mips/include/uapi/asm/kvm.h
index f09ff5ae2059..2c04b6d9ff85 100644
--- a/arch/mips/include/uapi/asm/kvm.h
+++ b/arch/mips/include/uapi/asm/kvm.h
@@ -106,6 +106,41 @@ struct kvm_fpu {
#define KVM_REG_MIPS_LO (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 33)
#define KVM_REG_MIPS_PC (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 34)
+/* KVM specific control registers */
+
+/*
+ * CP0_Count control
+ * DC: Set 0: Master disable CP0_Count and set COUNT_RESUME to now
+ * Set 1: Master re-enable CP0_Count with unchanged bias, handling timer
+ * interrupts since COUNT_RESUME
+ * This can be used to freeze the timer to get a consistent snapshot of
+ * the CP0_Count and timer interrupt pending state, while also resuming
+ * safely without losing time or guest timer interrupts.
+ * Other: Reserved, do not change.
+ */
+#define KVM_REG_MIPS_COUNT_CTL (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
+ 0x20000 | 0)
+#define KVM_REG_MIPS_COUNT_CTL_DC 0x00000001
+
+/*
+ * CP0_Count resume monotonic nanoseconds
+ * The monotonic nanosecond time of the last set of COUNT_CTL.DC (master
+ * disable). Any reads and writes of Count related registers while
+ * COUNT_CTL.DC=1 will appear to occur at this time. When COUNT_CTL.DC is
+ * cleared again (master enable) any timer interrupts since this time will be
+ * emulated.
+ * Modifications to times in the future are rejected.
+ */
+#define KVM_REG_MIPS_COUNT_RESUME (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
+ 0x20000 | 1)
+/*
+ * CP0_Count rate in Hz
+ * Specifies the rate of the CP0_Count timer in Hz. Modifications occur without
+ * discontinuities in CP0_Count.
+ */
+#define KVM_REG_MIPS_COUNT_HZ (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
+ 0x20000 | 2)
+
/*
* KVM MIPS specific structures and definitions
*
diff --git a/arch/mips/kvm/kvm_locore.S b/arch/mips/kvm/kvm_locore.S
index bbace092ad0a..033ac343e72c 100644
--- a/arch/mips/kvm/kvm_locore.S
+++ b/arch/mips/kvm/kvm_locore.S
@@ -611,35 +611,3 @@ MIPSX(exceptions):
.word _C_LABEL(MIPSX(GuestException)) # 29
.word _C_LABEL(MIPSX(GuestException)) # 30
.word _C_LABEL(MIPSX(GuestException)) # 31
-
-
-/* This routine makes changes to the instruction stream effective to the hardware.
- * It should be called after the instruction stream is written.
- * On return, the new instructions are effective.
- * Inputs:
- * a0 = Start address of new instruction stream
- * a1 = Size, in bytes, of new instruction stream
- */
-
-#define HW_SYNCI_Step $1
-LEAF(MIPSX(SyncICache))
- .set push
- .set mips32r2
- beq a1, zero, 20f
- nop
- REG_ADDU a1, a0, a1
- rdhwr v0, HW_SYNCI_Step
- beq v0, zero, 20f
- nop
-10:
- synci 0(a0)
- REG_ADDU a0, a0, v0
- sltu v1, a0, a1
- bne v1, zero, 10b
- nop
- sync
-20:
- jr.hb ra
- nop
- .set pop
-END(MIPSX(SyncICache))
diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c
index da5186fbd77a..cd5e4f568439 100644
--- a/arch/mips/kvm/kvm_mips.c
+++ b/arch/mips/kvm/kvm_mips.c
@@ -61,11 +61,6 @@ static int kvm_mips_reset_vcpu(struct kvm_vcpu *vcpu)
return 0;
}
-gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
-{
- return gfn;
-}
-
/* XXXKYMA: We are simulatoring a processor that has the WII bit set in Config7, so we
* are "runnable" if interrupts are pending
*/
@@ -130,8 +125,8 @@ static void kvm_mips_init_vm_percpu(void *arg)
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
if (atomic_inc_return(&kvm_mips_instance) == 1) {
- kvm_info("%s: 1st KVM instance, setup host TLB parameters\n",
- __func__);
+ kvm_debug("%s: 1st KVM instance, setup host TLB parameters\n",
+ __func__);
on_each_cpu(kvm_mips_init_vm_percpu, kvm, 1);
}
@@ -149,9 +144,7 @@ void kvm_mips_free_vcpus(struct kvm *kvm)
if (kvm->arch.guest_pmap[i] != KVM_INVALID_PAGE)
kvm_mips_release_pfn_clean(kvm->arch.guest_pmap[i]);
}
-
- if (kvm->arch.guest_pmap)
- kfree(kvm->arch.guest_pmap);
+ kfree(kvm->arch.guest_pmap);
kvm_for_each_vcpu(i, vcpu, kvm) {
kvm_arch_vcpu_free(vcpu);
@@ -186,8 +179,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
/* If this is the last instance, restore wired count */
if (atomic_dec_return(&kvm_mips_instance) == 0) {
- kvm_info("%s: last KVM instance, restoring TLB parameters\n",
- __func__);
+ kvm_debug("%s: last KVM instance, restoring TLB parameters\n",
+ __func__);
on_each_cpu(kvm_mips_uninit_tlbs, NULL, 1);
}
}
@@ -249,9 +242,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
goto out;
}
- kvm_info
- ("Allocated space for Guest PMAP Table (%ld pages) @ %p\n",
- npages, kvm->arch.guest_pmap);
+ kvm_debug("Allocated space for Guest PMAP Table (%ld pages) @ %p\n",
+ npages, kvm->arch.guest_pmap);
/* Now setup the page table */
for (i = 0; i < npages; i++) {
@@ -296,7 +288,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
if (err)
goto out_free_cpu;
- kvm_info("kvm @ %p: create cpu %d at %p\n", kvm, id, vcpu);
+ kvm_debug("kvm @ %p: create cpu %d at %p\n", kvm, id, vcpu);
/* Allocate space for host mode exception handlers that handle
* guest mode exits
@@ -304,7 +296,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
if (cpu_has_veic || cpu_has_vint) {
size = 0x200 + VECTORSPACING * 64;
} else {
- size = 0x200;
+ size = 0x4000;
}
/* Save Linux EBASE */
@@ -316,8 +308,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
err = -ENOMEM;
goto out_free_cpu;
}
- kvm_info("Allocated %d bytes for KVM Exception Handlers @ %p\n",
- ALIGN(size, PAGE_SIZE), gebase);
+ kvm_debug("Allocated %d bytes for KVM Exception Handlers @ %p\n",
+ ALIGN(size, PAGE_SIZE), gebase);
/* Save new ebase */
vcpu->arch.guest_ebase = gebase;
@@ -342,15 +334,16 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
/* General handler, relocate to unmapped space for sanity's sake */
offset = 0x2000;
- kvm_info("Installing KVM Exception handlers @ %p, %#x bytes\n",
- gebase + offset,
- mips32_GuestExceptionEnd - mips32_GuestException);
+ kvm_debug("Installing KVM Exception handlers @ %p, %#x bytes\n",
+ gebase + offset,
+ mips32_GuestExceptionEnd - mips32_GuestException);
memcpy(gebase + offset, mips32_GuestException,
mips32_GuestExceptionEnd - mips32_GuestException);
/* Invalidate the icache for these ranges */
- mips32_SyncICache((unsigned long) gebase, ALIGN(size, PAGE_SIZE));
+ local_flush_icache_range((unsigned long)gebase,
+ (unsigned long)gebase + ALIGN(size, PAGE_SIZE));
/* Allocate comm page for guest kernel, a TLB will be reserved for mapping GVA @ 0xFFFF8000 to this page */
vcpu->arch.kseg0_commpage = kzalloc(PAGE_SIZE << 1, GFP_KERNEL);
@@ -360,14 +353,14 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
goto out_free_gebase;
}
- kvm_info("Allocated COMM page @ %p\n", vcpu->arch.kseg0_commpage);
+ kvm_debug("Allocated COMM page @ %p\n", vcpu->arch.kseg0_commpage);
kvm_mips_commpage_init(vcpu);
/* Init */
vcpu->arch.last_sched_cpu = -1;
/* Start off the timer */
- kvm_mips_emulate_count(vcpu);
+ kvm_mips_init_count(vcpu);
return vcpu;
@@ -389,12 +382,8 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
kvm_mips_dump_stats(vcpu);
- if (vcpu->arch.guest_ebase)
- kfree(vcpu->arch.guest_ebase);
-
- if (vcpu->arch.kseg0_commpage)
- kfree(vcpu->arch.kseg0_commpage);
-
+ kfree(vcpu->arch.guest_ebase);
+ kfree(vcpu->arch.kseg0_commpage);
}
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -423,11 +412,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
vcpu->mmio_needed = 0;
}
+ local_irq_disable();
/* Check if we have any exceptions/interrupts pending */
kvm_mips_deliver_interrupts(vcpu,
kvm_read_c0_guest_cause(vcpu->arch.cop0));
- local_irq_disable();
kvm_guest_enter();
r = __kvm_mips_vcpu_run(run, vcpu);
@@ -490,36 +479,6 @@ kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
return -ENOIOCTLCMD;
}
-#define MIPS_CP0_32(_R, _S) \
- (KVM_REG_MIPS | KVM_REG_SIZE_U32 | 0x10000 | (8 * (_R) + (_S)))
-
-#define MIPS_CP0_64(_R, _S) \
- (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 0x10000 | (8 * (_R) + (_S)))
-
-#define KVM_REG_MIPS_CP0_INDEX MIPS_CP0_32(0, 0)
-#define KVM_REG_MIPS_CP0_ENTRYLO0 MIPS_CP0_64(2, 0)
-#define KVM_REG_MIPS_CP0_ENTRYLO1 MIPS_CP0_64(3, 0)
-#define KVM_REG_MIPS_CP0_CONTEXT MIPS_CP0_64(4, 0)
-#define KVM_REG_MIPS_CP0_USERLOCAL MIPS_CP0_64(4, 2)
-#define KVM_REG_MIPS_CP0_PAGEMASK MIPS_CP0_32(5, 0)
-#define KVM_REG_MIPS_CP0_PAGEGRAIN MIPS_CP0_32(5, 1)
-#define KVM_REG_MIPS_CP0_WIRED MIPS_CP0_32(6, 0)
-#define KVM_REG_MIPS_CP0_HWRENA MIPS_CP0_32(7, 0)
-#define KVM_REG_MIPS_CP0_BADVADDR MIPS_CP0_64(8, 0)
-#define KVM_REG_MIPS_CP0_COUNT MIPS_CP0_32(9, 0)
-#define KVM_REG_MIPS_CP0_ENTRYHI MIPS_CP0_64(10, 0)
-#define KVM_REG_MIPS_CP0_COMPARE MIPS_CP0_32(11, 0)
-#define KVM_REG_MIPS_CP0_STATUS MIPS_CP0_32(12, 0)
-#define KVM_REG_MIPS_CP0_CAUSE MIPS_CP0_32(13, 0)
-#define KVM_REG_MIPS_CP0_EBASE MIPS_CP0_64(15, 1)
-#define KVM_REG_MIPS_CP0_CONFIG MIPS_CP0_32(16, 0)
-#define KVM_REG_MIPS_CP0_CONFIG1 MIPS_CP0_32(16, 1)
-#define KVM_REG_MIPS_CP0_CONFIG2 MIPS_CP0_32(16, 2)
-#define KVM_REG_MIPS_CP0_CONFIG3 MIPS_CP0_32(16, 3)
-#define KVM_REG_MIPS_CP0_CONFIG7 MIPS_CP0_32(16, 7)
-#define KVM_REG_MIPS_CP0_XCONTEXT MIPS_CP0_64(20, 0)
-#define KVM_REG_MIPS_CP0_ERROREPC MIPS_CP0_64(30, 0)
-
static u64 kvm_mips_get_one_regs[] = {
KVM_REG_MIPS_R0,
KVM_REG_MIPS_R1,
@@ -560,25 +519,34 @@ static u64 kvm_mips_get_one_regs[] = {
KVM_REG_MIPS_CP0_INDEX,
KVM_REG_MIPS_CP0_CONTEXT,
+ KVM_REG_MIPS_CP0_USERLOCAL,
KVM_REG_MIPS_CP0_PAGEMASK,
KVM_REG_MIPS_CP0_WIRED,
+ KVM_REG_MIPS_CP0_HWRENA,
KVM_REG_MIPS_CP0_BADVADDR,
+ KVM_REG_MIPS_CP0_COUNT,
KVM_REG_MIPS_CP0_ENTRYHI,
+ KVM_REG_MIPS_CP0_COMPARE,
KVM_REG_MIPS_CP0_STATUS,
KVM_REG_MIPS_CP0_CAUSE,
- /* EPC set via kvm_regs, et al. */
+ KVM_REG_MIPS_CP0_EPC,
KVM_REG_MIPS_CP0_CONFIG,
KVM_REG_MIPS_CP0_CONFIG1,
KVM_REG_MIPS_CP0_CONFIG2,
KVM_REG_MIPS_CP0_CONFIG3,
KVM_REG_MIPS_CP0_CONFIG7,
- KVM_REG_MIPS_CP0_ERROREPC
+ KVM_REG_MIPS_CP0_ERROREPC,
+
+ KVM_REG_MIPS_COUNT_CTL,
+ KVM_REG_MIPS_COUNT_RESUME,
+ KVM_REG_MIPS_COUNT_HZ,
};
static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
const struct kvm_one_reg *reg)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
+ int ret;
s64 v;
switch (reg->id) {
@@ -601,24 +569,36 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
case KVM_REG_MIPS_CP0_CONTEXT:
v = (long)kvm_read_c0_guest_context(cop0);
break;
+ case KVM_REG_MIPS_CP0_USERLOCAL:
+ v = (long)kvm_read_c0_guest_userlocal(cop0);
+ break;
case KVM_REG_MIPS_CP0_PAGEMASK:
v = (long)kvm_read_c0_guest_pagemask(cop0);
break;
case KVM_REG_MIPS_CP0_WIRED:
v = (long)kvm_read_c0_guest_wired(cop0);
break;
+ case KVM_REG_MIPS_CP0_HWRENA:
+ v = (long)kvm_read_c0_guest_hwrena(cop0);
+ break;
case KVM_REG_MIPS_CP0_BADVADDR:
v = (long)kvm_read_c0_guest_badvaddr(cop0);
break;
case KVM_REG_MIPS_CP0_ENTRYHI:
v = (long)kvm_read_c0_guest_entryhi(cop0);
break;
+ case KVM_REG_MIPS_CP0_COMPARE:
+ v = (long)kvm_read_c0_guest_compare(cop0);
+ break;
case KVM_REG_MIPS_CP0_STATUS:
v = (long)kvm_read_c0_guest_status(cop0);
break;
case KVM_REG_MIPS_CP0_CAUSE:
v = (long)kvm_read_c0_guest_cause(cop0);
break;
+ case KVM_REG_MIPS_CP0_EPC:
+ v = (long)kvm_read_c0_guest_epc(cop0);
+ break;
case KVM_REG_MIPS_CP0_ERROREPC:
v = (long)kvm_read_c0_guest_errorepc(cop0);
break;
@@ -637,6 +617,15 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
case KVM_REG_MIPS_CP0_CONFIG7:
v = (long)kvm_read_c0_guest_config7(cop0);
break;
+ /* registers to be handled specially */
+ case KVM_REG_MIPS_CP0_COUNT:
+ case KVM_REG_MIPS_COUNT_CTL:
+ case KVM_REG_MIPS_COUNT_RESUME:
+ case KVM_REG_MIPS_COUNT_HZ:
+ ret = kvm_mips_callbacks->get_one_reg(vcpu, reg, &v);
+ if (ret)
+ return ret;
+ break;
default:
return -EINVAL;
}
@@ -697,12 +686,18 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
case KVM_REG_MIPS_CP0_CONTEXT:
kvm_write_c0_guest_context(cop0, v);
break;
+ case KVM_REG_MIPS_CP0_USERLOCAL:
+ kvm_write_c0_guest_userlocal(cop0, v);
+ break;
case KVM_REG_MIPS_CP0_PAGEMASK:
kvm_write_c0_guest_pagemask(cop0, v);
break;
case KVM_REG_MIPS_CP0_WIRED:
kvm_write_c0_guest_wired(cop0, v);
break;
+ case KVM_REG_MIPS_CP0_HWRENA:
+ kvm_write_c0_guest_hwrena(cop0, v);
+ break;
case KVM_REG_MIPS_CP0_BADVADDR:
kvm_write_c0_guest_badvaddr(cop0, v);
break;
@@ -712,12 +707,20 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
case KVM_REG_MIPS_CP0_STATUS:
kvm_write_c0_guest_status(cop0, v);
break;
- case KVM_REG_MIPS_CP0_CAUSE:
- kvm_write_c0_guest_cause(cop0, v);
+ case KVM_REG_MIPS_CP0_EPC:
+ kvm_write_c0_guest_epc(cop0, v);
break;
case KVM_REG_MIPS_CP0_ERROREPC:
kvm_write_c0_guest_errorepc(cop0, v);
break;
+ /* registers to be handled specially */
+ case KVM_REG_MIPS_CP0_COUNT:
+ case KVM_REG_MIPS_CP0_COMPARE:
+ case KVM_REG_MIPS_CP0_CAUSE:
+ case KVM_REG_MIPS_COUNT_CTL:
+ case KVM_REG_MIPS_COUNT_RESUME:
+ case KVM_REG_MIPS_COUNT_HZ:
+ return kvm_mips_callbacks->set_one_reg(vcpu, reg, v);
default:
return -EINVAL;
}
@@ -920,7 +923,7 @@ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu)
return -1;
printk("VCPU Register Dump:\n");
- printk("\tpc = 0x%08lx\n", vcpu->arch.pc);;
+ printk("\tpc = 0x%08lx\n", vcpu->arch.pc);
printk("\texceptions: %08lx\n", vcpu->arch.pending_exceptions);
for (i = 0; i < 32; i += 4) {
@@ -969,7 +972,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
return 0;
}
-void kvm_mips_comparecount_func(unsigned long data)
+static void kvm_mips_comparecount_func(unsigned long data)
{
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
@@ -984,15 +987,13 @@ void kvm_mips_comparecount_func(unsigned long data)
/*
* low level hrtimer wake routine.
*/
-enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer)
+static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer)
{
struct kvm_vcpu *vcpu;
vcpu = container_of(timer, struct kvm_vcpu, arch.comparecount_timer);
kvm_mips_comparecount_func((unsigned long) vcpu);
- hrtimer_forward_now(&vcpu->arch.comparecount_timer,
- ktime_set(0, MS_TO_NS(10)));
- return HRTIMER_RESTART;
+ return kvm_mips_count_timeout(vcpu);
}
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
diff --git a/arch/mips/kvm/kvm_mips_dyntrans.c b/arch/mips/kvm/kvm_mips_dyntrans.c
index 96528e2d1ea6..b80e41d858fd 100644
--- a/arch/mips/kvm/kvm_mips_dyntrans.c
+++ b/arch/mips/kvm/kvm_mips_dyntrans.c
@@ -16,6 +16,7 @@
#include <linux/vmalloc.h>
#include <linux/fs.h>
#include <linux/bootmem.h>
+#include <asm/cacheflush.h>
#include "kvm_mips_comm.h"
@@ -40,7 +41,7 @@ kvm_mips_trans_cache_index(uint32_t inst, uint32_t *opc,
CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa
(vcpu, (unsigned long) opc));
memcpy((void *)kseg0_opc, (void *)&synci_inst, sizeof(uint32_t));
- mips32_SyncICache(kseg0_opc, 32);
+ local_flush_icache_range(kseg0_opc, kseg0_opc + 32);
return result;
}
@@ -66,7 +67,7 @@ kvm_mips_trans_cache_va(uint32_t inst, uint32_t *opc,
CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa
(vcpu, (unsigned long) opc));
memcpy((void *)kseg0_opc, (void *)&synci_inst, sizeof(uint32_t));
- mips32_SyncICache(kseg0_opc, 32);
+ local_flush_icache_range(kseg0_opc, kseg0_opc + 32);
return result;
}
@@ -99,11 +100,12 @@ kvm_mips_trans_mfc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu)
CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa
(vcpu, (unsigned long) opc));
memcpy((void *)kseg0_opc, (void *)&mfc0_inst, sizeof(uint32_t));
- mips32_SyncICache(kseg0_opc, 32);
+ local_flush_icache_range(kseg0_opc, kseg0_opc + 32);
} else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) {
local_irq_save(flags);
memcpy((void *)opc, (void *)&mfc0_inst, sizeof(uint32_t));
- mips32_SyncICache((unsigned long) opc, 32);
+ local_flush_icache_range((unsigned long)opc,
+ (unsigned long)opc + 32);
local_irq_restore(flags);
} else {
kvm_err("%s: Invalid address: %p\n", __func__, opc);
@@ -134,11 +136,12 @@ kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu)
CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa
(vcpu, (unsigned long) opc));
memcpy((void *)kseg0_opc, (void *)&mtc0_inst, sizeof(uint32_t));
- mips32_SyncICache(kseg0_opc, 32);
+ local_flush_icache_range(kseg0_opc, kseg0_opc + 32);
} else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) {
local_irq_save(flags);
memcpy((void *)opc, (void *)&mtc0_inst, sizeof(uint32_t));
- mips32_SyncICache((unsigned long) opc, 32);
+ local_flush_icache_range((unsigned long)opc,
+ (unsigned long)opc + 32);
local_irq_restore(flags);
} else {
kvm_err("%s: Invalid address: %p\n", __func__, opc);
diff --git a/arch/mips/kvm/kvm_mips_emul.c b/arch/mips/kvm/kvm_mips_emul.c
index e3fec99941a7..8d4840090082 100644
--- a/arch/mips/kvm/kvm_mips_emul.c
+++ b/arch/mips/kvm/kvm_mips_emul.c
@@ -11,6 +11,7 @@
#include <linux/errno.h>
#include <linux/err.h>
+#include <linux/ktime.h>
#include <linux/kvm_host.h>
#include <linux/module.h>
#include <linux/vmalloc.h>
@@ -228,25 +229,520 @@ enum emulation_result update_pc(struct kvm_vcpu *vcpu, uint32_t cause)
return er;
}
-/* Everytime the compare register is written to, we need to decide when to fire
- * the timer that represents timer ticks to the GUEST.
+/**
+ * kvm_mips_count_disabled() - Find whether the CP0_Count timer is disabled.
+ * @vcpu: Virtual CPU.
*
+ * Returns: 1 if the CP0_Count timer is disabled by either the guest
+ * CP0_Cause.DC bit or the count_ctl.DC bit.
+ * 0 otherwise (in which case CP0_Count timer is running).
*/
-enum emulation_result kvm_mips_emulate_count(struct kvm_vcpu *vcpu)
+static inline int kvm_mips_count_disabled(struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
- enum emulation_result er = EMULATE_DONE;
+ return (vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC) ||
+ (kvm_read_c0_guest_cause(cop0) & CAUSEF_DC);
+}
+
+/**
+ * kvm_mips_ktime_to_count() - Scale ktime_t to a 32-bit count.
+ *
+ * Caches the dynamic nanosecond bias in vcpu->arch.count_dyn_bias.
+ *
+ * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running).
+ */
+static uint32_t kvm_mips_ktime_to_count(struct kvm_vcpu *vcpu, ktime_t now)
+{
+ s64 now_ns, periods;
+ u64 delta;
+
+ now_ns = ktime_to_ns(now);
+ delta = now_ns + vcpu->arch.count_dyn_bias;
+
+ if (delta >= vcpu->arch.count_period) {
+ /* If delta is out of safe range the bias needs adjusting */
+ periods = div64_s64(now_ns, vcpu->arch.count_period);
+ vcpu->arch.count_dyn_bias = -periods * vcpu->arch.count_period;
+ /* Recalculate delta with new bias */
+ delta = now_ns + vcpu->arch.count_dyn_bias;
+ }
+
+ /*
+ * We've ensured that:
+ * delta < count_period
+ *
+ * Therefore the intermediate delta*count_hz will never overflow since
+ * at the boundary condition:
+ * delta = count_period
+ * delta = NSEC_PER_SEC * 2^32 / count_hz
+ * delta * count_hz = NSEC_PER_SEC * 2^32
+ */
+ return div_u64(delta * vcpu->arch.count_hz, NSEC_PER_SEC);
+}
+
+/**
+ * kvm_mips_count_time() - Get effective current time.
+ * @vcpu: Virtual CPU.
+ *
+ * Get effective monotonic ktime. This is usually a straightforward ktime_get(),
+ * except when the master disable bit is set in count_ctl, in which case it is
+ * count_resume, i.e. the time that the count was disabled.
+ *
+ * Returns: Effective monotonic ktime for CP0_Count.
+ */
+static inline ktime_t kvm_mips_count_time(struct kvm_vcpu *vcpu)
+{
+ if (unlikely(vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC))
+ return vcpu->arch.count_resume;
+
+ return ktime_get();
+}
+
+/**
+ * kvm_mips_read_count_running() - Read the current count value as if running.
+ * @vcpu: Virtual CPU.
+ * @now: Kernel time to read CP0_Count at.
+ *
+ * Returns the current guest CP0_Count register at time @now and handles if the
+ * timer interrupt is pending and hasn't been handled yet.
+ *
+ * Returns: The current value of the guest CP0_Count register.
+ */
+static uint32_t kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now)
+{
+ ktime_t expires;
+ int running;
+
+ /* Is the hrtimer pending? */
+ expires = hrtimer_get_expires(&vcpu->arch.comparecount_timer);
+ if (ktime_compare(now, expires) >= 0) {
+ /*
+ * Cancel it while we handle it so there's no chance of
+ * interference with the timeout handler.
+ */
+ running = hrtimer_cancel(&vcpu->arch.comparecount_timer);
+
+ /* Nothing should be waiting on the timeout */
+ kvm_mips_callbacks->queue_timer_int(vcpu);
+
+ /*
+ * Restart the timer if it was running based on the expiry time
+ * we read, so that we don't push it back 2 periods.
+ */
+ if (running) {
+ expires = ktime_add_ns(expires,
+ vcpu->arch.count_period);
+ hrtimer_start(&vcpu->arch.comparecount_timer, expires,
+ HRTIMER_MODE_ABS);
+ }
+ }
+
+ /* Return the biased and scaled guest CP0_Count */
+ return vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now);
+}
+
+/**
+ * kvm_mips_read_count() - Read the current count value.
+ * @vcpu: Virtual CPU.
+ *
+ * Read the current guest CP0_Count value, taking into account whether the timer
+ * is stopped.
+ *
+ * Returns: The current guest CP0_Count value.
+ */
+uint32_t kvm_mips_read_count(struct kvm_vcpu *vcpu)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+
+ /* If count disabled just read static copy of count */
+ if (kvm_mips_count_disabled(vcpu))
+ return kvm_read_c0_guest_count(cop0);
+
+ return kvm_mips_read_count_running(vcpu, ktime_get());
+}
+
+/**
+ * kvm_mips_freeze_hrtimer() - Safely stop the hrtimer.
+ * @vcpu: Virtual CPU.
+ * @count: Output pointer for CP0_Count value at point of freeze.
+ *
+ * Freeze the hrtimer safely and return both the ktime and the CP0_Count value
+ * at the point it was frozen. It is guaranteed that any pending interrupts at
+ * the point it was frozen are handled, and none after that point.
+ *
+ * This is useful where the time/CP0_Count is needed in the calculation of the
+ * new parameters.
+ *
+ * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running).
+ *
+ * Returns: The ktime at the point of freeze.
+ */
+static ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu,
+ uint32_t *count)
+{
+ ktime_t now;
+
+ /* stop hrtimer before finding time */
+ hrtimer_cancel(&vcpu->arch.comparecount_timer);
+ now = ktime_get();
+
+ /* find count at this point and handle pending hrtimer */
+ *count = kvm_mips_read_count_running(vcpu, now);
+
+ return now;
+}
+
- /* If COUNT is enabled */
- if (!(kvm_read_c0_guest_cause(cop0) & CAUSEF_DC)) {
- hrtimer_try_to_cancel(&vcpu->arch.comparecount_timer);
- hrtimer_start(&vcpu->arch.comparecount_timer,
- ktime_set(0, MS_TO_NS(10)), HRTIMER_MODE_REL);
+/**
+ * kvm_mips_resume_hrtimer() - Resume hrtimer, updating expiry.
+ * @vcpu: Virtual CPU.
+ * @now: ktime at point of resume.
+ * @count: CP0_Count at point of resume.
+ *
+ * Resumes the timer and updates the timer expiry based on @now and @count.
+ * This can be used in conjunction with kvm_mips_freeze_timer() when timer
+ * parameters need to be changed.
+ *
+ * It is guaranteed that a timer interrupt immediately after resume will be
+ * handled, but not if CP_Compare is exactly at @count. That case is already
+ * handled by kvm_mips_freeze_timer().
+ *
+ * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running).
+ */
+static void kvm_mips_resume_hrtimer(struct kvm_vcpu *vcpu,
+ ktime_t now, uint32_t count)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ uint32_t compare;
+ u64 delta;
+ ktime_t expire;
+
+ /* Calculate timeout (wrap 0 to 2^32) */
+ compare = kvm_read_c0_guest_compare(cop0);
+ delta = (u64)(uint32_t)(compare - count - 1) + 1;
+ delta = div_u64(delta * NSEC_PER_SEC, vcpu->arch.count_hz);
+ expire = ktime_add_ns(now, delta);
+
+ /* Update hrtimer to use new timeout */
+ hrtimer_cancel(&vcpu->arch.comparecount_timer);
+ hrtimer_start(&vcpu->arch.comparecount_timer, expire, HRTIMER_MODE_ABS);
+}
+
+/**
+ * kvm_mips_update_hrtimer() - Update next expiry time of hrtimer.
+ * @vcpu: Virtual CPU.
+ *
+ * Recalculates and updates the expiry time of the hrtimer. This can be used
+ * after timer parameters have been altered which do not depend on the time that
+ * the change occurs (in those cases kvm_mips_freeze_hrtimer() and
+ * kvm_mips_resume_hrtimer() are used directly).
+ *
+ * It is guaranteed that no timer interrupts will be lost in the process.
+ *
+ * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running).
+ */
+static void kvm_mips_update_hrtimer(struct kvm_vcpu *vcpu)
+{
+ ktime_t now;
+ uint32_t count;
+
+ /*
+ * freeze_hrtimer takes care of a timer interrupts <= count, and
+ * resume_hrtimer the hrtimer takes care of a timer interrupts > count.
+ */
+ now = kvm_mips_freeze_hrtimer(vcpu, &count);
+ kvm_mips_resume_hrtimer(vcpu, now, count);
+}
+
+/**
+ * kvm_mips_write_count() - Modify the count and update timer.
+ * @vcpu: Virtual CPU.
+ * @count: Guest CP0_Count value to set.
+ *
+ * Sets the CP0_Count value and updates the timer accordingly.
+ */
+void kvm_mips_write_count(struct kvm_vcpu *vcpu, uint32_t count)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ ktime_t now;
+
+ /* Calculate bias */
+ now = kvm_mips_count_time(vcpu);
+ vcpu->arch.count_bias = count - kvm_mips_ktime_to_count(vcpu, now);
+
+ if (kvm_mips_count_disabled(vcpu))
+ /* The timer's disabled, adjust the static count */
+ kvm_write_c0_guest_count(cop0, count);
+ else
+ /* Update timeout */
+ kvm_mips_resume_hrtimer(vcpu, now, count);
+}
+
+/**
+ * kvm_mips_init_count() - Initialise timer.
+ * @vcpu: Virtual CPU.
+ *
+ * Initialise the timer to a sensible frequency, namely 100MHz, zero it, and set
+ * it going if it's enabled.
+ */
+void kvm_mips_init_count(struct kvm_vcpu *vcpu)
+{
+ /* 100 MHz */
+ vcpu->arch.count_hz = 100*1000*1000;
+ vcpu->arch.count_period = div_u64((u64)NSEC_PER_SEC << 32,
+ vcpu->arch.count_hz);
+ vcpu->arch.count_dyn_bias = 0;
+
+ /* Starting at 0 */
+ kvm_mips_write_count(vcpu, 0);
+}
+
+/**
+ * kvm_mips_set_count_hz() - Update the frequency of the timer.
+ * @vcpu: Virtual CPU.
+ * @count_hz: Frequency of CP0_Count timer in Hz.
+ *
+ * Change the frequency of the CP0_Count timer. This is done atomically so that
+ * CP0_Count is continuous and no timer interrupt is lost.
+ *
+ * Returns: -EINVAL if @count_hz is out of range.
+ * 0 on success.
+ */
+int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ int dc;
+ ktime_t now;
+ u32 count;
+
+ /* ensure the frequency is in a sensible range... */
+ if (count_hz <= 0 || count_hz > NSEC_PER_SEC)
+ return -EINVAL;
+ /* ... and has actually changed */
+ if (vcpu->arch.count_hz == count_hz)
+ return 0;
+
+ /* Safely freeze timer so we can keep it continuous */
+ dc = kvm_mips_count_disabled(vcpu);
+ if (dc) {
+ now = kvm_mips_count_time(vcpu);
+ count = kvm_read_c0_guest_count(cop0);
} else {
- hrtimer_try_to_cancel(&vcpu->arch.comparecount_timer);
+ now = kvm_mips_freeze_hrtimer(vcpu, &count);
}
- return er;
+ /* Update the frequency */
+ vcpu->arch.count_hz = count_hz;
+ vcpu->arch.count_period = div_u64((u64)NSEC_PER_SEC << 32, count_hz);
+ vcpu->arch.count_dyn_bias = 0;
+
+ /* Calculate adjusted bias so dynamic count is unchanged */
+ vcpu->arch.count_bias = count - kvm_mips_ktime_to_count(vcpu, now);
+
+ /* Update and resume hrtimer */
+ if (!dc)
+ kvm_mips_resume_hrtimer(vcpu, now, count);
+ return 0;
+}
+
+/**
+ * kvm_mips_write_compare() - Modify compare and update timer.
+ * @vcpu: Virtual CPU.
+ * @compare: New CP0_Compare value.
+ *
+ * Update CP0_Compare to a new value and update the timeout.
+ */
+void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+
+ /* if unchanged, must just be an ack */
+ if (kvm_read_c0_guest_compare(cop0) == compare)
+ return;
+
+ /* Update compare */
+ kvm_write_c0_guest_compare(cop0, compare);
+
+ /* Update timeout if count enabled */
+ if (!kvm_mips_count_disabled(vcpu))
+ kvm_mips_update_hrtimer(vcpu);
+}
+
+/**
+ * kvm_mips_count_disable() - Disable count.
+ * @vcpu: Virtual CPU.
+ *
+ * Disable the CP0_Count timer. A timer interrupt on or before the final stop
+ * time will be handled but not after.
+ *
+ * Assumes CP0_Count was previously enabled but now Guest.CP0_Cause.DC or
+ * count_ctl.DC has been set (count disabled).
+ *
+ * Returns: The time that the timer was stopped.
+ */
+static ktime_t kvm_mips_count_disable(struct kvm_vcpu *vcpu)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ uint32_t count;
+ ktime_t now;
+
+ /* Stop hrtimer */
+ hrtimer_cancel(&vcpu->arch.comparecount_timer);
+
+ /* Set the static count from the dynamic count, handling pending TI */
+ now = ktime_get();
+ count = kvm_mips_read_count_running(vcpu, now);
+ kvm_write_c0_guest_count(cop0, count);
+
+ return now;
+}
+
+/**
+ * kvm_mips_count_disable_cause() - Disable count using CP0_Cause.DC.
+ * @vcpu: Virtual CPU.
+ *
+ * Disable the CP0_Count timer and set CP0_Cause.DC. A timer interrupt on or
+ * before the final stop time will be handled if the timer isn't disabled by
+ * count_ctl.DC, but not after.
+ *
+ * Assumes CP0_Cause.DC is clear (count enabled).
+ */
+void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+
+ kvm_set_c0_guest_cause(cop0, CAUSEF_DC);
+ if (!(vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC))
+ kvm_mips_count_disable(vcpu);
+}
+
+/**
+ * kvm_mips_count_enable_cause() - Enable count using CP0_Cause.DC.
+ * @vcpu: Virtual CPU.
+ *
+ * Enable the CP0_Count timer and clear CP0_Cause.DC. A timer interrupt after
+ * the start time will be handled if the timer isn't disabled by count_ctl.DC,
+ * potentially before even returning, so the caller should be careful with
+ * ordering of CP0_Cause modifications so as not to lose it.
+ *
+ * Assumes CP0_Cause.DC is set (count disabled).
+ */
+void kvm_mips_count_enable_cause(struct kvm_vcpu *vcpu)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ uint32_t count;
+
+ kvm_clear_c0_guest_cause(cop0, CAUSEF_DC);
+
+ /*
+ * Set the dynamic count to match the static count.
+ * This starts the hrtimer if count_ctl.DC allows it.
+ * Otherwise it conveniently updates the biases.
+ */
+ count = kvm_read_c0_guest_count(cop0);
+ kvm_mips_write_count(vcpu, count);
+}
+
+/**
+ * kvm_mips_set_count_ctl() - Update the count control KVM register.
+ * @vcpu: Virtual CPU.
+ * @count_ctl: Count control register new value.
+ *
+ * Set the count control KVM register. The timer is updated accordingly.
+ *
+ * Returns: -EINVAL if reserved bits are set.
+ * 0 on success.
+ */
+int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ s64 changed = count_ctl ^ vcpu->arch.count_ctl;
+ s64 delta;
+ ktime_t expire, now;
+ uint32_t count, compare;
+
+ /* Only allow defined bits to be changed */
+ if (changed & ~(s64)(KVM_REG_MIPS_COUNT_CTL_DC))
+ return -EINVAL;
+
+ /* Apply new value */
+ vcpu->arch.count_ctl = count_ctl;
+
+ /* Master CP0_Count disable */
+ if (changed & KVM_REG_MIPS_COUNT_CTL_DC) {
+ /* Is CP0_Cause.DC already disabling CP0_Count? */
+ if (kvm_read_c0_guest_cause(cop0) & CAUSEF_DC) {
+ if (count_ctl & KVM_REG_MIPS_COUNT_CTL_DC)
+ /* Just record the current time */
+ vcpu->arch.count_resume = ktime_get();
+ } else if (count_ctl & KVM_REG_MIPS_COUNT_CTL_DC) {
+ /* disable timer and record current time */
+ vcpu->arch.count_resume = kvm_mips_count_disable(vcpu);
+ } else {
+ /*
+ * Calculate timeout relative to static count at resume
+ * time (wrap 0 to 2^32).
+ */
+ count = kvm_read_c0_guest_count(cop0);
+ compare = kvm_read_c0_guest_compare(cop0);
+ delta = (u64)(uint32_t)(compare - count - 1) + 1;
+ delta = div_u64(delta * NSEC_PER_SEC,
+ vcpu->arch.count_hz);
+ expire = ktime_add_ns(vcpu->arch.count_resume, delta);
+
+ /* Handle pending interrupt */
+ now = ktime_get();
+ if (ktime_compare(now, expire) >= 0)
+ /* Nothing should be waiting on the timeout */
+ kvm_mips_callbacks->queue_timer_int(vcpu);
+
+ /* Resume hrtimer without changing bias */
+ count = kvm_mips_read_count_running(vcpu, now);
+ kvm_mips_resume_hrtimer(vcpu, now, count);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * kvm_mips_set_count_resume() - Update the count resume KVM register.
+ * @vcpu: Virtual CPU.
+ * @count_resume: Count resume register new value.
+ *
+ * Set the count resume KVM register.
+ *
+ * Returns: -EINVAL if out of valid range (0..now).
+ * 0 on success.
+ */
+int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume)
+{
+ /*
+ * It doesn't make sense for the resume time to be in the future, as it
+ * would be possible for the next interrupt to be more than a full
+ * period in the future.
+ */
+ if (count_resume < 0 || count_resume > ktime_to_ns(ktime_get()))
+ return -EINVAL;
+
+ vcpu->arch.count_resume = ns_to_ktime(count_resume);
+ return 0;
+}
+
+/**
+ * kvm_mips_count_timeout() - Push timer forward on timeout.
+ * @vcpu: Virtual CPU.
+ *
+ * Handle an hrtimer event by push the hrtimer forward a period.
+ *
+ * Returns: The hrtimer_restart value to return to the hrtimer subsystem.
+ */
+enum hrtimer_restart kvm_mips_count_timeout(struct kvm_vcpu *vcpu)
+{
+ /* Add the Count period to the current expiry time */
+ hrtimer_add_expires_ns(&vcpu->arch.comparecount_timer,
+ vcpu->arch.count_period);
+ return HRTIMER_RESTART;
}
enum emulation_result kvm_mips_emul_eret(struct kvm_vcpu *vcpu)
@@ -471,8 +967,7 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause,
#endif
/* Get reg */
if ((rd == MIPS_CP0_COUNT) && (sel == 0)) {
- /* XXXKYMA: Run the Guest count register @ 1/4 the rate of the host */
- vcpu->arch.gprs[rt] = (read_c0_count() >> 2);
+ vcpu->arch.gprs[rt] = kvm_mips_read_count(vcpu);
} else if ((rd == MIPS_CP0_ERRCTL) && (sel == 0)) {
vcpu->arch.gprs[rt] = 0x0;
#ifdef CONFIG_KVM_MIPS_DYN_TRANS
@@ -539,10 +1034,7 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause,
}
/* Are we writing to COUNT */
else if ((rd == MIPS_CP0_COUNT) && (sel == 0)) {
- /* Linux doesn't seem to write into COUNT, we throw an error
- * if we notice a write to COUNT
- */
- /*er = EMULATE_FAIL; */
+ kvm_mips_write_count(vcpu, vcpu->arch.gprs[rt]);
goto done;
} else if ((rd == MIPS_CP0_COMPARE) && (sel == 0)) {
kvm_debug("[%#x] MTCz, COMPARE %#lx <- %#lx\n",
@@ -552,8 +1044,8 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause,
/* If we are writing to COMPARE */
/* Clear pending timer interrupt, if any */
kvm_mips_callbacks->dequeue_timer_int(vcpu);
- kvm_write_c0_guest_compare(cop0,
- vcpu->arch.gprs[rt]);
+ kvm_mips_write_compare(vcpu,
+ vcpu->arch.gprs[rt]);
} else if ((rd == MIPS_CP0_STATUS) && (sel == 0)) {
kvm_write_c0_guest_status(cop0,
vcpu->arch.gprs[rt]);
@@ -564,6 +1056,20 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause,
#ifdef CONFIG_KVM_MIPS_DYN_TRANS
kvm_mips_trans_mtc0(inst, opc, vcpu);
#endif
+ } else if ((rd == MIPS_CP0_CAUSE) && (sel == 0)) {
+ uint32_t old_cause, new_cause;
+ old_cause = kvm_read_c0_guest_cause(cop0);
+ new_cause = vcpu->arch.gprs[rt];
+ /* Update R/W bits */
+ kvm_change_c0_guest_cause(cop0, 0x08800300,
+ new_cause);
+ /* DC bit enabling/disabling timer? */
+ if ((old_cause ^ new_cause) & CAUSEF_DC) {
+ if (new_cause & CAUSEF_DC)
+ kvm_mips_count_disable_cause(vcpu);
+ else
+ kvm_mips_count_enable_cause(vcpu);
+ }
} else {
cop0->reg[rd][sel] = vcpu->arch.gprs[rt];
#ifdef CONFIG_KVM_MIPS_DYN_TRANS
@@ -887,7 +1393,7 @@ int kvm_mips_sync_icache(unsigned long va, struct kvm_vcpu *vcpu)
printk("%s: va: %#lx, unmapped: %#x\n", __func__, va, CKSEG0ADDR(pa));
- mips32_SyncICache(CKSEG0ADDR(pa), 32);
+ local_flush_icache_range(CKSEG0ADDR(pa), 32);
return 0;
}
@@ -1325,8 +1831,12 @@ kvm_mips_handle_tlbmod(unsigned long cause, uint32_t *opc,
struct kvm_run *run, struct kvm_vcpu *vcpu)
{
enum emulation_result er = EMULATE_DONE;
-
#ifdef DEBUG
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) |
+ (kvm_read_c0_guest_entryhi(cop0) & ASID_MASK);
+ int index;
+
/*
* If address not in the guest TLB, then we are in trouble
*/
@@ -1553,8 +2063,7 @@ kvm_mips_handle_ri(unsigned long cause, uint32_t *opc,
current_cpu_data.icache.linesz);
break;
case 2: /* Read count register */
- printk("RDHWR: Cont register\n");
- arch->gprs[rt] = kvm_read_c0_guest_count(cop0);
+ arch->gprs[rt] = kvm_mips_read_count(vcpu);
break;
case 3: /* Count register resolution */
switch (current_cpu_data.cputype) {
@@ -1810,11 +2319,9 @@ kvm_mips_handle_tlbmiss(unsigned long cause, uint32_t *opc,
er = EMULATE_FAIL;
}
} else {
-#ifdef DEBUG
kvm_debug
("Injecting hi: %#lx, lo0: %#lx, lo1: %#lx into shadow host TLB\n",
tlb->tlb_hi, tlb->tlb_lo0, tlb->tlb_lo1);
-#endif
/* OK we have a Guest TLB entry, now inject it into the shadow host TLB */
kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, NULL,
NULL);
diff --git a/arch/mips/kvm/kvm_tlb.c b/arch/mips/kvm/kvm_tlb.c
index 50ab9c4d4a5d..8a5a700ad8de 100644
--- a/arch/mips/kvm/kvm_tlb.c
+++ b/arch/mips/kvm/kvm_tlb.c
@@ -222,26 +222,19 @@ kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi,
return -1;
}
- if (idx < 0) {
- idx = read_c0_random() % current_cpu_data.tlbsize;
- write_c0_index(idx);
- mtc0_tlbw_hazard();
- }
write_c0_entrylo0(entrylo0);
write_c0_entrylo1(entrylo1);
mtc0_tlbw_hazard();
- tlb_write_indexed();
+ if (idx < 0)
+ tlb_write_random();
+ else
+ tlb_write_indexed();
tlbw_use_hazard();
-#ifdef DEBUG
- if (debug) {
- kvm_debug("@ %#lx idx: %2d [entryhi(R): %#lx] "
- "entrylo0(R): 0x%08lx, entrylo1(R): 0x%08lx\n",
- vcpu->arch.pc, idx, read_c0_entryhi(),
- read_c0_entrylo0(), read_c0_entrylo1());
- }
-#endif
+ kvm_debug("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0(R): 0x%08lx, entrylo1(R): 0x%08lx\n",
+ vcpu->arch.pc, idx, read_c0_entryhi(),
+ read_c0_entrylo0(), read_c0_entrylo1());
/* Flush D-cache */
if (flush_dcache_mask) {
@@ -348,11 +341,9 @@ int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr,
mtc0_tlbw_hazard();
tlbw_use_hazard();
-#ifdef DEBUG
kvm_debug ("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0 (R): 0x%08lx, entrylo1(R): 0x%08lx\n",
vcpu->arch.pc, read_c0_index(), read_c0_entryhi(),
read_c0_entrylo0(), read_c0_entrylo1());
-#endif
/* Restore old ASID */
write_c0_entryhi(old_entryhi);
@@ -400,10 +391,8 @@ kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) |
(tlb->tlb_lo1 & MIPS3_PG_D) | (tlb->tlb_lo1 & MIPS3_PG_V);
-#ifdef DEBUG
kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc,
tlb->tlb_lo0, tlb->tlb_lo1);
-#endif
return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
tlb->tlb_mask);
@@ -424,10 +413,8 @@ int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi)
}
}
-#ifdef DEBUG
kvm_debug("%s: entryhi: %#lx, index: %d lo0: %#lx, lo1: %#lx\n",
__func__, entryhi, index, tlb[i].tlb_lo0, tlb[i].tlb_lo1);
-#endif
return index;
}
@@ -461,9 +448,7 @@ int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr)
local_irq_restore(flags);
-#ifdef DEBUG
kvm_debug("Host TLB lookup, %#lx, idx: %2d\n", vaddr, idx);
-#endif
return idx;
}
@@ -508,12 +493,9 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va)
local_irq_restore(flags);
-#ifdef DEBUG
- if (idx > 0) {
+ if (idx > 0)
kvm_debug("%s: Invalidated entryhi %#lx @ idx %d\n", __func__,
- (va & VPN2_MASK) | (vcpu->arch.asid_map[va & ASID_MASK] & ASID_MASK), idx);
- }
-#endif
+ (va & VPN2_MASK) | kvm_mips_get_user_asid(vcpu), idx);
return 0;
}
@@ -658,15 +640,30 @@ void kvm_local_flush_tlb_all(void)
local_irq_restore(flags);
}
+/**
+ * kvm_mips_migrate_count() - Migrate timer.
+ * @vcpu: Virtual CPU.
+ *
+ * Migrate CP0_Count hrtimer to the current CPU by cancelling and restarting it
+ * if it was running prior to being cancelled.
+ *
+ * Must be called when the VCPU is migrated to a different CPU to ensure that
+ * timer expiry during guest execution interrupts the guest and causes the
+ * interrupt to be delivered in a timely manner.
+ */
+static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu)
+{
+ if (hrtimer_cancel(&vcpu->arch.comparecount_timer))
+ hrtimer_restart(&vcpu->arch.comparecount_timer);
+}
+
/* Restore ASID once we are scheduled back after preemption */
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
unsigned long flags;
int newasid = 0;
-#ifdef DEBUG
kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu);
-#endif
/* Alocate new kernel and user ASIDs if needed */
@@ -682,17 +679,23 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vcpu->arch.guest_user_mm.context.asid[cpu];
newasid++;
- kvm_info("[%d]: cpu_context: %#lx\n", cpu,
- cpu_context(cpu, current->mm));
- kvm_info("[%d]: Allocated new ASID for Guest Kernel: %#x\n",
- cpu, vcpu->arch.guest_kernel_asid[cpu]);
- kvm_info("[%d]: Allocated new ASID for Guest User: %#x\n", cpu,
- vcpu->arch.guest_user_asid[cpu]);
+ kvm_debug("[%d]: cpu_context: %#lx\n", cpu,
+ cpu_context(cpu, current->mm));
+ kvm_debug("[%d]: Allocated new ASID for Guest Kernel: %#x\n",
+ cpu, vcpu->arch.guest_kernel_asid[cpu]);
+ kvm_debug("[%d]: Allocated new ASID for Guest User: %#x\n", cpu,
+ vcpu->arch.guest_user_asid[cpu]);
}
if (vcpu->arch.last_sched_cpu != cpu) {
- kvm_info("[%d->%d]KVM VCPU[%d] switch\n",
- vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id);
+ kvm_debug("[%d->%d]KVM VCPU[%d] switch\n",
+ vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id);
+ /*
+ * Migrate the timer interrupt to the current CPU so that it
+ * always interrupts the guest and synchronously triggers a
+ * guest timer interrupt.
+ */
+ kvm_mips_migrate_count(vcpu);
}
if (!newasid) {
diff --git a/arch/mips/kvm/kvm_trap_emul.c b/arch/mips/kvm/kvm_trap_emul.c
index 30d725321db1..693f952b2fbb 100644
--- a/arch/mips/kvm/kvm_trap_emul.c
+++ b/arch/mips/kvm/kvm_trap_emul.c
@@ -32,9 +32,7 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva)
gpa = KVM_INVALID_ADDR;
}
-#ifdef DEBUG
kvm_debug("%s: gva %#lx, gpa: %#llx\n", __func__, gva, gpa);
-#endif
return gpa;
}
@@ -85,11 +83,9 @@ static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu)
if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0
|| KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) {
-#ifdef DEBUG
kvm_debug
("USER/KSEG23 ADDR TLB MOD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n",
cause, opc, badvaddr);
-#endif
er = kvm_mips_handle_tlbmod(cause, opc, run, vcpu);
if (er == EMULATE_DONE)
@@ -138,11 +134,9 @@ static int kvm_trap_emul_handle_tlb_st_miss(struct kvm_vcpu *vcpu)
}
} else if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0
|| KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) {
-#ifdef DEBUG
kvm_debug
("USER ADDR TLB LD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n",
cause, opc, badvaddr);
-#endif
er = kvm_mips_handle_tlbmiss(cause, opc, run, vcpu);
if (er == EMULATE_DONE)
ret = RESUME_GUEST;
@@ -188,10 +182,8 @@ static int kvm_trap_emul_handle_tlb_ld_miss(struct kvm_vcpu *vcpu)
}
} else if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0
|| KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) {
-#ifdef DEBUG
kvm_debug("USER ADDR TLB ST fault: PC: %#lx, BadVaddr: %#lx\n",
vcpu->arch.pc, badvaddr);
-#endif
/* User Address (UA) fault, this could happen if
* (1) TLB entry not present/valid in both Guest and shadow host TLBs, in this
@@ -236,9 +228,7 @@ static int kvm_trap_emul_handle_addr_err_st(struct kvm_vcpu *vcpu)
if (KVM_GUEST_KERNEL_MODE(vcpu)
&& (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1)) {
-#ifdef DEBUG
kvm_debug("Emulate Store to MMIO space\n");
-#endif
er = kvm_mips_emulate_inst(cause, opc, run, vcpu);
if (er == EMULATE_FAIL) {
printk("Emulate Store to MMIO space failed\n");
@@ -268,9 +258,7 @@ static int kvm_trap_emul_handle_addr_err_ld(struct kvm_vcpu *vcpu)
int ret = RESUME_GUEST;
if (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1) {
-#ifdef DEBUG
kvm_debug("Emulate Load from MMIO space @ %#lx\n", badvaddr);
-#endif
er = kvm_mips_emulate_inst(cause, opc, run, vcpu);
if (er == EMULATE_FAIL) {
printk("Emulate Load from MMIO space failed\n");
@@ -401,6 +389,78 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
return 0;
}
+static int kvm_trap_emul_get_one_reg(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg,
+ s64 *v)
+{
+ switch (reg->id) {
+ case KVM_REG_MIPS_CP0_COUNT:
+ *v = kvm_mips_read_count(vcpu);
+ break;
+ case KVM_REG_MIPS_COUNT_CTL:
+ *v = vcpu->arch.count_ctl;
+ break;
+ case KVM_REG_MIPS_COUNT_RESUME:
+ *v = ktime_to_ns(vcpu->arch.count_resume);
+ break;
+ case KVM_REG_MIPS_COUNT_HZ:
+ *v = vcpu->arch.count_hz;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg,
+ s64 v)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ int ret = 0;
+
+ switch (reg->id) {
+ case KVM_REG_MIPS_CP0_COUNT:
+ kvm_mips_write_count(vcpu, v);
+ break;
+ case KVM_REG_MIPS_CP0_COMPARE:
+ kvm_mips_write_compare(vcpu, v);
+ break;
+ case KVM_REG_MIPS_CP0_CAUSE:
+ /*
+ * If the timer is stopped or started (DC bit) it must look
+ * atomic with changes to the interrupt pending bits (TI, IRQ5).
+ * A timer interrupt should not happen in between.
+ */
+ if ((kvm_read_c0_guest_cause(cop0) ^ v) & CAUSEF_DC) {
+ if (v & CAUSEF_DC) {
+ /* disable timer first */
+ kvm_mips_count_disable_cause(vcpu);
+ kvm_change_c0_guest_cause(cop0, ~CAUSEF_DC, v);
+ } else {
+ /* enable timer last */
+ kvm_change_c0_guest_cause(cop0, ~CAUSEF_DC, v);
+ kvm_mips_count_enable_cause(vcpu);
+ }
+ } else {
+ kvm_write_c0_guest_cause(cop0, v);
+ }
+ break;
+ case KVM_REG_MIPS_COUNT_CTL:
+ ret = kvm_mips_set_count_ctl(vcpu, v);
+ break;
+ case KVM_REG_MIPS_COUNT_RESUME:
+ ret = kvm_mips_set_count_resume(vcpu, v);
+ break;
+ case KVM_REG_MIPS_COUNT_HZ:
+ ret = kvm_mips_set_count_hz(vcpu, v);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return ret;
+}
+
static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
/* exit handlers */
.handle_cop_unusable = kvm_trap_emul_handle_cop_unusable,
@@ -423,6 +483,8 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
.dequeue_io_int = kvm_mips_dequeue_io_int_cb,
.irq_deliver = kvm_mips_irq_deliver_cb,
.irq_clear = kvm_mips_irq_clear_cb,
+ .get_one_reg = kvm_trap_emul_get_one_reg,
+ .set_one_reg = kvm_trap_emul_set_one_reg,
};
int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks)
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index e422b38d3113..5aaa5c799731 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -30,6 +30,7 @@ void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page,
unsigned long pfn);
void (*flush_icache_range)(unsigned long start, unsigned long end);
void (*local_flush_icache_range)(unsigned long start, unsigned long end);
+EXPORT_SYMBOL_GPL(local_flush_icache_range);
void (*__flush_cache_vmap)(void);
void (*__flush_cache_vunmap)(void);
diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c
index 319009912142..3778a359f3ad 100644
--- a/arch/mips/mti-malta/malta-time.c
+++ b/arch/mips/mti-malta/malta-time.c
@@ -74,18 +74,8 @@ static void __init estimate_frequencies(void)
unsigned int giccount = 0, gicstart = 0;
#endif
-#if defined (CONFIG_KVM_GUEST) && defined (CONFIG_KVM_HOST_FREQ)
- unsigned int prid = read_c0_prid() & (PRID_COMP_MASK | PRID_IMP_MASK);
-
- /*
- * XXXKYMA: hardwire the CPU frequency to Host Freq/4
- */
- count = (CONFIG_KVM_HOST_FREQ * 1000000) >> 3;
- if ((prid != (PRID_COMP_MIPS | PRID_IMP_20KC)) &&
- (prid != (PRID_COMP_MIPS | PRID_IMP_25KF)))
- count *= 2;
-
- mips_hpt_frequency = count;
+#if defined(CONFIG_KVM_GUEST) && CONFIG_KVM_GUEST_TIMER_FREQ
+ mips_hpt_frequency = CONFIG_KVM_GUEST_TIMER_FREQ * 1000000;
return;
#endif
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index a27f5007062a..4181d7baabba 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -110,6 +110,7 @@ struct kvm_s390_sie_block {
#define ICTL_ISKE 0x00004000
#define ICTL_SSKE 0x00002000
#define ICTL_RRBE 0x00001000
+#define ICTL_TPROT 0x00000200
__u32 ictl; /* 0x0048 */
__u32 eca; /* 0x004c */
#define ICPT_INST 0x04
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index db608c3f9303..4653ac6e182b 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -292,7 +292,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
wake_up(&vcpu->kvm->arch.ipte_wq);
}
-static void ipte_lock(struct kvm_vcpu *vcpu)
+void ipte_lock(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.sie_block->eca & 1)
ipte_lock_siif(vcpu);
@@ -300,7 +300,7 @@ static void ipte_lock(struct kvm_vcpu *vcpu)
ipte_lock_simple(vcpu);
}
-static void ipte_unlock(struct kvm_vcpu *vcpu)
+void ipte_unlock(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.sie_block->eca & 1)
ipte_unlock_siif(vcpu);
@@ -645,6 +645,59 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
}
/**
+ * guest_translate_address - translate guest logical into guest absolute address
+ *
+ * Parameter semantics are the same as the ones from guest_translate.
+ * The memory contents at the guest address are not changed.
+ *
+ * Note: The IPTE lock is not taken during this function, so the caller
+ * has to take care of this.
+ */
+int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
+ unsigned long *gpa, int write)
+{
+ struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ struct trans_exc_code_bits *tec;
+ union asce asce;
+ int rc;
+
+ /* Access register mode is not supported yet. */
+ if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
+ return -EOPNOTSUPP;
+
+ gva = kvm_s390_logical_to_effective(vcpu, gva);
+ memset(pgm, 0, sizeof(*pgm));
+ tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+ tec->as = psw_bits(*psw).as;
+ tec->fsi = write ? FSI_STORE : FSI_FETCH;
+ tec->addr = gva >> PAGE_SHIFT;
+ if (is_low_address(gva) && low_address_protection_enabled(vcpu)) {
+ if (write) {
+ rc = pgm->code = PGM_PROTECTION;
+ return rc;
+ }
+ }
+
+ asce.val = get_vcpu_asce(vcpu);
+ if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */
+ rc = guest_translate(vcpu, gva, gpa, write);
+ if (rc > 0) {
+ if (rc == PGM_PROTECTION)
+ tec->b61 = 1;
+ pgm->code = rc;
+ }
+ } else {
+ rc = 0;
+ *gpa = kvm_s390_real_to_abs(vcpu, gva);
+ if (kvm_is_error_gpa(vcpu->kvm, *gpa))
+ rc = pgm->code = PGM_ADDRESSING;
+ }
+
+ return rc;
+}
+
+/**
* kvm_s390_check_low_addr_protection - check for low-address protection
* @ga: Guest address
*
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index a07ee08ac478..0149cf15058a 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -155,6 +155,9 @@ int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
return kvm_read_guest(vcpu->kvm, gpa, data, len);
}
+int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
+ unsigned long *gpa, int write);
+
int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
unsigned long len, int write);
@@ -324,6 +327,8 @@ int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
return access_guest_real(vcpu, gra, data, len, 0);
}
+void ipte_lock(struct kvm_vcpu *vcpu);
+void ipte_unlock(struct kvm_vcpu *vcpu);
int ipte_lock_held(struct kvm_vcpu *vcpu);
int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index bf0d9bc15bcd..90c8de22a2a0 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -442,7 +442,6 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
&vcpu->arch.sie_block->gpsw,
sizeof(psw_t));
- kvm_s390_vcpu_start(vcpu);
break;
case KVM_S390_PROGRAM_INT:
VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index e519860c6031..43e191b25789 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -637,7 +637,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
if (sclp_has_siif())
vcpu->arch.sie_block->eca |= 1;
vcpu->arch.sie_block->fac = (int) (long) vfacilities;
- vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
+ vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE |
+ ICTL_TPROT;
+
if (kvm_s390_cmma_enabled(vcpu->kvm)) {
rc = kvm_s390_vcpu_setup_cmma(vcpu);
if (rc)
@@ -950,7 +952,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
vcpu->guest_debug = 0;
kvm_s390_clear_bp_data(vcpu);
- if (vcpu->guest_debug & ~VALID_GUESTDBG_FLAGS)
+ if (dbg->control & ~VALID_GUESTDBG_FLAGS)
return -EINVAL;
if (dbg->control & KVM_GUESTDBG_ENABLE) {
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 6296159ac883..f89c1cd67751 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -930,8 +930,9 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
static int handle_tprot(struct kvm_vcpu *vcpu)
{
u64 address1, address2;
- struct vm_area_struct *vma;
- unsigned long user_address;
+ unsigned long hva, gpa;
+ int ret = 0, cc = 0;
+ bool writable;
vcpu->stat.instruction_tprot++;
@@ -942,32 +943,41 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
/* we only handle the Linux memory detection case:
* access key == 0
- * guest DAT == off
* everything else goes to userspace. */
if (address2 & 0xf0)
return -EOPNOTSUPP;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
- return -EOPNOTSUPP;
-
- down_read(&current->mm->mmap_sem);
- user_address = __gmap_translate(address1, vcpu->arch.gmap);
- if (IS_ERR_VALUE(user_address))
- goto out_inject;
- vma = find_vma(current->mm, user_address);
- if (!vma)
- goto out_inject;
- vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
- if (!(vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_READ))
- vcpu->arch.sie_block->gpsw.mask |= (1ul << 44);
- if (!(vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_READ))
- vcpu->arch.sie_block->gpsw.mask |= (2ul << 44);
-
- up_read(&current->mm->mmap_sem);
- return 0;
+ ipte_lock(vcpu);
+ ret = guest_translate_address(vcpu, address1, &gpa, 1);
+ if (ret == PGM_PROTECTION) {
+ /* Write protected? Try again with read-only... */
+ cc = 1;
+ ret = guest_translate_address(vcpu, address1, &gpa, 0);
+ }
+ if (ret) {
+ if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) {
+ ret = kvm_s390_inject_program_int(vcpu, ret);
+ } else if (ret > 0) {
+ /* Translation not available */
+ kvm_s390_set_psw_cc(vcpu, 3);
+ ret = 0;
+ }
+ goto out_unlock;
+ }
-out_inject:
- up_read(&current->mm->mmap_sem);
- return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ hva = gfn_to_hva_prot(vcpu->kvm, gpa_to_gfn(gpa), &writable);
+ if (kvm_is_error_hva(hva)) {
+ ret = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ } else {
+ if (!writable)
+ cc = 1; /* Write not permitted ==> read-only */
+ kvm_s390_set_psw_cc(vcpu, cc);
+ /* Note: CC2 only occurs for storage keys (not supported yet) */
+ }
+out_unlock:
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
+ ipte_unlock(vcpu);
+ return ret;
}
int kvm_s390_handle_e5(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index d0341d2e54b1..43079a48cc98 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -54,33 +54,23 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
{
- struct kvm_s390_local_interrupt *li;
- struct kvm_s390_interrupt_info *inti;
+ struct kvm_s390_interrupt s390int = {
+ .type = KVM_S390_INT_EMERGENCY,
+ .parm = vcpu->vcpu_id,
+ };
struct kvm_vcpu *dst_vcpu = NULL;
+ int rc = 0;
if (cpu_addr < KVM_MAX_VCPUS)
dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
if (!dst_vcpu)
return SIGP_CC_NOT_OPERATIONAL;
- inti = kzalloc(sizeof(*inti), GFP_KERNEL);
- if (!inti)
- return -ENOMEM;
-
- inti->type = KVM_S390_INT_EMERGENCY;
- inti->emerg.code = vcpu->vcpu_id;
+ rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
+ if (!rc)
+ VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
- li = &dst_vcpu->arch.local_int;
- spin_lock_bh(&li->lock);
- list_add_tail(&inti->list, &li->list);
- atomic_set(&li->active, 1);
- atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(li->wq))
- wake_up_interruptible(li->wq);
- spin_unlock_bh(&li->lock);
- VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
-
- return SIGP_CC_ORDER_CODE_ACCEPTED;
+ return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
}
static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
@@ -116,33 +106,23 @@ static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
{
- struct kvm_s390_local_interrupt *li;
- struct kvm_s390_interrupt_info *inti;
+ struct kvm_s390_interrupt s390int = {
+ .type = KVM_S390_INT_EXTERNAL_CALL,
+ .parm = vcpu->vcpu_id,
+ };
struct kvm_vcpu *dst_vcpu = NULL;
+ int rc;
if (cpu_addr < KVM_MAX_VCPUS)
dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
if (!dst_vcpu)
return SIGP_CC_NOT_OPERATIONAL;
- inti = kzalloc(sizeof(*inti), GFP_KERNEL);
- if (!inti)
- return -ENOMEM;
-
- inti->type = KVM_S390_INT_EXTERNAL_CALL;
- inti->extcall.code = vcpu->vcpu_id;
-
- li = &dst_vcpu->arch.local_int;
- spin_lock_bh(&li->lock);
- list_add_tail(&inti->list, &li->list);
- atomic_set(&li->active, 1);
- atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(li->wq))
- wake_up_interruptible(li->wq);
- spin_unlock_bh(&li->lock);
- VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
+ rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
+ if (!rc)
+ VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
- return SIGP_CC_ORDER_CODE_ACCEPTED;
+ return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
}
static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 2fa7ab069817..e4e833d3d7d7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -161,6 +161,7 @@
#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
#define NoWrite ((u64)1 << 45) /* No writeback */
#define SrcWrite ((u64)1 << 46) /* Write back src operand */
+#define NoMod ((u64)1 << 47) /* Mod field is ignored */
#define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
@@ -1077,7 +1078,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
ctxt->modrm_rm |= (ctxt->modrm & 0x07);
ctxt->modrm_seg = VCPU_SREG_DS;
- if (ctxt->modrm_mod == 3) {
+ if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
op->type = OP_REG;
op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
@@ -3877,10 +3878,12 @@ static const struct opcode twobyte_table[256] = {
N, N, N, N, N, N, N, N,
D(ImplicitOps | ModRM), N, N, N, N, N, N, D(ImplicitOps | ModRM),
/* 0x20 - 0x2F */
- DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read),
- DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read),
- IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write),
- IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write),
+ DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read),
+ DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
+ IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
+ check_cr_write),
+ IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
+ check_dr_write),
N, N, N, N,
GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9736529ade08..006911858174 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -360,6 +360,8 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
{
+ /* Note that we never get here with APIC virtualization enabled. */
+
if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
++apic->isr_count;
BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
@@ -371,12 +373,48 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
apic->highest_isr_cache = vec;
}
+static inline int apic_find_highest_isr(struct kvm_lapic *apic)
+{
+ int result;
+
+ /*
+ * Note that isr_count is always 1, and highest_isr_cache
+ * is always -1, with APIC virtualization enabled.
+ */
+ if (!apic->isr_count)
+ return -1;
+ if (likely(apic->highest_isr_cache != -1))
+ return apic->highest_isr_cache;
+
+ result = find_highest_vector(apic->regs + APIC_ISR);
+ ASSERT(result == -1 || result >= 16);
+
+ return result;
+}
+
static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
{
- if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
+ struct kvm_vcpu *vcpu;
+ if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
+ return;
+
+ vcpu = apic->vcpu;
+
+ /*
+ * We do get here for APIC virtualization enabled if the guest
+ * uses the Hyper-V APIC enlightenment. In this case we may need
+ * to trigger a new interrupt delivery by writing the SVI field;
+ * on the other hand isr_count and highest_isr_cache are unused
+ * and must be left alone.
+ */
+ if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
+ kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
+ apic_find_highest_isr(apic));
+ else {
--apic->isr_count;
- BUG_ON(apic->isr_count < 0);
- apic->highest_isr_cache = -1;
+ BUG_ON(apic->isr_count < 0);
+ apic->highest_isr_cache = -1;
+ }
}
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
@@ -456,22 +494,6 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
__clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
}
-static inline int apic_find_highest_isr(struct kvm_lapic *apic)
-{
- int result;
-
- /* Note that isr_count is always 1 with vid enabled */
- if (!apic->isr_count)
- return -1;
- if (likely(apic->highest_isr_cache != -1))
- return apic->highest_isr_cache;
-
- result = find_highest_vector(apic->regs + APIC_ISR);
- ASSERT(result == -1 || result >= 16);
-
- return result;
-}
-
void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr)
{
struct kvm_lapic *apic = vcpu->arch.apic;
@@ -1605,6 +1627,8 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
int vector = kvm_apic_has_interrupt(vcpu);
struct kvm_lapic *apic = vcpu->arch.apic;
+ /* Note that we never get here with APIC virtualization enabled. */
+
if (vector == -1)
return -1;
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 6929571b79b0..24e9033f8b3f 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -317,6 +317,7 @@ header-y += ppp-ioctl.h
header-y += ppp_defs.h
header-y += pps.h
header-y += prctl.h
+header-y += psci.h
header-y += ptp_clock.h
header-y += ptrace.h
header-y += qnx4_fs.h
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 16c923de85e7..e11d8f170a62 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -171,6 +171,7 @@ struct kvm_pit_config {
#define KVM_EXIT_WATCHDOG 21
#define KVM_EXIT_S390_TSCH 22
#define KVM_EXIT_EPR 23
+#define KVM_EXIT_SYSTEM_EVENT 24
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -301,6 +302,13 @@ struct kvm_run {
struct {
__u32 epr;
} epr;
+ /* KVM_EXIT_SYSTEM_EVENT */
+ struct {
+#define KVM_SYSTEM_EVENT_SHUTDOWN 1
+#define KVM_SYSTEM_EVENT_RESET 2
+ __u32 type;
+ __u64 flags;
+ } system_event;
/* Fix the size of the union. */
char padding[256];
};
@@ -748,7 +756,8 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_S390_IRQCHIP 99
#define KVM_CAP_IOEVENTFD_NO_LENGTH 100
#define KVM_CAP_VM_ATTRIBUTES 101
-#define KVM_CAP_PPC_FIXUP_HCALL 102
+#define KVM_CAP_ARM_PSCI_0_2 102
+#define KVM_CAP_PPC_FIXUP_HCALL 103
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h
new file mode 100644
index 000000000000..310d83e0a91b
--- /dev/null
+++ b/include/uapi/linux/psci.h
@@ -0,0 +1,90 @@
+/*
+ * ARM Power State and Coordination Interface (PSCI) header
+ *
+ * This header holds common PSCI defines and macros shared
+ * by: ARM kernel, ARM64 kernel, KVM ARM/ARM64 and user space.
+ *
+ * Copyright (C) 2014 Linaro Ltd.
+ * Author: Anup Patel <anup.patel@linaro.org>
+ */
+
+#ifndef _UAPI_LINUX_PSCI_H
+#define _UAPI_LINUX_PSCI_H
+
+/*
+ * PSCI v0.1 interface
+ *
+ * The PSCI v0.1 function numbers are implementation defined.
+ *
+ * Only PSCI return values such as: SUCCESS, NOT_SUPPORTED,
+ * INVALID_PARAMS, and DENIED defined below are applicable
+ * to PSCI v0.1.
+ */
+
+/* PSCI v0.2 interface */
+#define PSCI_0_2_FN_BASE 0x84000000
+#define PSCI_0_2_FN(n) (PSCI_0_2_FN_BASE + (n))
+#define PSCI_0_2_64BIT 0x40000000
+#define PSCI_0_2_FN64_BASE \
+ (PSCI_0_2_FN_BASE + PSCI_0_2_64BIT)
+#define PSCI_0_2_FN64(n) (PSCI_0_2_FN64_BASE + (n))
+
+#define PSCI_0_2_FN_PSCI_VERSION PSCI_0_2_FN(0)
+#define PSCI_0_2_FN_CPU_SUSPEND PSCI_0_2_FN(1)
+#define PSCI_0_2_FN_CPU_OFF PSCI_0_2_FN(2)
+#define PSCI_0_2_FN_CPU_ON PSCI_0_2_FN(3)
+#define PSCI_0_2_FN_AFFINITY_INFO PSCI_0_2_FN(4)
+#define PSCI_0_2_FN_MIGRATE PSCI_0_2_FN(5)
+#define PSCI_0_2_FN_MIGRATE_INFO_TYPE PSCI_0_2_FN(6)
+#define PSCI_0_2_FN_MIGRATE_INFO_UP_CPU PSCI_0_2_FN(7)
+#define PSCI_0_2_FN_SYSTEM_OFF PSCI_0_2_FN(8)
+#define PSCI_0_2_FN_SYSTEM_RESET PSCI_0_2_FN(9)
+
+#define PSCI_0_2_FN64_CPU_SUSPEND PSCI_0_2_FN64(1)
+#define PSCI_0_2_FN64_CPU_ON PSCI_0_2_FN64(3)
+#define PSCI_0_2_FN64_AFFINITY_INFO PSCI_0_2_FN64(4)
+#define PSCI_0_2_FN64_MIGRATE PSCI_0_2_FN64(5)
+#define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU PSCI_0_2_FN64(7)
+
+/* PSCI v0.2 power state encoding for CPU_SUSPEND function */
+#define PSCI_0_2_POWER_STATE_ID_MASK 0xffff
+#define PSCI_0_2_POWER_STATE_ID_SHIFT 0
+#define PSCI_0_2_POWER_STATE_TYPE_SHIFT 16
+#define PSCI_0_2_POWER_STATE_TYPE_MASK \
+ (0x1 << PSCI_0_2_POWER_STATE_TYPE_SHIFT)
+#define PSCI_0_2_POWER_STATE_AFFL_SHIFT 24
+#define PSCI_0_2_POWER_STATE_AFFL_MASK \
+ (0x3 << PSCI_0_2_POWER_STATE_AFFL_SHIFT)
+
+/* PSCI v0.2 affinity level state returned by AFFINITY_INFO */
+#define PSCI_0_2_AFFINITY_LEVEL_ON 0
+#define PSCI_0_2_AFFINITY_LEVEL_OFF 1
+#define PSCI_0_2_AFFINITY_LEVEL_ON_PENDING 2
+
+/* PSCI v0.2 multicore support in Trusted OS returned by MIGRATE_INFO_TYPE */
+#define PSCI_0_2_TOS_UP_MIGRATE 0
+#define PSCI_0_2_TOS_UP_NO_MIGRATE 1
+#define PSCI_0_2_TOS_MP 2
+
+/* PSCI version decoding (independent of PSCI version) */
+#define PSCI_VERSION_MAJOR_SHIFT 16
+#define PSCI_VERSION_MINOR_MASK \
+ ((1U << PSCI_VERSION_MAJOR_SHIFT) - 1)
+#define PSCI_VERSION_MAJOR_MASK ~PSCI_VERSION_MINOR_MASK
+#define PSCI_VERSION_MAJOR(ver) \
+ (((ver) & PSCI_VERSION_MAJOR_MASK) >> PSCI_VERSION_MAJOR_SHIFT)
+#define PSCI_VERSION_MINOR(ver) \
+ ((ver) & PSCI_VERSION_MINOR_MASK)
+
+/* PSCI return values (inclusive of all PSCI versions) */
+#define PSCI_RET_SUCCESS 0
+#define PSCI_RET_NOT_SUPPORTED -1
+#define PSCI_RET_INVALID_PARAMS -2
+#define PSCI_RET_DENIED -3
+#define PSCI_RET_ALREADY_ON -4
+#define PSCI_RET_ON_PENDING -5
+#define PSCI_RET_INTERNAL_FAILURE -6
+#define PSCI_RET_NOT_PRESENT -7
+#define PSCI_RET_DISABLED -8
+
+#endif /* _UAPI_LINUX_PSCI_H */