diff options
45 files changed, 1919 insertions, 444 deletions
diff --git a/Documentation/devicetree/bindings/arm/psci.txt b/Documentation/devicetree/bindings/arm/psci.txt index 433afe9cb590..b4a58f39223c 100644 --- a/Documentation/devicetree/bindings/arm/psci.txt +++ b/Documentation/devicetree/bindings/arm/psci.txt @@ -21,7 +21,15 @@ to #0. Main node required properties: - - compatible : Must be "arm,psci" + - compatible : should contain at least one of: + + * "arm,psci" : for implementations complying to PSCI versions prior to + 0.2. For these cases function IDs must be provided. + + * "arm,psci-0.2" : for implementations complying to PSCI 0.2. Function + IDs are not required and should be ignored by an OS with PSCI 0.2 + support, but are permitted to be present for compatibility with + existing software when "arm,psci" is later in the compatible list. - method : The method of calling the PSCI firmware. Permitted values are: @@ -45,6 +53,8 @@ Main node optional properties: Example: +Case 1: PSCI v0.1 only. + psci { compatible = "arm,psci"; method = "smc"; @@ -53,3 +63,28 @@ Example: cpu_on = <0x95c10002>; migrate = <0x95c10003>; }; + + +Case 2: PSCI v0.2 only + + psci { + compatible = "arm,psci-0.2"; + method = "smc"; + }; + +Case 3: PSCI v0.2 and PSCI v0.1. + + A DTB may provide IDs for use by kernels without PSCI 0.2 support, + enabling firmware and hypervisors to support existing and new kernels. + These IDs will be ignored by kernels with PSCI 0.2 support, which will + use the standard PSCI 0.2 IDs exclusively. + + psci { + compatible = "arm,psci-0.2", "arm,psci"; + method = "hvc"; + + cpu_on = < arbitrary value >; + cpu_off = < arbitrary value >; + + ... + }; diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 6b3029016e9c..6ff3a7742989 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2384,6 +2384,8 @@ Possible features: Depends on KVM_CAP_ARM_PSCI. - KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode. Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only). + - KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU. + Depends on KVM_CAP_ARM_PSCI_0_2. 4.83 KVM_ARM_PREFERRED_TARGET @@ -2746,6 +2748,21 @@ It gets triggered whenever both KVM_CAP_PPC_EPR are enabled and an external interrupt has just been delivered into the guest. User space should put the acknowledged interrupt vector into the 'epr' field. + /* KVM_EXIT_SYSTEM_EVENT */ + struct { +#define KVM_SYSTEM_EVENT_SHUTDOWN 1 +#define KVM_SYSTEM_EVENT_RESET 2 + __u32 type; + __u64 flags; + } system_event; + +If exit_reason is KVM_EXIT_SYSTEM_EVENT then the vcpu has triggered +a system-level event using some architecture specific mechanism (hypercall +or some special instruction). In case of ARM/ARM64, this is triggered using +HVC instruction based PSCI call from the vcpu. The 'type' field describes +the system-level event type. The 'flags' field describes architecture +specific flags for the system-level event. + /* Fix the size of the union. */ char padding[256]; }; diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 09af14999c9b..193ceaf01bfd 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -36,7 +36,7 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_HAVE_ONE_REG -#define KVM_VCPU_MAX_FEATURES 1 +#define KVM_VCPU_MAX_FEATURES 2 #include <kvm/arm_vgic.h> diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h index 9a83d98bf170..6bda945d31fa 100644 --- a/arch/arm/include/asm/kvm_psci.h +++ b/arch/arm/include/asm/kvm_psci.h @@ -18,6 +18,10 @@ #ifndef __ARM_KVM_PSCI_H__ #define __ARM_KVM_PSCI_H__ -bool kvm_psci_call(struct kvm_vcpu *vcpu); +#define KVM_ARM_PSCI_0_1 1 +#define KVM_ARM_PSCI_0_2 2 + +int kvm_psci_version(struct kvm_vcpu *vcpu); +int kvm_psci_call(struct kvm_vcpu *vcpu); #endif /* __ARM_KVM_PSCI_H__ */ diff --git a/arch/arm/include/asm/psci.h b/arch/arm/include/asm/psci.h index c4ae171850f8..c25ef3ec6d1f 100644 --- a/arch/arm/include/asm/psci.h +++ b/arch/arm/include/asm/psci.h @@ -29,16 +29,19 @@ struct psci_operations { int (*cpu_off)(struct psci_power_state state); int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); int (*migrate)(unsigned long cpuid); + int (*affinity_info)(unsigned long target_affinity, + unsigned long lowest_affinity_level); + int (*migrate_info_type)(void); }; extern struct psci_operations psci_ops; extern struct smp_operations psci_smp_ops; #ifdef CONFIG_ARM_PSCI -void psci_init(void); +int psci_init(void); bool psci_smp_available(void); #else -static inline void psci_init(void) { } +static inline int psci_init(void) { return 0; } static inline bool psci_smp_available(void) { return false; } #endif diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index ef0c8785ba16..e6ebdd3471e5 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -20,6 +20,7 @@ #define __ARM_KVM_H__ #include <linux/types.h> +#include <linux/psci.h> #include <asm/ptrace.h> #define __KVM_HAVE_GUEST_DEBUG @@ -83,6 +84,7 @@ struct kvm_regs { #define KVM_VGIC_V2_CPU_SIZE 0x2000 #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ +#define KVM_ARM_VCPU_PSCI_0_2 1 /* CPU uses PSCI v0.2 */ struct kvm_vcpu_init { __u32 target; @@ -201,9 +203,9 @@ struct kvm_arch_memory_slot { #define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2) #define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3) -#define KVM_PSCI_RET_SUCCESS 0 -#define KVM_PSCI_RET_NI ((unsigned long)-1) -#define KVM_PSCI_RET_INVAL ((unsigned long)-2) -#define KVM_PSCI_RET_DENIED ((unsigned long)-3) +#define KVM_PSCI_RET_SUCCESS PSCI_RET_SUCCESS +#define KVM_PSCI_RET_NI PSCI_RET_NOT_SUPPORTED +#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS +#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c index 46931880093d..f73891b6b730 100644 --- a/arch/arm/kernel/psci.c +++ b/arch/arm/kernel/psci.c @@ -17,63 +17,58 @@ #include <linux/init.h> #include <linux/of.h> +#include <linux/reboot.h> +#include <linux/pm.h> +#include <uapi/linux/psci.h> #include <asm/compiler.h> #include <asm/errno.h> #include <asm/opcodes-sec.h> #include <asm/opcodes-virt.h> #include <asm/psci.h> +#include <asm/system_misc.h> struct psci_operations psci_ops; static int (*invoke_psci_fn)(u32, u32, u32, u32); +typedef int (*psci_initcall_t)(const struct device_node *); enum psci_function { PSCI_FN_CPU_SUSPEND, PSCI_FN_CPU_ON, PSCI_FN_CPU_OFF, PSCI_FN_MIGRATE, + PSCI_FN_AFFINITY_INFO, + PSCI_FN_MIGRATE_INFO_TYPE, PSCI_FN_MAX, }; static u32 psci_function_id[PSCI_FN_MAX]; -#define PSCI_RET_SUCCESS 0 -#define PSCI_RET_EOPNOTSUPP -1 -#define PSCI_RET_EINVAL -2 -#define PSCI_RET_EPERM -3 - static int psci_to_linux_errno(int errno) { switch (errno) { case PSCI_RET_SUCCESS: return 0; - case PSCI_RET_EOPNOTSUPP: + case PSCI_RET_NOT_SUPPORTED: return -EOPNOTSUPP; - case PSCI_RET_EINVAL: + case PSCI_RET_INVALID_PARAMS: return -EINVAL; - case PSCI_RET_EPERM: + case PSCI_RET_DENIED: return -EPERM; }; return -EINVAL; } -#define PSCI_POWER_STATE_ID_MASK 0xffff -#define PSCI_POWER_STATE_ID_SHIFT 0 -#define PSCI_POWER_STATE_TYPE_MASK 0x1 -#define PSCI_POWER_STATE_TYPE_SHIFT 16 -#define PSCI_POWER_STATE_AFFL_MASK 0x3 -#define PSCI_POWER_STATE_AFFL_SHIFT 24 - static u32 psci_power_state_pack(struct psci_power_state state) { - return ((state.id & PSCI_POWER_STATE_ID_MASK) - << PSCI_POWER_STATE_ID_SHIFT) | - ((state.type & PSCI_POWER_STATE_TYPE_MASK) - << PSCI_POWER_STATE_TYPE_SHIFT) | - ((state.affinity_level & PSCI_POWER_STATE_AFFL_MASK) - << PSCI_POWER_STATE_AFFL_SHIFT); + return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT) + & PSCI_0_2_POWER_STATE_ID_MASK) | + ((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT) + & PSCI_0_2_POWER_STATE_TYPE_MASK) | + ((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT) + & PSCI_0_2_POWER_STATE_AFFL_MASK); } /* @@ -110,6 +105,14 @@ static noinline int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1, return function_id; } +static int psci_get_version(void) +{ + int err; + + err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0); + return err; +} + static int psci_cpu_suspend(struct psci_power_state state, unsigned long entry_point) { @@ -153,26 +156,36 @@ static int psci_migrate(unsigned long cpuid) return psci_to_linux_errno(err); } -static const struct of_device_id psci_of_match[] __initconst = { - { .compatible = "arm,psci", }, - {}, -}; +static int psci_affinity_info(unsigned long target_affinity, + unsigned long lowest_affinity_level) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_AFFINITY_INFO]; + err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0); + return err; +} -void __init psci_init(void) +static int psci_migrate_info_type(void) { - struct device_node *np; - const char *method; - u32 id; + int err; + u32 fn; - np = of_find_matching_node(NULL, psci_of_match); - if (!np) - return; + fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE]; + err = invoke_psci_fn(fn, 0, 0, 0); + return err; +} + +static int get_set_conduit_method(struct device_node *np) +{ + const char *method; - pr_info("probing function IDs from device-tree\n"); + pr_info("probing for conduit method from DT.\n"); if (of_property_read_string(np, "method", &method)) { - pr_warning("missing \"method\" property\n"); - goto out_put_node; + pr_warn("missing \"method\" property\n"); + return -ENXIO; } if (!strcmp("hvc", method)) { @@ -180,10 +193,99 @@ void __init psci_init(void) } else if (!strcmp("smc", method)) { invoke_psci_fn = __invoke_psci_fn_smc; } else { - pr_warning("invalid \"method\" property: %s\n", method); + pr_warn("invalid \"method\" property: %s\n", method); + return -EINVAL; + } + return 0; +} + +static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd) +{ + invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); +} + +static void psci_sys_poweroff(void) +{ + invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0); +} + +/* + * PSCI Function IDs for v0.2+ are well defined so use + * standard values. + */ +static int psci_0_2_init(struct device_node *np) +{ + int err, ver; + + err = get_set_conduit_method(np); + + if (err) + goto out_put_node; + + ver = psci_get_version(); + + if (ver == PSCI_RET_NOT_SUPPORTED) { + /* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */ + pr_err("PSCI firmware does not comply with the v0.2 spec.\n"); + err = -EOPNOTSUPP; goto out_put_node; + } else { + pr_info("PSCIv%d.%d detected in firmware.\n", + PSCI_VERSION_MAJOR(ver), + PSCI_VERSION_MINOR(ver)); + + if (PSCI_VERSION_MAJOR(ver) == 0 && + PSCI_VERSION_MINOR(ver) < 2) { + err = -EINVAL; + pr_err("Conflicting PSCI version detected.\n"); + goto out_put_node; + } } + pr_info("Using standard PSCI v0.2 function IDs\n"); + psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN_CPU_SUSPEND; + psci_ops.cpu_suspend = psci_cpu_suspend; + + psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF; + psci_ops.cpu_off = psci_cpu_off; + + psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN_CPU_ON; + psci_ops.cpu_on = psci_cpu_on; + + psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN_MIGRATE; + psci_ops.migrate = psci_migrate; + + psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN_AFFINITY_INFO; + psci_ops.affinity_info = psci_affinity_info; + + psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] = + PSCI_0_2_FN_MIGRATE_INFO_TYPE; + psci_ops.migrate_info_type = psci_migrate_info_type; + + arm_pm_restart = psci_sys_reset; + + pm_power_off = psci_sys_poweroff; + +out_put_node: + of_node_put(np); + return err; +} + +/* + * PSCI < v0.2 get PSCI Function IDs via DT. + */ +static int psci_0_1_init(struct device_node *np) +{ + u32 id; + int err; + + err = get_set_conduit_method(np); + + if (err) + goto out_put_node; + + pr_info("Using PSCI v0.1 Function IDs from DT\n"); + if (!of_property_read_u32(np, "cpu_suspend", &id)) { psci_function_id[PSCI_FN_CPU_SUSPEND] = id; psci_ops.cpu_suspend = psci_cpu_suspend; @@ -206,5 +308,25 @@ void __init psci_init(void) out_put_node: of_node_put(np); - return; + return err; +} + +static const struct of_device_id psci_of_match[] __initconst = { + { .compatible = "arm,psci", .data = psci_0_1_init}, + { .compatible = "arm,psci-0.2", .data = psci_0_2_init}, + {}, +}; + +int __init psci_init(void) +{ + struct device_node *np; + const struct of_device_id *matched_np; + psci_initcall_t init_fn; + + np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np); + if (!np) + return -ENODEV; + + init_fn = (psci_initcall_t)matched_np->data; + return init_fn(np); } diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c index 570a48cc3d64..28a1db4da704 100644 --- a/arch/arm/kernel/psci_smp.c +++ b/arch/arm/kernel/psci_smp.c @@ -16,6 +16,8 @@ #include <linux/init.h> #include <linux/smp.h> #include <linux/of.h> +#include <linux/delay.h> +#include <uapi/linux/psci.h> #include <asm/psci.h> #include <asm/smp_plat.h> @@ -66,6 +68,36 @@ void __ref psci_cpu_die(unsigned int cpu) /* We should never return */ panic("psci: cpu %d failed to shutdown\n", cpu); } + +int __ref psci_cpu_kill(unsigned int cpu) +{ + int err, i; + + if (!psci_ops.affinity_info) + return 1; + /* + * cpu_kill could race with cpu_die and we can + * potentially end up declaring this cpu undead + * while it is dying. So, try again a few times. + */ + + for (i = 0; i < 10; i++) { + err = psci_ops.affinity_info(cpu_logical_map(cpu), 0); + if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) { + pr_info("CPU%d killed.\n", cpu); + return 1; + } + + msleep(10); + pr_info("Retrying again to check for CPU kill\n"); + } + + pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n", + cpu, err); + /* Make platform_cpu_kill() fail. */ + return 0; +} + #endif bool __init psci_smp_available(void) @@ -78,5 +110,6 @@ struct smp_operations __initdata psci_smp_ops = { .smp_boot_secondary = psci_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU .cpu_die = psci_cpu_die, + .cpu_kill = psci_cpu_kill, #endif }; diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index f0e50a0f3a65..3c82b37c0f9e 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -197,6 +197,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: case KVM_CAP_ONE_REG: case KVM_CAP_ARM_PSCI: + case KVM_CAP_ARM_PSCI_0_2: r = 1; break; case KVM_CAP_COALESCED_MMIO: diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 0de91fc6de0f..4c979d466cc1 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -38,14 +38,18 @@ static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) { + int ret; + trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0), kvm_vcpu_hvc_get_imm(vcpu)); - if (kvm_psci_call(vcpu)) + ret = kvm_psci_call(vcpu); + if (ret < 0) { + kvm_inject_undefined(vcpu); return 1; + } - kvm_inject_undefined(vcpu); - return 1; + return ret; } static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 448f60e8d23c..09cf37737ee2 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -27,6 +27,36 @@ * as described in ARM document number ARM DEN 0022A. */ +#define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1) + +static unsigned long psci_affinity_mask(unsigned long affinity_level) +{ + if (affinity_level <= 3) + return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level); + + return 0; +} + +static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) +{ + /* + * NOTE: For simplicity, we make VCPU suspend emulation to be + * same-as WFI (Wait-for-interrupt) emulation. + * + * This means for KVM the wakeup events are interrupts and + * this is consistent with intended use of StateID as described + * in section 5.4.1 of PSCI v0.2 specification (ARM DEN 0022A). + * + * Further, we also treat power-down request to be same as + * stand-by request as-per section 5.4.2 clause 3 of PSCI v0.2 + * specification (ARM DEN 0022A). This means all suspend states + * for KVM will preserve the register state. + */ + kvm_vcpu_block(vcpu); + + return PSCI_RET_SUCCESS; +} + static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) { vcpu->arch.pause = true; @@ -38,6 +68,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) struct kvm_vcpu *vcpu = NULL, *tmp; wait_queue_head_t *wq; unsigned long cpu_id; + unsigned long context_id; unsigned long mpidr; phys_addr_t target_pc; int i; @@ -58,10 +89,17 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) * Make sure the caller requested a valid CPU and that the CPU is * turned off. */ - if (!vcpu || !vcpu->arch.pause) - return KVM_PSCI_RET_INVAL; + if (!vcpu) + return PSCI_RET_INVALID_PARAMS; + if (!vcpu->arch.pause) { + if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) + return PSCI_RET_ALREADY_ON; + else + return PSCI_RET_INVALID_PARAMS; + } target_pc = *vcpu_reg(source_vcpu, 2); + context_id = *vcpu_reg(source_vcpu, 3); kvm_reset_vcpu(vcpu); @@ -76,26 +114,160 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) kvm_vcpu_set_be(vcpu); *vcpu_pc(vcpu) = target_pc; + /* + * NOTE: We always update r0 (or x0) because for PSCI v0.1 + * the general puspose registers are undefined upon CPU_ON. + */ + *vcpu_reg(vcpu, 0) = context_id; vcpu->arch.pause = false; smp_mb(); /* Make sure the above is visible */ wq = kvm_arch_vcpu_wq(vcpu); wake_up_interruptible(wq); - return KVM_PSCI_RET_SUCCESS; + return PSCI_RET_SUCCESS; } -/** - * kvm_psci_call - handle PSCI call if r0 value is in range - * @vcpu: Pointer to the VCPU struct - * - * Handle PSCI calls from guests through traps from HVC instructions. - * The calling convention is similar to SMC calls to the secure world where - * the function number is placed in r0 and this function returns true if the - * function number specified in r0 is withing the PSCI range, and false - * otherwise. - */ -bool kvm_psci_call(struct kvm_vcpu *vcpu) +static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) +{ + int i; + unsigned long mpidr; + unsigned long target_affinity; + unsigned long target_affinity_mask; + unsigned long lowest_affinity_level; + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *tmp; + + target_affinity = *vcpu_reg(vcpu, 1); + lowest_affinity_level = *vcpu_reg(vcpu, 2); + + /* Determine target affinity mask */ + target_affinity_mask = psci_affinity_mask(lowest_affinity_level); + if (!target_affinity_mask) + return PSCI_RET_INVALID_PARAMS; + + /* Ignore other bits of target affinity */ + target_affinity &= target_affinity_mask; + + /* + * If one or more VCPU matching target affinity are running + * then ON else OFF + */ + kvm_for_each_vcpu(i, tmp, kvm) { + mpidr = kvm_vcpu_get_mpidr(tmp); + if (((mpidr & target_affinity_mask) == target_affinity) && + !tmp->arch.pause) { + return PSCI_0_2_AFFINITY_LEVEL_ON; + } + } + + return PSCI_0_2_AFFINITY_LEVEL_OFF; +} + +static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) +{ + memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); + vcpu->run->system_event.type = type; + vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; +} + +static void kvm_psci_system_off(struct kvm_vcpu *vcpu) +{ + kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN); +} + +static void kvm_psci_system_reset(struct kvm_vcpu *vcpu) +{ + kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET); +} + +int kvm_psci_version(struct kvm_vcpu *vcpu) +{ + if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) + return KVM_ARM_PSCI_0_2; + + return KVM_ARM_PSCI_0_1; +} + +static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) +{ + int ret = 1; + unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0); + unsigned long val; + + switch (psci_fn) { + case PSCI_0_2_FN_PSCI_VERSION: + /* + * Bits[31:16] = Major Version = 0 + * Bits[15:0] = Minor Version = 2 + */ + val = 2; + break; + case PSCI_0_2_FN_CPU_SUSPEND: + case PSCI_0_2_FN64_CPU_SUSPEND: + val = kvm_psci_vcpu_suspend(vcpu); + break; + case PSCI_0_2_FN_CPU_OFF: + kvm_psci_vcpu_off(vcpu); + val = PSCI_RET_SUCCESS; + break; + case PSCI_0_2_FN_CPU_ON: + case PSCI_0_2_FN64_CPU_ON: + val = kvm_psci_vcpu_on(vcpu); + break; + case PSCI_0_2_FN_AFFINITY_INFO: + case PSCI_0_2_FN64_AFFINITY_INFO: + val = kvm_psci_vcpu_affinity_info(vcpu); + break; + case PSCI_0_2_FN_MIGRATE: + case PSCI_0_2_FN64_MIGRATE: + val = PSCI_RET_NOT_SUPPORTED; + break; + case PSCI_0_2_FN_MIGRATE_INFO_TYPE: + /* + * Trusted OS is MP hence does not require migration + * or + * Trusted OS is not present + */ + val = PSCI_0_2_TOS_MP; + break; + case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU: + case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU: + val = PSCI_RET_NOT_SUPPORTED; + break; + case PSCI_0_2_FN_SYSTEM_OFF: + kvm_psci_system_off(vcpu); + /* + * We should'nt be going back to guest VCPU after + * receiving SYSTEM_OFF request. + * + * If user space accidently/deliberately resumes + * guest VCPU after SYSTEM_OFF request then guest + * VCPU should see internal failure from PSCI return + * value. To achieve this, we preload r0 (or x0) with + * PSCI return value INTERNAL_FAILURE. + */ + val = PSCI_RET_INTERNAL_FAILURE; + ret = 0; + break; + case PSCI_0_2_FN_SYSTEM_RESET: + kvm_psci_system_reset(vcpu); + /* + * Same reason as SYSTEM_OFF for preloading r0 (or x0) + * with PSCI return value INTERNAL_FAILURE. + */ + val = PSCI_RET_INTERNAL_FAILURE; + ret = 0; + break; + default: + return -EINVAL; + } + + *vcpu_reg(vcpu, 0) = val; + return ret; +} + +static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) { unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0); unsigned long val; @@ -103,20 +275,45 @@ bool kvm_psci_call(struct kvm_vcpu *vcpu) switch (psci_fn) { case KVM_PSCI_FN_CPU_OFF: kvm_psci_vcpu_off(vcpu); - val = KVM_PSCI_RET_SUCCESS; + val = PSCI_RET_SUCCESS; break; case KVM_PSCI_FN_CPU_ON: val = kvm_psci_vcpu_on(vcpu); break; case KVM_PSCI_FN_CPU_SUSPEND: case KVM_PSCI_FN_MIGRATE: - val = KVM_PSCI_RET_NI; + val = PSCI_RET_NOT_SUPPORTED; break; - default: - return false; + return -EINVAL; } *vcpu_reg(vcpu, 0) = val; - return true; + return 1; +} + +/** + * kvm_psci_call - handle PSCI call if r0 value is in range + * @vcpu: Pointer to the VCPU struct + * + * Handle PSCI calls from guests through traps from HVC instructions. + * The calling convention is similar to SMC calls to the secure world + * where the function number is placed in r0. + * + * This function returns: > 0 (success), 0 (success but exit to user + * space), and < 0 (errors) + * + * Errors: + * -EINVAL: Unrecognized PSCI function + */ +int kvm_psci_call(struct kvm_vcpu *vcpu) +{ + switch (kvm_psci_version(vcpu)) { + case KVM_ARM_PSCI_0_2: + return kvm_psci_0_2_call(vcpu); + case KVM_ARM_PSCI_0_1: + return kvm_psci_0_1_call(vcpu); + default: + return -EINVAL; + }; } diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index 152413076503..d7b4b38a8e86 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -39,6 +39,7 @@ struct device_node; * from the cpu to be killed. * @cpu_die: Makes a cpu leave the kernel. Must not fail. Called from the * cpu being killed. + * @cpu_kill: Ensures a cpu has left the kernel. Called from another cpu. * @cpu_suspend: Suspends a cpu and saves the required context. May fail owing * to wrong parameters or error conditions. Called from the * CPU being suspended. Must be called with IRQs disabled. @@ -52,6 +53,7 @@ struct cpu_operations { #ifdef CONFIG_HOTPLUG_CPU int (*cpu_disable)(unsigned int cpu); void (*cpu_die)(unsigned int cpu); + int (*cpu_kill)(unsigned int cpu); #endif #ifdef CONFIG_ARM64_CPU_SUSPEND int (*cpu_suspend)(unsigned long); diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index c404fb0df3a6..27f54a7cc81b 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -41,6 +41,7 @@ #define ARM_CPU_PART_AEM_V8 0xD0F0 #define ARM_CPU_PART_FOUNDATION 0xD000 +#define ARM_CPU_PART_CORTEX_A53 0xD030 #define ARM_CPU_PART_CORTEX_A57 0xD070 #define APM_CPU_PART_POTENZA 0x0000 diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0a1d69751562..92242ce06309 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -39,7 +39,7 @@ #include <kvm/arm_vgic.h> #include <kvm/arm_arch_timer.h> -#define KVM_VCPU_MAX_FEATURES 2 +#define KVM_VCPU_MAX_FEATURES 3 struct kvm_vcpu; int kvm_target_cpu(void); diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h index e301a4816355..bc39e557c56c 100644 --- a/arch/arm64/include/asm/kvm_psci.h +++ b/arch/arm64/include/asm/kvm_psci.h @@ -18,6 +18,10 @@ #ifndef __ARM64_KVM_PSCI_H__ #define __ARM64_KVM_PSCI_H__ -bool kvm_psci_call(struct kvm_vcpu *vcpu); +#define KVM_ARM_PSCI_0_1 1 +#define KVM_ARM_PSCI_0_2 2 + +int kvm_psci_version(struct kvm_vcpu *vcpu); +int kvm_psci_call(struct kvm_vcpu *vcpu); #endif /* __ARM64_KVM_PSCI_H__ */ diff --git a/arch/arm64/include/asm/psci.h b/arch/arm64/include/asm/psci.h index d15ab8b46336..e5312ea0ec1a 100644 --- a/arch/arm64/include/asm/psci.h +++ b/arch/arm64/include/asm/psci.h @@ -14,6 +14,6 @@ #ifndef __ASM_PSCI_H #define __ASM_PSCI_H -void psci_init(void); +int psci_init(void); #endif /* __ASM_PSCI_H */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index eaf54a30bedc..e633ff8cdec8 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -31,6 +31,7 @@ #define KVM_NR_SPSR 5 #ifndef __ASSEMBLY__ +#include <linux/psci.h> #include <asm/types.h> #include <asm/ptrace.h> @@ -56,8 +57,9 @@ struct kvm_regs { #define KVM_ARM_TARGET_FOUNDATION_V8 1 #define KVM_ARM_TARGET_CORTEX_A57 2 #define KVM_ARM_TARGET_XGENE_POTENZA 3 +#define KVM_ARM_TARGET_CORTEX_A53 4 -#define KVM_ARM_NUM_TARGETS 4 +#define KVM_ARM_NUM_TARGETS 5 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 @@ -77,6 +79,7 @@ struct kvm_regs { #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ #define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ +#define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */ struct kvm_vcpu_init { __u32 target; @@ -186,10 +189,10 @@ struct kvm_arch_memory_slot { #define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2) #define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3) -#define KVM_PSCI_RET_SUCCESS 0 -#define KVM_PSCI_RET_NI ((unsigned long)-1) -#define KVM_PSCI_RET_INVAL ((unsigned long)-2) -#define KVM_PSCI_RET_DENIED ((unsigned long)-3) +#define KVM_PSCI_RET_SUCCESS PSCI_RET_SUCCESS +#define KVM_PSCI_RET_NI PSCI_RET_NOT_SUPPORTED +#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS +#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED #endif diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index ea4828a4aa96..9e9798f91172 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -18,12 +18,17 @@ #include <linux/init.h> #include <linux/of.h> #include <linux/smp.h> +#include <linux/reboot.h> +#include <linux/pm.h> +#include <linux/delay.h> +#include <uapi/linux/psci.h> #include <asm/compiler.h> #include <asm/cpu_ops.h> #include <asm/errno.h> #include <asm/psci.h> #include <asm/smp_plat.h> +#include <asm/system_misc.h> #define PSCI_POWER_STATE_TYPE_STANDBY 0 #define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 @@ -40,58 +45,52 @@ struct psci_operations { int (*cpu_off)(struct psci_power_state state); int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); int (*migrate)(unsigned long cpuid); + int (*affinity_info)(unsigned long target_affinity, + unsigned long lowest_affinity_level); + int (*migrate_info_type)(void); }; static struct psci_operations psci_ops; static int (*invoke_psci_fn)(u64, u64, u64, u64); +typedef int (*psci_initcall_t)(const struct device_node *); enum psci_function { PSCI_FN_CPU_SUSPEND, PSCI_FN_CPU_ON, PSCI_FN_CPU_OFF, PSCI_FN_MIGRATE, + PSCI_FN_AFFINITY_INFO, + PSCI_FN_MIGRATE_INFO_TYPE, PSCI_FN_MAX, }; static u32 psci_function_id[PSCI_FN_MAX]; -#define PSCI_RET_SUCCESS 0 -#define PSCI_RET_EOPNOTSUPP -1 -#define PSCI_RET_EINVAL -2 -#define PSCI_RET_EPERM -3 - static int psci_to_linux_errno(int errno) { switch (errno) { case PSCI_RET_SUCCESS: return 0; - case PSCI_RET_EOPNOTSUPP: + case PSCI_RET_NOT_SUPPORTED: return -EOPNOTSUPP; - case PSCI_RET_EINVAL: + case PSCI_RET_INVALID_PARAMS: return -EINVAL; - case PSCI_RET_EPERM: + case PSCI_RET_DENIED: return -EPERM; }; return -EINVAL; } -#define PSCI_POWER_STATE_ID_MASK 0xffff -#define PSCI_POWER_STATE_ID_SHIFT 0 -#define PSCI_POWER_STATE_TYPE_MASK 0x1 -#define PSCI_POWER_STATE_TYPE_SHIFT 16 -#define PSCI_POWER_STATE_AFFL_MASK 0x3 -#define PSCI_POWER_STATE_AFFL_SHIFT 24 - static u32 psci_power_state_pack(struct psci_power_state state) { - return ((state.id & PSCI_POWER_STATE_ID_MASK) - << PSCI_POWER_STATE_ID_SHIFT) | - ((state.type & PSCI_POWER_STATE_TYPE_MASK) - << PSCI_POWER_STATE_TYPE_SHIFT) | - ((state.affinity_level & PSCI_POWER_STATE_AFFL_MASK) - << PSCI_POWER_STATE_AFFL_SHIFT); + return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT) + & PSCI_0_2_POWER_STATE_ID_MASK) | + ((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT) + & PSCI_0_2_POWER_STATE_TYPE_MASK) | + ((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT) + & PSCI_0_2_POWER_STATE_AFFL_MASK); } /* @@ -128,6 +127,14 @@ static noinline int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1, return function_id; } +static int psci_get_version(void) +{ + int err; + + err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0); + return err; +} + static int psci_cpu_suspend(struct psci_power_state state, unsigned long entry_point) { @@ -171,26 +178,36 @@ static int psci_migrate(unsigned long cpuid) return psci_to_linux_errno(err); } -static const struct of_device_id psci_of_match[] __initconst = { - { .compatible = "arm,psci", }, - {}, -}; +static int psci_affinity_info(unsigned long target_affinity, + unsigned long lowest_affinity_level) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_AFFINITY_INFO]; + err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0); + return err; +} -void __init psci_init(void) +static int psci_migrate_info_type(void) { - struct device_node *np; - const char *method; - u32 id; + int err; + u32 fn; - np = of_find_matching_node(NULL, psci_of_match); - if (!np) - return; + fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE]; + err = invoke_psci_fn(fn, 0, 0, 0); + return err; +} - pr_info("probing function IDs from device-tree\n"); +static int get_set_conduit_method(struct device_node *np) +{ + const char *method; + + pr_info("probing for conduit method from DT.\n"); if (of_property_read_string(np, "method", &method)) { - pr_warning("missing \"method\" property\n"); - goto out_put_node; + pr_warn("missing \"method\" property\n"); + return -ENXIO; } if (!strcmp("hvc", method)) { @@ -198,10 +215,99 @@ void __init psci_init(void) } else if (!strcmp("smc", method)) { invoke_psci_fn = __invoke_psci_fn_smc; } else { - pr_warning("invalid \"method\" property: %s\n", method); + pr_warn("invalid \"method\" property: %s\n", method); + return -EINVAL; + } + return 0; +} + +static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd) +{ + invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); +} + +static void psci_sys_poweroff(void) +{ + invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0); +} + +/* + * PSCI Function IDs for v0.2+ are well defined so use + * standard values. + */ +static int psci_0_2_init(struct device_node *np) +{ + int err, ver; + + err = get_set_conduit_method(np); + + if (err) + goto out_put_node; + + ver = psci_get_version(); + + if (ver == PSCI_RET_NOT_SUPPORTED) { + /* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */ + pr_err("PSCI firmware does not comply with the v0.2 spec.\n"); + err = -EOPNOTSUPP; goto out_put_node; + } else { + pr_info("PSCIv%d.%d detected in firmware.\n", + PSCI_VERSION_MAJOR(ver), + PSCI_VERSION_MINOR(ver)); + + if (PSCI_VERSION_MAJOR(ver) == 0 && + PSCI_VERSION_MINOR(ver) < 2) { + err = -EINVAL; + pr_err("Conflicting PSCI version detected.\n"); + goto out_put_node; + } } + pr_info("Using standard PSCI v0.2 function IDs\n"); + psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN64_CPU_SUSPEND; + psci_ops.cpu_suspend = psci_cpu_suspend; + + psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF; + psci_ops.cpu_off = psci_cpu_off; + + psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN64_CPU_ON; + psci_ops.cpu_on = psci_cpu_on; + + psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN64_MIGRATE; + psci_ops.migrate = psci_migrate; + + psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN64_AFFINITY_INFO; + psci_ops.affinity_info = psci_affinity_info; + + psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] = + PSCI_0_2_FN_MIGRATE_INFO_TYPE; + psci_ops.migrate_info_type = psci_migrate_info_type; + + arm_pm_restart = psci_sys_reset; + + pm_power_off = psci_sys_poweroff; + +out_put_node: + of_node_put(np); + return err; +} + +/* + * PSCI < v0.2 get PSCI Function IDs via DT. + */ +static int psci_0_1_init(struct device_node *np) +{ + u32 id; + int err; + + err = get_set_conduit_method(np); + + if (err) + goto out_put_node; + + pr_info("Using PSCI v0.1 Function IDs from DT\n"); + if (!of_property_read_u32(np, "cpu_suspend", &id)) { psci_function_id[PSCI_FN_CPU_SUSPEND] = id; psci_ops.cpu_suspend = psci_cpu_suspend; @@ -224,7 +330,28 @@ void __init psci_init(void) out_put_node: of_node_put(np); - return; + return err; +} + +static const struct of_device_id psci_of_match[] __initconst = { + { .compatible = "arm,psci", .data = psci_0_1_init}, + { .compatible = "arm,psci-0.2", .data = psci_0_2_init}, + {}, +}; + +int __init psci_init(void) +{ + struct device_node *np; + const struct of_device_id *matched_np; + psci_initcall_t init_fn; + + np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np); + + if (!np) + return -ENODEV; + + init_fn = (psci_initcall_t)matched_np->data; + return init_fn(np); } #ifdef CONFIG_SMP @@ -277,6 +404,35 @@ static void cpu_psci_cpu_die(unsigned int cpu) pr_crit("unable to power off CPU%u (%d)\n", cpu, ret); } + +static int cpu_psci_cpu_kill(unsigned int cpu) +{ + int err, i; + + if (!psci_ops.affinity_info) + return 1; + /* + * cpu_kill could race with cpu_die and we can + * potentially end up declaring this cpu undead + * while it is dying. So, try again a few times. + */ + + for (i = 0; i < 10; i++) { + err = psci_ops.affinity_info(cpu_logical_map(cpu), 0); + if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) { + pr_info("CPU%d killed.\n", cpu); + return 1; + } + + msleep(10); + pr_info("Retrying again to check for CPU kill\n"); + } + + pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n", + cpu, err); + /* Make op_cpu_kill() fail. */ + return 0; +} #endif const struct cpu_operations cpu_psci_ops = { @@ -287,6 +443,7 @@ const struct cpu_operations cpu_psci_ops = { #ifdef CONFIG_HOTPLUG_CPU .cpu_disable = cpu_psci_cpu_disable, .cpu_die = cpu_psci_cpu_die, + .cpu_kill = cpu_psci_cpu_kill, #endif }; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index f0a141dd5655..c3cb160edc69 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -228,6 +228,19 @@ int __cpu_disable(void) return 0; } +static int op_cpu_kill(unsigned int cpu) +{ + /* + * If we have no means of synchronising with the dying CPU, then assume + * that it is really dead. We can only wait for an arbitrary length of + * time and hope that it's dead, so let's skip the wait and just hope. + */ + if (!cpu_ops[cpu]->cpu_kill) + return 1; + + return cpu_ops[cpu]->cpu_kill(cpu); +} + static DECLARE_COMPLETION(cpu_died); /* @@ -241,6 +254,15 @@ void __cpu_die(unsigned int cpu) return; } pr_notice("CPU%u: shutdown\n", cpu); + + /* + * Now that the dying CPU is beyond the point of no return w.r.t. + * in-kernel synchronisation, try to get the firwmare to help us to + * verify that it has really left the kernel before we consider + * clobbering anything it might still be using. + */ + if (!op_cpu_kill(cpu)) + pr_warn("CPU%d may not have shut down cleanly\n", cpu); } /* diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 08745578d54d..60b5c31f3c10 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -214,6 +214,8 @@ int __attribute_const__ kvm_target_cpu(void) return KVM_ARM_TARGET_AEM_V8; case ARM_CPU_PART_FOUNDATION: return KVM_ARM_TARGET_FOUNDATION_V8; + case ARM_CPU_PART_CORTEX_A53: + return KVM_ARM_TARGET_CORTEX_A53; case ARM_CPU_PART_CORTEX_A57: return KVM_ARM_TARGET_CORTEX_A57; }; diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 7bc41eab4c64..182415e1a952 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -30,11 +30,15 @@ typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - if (kvm_psci_call(vcpu)) + int ret; + + ret = kvm_psci_call(vcpu); + if (ret < 0) { + kvm_inject_undefined(vcpu); return 1; + } - kvm_inject_undefined(vcpu); - return 1; + return ret; } static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c index 8fe6f76b0edc..475fd2929310 100644 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -88,6 +88,8 @@ static int __init sys_reg_genericv8_init(void) &genericv8_target_table); kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8, &genericv8_target_table); + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A53, + &genericv8_target_table); kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57, &genericv8_target_table); kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA, diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 5cd695f905a1..5e0014e864f3 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1756,14 +1756,14 @@ config KVM_GUEST help Select this option if building a guest kernel for KVM (Trap & Emulate) mode -config KVM_HOST_FREQ - int "KVM Host Processor Frequency (MHz)" +config KVM_GUEST_TIMER_FREQ + int "Count/Compare Timer Frequency (MHz)" depends on KVM_GUEST - default 500 + default 100 help - Select this option if building a guest kernel for KVM to skip - RTC emulation when determining guest CPU Frequency. Instead, the guest - processor frequency is automatically derived from the host frequency. + Set this to non-zero if building a guest kernel for KVM to skip RTC + emulation when determining guest CPU Frequency. Instead, the guest's + timer frequency is specified directly. choice prompt "Kernel page size" diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 060aaa6348d7..b0aa95565752 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -19,6 +19,38 @@ #include <linux/threads.h> #include <linux/spinlock.h> +/* MIPS KVM register ids */ +#define MIPS_CP0_32(_R, _S) \ + (KVM_REG_MIPS | KVM_REG_SIZE_U32 | 0x10000 | (8 * (_R) + (_S))) + +#define MIPS_CP0_64(_R, _S) \ + (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 0x10000 | (8 * (_R) + (_S))) + +#define KVM_REG_MIPS_CP0_INDEX MIPS_CP0_32(0, 0) +#define KVM_REG_MIPS_CP0_ENTRYLO0 MIPS_CP0_64(2, 0) +#define KVM_REG_MIPS_CP0_ENTRYLO1 MIPS_CP0_64(3, 0) +#define KVM_REG_MIPS_CP0_CONTEXT MIPS_CP0_64(4, 0) +#define KVM_REG_MIPS_CP0_USERLOCAL MIPS_CP0_64(4, 2) +#define KVM_REG_MIPS_CP0_PAGEMASK MIPS_CP0_32(5, 0) +#define KVM_REG_MIPS_CP0_PAGEGRAIN MIPS_CP0_32(5, 1) +#define KVM_REG_MIPS_CP0_WIRED MIPS_CP0_32(6, 0) +#define KVM_REG_MIPS_CP0_HWRENA MIPS_CP0_32(7, 0) +#define KVM_REG_MIPS_CP0_BADVADDR MIPS_CP0_64(8, 0) +#define KVM_REG_MIPS_CP0_COUNT MIPS_CP0_32(9, 0) +#define KVM_REG_MIPS_CP0_ENTRYHI MIPS_CP0_64(10, 0) +#define KVM_REG_MIPS_CP0_COMPARE MIPS_CP0_32(11, 0) +#define KVM_REG_MIPS_CP0_STATUS MIPS_CP0_32(12, 0) +#define KVM_REG_MIPS_CP0_CAUSE MIPS_CP0_32(13, 0) +#define KVM_REG_MIPS_CP0_EPC MIPS_CP0_64(14, 0) +#define KVM_REG_MIPS_CP0_EBASE MIPS_CP0_64(15, 1) +#define KVM_REG_MIPS_CP0_CONFIG MIPS_CP0_32(16, 0) +#define KVM_REG_MIPS_CP0_CONFIG1 MIPS_CP0_32(16, 1) +#define KVM_REG_MIPS_CP0_CONFIG2 MIPS_CP0_32(16, 2) +#define KVM_REG_MIPS_CP0_CONFIG3 MIPS_CP0_32(16, 3) +#define KVM_REG_MIPS_CP0_CONFIG7 MIPS_CP0_32(16, 7) +#define KVM_REG_MIPS_CP0_XCONTEXT MIPS_CP0_64(20, 0) +#define KVM_REG_MIPS_CP0_ERROREPC MIPS_CP0_64(30, 0) + #define KVM_MAX_VCPUS 1 #define KVM_USER_MEM_SLOTS 8 @@ -372,8 +404,19 @@ struct kvm_vcpu_arch { u32 io_gpr; /* GPR used as IO source/target */ - /* Used to calibrate the virutal count register for the guest */ - int32_t host_cp0_count; + struct hrtimer comparecount_timer; + /* Count timer control KVM register */ + uint32_t count_ctl; + /* Count bias from the raw time */ + uint32_t count_bias; + /* Frequency of timer in Hz */ + uint32_t count_hz; + /* Dynamic nanosecond bias (multiple of count_period) to avoid overflow */ + s64 count_dyn_bias; + /* Resume time */ + ktime_t count_resume; + /* Period of timer tick in ns */ + u64 count_period; /* Bitmask of exceptions that are pending */ unsigned long pending_exceptions; @@ -394,8 +437,6 @@ struct kvm_vcpu_arch { uint32_t guest_kernel_asid[NR_CPUS]; struct mm_struct guest_kernel_mm, guest_user_mm; - struct hrtimer comparecount_timer; - int last_sched_cpu; /* WAIT executed */ @@ -410,6 +451,7 @@ struct kvm_vcpu_arch { #define kvm_read_c0_guest_context(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0]) #define kvm_write_c0_guest_context(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val)) #define kvm_read_c0_guest_userlocal(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2]) +#define kvm_write_c0_guest_userlocal(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2] = (val)) #define kvm_read_c0_guest_pagemask(cop0) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0]) #define kvm_write_c0_guest_pagemask(cop0, val) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0] = (val)) #define kvm_read_c0_guest_wired(cop0) (cop0->reg[MIPS_CP0_TLB_WIRED][0]) @@ -449,15 +491,74 @@ struct kvm_vcpu_arch { #define kvm_read_c0_guest_errorepc(cop0) (cop0->reg[MIPS_CP0_ERROR_PC][0]) #define kvm_write_c0_guest_errorepc(cop0, val) (cop0->reg[MIPS_CP0_ERROR_PC][0] = (val)) +/* + * Some of the guest registers may be modified asynchronously (e.g. from a + * hrtimer callback in hard irq context) and therefore need stronger atomicity + * guarantees than other registers. + */ + +static inline void _kvm_atomic_set_c0_guest_reg(unsigned long *reg, + unsigned long val) +{ + unsigned long temp; + do { + __asm__ __volatile__( + " .set mips3 \n" + " " __LL "%0, %1 \n" + " or %0, %2 \n" + " " __SC "%0, %1 \n" + " .set mips0 \n" + : "=&r" (temp), "+m" (*reg) + : "r" (val)); + } while (unlikely(!temp)); +} + +static inline void _kvm_atomic_clear_c0_guest_reg(unsigned long *reg, + unsigned long val) +{ + unsigned long temp; + do { + __asm__ __volatile__( + " .set mips3 \n" + " " __LL "%0, %1 \n" + " and %0, %2 \n" + " " __SC "%0, %1 \n" + " .set mips0 \n" + : "=&r" (temp), "+m" (*reg) + : "r" (~val)); + } while (unlikely(!temp)); +} + +static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg, + unsigned long change, + unsigned long val) +{ + unsigned long temp; + do { + __asm__ __volatile__( + " .set mips3 \n" + " " __LL "%0, %1 \n" + " and %0, %2 \n" + " or %0, %3 \n" + " " __SC "%0, %1 \n" + " .set mips0 \n" + : "=&r" (temp), "+m" (*reg) + : "r" (~change), "r" (val & change)); + } while (unlikely(!temp)); +} + #define kvm_set_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] |= (val)) #define kvm_clear_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] &= ~(val)) -#define kvm_set_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] |= (val)) -#define kvm_clear_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] &= ~(val)) + +/* Cause can be modified asynchronously from hardirq hrtimer callback */ +#define kvm_set_c0_guest_cause(cop0, val) \ + _kvm_atomic_set_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val) +#define kvm_clear_c0_guest_cause(cop0, val) \ + _kvm_atomic_clear_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val) #define kvm_change_c0_guest_cause(cop0, change, val) \ -{ \ - kvm_clear_c0_guest_cause(cop0, change); \ - kvm_set_c0_guest_cause(cop0, ((val) & (change))); \ -} + _kvm_atomic_change_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], \ + change, val) + #define kvm_set_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] |= (val)) #define kvm_clear_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] &= ~(val)) #define kvm_change_c0_guest_ebase(cop0, change, val) \ @@ -468,29 +569,33 @@ struct kvm_vcpu_arch { struct kvm_mips_callbacks { - int (*handle_cop_unusable) (struct kvm_vcpu *vcpu); - int (*handle_tlb_mod) (struct kvm_vcpu *vcpu); - int (*handle_tlb_ld_miss) (struct kvm_vcpu *vcpu); - int (*handle_tlb_st_miss) (struct kvm_vcpu *vcpu); - int (*handle_addr_err_st) (struct kvm_vcpu *vcpu); - int (*handle_addr_err_ld) (struct kvm_vcpu *vcpu); - int (*handle_syscall) (struct kvm_vcpu *vcpu); - int (*handle_res_inst) (struct kvm_vcpu *vcpu); - int (*handle_break) (struct kvm_vcpu *vcpu); - int (*vm_init) (struct kvm *kvm); - int (*vcpu_init) (struct kvm_vcpu *vcpu); - int (*vcpu_setup) (struct kvm_vcpu *vcpu); - gpa_t(*gva_to_gpa) (gva_t gva); - void (*queue_timer_int) (struct kvm_vcpu *vcpu); - void (*dequeue_timer_int) (struct kvm_vcpu *vcpu); - void (*queue_io_int) (struct kvm_vcpu *vcpu, - struct kvm_mips_interrupt *irq); - void (*dequeue_io_int) (struct kvm_vcpu *vcpu, - struct kvm_mips_interrupt *irq); - int (*irq_deliver) (struct kvm_vcpu *vcpu, unsigned int priority, - uint32_t cause); - int (*irq_clear) (struct kvm_vcpu *vcpu, unsigned int priority, - uint32_t cause); + int (*handle_cop_unusable)(struct kvm_vcpu *vcpu); + int (*handle_tlb_mod)(struct kvm_vcpu *vcpu); + int (*handle_tlb_ld_miss)(struct kvm_vcpu *vcpu); + int (*handle_tlb_st_miss)(struct kvm_vcpu *vcpu); + int (*handle_addr_err_st)(struct kvm_vcpu *vcpu); + int (*handle_addr_err_ld)(struct kvm_vcpu *vcpu); + int (*handle_syscall)(struct kvm_vcpu *vcpu); + int (*handle_res_inst)(struct kvm_vcpu *vcpu); + int (*handle_break)(struct kvm_vcpu *vcpu); + int (*vm_init)(struct kvm *kvm); + int (*vcpu_init)(struct kvm_vcpu *vcpu); + int (*vcpu_setup)(struct kvm_vcpu *vcpu); + gpa_t (*gva_to_gpa)(gva_t gva); + void (*queue_timer_int)(struct kvm_vcpu *vcpu); + void (*dequeue_timer_int)(struct kvm_vcpu *vcpu); + void (*queue_io_int)(struct kvm_vcpu *vcpu, + struct kvm_mips_interrupt *irq); + void (*dequeue_io_int)(struct kvm_vcpu *vcpu, + struct kvm_mips_interrupt *irq); + int (*irq_deliver)(struct kvm_vcpu *vcpu, unsigned int priority, + uint32_t cause); + int (*irq_clear)(struct kvm_vcpu *vcpu, unsigned int priority, + uint32_t cause); + int (*get_one_reg)(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg, s64 *v); + int (*set_one_reg)(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg, s64 v); }; extern struct kvm_mips_callbacks *kvm_mips_callbacks; int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks); @@ -609,7 +714,16 @@ extern enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause, extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run); -enum emulation_result kvm_mips_emulate_count(struct kvm_vcpu *vcpu); +uint32_t kvm_mips_read_count(struct kvm_vcpu *vcpu); +void kvm_mips_write_count(struct kvm_vcpu *vcpu, uint32_t count); +void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare); +void kvm_mips_init_count(struct kvm_vcpu *vcpu); +int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl); +int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume); +int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz); +void kvm_mips_count_enable_cause(struct kvm_vcpu *vcpu); +void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu); +enum hrtimer_restart kvm_mips_count_timeout(struct kvm_vcpu *vcpu); enum emulation_result kvm_mips_check_privilege(unsigned long cause, uint32_t *opc, @@ -646,7 +760,6 @@ extern int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu); /* Misc */ -extern void mips32_SyncICache(unsigned long addr, unsigned long size); extern int kvm_mips_dump_stats(struct kvm_vcpu *vcpu); extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); diff --git a/arch/mips/include/uapi/asm/kvm.h b/arch/mips/include/uapi/asm/kvm.h index f09ff5ae2059..2c04b6d9ff85 100644 --- a/arch/mips/include/uapi/asm/kvm.h +++ b/arch/mips/include/uapi/asm/kvm.h @@ -106,6 +106,41 @@ struct kvm_fpu { #define KVM_REG_MIPS_LO (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 33) #define KVM_REG_MIPS_PC (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 34) +/* KVM specific control registers */ + +/* + * CP0_Count control + * DC: Set 0: Master disable CP0_Count and set COUNT_RESUME to now + * Set 1: Master re-enable CP0_Count with unchanged bias, handling timer + * interrupts since COUNT_RESUME + * This can be used to freeze the timer to get a consistent snapshot of + * the CP0_Count and timer interrupt pending state, while also resuming + * safely without losing time or guest timer interrupts. + * Other: Reserved, do not change. + */ +#define KVM_REG_MIPS_COUNT_CTL (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \ + 0x20000 | 0) +#define KVM_REG_MIPS_COUNT_CTL_DC 0x00000001 + +/* + * CP0_Count resume monotonic nanoseconds + * The monotonic nanosecond time of the last set of COUNT_CTL.DC (master + * disable). Any reads and writes of Count related registers while + * COUNT_CTL.DC=1 will appear to occur at this time. When COUNT_CTL.DC is + * cleared again (master enable) any timer interrupts since this time will be + * emulated. + * Modifications to times in the future are rejected. + */ +#define KVM_REG_MIPS_COUNT_RESUME (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \ + 0x20000 | 1) +/* + * CP0_Count rate in Hz + * Specifies the rate of the CP0_Count timer in Hz. Modifications occur without + * discontinuities in CP0_Count. + */ +#define KVM_REG_MIPS_COUNT_HZ (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \ + 0x20000 | 2) + /* * KVM MIPS specific structures and definitions * diff --git a/arch/mips/kvm/kvm_locore.S b/arch/mips/kvm/kvm_locore.S index bbace092ad0a..033ac343e72c 100644 --- a/arch/mips/kvm/kvm_locore.S +++ b/arch/mips/kvm/kvm_locore.S @@ -611,35 +611,3 @@ MIPSX(exceptions): .word _C_LABEL(MIPSX(GuestException)) # 29 .word _C_LABEL(MIPSX(GuestException)) # 30 .word _C_LABEL(MIPSX(GuestException)) # 31 - - -/* This routine makes changes to the instruction stream effective to the hardware. - * It should be called after the instruction stream is written. - * On return, the new instructions are effective. - * Inputs: - * a0 = Start address of new instruction stream - * a1 = Size, in bytes, of new instruction stream - */ - -#define HW_SYNCI_Step $1 -LEAF(MIPSX(SyncICache)) - .set push - .set mips32r2 - beq a1, zero, 20f - nop - REG_ADDU a1, a0, a1 - rdhwr v0, HW_SYNCI_Step - beq v0, zero, 20f - nop -10: - synci 0(a0) - REG_ADDU a0, a0, v0 - sltu v1, a0, a1 - bne v1, zero, 10b - nop - sync -20: - jr.hb ra - nop - .set pop -END(MIPSX(SyncICache)) diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c index da5186fbd77a..cd5e4f568439 100644 --- a/arch/mips/kvm/kvm_mips.c +++ b/arch/mips/kvm/kvm_mips.c @@ -61,11 +61,6 @@ static int kvm_mips_reset_vcpu(struct kvm_vcpu *vcpu) return 0; } -gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) -{ - return gfn; -} - /* XXXKYMA: We are simulatoring a processor that has the WII bit set in Config7, so we * are "runnable" if interrupts are pending */ @@ -130,8 +125,8 @@ static void kvm_mips_init_vm_percpu(void *arg) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { if (atomic_inc_return(&kvm_mips_instance) == 1) { - kvm_info("%s: 1st KVM instance, setup host TLB parameters\n", - __func__); + kvm_debug("%s: 1st KVM instance, setup host TLB parameters\n", + __func__); on_each_cpu(kvm_mips_init_vm_percpu, kvm, 1); } @@ -149,9 +144,7 @@ void kvm_mips_free_vcpus(struct kvm *kvm) if (kvm->arch.guest_pmap[i] != KVM_INVALID_PAGE) kvm_mips_release_pfn_clean(kvm->arch.guest_pmap[i]); } - - if (kvm->arch.guest_pmap) - kfree(kvm->arch.guest_pmap); + kfree(kvm->arch.guest_pmap); kvm_for_each_vcpu(i, vcpu, kvm) { kvm_arch_vcpu_free(vcpu); @@ -186,8 +179,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) /* If this is the last instance, restore wired count */ if (atomic_dec_return(&kvm_mips_instance) == 0) { - kvm_info("%s: last KVM instance, restoring TLB parameters\n", - __func__); + kvm_debug("%s: last KVM instance, restoring TLB parameters\n", + __func__); on_each_cpu(kvm_mips_uninit_tlbs, NULL, 1); } } @@ -249,9 +242,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, goto out; } - kvm_info - ("Allocated space for Guest PMAP Table (%ld pages) @ %p\n", - npages, kvm->arch.guest_pmap); + kvm_debug("Allocated space for Guest PMAP Table (%ld pages) @ %p\n", + npages, kvm->arch.guest_pmap); /* Now setup the page table */ for (i = 0; i < npages; i++) { @@ -296,7 +288,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) if (err) goto out_free_cpu; - kvm_info("kvm @ %p: create cpu %d at %p\n", kvm, id, vcpu); + kvm_debug("kvm @ %p: create cpu %d at %p\n", kvm, id, vcpu); /* Allocate space for host mode exception handlers that handle * guest mode exits @@ -304,7 +296,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) if (cpu_has_veic || cpu_has_vint) { size = 0x200 + VECTORSPACING * 64; } else { - size = 0x200; + size = 0x4000; } /* Save Linux EBASE */ @@ -316,8 +308,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) err = -ENOMEM; goto out_free_cpu; } - kvm_info("Allocated %d bytes for KVM Exception Handlers @ %p\n", - ALIGN(size, PAGE_SIZE), gebase); + kvm_debug("Allocated %d bytes for KVM Exception Handlers @ %p\n", + ALIGN(size, PAGE_SIZE), gebase); /* Save new ebase */ vcpu->arch.guest_ebase = gebase; @@ -342,15 +334,16 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) /* General handler, relocate to unmapped space for sanity's sake */ offset = 0x2000; - kvm_info("Installing KVM Exception handlers @ %p, %#x bytes\n", - gebase + offset, - mips32_GuestExceptionEnd - mips32_GuestException); + kvm_debug("Installing KVM Exception handlers @ %p, %#x bytes\n", + gebase + offset, + mips32_GuestExceptionEnd - mips32_GuestException); memcpy(gebase + offset, mips32_GuestException, mips32_GuestExceptionEnd - mips32_GuestException); /* Invalidate the icache for these ranges */ - mips32_SyncICache((unsigned long) gebase, ALIGN(size, PAGE_SIZE)); + local_flush_icache_range((unsigned long)gebase, + (unsigned long)gebase + ALIGN(size, PAGE_SIZE)); /* Allocate comm page for guest kernel, a TLB will be reserved for mapping GVA @ 0xFFFF8000 to this page */ vcpu->arch.kseg0_commpage = kzalloc(PAGE_SIZE << 1, GFP_KERNEL); @@ -360,14 +353,14 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) goto out_free_gebase; } - kvm_info("Allocated COMM page @ %p\n", vcpu->arch.kseg0_commpage); + kvm_debug("Allocated COMM page @ %p\n", vcpu->arch.kseg0_commpage); kvm_mips_commpage_init(vcpu); /* Init */ vcpu->arch.last_sched_cpu = -1; /* Start off the timer */ - kvm_mips_emulate_count(vcpu); + kvm_mips_init_count(vcpu); return vcpu; @@ -389,12 +382,8 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) kvm_mips_dump_stats(vcpu); - if (vcpu->arch.guest_ebase) - kfree(vcpu->arch.guest_ebase); - - if (vcpu->arch.kseg0_commpage) - kfree(vcpu->arch.kseg0_commpage); - + kfree(vcpu->arch.guest_ebase); + kfree(vcpu->arch.kseg0_commpage); } void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) @@ -423,11 +412,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) vcpu->mmio_needed = 0; } + local_irq_disable(); /* Check if we have any exceptions/interrupts pending */ kvm_mips_deliver_interrupts(vcpu, kvm_read_c0_guest_cause(vcpu->arch.cop0)); - local_irq_disable(); kvm_guest_enter(); r = __kvm_mips_vcpu_run(run, vcpu); @@ -490,36 +479,6 @@ kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, return -ENOIOCTLCMD; } -#define MIPS_CP0_32(_R, _S) \ - (KVM_REG_MIPS | KVM_REG_SIZE_U32 | 0x10000 | (8 * (_R) + (_S))) - -#define MIPS_CP0_64(_R, _S) \ - (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 0x10000 | (8 * (_R) + (_S))) - -#define KVM_REG_MIPS_CP0_INDEX MIPS_CP0_32(0, 0) -#define KVM_REG_MIPS_CP0_ENTRYLO0 MIPS_CP0_64(2, 0) -#define KVM_REG_MIPS_CP0_ENTRYLO1 MIPS_CP0_64(3, 0) -#define KVM_REG_MIPS_CP0_CONTEXT MIPS_CP0_64(4, 0) -#define KVM_REG_MIPS_CP0_USERLOCAL MIPS_CP0_64(4, 2) -#define KVM_REG_MIPS_CP0_PAGEMASK MIPS_CP0_32(5, 0) -#define KVM_REG_MIPS_CP0_PAGEGRAIN MIPS_CP0_32(5, 1) -#define KVM_REG_MIPS_CP0_WIRED MIPS_CP0_32(6, 0) -#define KVM_REG_MIPS_CP0_HWRENA MIPS_CP0_32(7, 0) -#define KVM_REG_MIPS_CP0_BADVADDR MIPS_CP0_64(8, 0) -#define KVM_REG_MIPS_CP0_COUNT MIPS_CP0_32(9, 0) -#define KVM_REG_MIPS_CP0_ENTRYHI MIPS_CP0_64(10, 0) -#define KVM_REG_MIPS_CP0_COMPARE MIPS_CP0_32(11, 0) -#define KVM_REG_MIPS_CP0_STATUS MIPS_CP0_32(12, 0) -#define KVM_REG_MIPS_CP0_CAUSE MIPS_CP0_32(13, 0) -#define KVM_REG_MIPS_CP0_EBASE MIPS_CP0_64(15, 1) -#define KVM_REG_MIPS_CP0_CONFIG MIPS_CP0_32(16, 0) -#define KVM_REG_MIPS_CP0_CONFIG1 MIPS_CP0_32(16, 1) -#define KVM_REG_MIPS_CP0_CONFIG2 MIPS_CP0_32(16, 2) -#define KVM_REG_MIPS_CP0_CONFIG3 MIPS_CP0_32(16, 3) -#define KVM_REG_MIPS_CP0_CONFIG7 MIPS_CP0_32(16, 7) -#define KVM_REG_MIPS_CP0_XCONTEXT MIPS_CP0_64(20, 0) -#define KVM_REG_MIPS_CP0_ERROREPC MIPS_CP0_64(30, 0) - static u64 kvm_mips_get_one_regs[] = { KVM_REG_MIPS_R0, KVM_REG_MIPS_R1, @@ -560,25 +519,34 @@ static u64 kvm_mips_get_one_regs[] = { KVM_REG_MIPS_CP0_INDEX, KVM_REG_MIPS_CP0_CONTEXT, + KVM_REG_MIPS_CP0_USERLOCAL, KVM_REG_MIPS_CP0_PAGEMASK, KVM_REG_MIPS_CP0_WIRED, + KVM_REG_MIPS_CP0_HWRENA, KVM_REG_MIPS_CP0_BADVADDR, + KVM_REG_MIPS_CP0_COUNT, KVM_REG_MIPS_CP0_ENTRYHI, + KVM_REG_MIPS_CP0_COMPARE, KVM_REG_MIPS_CP0_STATUS, KVM_REG_MIPS_CP0_CAUSE, - /* EPC set via kvm_regs, et al. */ + KVM_REG_MIPS_CP0_EPC, KVM_REG_MIPS_CP0_CONFIG, KVM_REG_MIPS_CP0_CONFIG1, KVM_REG_MIPS_CP0_CONFIG2, KVM_REG_MIPS_CP0_CONFIG3, KVM_REG_MIPS_CP0_CONFIG7, - KVM_REG_MIPS_CP0_ERROREPC + KVM_REG_MIPS_CP0_ERROREPC, + + KVM_REG_MIPS_COUNT_CTL, + KVM_REG_MIPS_COUNT_RESUME, + KVM_REG_MIPS_COUNT_HZ, }; static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { struct mips_coproc *cop0 = vcpu->arch.cop0; + int ret; s64 v; switch (reg->id) { @@ -601,24 +569,36 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, case KVM_REG_MIPS_CP0_CONTEXT: v = (long)kvm_read_c0_guest_context(cop0); break; + case KVM_REG_MIPS_CP0_USERLOCAL: + v = (long)kvm_read_c0_guest_userlocal(cop0); + break; case KVM_REG_MIPS_CP0_PAGEMASK: v = (long)kvm_read_c0_guest_pagemask(cop0); break; case KVM_REG_MIPS_CP0_WIRED: v = (long)kvm_read_c0_guest_wired(cop0); break; + case KVM_REG_MIPS_CP0_HWRENA: + v = (long)kvm_read_c0_guest_hwrena(cop0); + break; case KVM_REG_MIPS_CP0_BADVADDR: v = (long)kvm_read_c0_guest_badvaddr(cop0); break; case KVM_REG_MIPS_CP0_ENTRYHI: v = (long)kvm_read_c0_guest_entryhi(cop0); break; + case KVM_REG_MIPS_CP0_COMPARE: + v = (long)kvm_read_c0_guest_compare(cop0); + break; case KVM_REG_MIPS_CP0_STATUS: v = (long)kvm_read_c0_guest_status(cop0); break; case KVM_REG_MIPS_CP0_CAUSE: v = (long)kvm_read_c0_guest_cause(cop0); break; + case KVM_REG_MIPS_CP0_EPC: + v = (long)kvm_read_c0_guest_epc(cop0); + break; case KVM_REG_MIPS_CP0_ERROREPC: v = (long)kvm_read_c0_guest_errorepc(cop0); break; @@ -637,6 +617,15 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, case KVM_REG_MIPS_CP0_CONFIG7: v = (long)kvm_read_c0_guest_config7(cop0); break; + /* registers to be handled specially */ + case KVM_REG_MIPS_CP0_COUNT: + case KVM_REG_MIPS_COUNT_CTL: + case KVM_REG_MIPS_COUNT_RESUME: + case KVM_REG_MIPS_COUNT_HZ: + ret = kvm_mips_callbacks->get_one_reg(vcpu, reg, &v); + if (ret) + return ret; + break; default: return -EINVAL; } @@ -697,12 +686,18 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, case KVM_REG_MIPS_CP0_CONTEXT: kvm_write_c0_guest_context(cop0, v); break; + case KVM_REG_MIPS_CP0_USERLOCAL: + kvm_write_c0_guest_userlocal(cop0, v); + break; case KVM_REG_MIPS_CP0_PAGEMASK: kvm_write_c0_guest_pagemask(cop0, v); break; case KVM_REG_MIPS_CP0_WIRED: kvm_write_c0_guest_wired(cop0, v); break; + case KVM_REG_MIPS_CP0_HWRENA: + kvm_write_c0_guest_hwrena(cop0, v); + break; case KVM_REG_MIPS_CP0_BADVADDR: kvm_write_c0_guest_badvaddr(cop0, v); break; @@ -712,12 +707,20 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, case KVM_REG_MIPS_CP0_STATUS: kvm_write_c0_guest_status(cop0, v); break; - case KVM_REG_MIPS_CP0_CAUSE: - kvm_write_c0_guest_cause(cop0, v); + case KVM_REG_MIPS_CP0_EPC: + kvm_write_c0_guest_epc(cop0, v); break; case KVM_REG_MIPS_CP0_ERROREPC: kvm_write_c0_guest_errorepc(cop0, v); break; + /* registers to be handled specially */ + case KVM_REG_MIPS_CP0_COUNT: + case KVM_REG_MIPS_CP0_COMPARE: + case KVM_REG_MIPS_CP0_CAUSE: + case KVM_REG_MIPS_COUNT_CTL: + case KVM_REG_MIPS_COUNT_RESUME: + case KVM_REG_MIPS_COUNT_HZ: + return kvm_mips_callbacks->set_one_reg(vcpu, reg, v); default: return -EINVAL; } @@ -920,7 +923,7 @@ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu) return -1; printk("VCPU Register Dump:\n"); - printk("\tpc = 0x%08lx\n", vcpu->arch.pc);; + printk("\tpc = 0x%08lx\n", vcpu->arch.pc); printk("\texceptions: %08lx\n", vcpu->arch.pending_exceptions); for (i = 0; i < 32; i += 4) { @@ -969,7 +972,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return 0; } -void kvm_mips_comparecount_func(unsigned long data) +static void kvm_mips_comparecount_func(unsigned long data) { struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; @@ -984,15 +987,13 @@ void kvm_mips_comparecount_func(unsigned long data) /* * low level hrtimer wake routine. */ -enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer) +static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer) { struct kvm_vcpu *vcpu; vcpu = container_of(timer, struct kvm_vcpu, arch.comparecount_timer); kvm_mips_comparecount_func((unsigned long) vcpu); - hrtimer_forward_now(&vcpu->arch.comparecount_timer, - ktime_set(0, MS_TO_NS(10))); - return HRTIMER_RESTART; + return kvm_mips_count_timeout(vcpu); } int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) diff --git a/arch/mips/kvm/kvm_mips_dyntrans.c b/arch/mips/kvm/kvm_mips_dyntrans.c index 96528e2d1ea6..b80e41d858fd 100644 --- a/arch/mips/kvm/kvm_mips_dyntrans.c +++ b/arch/mips/kvm/kvm_mips_dyntrans.c @@ -16,6 +16,7 @@ #include <linux/vmalloc.h> #include <linux/fs.h> #include <linux/bootmem.h> +#include <asm/cacheflush.h> #include "kvm_mips_comm.h" @@ -40,7 +41,7 @@ kvm_mips_trans_cache_index(uint32_t inst, uint32_t *opc, CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa (vcpu, (unsigned long) opc)); memcpy((void *)kseg0_opc, (void *)&synci_inst, sizeof(uint32_t)); - mips32_SyncICache(kseg0_opc, 32); + local_flush_icache_range(kseg0_opc, kseg0_opc + 32); return result; } @@ -66,7 +67,7 @@ kvm_mips_trans_cache_va(uint32_t inst, uint32_t *opc, CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa (vcpu, (unsigned long) opc)); memcpy((void *)kseg0_opc, (void *)&synci_inst, sizeof(uint32_t)); - mips32_SyncICache(kseg0_opc, 32); + local_flush_icache_range(kseg0_opc, kseg0_opc + 32); return result; } @@ -99,11 +100,12 @@ kvm_mips_trans_mfc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu) CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa (vcpu, (unsigned long) opc)); memcpy((void *)kseg0_opc, (void *)&mfc0_inst, sizeof(uint32_t)); - mips32_SyncICache(kseg0_opc, 32); + local_flush_icache_range(kseg0_opc, kseg0_opc + 32); } else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) { local_irq_save(flags); memcpy((void *)opc, (void *)&mfc0_inst, sizeof(uint32_t)); - mips32_SyncICache((unsigned long) opc, 32); + local_flush_icache_range((unsigned long)opc, + (unsigned long)opc + 32); local_irq_restore(flags); } else { kvm_err("%s: Invalid address: %p\n", __func__, opc); @@ -134,11 +136,12 @@ kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu) CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa (vcpu, (unsigned long) opc)); memcpy((void *)kseg0_opc, (void *)&mtc0_inst, sizeof(uint32_t)); - mips32_SyncICache(kseg0_opc, 32); + local_flush_icache_range(kseg0_opc, kseg0_opc + 32); } else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) { local_irq_save(flags); memcpy((void *)opc, (void *)&mtc0_inst, sizeof(uint32_t)); - mips32_SyncICache((unsigned long) opc, 32); + local_flush_icache_range((unsigned long)opc, + (unsigned long)opc + 32); local_irq_restore(flags); } else { kvm_err("%s: Invalid address: %p\n", __func__, opc); diff --git a/arch/mips/kvm/kvm_mips_emul.c b/arch/mips/kvm/kvm_mips_emul.c index e3fec99941a7..8d4840090082 100644 --- a/arch/mips/kvm/kvm_mips_emul.c +++ b/arch/mips/kvm/kvm_mips_emul.c @@ -11,6 +11,7 @@ #include <linux/errno.h> #include <linux/err.h> +#include <linux/ktime.h> #include <linux/kvm_host.h> #include <linux/module.h> #include <linux/vmalloc.h> @@ -228,25 +229,520 @@ enum emulation_result update_pc(struct kvm_vcpu *vcpu, uint32_t cause) return er; } -/* Everytime the compare register is written to, we need to decide when to fire - * the timer that represents timer ticks to the GUEST. +/** + * kvm_mips_count_disabled() - Find whether the CP0_Count timer is disabled. + * @vcpu: Virtual CPU. * + * Returns: 1 if the CP0_Count timer is disabled by either the guest + * CP0_Cause.DC bit or the count_ctl.DC bit. + * 0 otherwise (in which case CP0_Count timer is running). */ -enum emulation_result kvm_mips_emulate_count(struct kvm_vcpu *vcpu) +static inline int kvm_mips_count_disabled(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; - enum emulation_result er = EMULATE_DONE; + return (vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC) || + (kvm_read_c0_guest_cause(cop0) & CAUSEF_DC); +} + +/** + * kvm_mips_ktime_to_count() - Scale ktime_t to a 32-bit count. + * + * Caches the dynamic nanosecond bias in vcpu->arch.count_dyn_bias. + * + * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running). + */ +static uint32_t kvm_mips_ktime_to_count(struct kvm_vcpu *vcpu, ktime_t now) +{ + s64 now_ns, periods; + u64 delta; + + now_ns = ktime_to_ns(now); + delta = now_ns + vcpu->arch.count_dyn_bias; + + if (delta >= vcpu->arch.count_period) { + /* If delta is out of safe range the bias needs adjusting */ + periods = div64_s64(now_ns, vcpu->arch.count_period); + vcpu->arch.count_dyn_bias = -periods * vcpu->arch.count_period; + /* Recalculate delta with new bias */ + delta = now_ns + vcpu->arch.count_dyn_bias; + } + + /* + * We've ensured that: + * delta < count_period + * + * Therefore the intermediate delta*count_hz will never overflow since + * at the boundary condition: + * delta = count_period + * delta = NSEC_PER_SEC * 2^32 / count_hz + * delta * count_hz = NSEC_PER_SEC * 2^32 + */ + return div_u64(delta * vcpu->arch.count_hz, NSEC_PER_SEC); +} + +/** + * kvm_mips_count_time() - Get effective current time. + * @vcpu: Virtual CPU. + * + * Get effective monotonic ktime. This is usually a straightforward ktime_get(), + * except when the master disable bit is set in count_ctl, in which case it is + * count_resume, i.e. the time that the count was disabled. + * + * Returns: Effective monotonic ktime for CP0_Count. + */ +static inline ktime_t kvm_mips_count_time(struct kvm_vcpu *vcpu) +{ + if (unlikely(vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC)) + return vcpu->arch.count_resume; + + return ktime_get(); +} + +/** + * kvm_mips_read_count_running() - Read the current count value as if running. + * @vcpu: Virtual CPU. + * @now: Kernel time to read CP0_Count at. + * + * Returns the current guest CP0_Count register at time @now and handles if the + * timer interrupt is pending and hasn't been handled yet. + * + * Returns: The current value of the guest CP0_Count register. + */ +static uint32_t kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now) +{ + ktime_t expires; + int running; + + /* Is the hrtimer pending? */ + expires = hrtimer_get_expires(&vcpu->arch.comparecount_timer); + if (ktime_compare(now, expires) >= 0) { + /* + * Cancel it while we handle it so there's no chance of + * interference with the timeout handler. + */ + running = hrtimer_cancel(&vcpu->arch.comparecount_timer); + + /* Nothing should be waiting on the timeout */ + kvm_mips_callbacks->queue_timer_int(vcpu); + + /* + * Restart the timer if it was running based on the expiry time + * we read, so that we don't push it back 2 periods. + */ + if (running) { + expires = ktime_add_ns(expires, + vcpu->arch.count_period); + hrtimer_start(&vcpu->arch.comparecount_timer, expires, + HRTIMER_MODE_ABS); + } + } + + /* Return the biased and scaled guest CP0_Count */ + return vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now); +} + +/** + * kvm_mips_read_count() - Read the current count value. + * @vcpu: Virtual CPU. + * + * Read the current guest CP0_Count value, taking into account whether the timer + * is stopped. + * + * Returns: The current guest CP0_Count value. + */ +uint32_t kvm_mips_read_count(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + + /* If count disabled just read static copy of count */ + if (kvm_mips_count_disabled(vcpu)) + return kvm_read_c0_guest_count(cop0); + + return kvm_mips_read_count_running(vcpu, ktime_get()); +} + +/** + * kvm_mips_freeze_hrtimer() - Safely stop the hrtimer. + * @vcpu: Virtual CPU. + * @count: Output pointer for CP0_Count value at point of freeze. + * + * Freeze the hrtimer safely and return both the ktime and the CP0_Count value + * at the point it was frozen. It is guaranteed that any pending interrupts at + * the point it was frozen are handled, and none after that point. + * + * This is useful where the time/CP0_Count is needed in the calculation of the + * new parameters. + * + * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running). + * + * Returns: The ktime at the point of freeze. + */ +static ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, + uint32_t *count) +{ + ktime_t now; + + /* stop hrtimer before finding time */ + hrtimer_cancel(&vcpu->arch.comparecount_timer); + now = ktime_get(); + + /* find count at this point and handle pending hrtimer */ + *count = kvm_mips_read_count_running(vcpu, now); + + return now; +} + - /* If COUNT is enabled */ - if (!(kvm_read_c0_guest_cause(cop0) & CAUSEF_DC)) { - hrtimer_try_to_cancel(&vcpu->arch.comparecount_timer); - hrtimer_start(&vcpu->arch.comparecount_timer, - ktime_set(0, MS_TO_NS(10)), HRTIMER_MODE_REL); +/** + * kvm_mips_resume_hrtimer() - Resume hrtimer, updating expiry. + * @vcpu: Virtual CPU. + * @now: ktime at point of resume. + * @count: CP0_Count at point of resume. + * + * Resumes the timer and updates the timer expiry based on @now and @count. + * This can be used in conjunction with kvm_mips_freeze_timer() when timer + * parameters need to be changed. + * + * It is guaranteed that a timer interrupt immediately after resume will be + * handled, but not if CP_Compare is exactly at @count. That case is already + * handled by kvm_mips_freeze_timer(). + * + * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running). + */ +static void kvm_mips_resume_hrtimer(struct kvm_vcpu *vcpu, + ktime_t now, uint32_t count) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + uint32_t compare; + u64 delta; + ktime_t expire; + + /* Calculate timeout (wrap 0 to 2^32) */ + compare = kvm_read_c0_guest_compare(cop0); + delta = (u64)(uint32_t)(compare - count - 1) + 1; + delta = div_u64(delta * NSEC_PER_SEC, vcpu->arch.count_hz); + expire = ktime_add_ns(now, delta); + + /* Update hrtimer to use new timeout */ + hrtimer_cancel(&vcpu->arch.comparecount_timer); + hrtimer_start(&vcpu->arch.comparecount_timer, expire, HRTIMER_MODE_ABS); +} + +/** + * kvm_mips_update_hrtimer() - Update next expiry time of hrtimer. + * @vcpu: Virtual CPU. + * + * Recalculates and updates the expiry time of the hrtimer. This can be used + * after timer parameters have been altered which do not depend on the time that + * the change occurs (in those cases kvm_mips_freeze_hrtimer() and + * kvm_mips_resume_hrtimer() are used directly). + * + * It is guaranteed that no timer interrupts will be lost in the process. + * + * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running). + */ +static void kvm_mips_update_hrtimer(struct kvm_vcpu *vcpu) +{ + ktime_t now; + uint32_t count; + + /* + * freeze_hrtimer takes care of a timer interrupts <= count, and + * resume_hrtimer the hrtimer takes care of a timer interrupts > count. + */ + now = kvm_mips_freeze_hrtimer(vcpu, &count); + kvm_mips_resume_hrtimer(vcpu, now, count); +} + +/** + * kvm_mips_write_count() - Modify the count and update timer. + * @vcpu: Virtual CPU. + * @count: Guest CP0_Count value to set. + * + * Sets the CP0_Count value and updates the timer accordingly. + */ +void kvm_mips_write_count(struct kvm_vcpu *vcpu, uint32_t count) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + ktime_t now; + + /* Calculate bias */ + now = kvm_mips_count_time(vcpu); + vcpu->arch.count_bias = count - kvm_mips_ktime_to_count(vcpu, now); + + if (kvm_mips_count_disabled(vcpu)) + /* The timer's disabled, adjust the static count */ + kvm_write_c0_guest_count(cop0, count); + else + /* Update timeout */ + kvm_mips_resume_hrtimer(vcpu, now, count); +} + +/** + * kvm_mips_init_count() - Initialise timer. + * @vcpu: Virtual CPU. + * + * Initialise the timer to a sensible frequency, namely 100MHz, zero it, and set + * it going if it's enabled. + */ +void kvm_mips_init_count(struct kvm_vcpu *vcpu) +{ + /* 100 MHz */ + vcpu->arch.count_hz = 100*1000*1000; + vcpu->arch.count_period = div_u64((u64)NSEC_PER_SEC << 32, + vcpu->arch.count_hz); + vcpu->arch.count_dyn_bias = 0; + + /* Starting at 0 */ + kvm_mips_write_count(vcpu, 0); +} + +/** + * kvm_mips_set_count_hz() - Update the frequency of the timer. + * @vcpu: Virtual CPU. + * @count_hz: Frequency of CP0_Count timer in Hz. + * + * Change the frequency of the CP0_Count timer. This is done atomically so that + * CP0_Count is continuous and no timer interrupt is lost. + * + * Returns: -EINVAL if @count_hz is out of range. + * 0 on success. + */ +int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + int dc; + ktime_t now; + u32 count; + + /* ensure the frequency is in a sensible range... */ + if (count_hz <= 0 || count_hz > NSEC_PER_SEC) + return -EINVAL; + /* ... and has actually changed */ + if (vcpu->arch.count_hz == count_hz) + return 0; + + /* Safely freeze timer so we can keep it continuous */ + dc = kvm_mips_count_disabled(vcpu); + if (dc) { + now = kvm_mips_count_time(vcpu); + count = kvm_read_c0_guest_count(cop0); } else { - hrtimer_try_to_cancel(&vcpu->arch.comparecount_timer); + now = kvm_mips_freeze_hrtimer(vcpu, &count); } - return er; + /* Update the frequency */ + vcpu->arch.count_hz = count_hz; + vcpu->arch.count_period = div_u64((u64)NSEC_PER_SEC << 32, count_hz); + vcpu->arch.count_dyn_bias = 0; + + /* Calculate adjusted bias so dynamic count is unchanged */ + vcpu->arch.count_bias = count - kvm_mips_ktime_to_count(vcpu, now); + + /* Update and resume hrtimer */ + if (!dc) + kvm_mips_resume_hrtimer(vcpu, now, count); + return 0; +} + +/** + * kvm_mips_write_compare() - Modify compare and update timer. + * @vcpu: Virtual CPU. + * @compare: New CP0_Compare value. + * + * Update CP0_Compare to a new value and update the timeout. + */ +void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + + /* if unchanged, must just be an ack */ + if (kvm_read_c0_guest_compare(cop0) == compare) + return; + + /* Update compare */ + kvm_write_c0_guest_compare(cop0, compare); + + /* Update timeout if count enabled */ + if (!kvm_mips_count_disabled(vcpu)) + kvm_mips_update_hrtimer(vcpu); +} + +/** + * kvm_mips_count_disable() - Disable count. + * @vcpu: Virtual CPU. + * + * Disable the CP0_Count timer. A timer interrupt on or before the final stop + * time will be handled but not after. + * + * Assumes CP0_Count was previously enabled but now Guest.CP0_Cause.DC or + * count_ctl.DC has been set (count disabled). + * + * Returns: The time that the timer was stopped. + */ +static ktime_t kvm_mips_count_disable(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + uint32_t count; + ktime_t now; + + /* Stop hrtimer */ + hrtimer_cancel(&vcpu->arch.comparecount_timer); + + /* Set the static count from the dynamic count, handling pending TI */ + now = ktime_get(); + count = kvm_mips_read_count_running(vcpu, now); + kvm_write_c0_guest_count(cop0, count); + + return now; +} + +/** + * kvm_mips_count_disable_cause() - Disable count using CP0_Cause.DC. + * @vcpu: Virtual CPU. + * + * Disable the CP0_Count timer and set CP0_Cause.DC. A timer interrupt on or + * before the final stop time will be handled if the timer isn't disabled by + * count_ctl.DC, but not after. + * + * Assumes CP0_Cause.DC is clear (count enabled). + */ +void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + + kvm_set_c0_guest_cause(cop0, CAUSEF_DC); + if (!(vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC)) + kvm_mips_count_disable(vcpu); +} + +/** + * kvm_mips_count_enable_cause() - Enable count using CP0_Cause.DC. + * @vcpu: Virtual CPU. + * + * Enable the CP0_Count timer and clear CP0_Cause.DC. A timer interrupt after + * the start time will be handled if the timer isn't disabled by count_ctl.DC, + * potentially before even returning, so the caller should be careful with + * ordering of CP0_Cause modifications so as not to lose it. + * + * Assumes CP0_Cause.DC is set (count disabled). + */ +void kvm_mips_count_enable_cause(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + uint32_t count; + + kvm_clear_c0_guest_cause(cop0, CAUSEF_DC); + + /* + * Set the dynamic count to match the static count. + * This starts the hrtimer if count_ctl.DC allows it. + * Otherwise it conveniently updates the biases. + */ + count = kvm_read_c0_guest_count(cop0); + kvm_mips_write_count(vcpu, count); +} + +/** + * kvm_mips_set_count_ctl() - Update the count control KVM register. + * @vcpu: Virtual CPU. + * @count_ctl: Count control register new value. + * + * Set the count control KVM register. The timer is updated accordingly. + * + * Returns: -EINVAL if reserved bits are set. + * 0 on success. + */ +int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + s64 changed = count_ctl ^ vcpu->arch.count_ctl; + s64 delta; + ktime_t expire, now; + uint32_t count, compare; + + /* Only allow defined bits to be changed */ + if (changed & ~(s64)(KVM_REG_MIPS_COUNT_CTL_DC)) + return -EINVAL; + + /* Apply new value */ + vcpu->arch.count_ctl = count_ctl; + + /* Master CP0_Count disable */ + if (changed & KVM_REG_MIPS_COUNT_CTL_DC) { + /* Is CP0_Cause.DC already disabling CP0_Count? */ + if (kvm_read_c0_guest_cause(cop0) & CAUSEF_DC) { + if (count_ctl & KVM_REG_MIPS_COUNT_CTL_DC) + /* Just record the current time */ + vcpu->arch.count_resume = ktime_get(); + } else if (count_ctl & KVM_REG_MIPS_COUNT_CTL_DC) { + /* disable timer and record current time */ + vcpu->arch.count_resume = kvm_mips_count_disable(vcpu); + } else { + /* + * Calculate timeout relative to static count at resume + * time (wrap 0 to 2^32). + */ + count = kvm_read_c0_guest_count(cop0); + compare = kvm_read_c0_guest_compare(cop0); + delta = (u64)(uint32_t)(compare - count - 1) + 1; + delta = div_u64(delta * NSEC_PER_SEC, + vcpu->arch.count_hz); + expire = ktime_add_ns(vcpu->arch.count_resume, delta); + + /* Handle pending interrupt */ + now = ktime_get(); + if (ktime_compare(now, expire) >= 0) + /* Nothing should be waiting on the timeout */ + kvm_mips_callbacks->queue_timer_int(vcpu); + + /* Resume hrtimer without changing bias */ + count = kvm_mips_read_count_running(vcpu, now); + kvm_mips_resume_hrtimer(vcpu, now, count); + } + } + + return 0; +} + +/** + * kvm_mips_set_count_resume() - Update the count resume KVM register. + * @vcpu: Virtual CPU. + * @count_resume: Count resume register new value. + * + * Set the count resume KVM register. + * + * Returns: -EINVAL if out of valid range (0..now). + * 0 on success. + */ +int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume) +{ + /* + * It doesn't make sense for the resume time to be in the future, as it + * would be possible for the next interrupt to be more than a full + * period in the future. + */ + if (count_resume < 0 || count_resume > ktime_to_ns(ktime_get())) + return -EINVAL; + + vcpu->arch.count_resume = ns_to_ktime(count_resume); + return 0; +} + +/** + * kvm_mips_count_timeout() - Push timer forward on timeout. + * @vcpu: Virtual CPU. + * + * Handle an hrtimer event by push the hrtimer forward a period. + * + * Returns: The hrtimer_restart value to return to the hrtimer subsystem. + */ +enum hrtimer_restart kvm_mips_count_timeout(struct kvm_vcpu *vcpu) +{ + /* Add the Count period to the current expiry time */ + hrtimer_add_expires_ns(&vcpu->arch.comparecount_timer, + vcpu->arch.count_period); + return HRTIMER_RESTART; } enum emulation_result kvm_mips_emul_eret(struct kvm_vcpu *vcpu) @@ -471,8 +967,7 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, #endif /* Get reg */ if ((rd == MIPS_CP0_COUNT) && (sel == 0)) { - /* XXXKYMA: Run the Guest count register @ 1/4 the rate of the host */ - vcpu->arch.gprs[rt] = (read_c0_count() >> 2); + vcpu->arch.gprs[rt] = kvm_mips_read_count(vcpu); } else if ((rd == MIPS_CP0_ERRCTL) && (sel == 0)) { vcpu->arch.gprs[rt] = 0x0; #ifdef CONFIG_KVM_MIPS_DYN_TRANS @@ -539,10 +1034,7 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, } /* Are we writing to COUNT */ else if ((rd == MIPS_CP0_COUNT) && (sel == 0)) { - /* Linux doesn't seem to write into COUNT, we throw an error - * if we notice a write to COUNT - */ - /*er = EMULATE_FAIL; */ + kvm_mips_write_count(vcpu, vcpu->arch.gprs[rt]); goto done; } else if ((rd == MIPS_CP0_COMPARE) && (sel == 0)) { kvm_debug("[%#x] MTCz, COMPARE %#lx <- %#lx\n", @@ -552,8 +1044,8 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, /* If we are writing to COMPARE */ /* Clear pending timer interrupt, if any */ kvm_mips_callbacks->dequeue_timer_int(vcpu); - kvm_write_c0_guest_compare(cop0, - vcpu->arch.gprs[rt]); + kvm_mips_write_compare(vcpu, + vcpu->arch.gprs[rt]); } else if ((rd == MIPS_CP0_STATUS) && (sel == 0)) { kvm_write_c0_guest_status(cop0, vcpu->arch.gprs[rt]); @@ -564,6 +1056,20 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, #ifdef CONFIG_KVM_MIPS_DYN_TRANS kvm_mips_trans_mtc0(inst, opc, vcpu); #endif + } else if ((rd == MIPS_CP0_CAUSE) && (sel == 0)) { + uint32_t old_cause, new_cause; + old_cause = kvm_read_c0_guest_cause(cop0); + new_cause = vcpu->arch.gprs[rt]; + /* Update R/W bits */ + kvm_change_c0_guest_cause(cop0, 0x08800300, + new_cause); + /* DC bit enabling/disabling timer? */ + if ((old_cause ^ new_cause) & CAUSEF_DC) { + if (new_cause & CAUSEF_DC) + kvm_mips_count_disable_cause(vcpu); + else + kvm_mips_count_enable_cause(vcpu); + } } else { cop0->reg[rd][sel] = vcpu->arch.gprs[rt]; #ifdef CONFIG_KVM_MIPS_DYN_TRANS @@ -887,7 +1393,7 @@ int kvm_mips_sync_icache(unsigned long va, struct kvm_vcpu *vcpu) printk("%s: va: %#lx, unmapped: %#x\n", __func__, va, CKSEG0ADDR(pa)); - mips32_SyncICache(CKSEG0ADDR(pa), 32); + local_flush_icache_range(CKSEG0ADDR(pa), 32); return 0; } @@ -1325,8 +1831,12 @@ kvm_mips_handle_tlbmod(unsigned long cause, uint32_t *opc, struct kvm_run *run, struct kvm_vcpu *vcpu) { enum emulation_result er = EMULATE_DONE; - #ifdef DEBUG + struct mips_coproc *cop0 = vcpu->arch.cop0; + unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) | + (kvm_read_c0_guest_entryhi(cop0) & ASID_MASK); + int index; + /* * If address not in the guest TLB, then we are in trouble */ @@ -1553,8 +2063,7 @@ kvm_mips_handle_ri(unsigned long cause, uint32_t *opc, current_cpu_data.icache.linesz); break; case 2: /* Read count register */ - printk("RDHWR: Cont register\n"); - arch->gprs[rt] = kvm_read_c0_guest_count(cop0); + arch->gprs[rt] = kvm_mips_read_count(vcpu); break; case 3: /* Count register resolution */ switch (current_cpu_data.cputype) { @@ -1810,11 +2319,9 @@ kvm_mips_handle_tlbmiss(unsigned long cause, uint32_t *opc, er = EMULATE_FAIL; } } else { -#ifdef DEBUG kvm_debug ("Injecting hi: %#lx, lo0: %#lx, lo1: %#lx into shadow host TLB\n", tlb->tlb_hi, tlb->tlb_lo0, tlb->tlb_lo1); -#endif /* OK we have a Guest TLB entry, now inject it into the shadow host TLB */ kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, NULL, NULL); diff --git a/arch/mips/kvm/kvm_tlb.c b/arch/mips/kvm/kvm_tlb.c index 50ab9c4d4a5d..8a5a700ad8de 100644 --- a/arch/mips/kvm/kvm_tlb.c +++ b/arch/mips/kvm/kvm_tlb.c @@ -222,26 +222,19 @@ kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi, return -1; } - if (idx < 0) { - idx = read_c0_random() % current_cpu_data.tlbsize; - write_c0_index(idx); - mtc0_tlbw_hazard(); - } write_c0_entrylo0(entrylo0); write_c0_entrylo1(entrylo1); mtc0_tlbw_hazard(); - tlb_write_indexed(); + if (idx < 0) + tlb_write_random(); + else + tlb_write_indexed(); tlbw_use_hazard(); -#ifdef DEBUG - if (debug) { - kvm_debug("@ %#lx idx: %2d [entryhi(R): %#lx] " - "entrylo0(R): 0x%08lx, entrylo1(R): 0x%08lx\n", - vcpu->arch.pc, idx, read_c0_entryhi(), - read_c0_entrylo0(), read_c0_entrylo1()); - } -#endif + kvm_debug("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0(R): 0x%08lx, entrylo1(R): 0x%08lx\n", + vcpu->arch.pc, idx, read_c0_entryhi(), + read_c0_entrylo0(), read_c0_entrylo1()); /* Flush D-cache */ if (flush_dcache_mask) { @@ -348,11 +341,9 @@ int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, mtc0_tlbw_hazard(); tlbw_use_hazard(); -#ifdef DEBUG kvm_debug ("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0 (R): 0x%08lx, entrylo1(R): 0x%08lx\n", vcpu->arch.pc, read_c0_index(), read_c0_entryhi(), read_c0_entrylo0(), read_c0_entrylo1()); -#endif /* Restore old ASID */ write_c0_entryhi(old_entryhi); @@ -400,10 +391,8 @@ kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) | (tlb->tlb_lo1 & MIPS3_PG_D) | (tlb->tlb_lo1 & MIPS3_PG_V); -#ifdef DEBUG kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc, tlb->tlb_lo0, tlb->tlb_lo1); -#endif return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1, tlb->tlb_mask); @@ -424,10 +413,8 @@ int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi) } } -#ifdef DEBUG kvm_debug("%s: entryhi: %#lx, index: %d lo0: %#lx, lo1: %#lx\n", __func__, entryhi, index, tlb[i].tlb_lo0, tlb[i].tlb_lo1); -#endif return index; } @@ -461,9 +448,7 @@ int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr) local_irq_restore(flags); -#ifdef DEBUG kvm_debug("Host TLB lookup, %#lx, idx: %2d\n", vaddr, idx); -#endif return idx; } @@ -508,12 +493,9 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va) local_irq_restore(flags); -#ifdef DEBUG - if (idx > 0) { + if (idx > 0) kvm_debug("%s: Invalidated entryhi %#lx @ idx %d\n", __func__, - (va & VPN2_MASK) | (vcpu->arch.asid_map[va & ASID_MASK] & ASID_MASK), idx); - } -#endif + (va & VPN2_MASK) | kvm_mips_get_user_asid(vcpu), idx); return 0; } @@ -658,15 +640,30 @@ void kvm_local_flush_tlb_all(void) local_irq_restore(flags); } +/** + * kvm_mips_migrate_count() - Migrate timer. + * @vcpu: Virtual CPU. + * + * Migrate CP0_Count hrtimer to the current CPU by cancelling and restarting it + * if it was running prior to being cancelled. + * + * Must be called when the VCPU is migrated to a different CPU to ensure that + * timer expiry during guest execution interrupts the guest and causes the + * interrupt to be delivered in a timely manner. + */ +static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu) +{ + if (hrtimer_cancel(&vcpu->arch.comparecount_timer)) + hrtimer_restart(&vcpu->arch.comparecount_timer); +} + /* Restore ASID once we are scheduled back after preemption */ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { unsigned long flags; int newasid = 0; -#ifdef DEBUG kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu); -#endif /* Alocate new kernel and user ASIDs if needed */ @@ -682,17 +679,23 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->arch.guest_user_mm.context.asid[cpu]; newasid++; - kvm_info("[%d]: cpu_context: %#lx\n", cpu, - cpu_context(cpu, current->mm)); - kvm_info("[%d]: Allocated new ASID for Guest Kernel: %#x\n", - cpu, vcpu->arch.guest_kernel_asid[cpu]); - kvm_info("[%d]: Allocated new ASID for Guest User: %#x\n", cpu, - vcpu->arch.guest_user_asid[cpu]); + kvm_debug("[%d]: cpu_context: %#lx\n", cpu, + cpu_context(cpu, current->mm)); + kvm_debug("[%d]: Allocated new ASID for Guest Kernel: %#x\n", + cpu, vcpu->arch.guest_kernel_asid[cpu]); + kvm_debug("[%d]: Allocated new ASID for Guest User: %#x\n", cpu, + vcpu->arch.guest_user_asid[cpu]); } if (vcpu->arch.last_sched_cpu != cpu) { - kvm_info("[%d->%d]KVM VCPU[%d] switch\n", - vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id); + kvm_debug("[%d->%d]KVM VCPU[%d] switch\n", + vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id); + /* + * Migrate the timer interrupt to the current CPU so that it + * always interrupts the guest and synchronously triggers a + * guest timer interrupt. + */ + kvm_mips_migrate_count(vcpu); } if (!newasid) { diff --git a/arch/mips/kvm/kvm_trap_emul.c b/arch/mips/kvm/kvm_trap_emul.c index 30d725321db1..693f952b2fbb 100644 --- a/arch/mips/kvm/kvm_trap_emul.c +++ b/arch/mips/kvm/kvm_trap_emul.c @@ -32,9 +32,7 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva) gpa = KVM_INVALID_ADDR; } -#ifdef DEBUG kvm_debug("%s: gva %#lx, gpa: %#llx\n", __func__, gva, gpa); -#endif return gpa; } @@ -85,11 +83,9 @@ static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu) if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0 || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) { -#ifdef DEBUG kvm_debug ("USER/KSEG23 ADDR TLB MOD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n", cause, opc, badvaddr); -#endif er = kvm_mips_handle_tlbmod(cause, opc, run, vcpu); if (er == EMULATE_DONE) @@ -138,11 +134,9 @@ static int kvm_trap_emul_handle_tlb_st_miss(struct kvm_vcpu *vcpu) } } else if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0 || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) { -#ifdef DEBUG kvm_debug ("USER ADDR TLB LD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n", cause, opc, badvaddr); -#endif er = kvm_mips_handle_tlbmiss(cause, opc, run, vcpu); if (er == EMULATE_DONE) ret = RESUME_GUEST; @@ -188,10 +182,8 @@ static int kvm_trap_emul_handle_tlb_ld_miss(struct kvm_vcpu *vcpu) } } else if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0 || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) { -#ifdef DEBUG kvm_debug("USER ADDR TLB ST fault: PC: %#lx, BadVaddr: %#lx\n", vcpu->arch.pc, badvaddr); -#endif /* User Address (UA) fault, this could happen if * (1) TLB entry not present/valid in both Guest and shadow host TLBs, in this @@ -236,9 +228,7 @@ static int kvm_trap_emul_handle_addr_err_st(struct kvm_vcpu *vcpu) if (KVM_GUEST_KERNEL_MODE(vcpu) && (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1)) { -#ifdef DEBUG kvm_debug("Emulate Store to MMIO space\n"); -#endif er = kvm_mips_emulate_inst(cause, opc, run, vcpu); if (er == EMULATE_FAIL) { printk("Emulate Store to MMIO space failed\n"); @@ -268,9 +258,7 @@ static int kvm_trap_emul_handle_addr_err_ld(struct kvm_vcpu *vcpu) int ret = RESUME_GUEST; if (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1) { -#ifdef DEBUG kvm_debug("Emulate Load from MMIO space @ %#lx\n", badvaddr); -#endif er = kvm_mips_emulate_inst(cause, opc, run, vcpu); if (er == EMULATE_FAIL) { printk("Emulate Load from MMIO space failed\n"); @@ -401,6 +389,78 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) return 0; } +static int kvm_trap_emul_get_one_reg(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg, + s64 *v) +{ + switch (reg->id) { + case KVM_REG_MIPS_CP0_COUNT: + *v = kvm_mips_read_count(vcpu); + break; + case KVM_REG_MIPS_COUNT_CTL: + *v = vcpu->arch.count_ctl; + break; + case KVM_REG_MIPS_COUNT_RESUME: + *v = ktime_to_ns(vcpu->arch.count_resume); + break; + case KVM_REG_MIPS_COUNT_HZ: + *v = vcpu->arch.count_hz; + break; + default: + return -EINVAL; + } + return 0; +} + +static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg, + s64 v) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + int ret = 0; + + switch (reg->id) { + case KVM_REG_MIPS_CP0_COUNT: + kvm_mips_write_count(vcpu, v); + break; + case KVM_REG_MIPS_CP0_COMPARE: + kvm_mips_write_compare(vcpu, v); + break; + case KVM_REG_MIPS_CP0_CAUSE: + /* + * If the timer is stopped or started (DC bit) it must look + * atomic with changes to the interrupt pending bits (TI, IRQ5). + * A timer interrupt should not happen in between. + */ + if ((kvm_read_c0_guest_cause(cop0) ^ v) & CAUSEF_DC) { + if (v & CAUSEF_DC) { + /* disable timer first */ + kvm_mips_count_disable_cause(vcpu); + kvm_change_c0_guest_cause(cop0, ~CAUSEF_DC, v); + } else { + /* enable timer last */ + kvm_change_c0_guest_cause(cop0, ~CAUSEF_DC, v); + kvm_mips_count_enable_cause(vcpu); + } + } else { + kvm_write_c0_guest_cause(cop0, v); + } + break; + case KVM_REG_MIPS_COUNT_CTL: + ret = kvm_mips_set_count_ctl(vcpu, v); + break; + case KVM_REG_MIPS_COUNT_RESUME: + ret = kvm_mips_set_count_resume(vcpu, v); + break; + case KVM_REG_MIPS_COUNT_HZ: + ret = kvm_mips_set_count_hz(vcpu, v); + break; + default: + return -EINVAL; + } + return ret; +} + static struct kvm_mips_callbacks kvm_trap_emul_callbacks = { /* exit handlers */ .handle_cop_unusable = kvm_trap_emul_handle_cop_unusable, @@ -423,6 +483,8 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = { .dequeue_io_int = kvm_mips_dequeue_io_int_cb, .irq_deliver = kvm_mips_irq_deliver_cb, .irq_clear = kvm_mips_irq_clear_cb, + .get_one_reg = kvm_trap_emul_get_one_reg, + .set_one_reg = kvm_trap_emul_set_one_reg, }; int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks) diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index e422b38d3113..5aaa5c799731 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -30,6 +30,7 @@ void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page, unsigned long pfn); void (*flush_icache_range)(unsigned long start, unsigned long end); void (*local_flush_icache_range)(unsigned long start, unsigned long end); +EXPORT_SYMBOL_GPL(local_flush_icache_range); void (*__flush_cache_vmap)(void); void (*__flush_cache_vunmap)(void); diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c index 319009912142..3778a359f3ad 100644 --- a/arch/mips/mti-malta/malta-time.c +++ b/arch/mips/mti-malta/malta-time.c @@ -74,18 +74,8 @@ static void __init estimate_frequencies(void) unsigned int giccount = 0, gicstart = 0; #endif -#if defined (CONFIG_KVM_GUEST) && defined (CONFIG_KVM_HOST_FREQ) - unsigned int prid = read_c0_prid() & (PRID_COMP_MASK | PRID_IMP_MASK); - - /* - * XXXKYMA: hardwire the CPU frequency to Host Freq/4 - */ - count = (CONFIG_KVM_HOST_FREQ * 1000000) >> 3; - if ((prid != (PRID_COMP_MIPS | PRID_IMP_20KC)) && - (prid != (PRID_COMP_MIPS | PRID_IMP_25KF))) - count *= 2; - - mips_hpt_frequency = count; +#if defined(CONFIG_KVM_GUEST) && CONFIG_KVM_GUEST_TIMER_FREQ + mips_hpt_frequency = CONFIG_KVM_GUEST_TIMER_FREQ * 1000000; return; #endif diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index a27f5007062a..4181d7baabba 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -110,6 +110,7 @@ struct kvm_s390_sie_block { #define ICTL_ISKE 0x00004000 #define ICTL_SSKE 0x00002000 #define ICTL_RRBE 0x00001000 +#define ICTL_TPROT 0x00000200 __u32 ictl; /* 0x0048 */ __u32 eca; /* 0x004c */ #define ICPT_INST 0x04 diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index db608c3f9303..4653ac6e182b 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -292,7 +292,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu) wake_up(&vcpu->kvm->arch.ipte_wq); } -static void ipte_lock(struct kvm_vcpu *vcpu) +void ipte_lock(struct kvm_vcpu *vcpu) { if (vcpu->arch.sie_block->eca & 1) ipte_lock_siif(vcpu); @@ -300,7 +300,7 @@ static void ipte_lock(struct kvm_vcpu *vcpu) ipte_lock_simple(vcpu); } -static void ipte_unlock(struct kvm_vcpu *vcpu) +void ipte_unlock(struct kvm_vcpu *vcpu) { if (vcpu->arch.sie_block->eca & 1) ipte_unlock_siif(vcpu); @@ -645,6 +645,59 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, } /** + * guest_translate_address - translate guest logical into guest absolute address + * + * Parameter semantics are the same as the ones from guest_translate. + * The memory contents at the guest address are not changed. + * + * Note: The IPTE lock is not taken during this function, so the caller + * has to take care of this. + */ +int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, + unsigned long *gpa, int write) +{ + struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; + psw_t *psw = &vcpu->arch.sie_block->gpsw; + struct trans_exc_code_bits *tec; + union asce asce; + int rc; + + /* Access register mode is not supported yet. */ + if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG) + return -EOPNOTSUPP; + + gva = kvm_s390_logical_to_effective(vcpu, gva); + memset(pgm, 0, sizeof(*pgm)); + tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; + tec->as = psw_bits(*psw).as; + tec->fsi = write ? FSI_STORE : FSI_FETCH; + tec->addr = gva >> PAGE_SHIFT; + if (is_low_address(gva) && low_address_protection_enabled(vcpu)) { + if (write) { + rc = pgm->code = PGM_PROTECTION; + return rc; + } + } + + asce.val = get_vcpu_asce(vcpu); + if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */ + rc = guest_translate(vcpu, gva, gpa, write); + if (rc > 0) { + if (rc == PGM_PROTECTION) + tec->b61 = 1; + pgm->code = rc; + } + } else { + rc = 0; + *gpa = kvm_s390_real_to_abs(vcpu, gva); + if (kvm_is_error_gpa(vcpu->kvm, *gpa)) + rc = pgm->code = PGM_ADDRESSING; + } + + return rc; +} + +/** * kvm_s390_check_low_addr_protection - check for low-address protection * @ga: Guest address * diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index a07ee08ac478..0149cf15058a 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -155,6 +155,9 @@ int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data, return kvm_read_guest(vcpu->kvm, gpa, data, len); } +int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, + unsigned long *gpa, int write); + int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data, unsigned long len, int write); @@ -324,6 +327,8 @@ int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, return access_guest_real(vcpu, gra, data, len, 0); } +void ipte_lock(struct kvm_vcpu *vcpu); +void ipte_unlock(struct kvm_vcpu *vcpu); int ipte_lock_held(struct kvm_vcpu *vcpu); int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga); diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index bf0d9bc15bcd..90c8de22a2a0 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -442,7 +442,6 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - kvm_s390_vcpu_start(vcpu); break; case KVM_S390_PROGRAM_INT: VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index e519860c6031..43e191b25789 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -637,7 +637,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) if (sclp_has_siif()) vcpu->arch.sie_block->eca |= 1; vcpu->arch.sie_block->fac = (int) (long) vfacilities; - vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; + vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE | + ICTL_TPROT; + if (kvm_s390_cmma_enabled(vcpu->kvm)) { rc = kvm_s390_vcpu_setup_cmma(vcpu); if (rc) @@ -950,7 +952,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, vcpu->guest_debug = 0; kvm_s390_clear_bp_data(vcpu); - if (vcpu->guest_debug & ~VALID_GUESTDBG_FLAGS) + if (dbg->control & ~VALID_GUESTDBG_FLAGS) return -EINVAL; if (dbg->control & KVM_GUESTDBG_ENABLE) { diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 6296159ac883..f89c1cd67751 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -930,8 +930,9 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) static int handle_tprot(struct kvm_vcpu *vcpu) { u64 address1, address2; - struct vm_area_struct *vma; - unsigned long user_address; + unsigned long hva, gpa; + int ret = 0, cc = 0; + bool writable; vcpu->stat.instruction_tprot++; @@ -942,32 +943,41 @@ static int handle_tprot(struct kvm_vcpu *vcpu) /* we only handle the Linux memory detection case: * access key == 0 - * guest DAT == off * everything else goes to userspace. */ if (address2 & 0xf0) return -EOPNOTSUPP; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) - return -EOPNOTSUPP; - - down_read(¤t->mm->mmap_sem); - user_address = __gmap_translate(address1, vcpu->arch.gmap); - if (IS_ERR_VALUE(user_address)) - goto out_inject; - vma = find_vma(current->mm, user_address); - if (!vma) - goto out_inject; - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); - if (!(vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_READ)) - vcpu->arch.sie_block->gpsw.mask |= (1ul << 44); - if (!(vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_READ)) - vcpu->arch.sie_block->gpsw.mask |= (2ul << 44); - - up_read(¤t->mm->mmap_sem); - return 0; + ipte_lock(vcpu); + ret = guest_translate_address(vcpu, address1, &gpa, 1); + if (ret == PGM_PROTECTION) { + /* Write protected? Try again with read-only... */ + cc = 1; + ret = guest_translate_address(vcpu, address1, &gpa, 0); + } + if (ret) { + if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) { + ret = kvm_s390_inject_program_int(vcpu, ret); + } else if (ret > 0) { + /* Translation not available */ + kvm_s390_set_psw_cc(vcpu, 3); + ret = 0; + } + goto out_unlock; + } -out_inject: - up_read(¤t->mm->mmap_sem); - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + hva = gfn_to_hva_prot(vcpu->kvm, gpa_to_gfn(gpa), &writable); + if (kvm_is_error_hva(hva)) { + ret = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + } else { + if (!writable) + cc = 1; /* Write not permitted ==> read-only */ + kvm_s390_set_psw_cc(vcpu, cc); + /* Note: CC2 only occurs for storage keys (not supported yet) */ + } +out_unlock: + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) + ipte_unlock(vcpu); + return ret; } int kvm_s390_handle_e5(struct kvm_vcpu *vcpu) diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index d0341d2e54b1..43079a48cc98 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -54,33 +54,23 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) { - struct kvm_s390_local_interrupt *li; - struct kvm_s390_interrupt_info *inti; + struct kvm_s390_interrupt s390int = { + .type = KVM_S390_INT_EMERGENCY, + .parm = vcpu->vcpu_id, + }; struct kvm_vcpu *dst_vcpu = NULL; + int rc = 0; if (cpu_addr < KVM_MAX_VCPUS) dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; - inti = kzalloc(sizeof(*inti), GFP_KERNEL); - if (!inti) - return -ENOMEM; - - inti->type = KVM_S390_INT_EMERGENCY; - inti->emerg.code = vcpu->vcpu_id; + rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int); + if (!rc) + VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); - li = &dst_vcpu->arch.local_int; - spin_lock_bh(&li->lock); - list_add_tail(&inti->list, &li->list); - atomic_set(&li->active, 1); - atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(li->wq)) - wake_up_interruptible(li->wq); - spin_unlock_bh(&li->lock); - VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); - - return SIGP_CC_ORDER_CODE_ACCEPTED; + return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED; } static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr, @@ -116,33 +106,23 @@ static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr, static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) { - struct kvm_s390_local_interrupt *li; - struct kvm_s390_interrupt_info *inti; + struct kvm_s390_interrupt s390int = { + .type = KVM_S390_INT_EXTERNAL_CALL, + .parm = vcpu->vcpu_id, + }; struct kvm_vcpu *dst_vcpu = NULL; + int rc; if (cpu_addr < KVM_MAX_VCPUS) dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; - inti = kzalloc(sizeof(*inti), GFP_KERNEL); - if (!inti) - return -ENOMEM; - - inti->type = KVM_S390_INT_EXTERNAL_CALL; - inti->extcall.code = vcpu->vcpu_id; - - li = &dst_vcpu->arch.local_int; - spin_lock_bh(&li->lock); - list_add_tail(&inti->list, &li->list); - atomic_set(&li->active, 1); - atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(li->wq)) - wake_up_interruptible(li->wq); - spin_unlock_bh(&li->lock); - VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr); + rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int); + if (!rc) + VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr); - return SIGP_CC_ORDER_CODE_ACCEPTED; + return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED; } static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 2fa7ab069817..e4e833d3d7d7 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -161,6 +161,7 @@ #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */ #define NoWrite ((u64)1 << 45) /* No writeback */ #define SrcWrite ((u64)1 << 46) /* Write back src operand */ +#define NoMod ((u64)1 << 47) /* Mod field is ignored */ #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) @@ -1077,7 +1078,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, ctxt->modrm_rm |= (ctxt->modrm & 0x07); ctxt->modrm_seg = VCPU_SREG_DS; - if (ctxt->modrm_mod == 3) { + if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) { op->type = OP_REG; op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, @@ -3877,10 +3878,12 @@ static const struct opcode twobyte_table[256] = { N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, D(ImplicitOps | ModRM), /* 0x20 - 0x2F */ - DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read), - DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read), - IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write), - IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write), + DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read), + DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read), + IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write, + check_cr_write), + IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write, + check_dr_write), N, N, N, N, GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29), GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29), diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 9736529ade08..006911858174 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -360,6 +360,8 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) static inline void apic_set_isr(int vec, struct kvm_lapic *apic) { + /* Note that we never get here with APIC virtualization enabled. */ + if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) ++apic->isr_count; BUG_ON(apic->isr_count > MAX_APIC_VECTOR); @@ -371,12 +373,48 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic) apic->highest_isr_cache = vec; } +static inline int apic_find_highest_isr(struct kvm_lapic *apic) +{ + int result; + + /* + * Note that isr_count is always 1, and highest_isr_cache + * is always -1, with APIC virtualization enabled. + */ + if (!apic->isr_count) + return -1; + if (likely(apic->highest_isr_cache != -1)) + return apic->highest_isr_cache; + + result = find_highest_vector(apic->regs + APIC_ISR); + ASSERT(result == -1 || result >= 16); + + return result; +} + static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) { - if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) + struct kvm_vcpu *vcpu; + if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) + return; + + vcpu = apic->vcpu; + + /* + * We do get here for APIC virtualization enabled if the guest + * uses the Hyper-V APIC enlightenment. In this case we may need + * to trigger a new interrupt delivery by writing the SVI field; + * on the other hand isr_count and highest_isr_cache are unused + * and must be left alone. + */ + if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) + kvm_x86_ops->hwapic_isr_update(vcpu->kvm, + apic_find_highest_isr(apic)); + else { --apic->isr_count; - BUG_ON(apic->isr_count < 0); - apic->highest_isr_cache = -1; + BUG_ON(apic->isr_count < 0); + apic->highest_isr_cache = -1; + } } int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) @@ -456,22 +494,6 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); } -static inline int apic_find_highest_isr(struct kvm_lapic *apic) -{ - int result; - - /* Note that isr_count is always 1 with vid enabled */ - if (!apic->isr_count) - return -1; - if (likely(apic->highest_isr_cache != -1)) - return apic->highest_isr_cache; - - result = find_highest_vector(apic->regs + APIC_ISR); - ASSERT(result == -1 || result >= 16); - - return result; -} - void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr) { struct kvm_lapic *apic = vcpu->arch.apic; @@ -1605,6 +1627,8 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) int vector = kvm_apic_has_interrupt(vcpu); struct kvm_lapic *apic = vcpu->arch.apic; + /* Note that we never get here with APIC virtualization enabled. */ + if (vector == -1) return -1; diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 6929571b79b0..24e9033f8b3f 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -317,6 +317,7 @@ header-y += ppp-ioctl.h header-y += ppp_defs.h header-y += pps.h header-y += prctl.h +header-y += psci.h header-y += ptp_clock.h header-y += ptrace.h header-y += qnx4_fs.h diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 16c923de85e7..e11d8f170a62 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -171,6 +171,7 @@ struct kvm_pit_config { #define KVM_EXIT_WATCHDOG 21 #define KVM_EXIT_S390_TSCH 22 #define KVM_EXIT_EPR 23 +#define KVM_EXIT_SYSTEM_EVENT 24 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -301,6 +302,13 @@ struct kvm_run { struct { __u32 epr; } epr; + /* KVM_EXIT_SYSTEM_EVENT */ + struct { +#define KVM_SYSTEM_EVENT_SHUTDOWN 1 +#define KVM_SYSTEM_EVENT_RESET 2 + __u32 type; + __u64 flags; + } system_event; /* Fix the size of the union. */ char padding[256]; }; @@ -748,7 +756,8 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_S390_IRQCHIP 99 #define KVM_CAP_IOEVENTFD_NO_LENGTH 100 #define KVM_CAP_VM_ATTRIBUTES 101 -#define KVM_CAP_PPC_FIXUP_HCALL 102 +#define KVM_CAP_ARM_PSCI_0_2 102 +#define KVM_CAP_PPC_FIXUP_HCALL 103 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h new file mode 100644 index 000000000000..310d83e0a91b --- /dev/null +++ b/include/uapi/linux/psci.h @@ -0,0 +1,90 @@ +/* + * ARM Power State and Coordination Interface (PSCI) header + * + * This header holds common PSCI defines and macros shared + * by: ARM kernel, ARM64 kernel, KVM ARM/ARM64 and user space. + * + * Copyright (C) 2014 Linaro Ltd. + * Author: Anup Patel <anup.patel@linaro.org> + */ + +#ifndef _UAPI_LINUX_PSCI_H +#define _UAPI_LINUX_PSCI_H + +/* + * PSCI v0.1 interface + * + * The PSCI v0.1 function numbers are implementation defined. + * + * Only PSCI return values such as: SUCCESS, NOT_SUPPORTED, + * INVALID_PARAMS, and DENIED defined below are applicable + * to PSCI v0.1. + */ + +/* PSCI v0.2 interface */ +#define PSCI_0_2_FN_BASE 0x84000000 +#define PSCI_0_2_FN(n) (PSCI_0_2_FN_BASE + (n)) +#define PSCI_0_2_64BIT 0x40000000 +#define PSCI_0_2_FN64_BASE \ + (PSCI_0_2_FN_BASE + PSCI_0_2_64BIT) +#define PSCI_0_2_FN64(n) (PSCI_0_2_FN64_BASE + (n)) + +#define PSCI_0_2_FN_PSCI_VERSION PSCI_0_2_FN(0) +#define PSCI_0_2_FN_CPU_SUSPEND PSCI_0_2_FN(1) +#define PSCI_0_2_FN_CPU_OFF PSCI_0_2_FN(2) +#define PSCI_0_2_FN_CPU_ON PSCI_0_2_FN(3) +#define PSCI_0_2_FN_AFFINITY_INFO PSCI_0_2_FN(4) +#define PSCI_0_2_FN_MIGRATE PSCI_0_2_FN(5) +#define PSCI_0_2_FN_MIGRATE_INFO_TYPE PSCI_0_2_FN(6) +#define PSCI_0_2_FN_MIGRATE_INFO_UP_CPU PSCI_0_2_FN(7) +#define PSCI_0_2_FN_SYSTEM_OFF PSCI_0_2_FN(8) +#define PSCI_0_2_FN_SYSTEM_RESET PSCI_0_2_FN(9) + +#define PSCI_0_2_FN64_CPU_SUSPEND PSCI_0_2_FN64(1) +#define PSCI_0_2_FN64_CPU_ON PSCI_0_2_FN64(3) +#define PSCI_0_2_FN64_AFFINITY_INFO PSCI_0_2_FN64(4) +#define PSCI_0_2_FN64_MIGRATE PSCI_0_2_FN64(5) +#define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU PSCI_0_2_FN64(7) + +/* PSCI v0.2 power state encoding for CPU_SUSPEND function */ +#define PSCI_0_2_POWER_STATE_ID_MASK 0xffff +#define PSCI_0_2_POWER_STATE_ID_SHIFT 0 +#define PSCI_0_2_POWER_STATE_TYPE_SHIFT 16 +#define PSCI_0_2_POWER_STATE_TYPE_MASK \ + (0x1 << PSCI_0_2_POWER_STATE_TYPE_SHIFT) +#define PSCI_0_2_POWER_STATE_AFFL_SHIFT 24 +#define PSCI_0_2_POWER_STATE_AFFL_MASK \ + (0x3 << PSCI_0_2_POWER_STATE_AFFL_SHIFT) + +/* PSCI v0.2 affinity level state returned by AFFINITY_INFO */ +#define PSCI_0_2_AFFINITY_LEVEL_ON 0 +#define PSCI_0_2_AFFINITY_LEVEL_OFF 1 +#define PSCI_0_2_AFFINITY_LEVEL_ON_PENDING 2 + +/* PSCI v0.2 multicore support in Trusted OS returned by MIGRATE_INFO_TYPE */ +#define PSCI_0_2_TOS_UP_MIGRATE 0 +#define PSCI_0_2_TOS_UP_NO_MIGRATE 1 +#define PSCI_0_2_TOS_MP 2 + +/* PSCI version decoding (independent of PSCI version) */ +#define PSCI_VERSION_MAJOR_SHIFT 16 +#define PSCI_VERSION_MINOR_MASK \ + ((1U << PSCI_VERSION_MAJOR_SHIFT) - 1) +#define PSCI_VERSION_MAJOR_MASK ~PSCI_VERSION_MINOR_MASK +#define PSCI_VERSION_MAJOR(ver) \ + (((ver) & PSCI_VERSION_MAJOR_MASK) >> PSCI_VERSION_MAJOR_SHIFT) +#define PSCI_VERSION_MINOR(ver) \ + ((ver) & PSCI_VERSION_MINOR_MASK) + +/* PSCI return values (inclusive of all PSCI versions) */ +#define PSCI_RET_SUCCESS 0 +#define PSCI_RET_NOT_SUPPORTED -1 +#define PSCI_RET_INVALID_PARAMS -2 +#define PSCI_RET_DENIED -3 +#define PSCI_RET_ALREADY_ON -4 +#define PSCI_RET_ON_PENDING -5 +#define PSCI_RET_INTERNAL_FAILURE -6 +#define PSCI_RET_NOT_PRESENT -7 +#define PSCI_RET_DISABLED -8 + +#endif /* _UAPI_LINUX_PSCI_H */ |