summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/gfp.h1
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c13
-rw-r--r--arch/arm64/kvm/hyp/nvhe/page_alloc.c15
-rw-r--r--arch/arm64/kvm/mmu.c6
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/asm/kvmclock.h14
-rw-r--r--arch/x86/kernel/kvmclock.c13
-rw-r--r--arch/x86/kvm/cpuid.c4
-rw-r--r--arch/x86/kvm/svm/sev.c2
-rw-r--r--arch/x86/kvm/x86.c31
-rw-r--r--drivers/ptp/ptp_kvm_x86.c9
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h2
-rw-r--r--tools/testing/selftests/kvm/rseq_test.c69
13 files changed, 130 insertions, 51 deletions
diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
index fb0f523d1492..0a048dc06a7d 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
@@ -24,6 +24,7 @@ struct hyp_pool {
/* Allocation */
void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order);
+void hyp_split_page(struct hyp_page *page);
void hyp_get_page(struct hyp_pool *pool, void *addr);
void hyp_put_page(struct hyp_pool *pool, void *addr);
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index bacd493a4eac..34eeb524b686 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -35,7 +35,18 @@ const u8 pkvm_hyp_id = 1;
static void *host_s2_zalloc_pages_exact(size_t size)
{
- return hyp_alloc_pages(&host_s2_pool, get_order(size));
+ void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));
+
+ hyp_split_page(hyp_virt_to_page(addr));
+
+ /*
+ * The size of concatenated PGDs is always a power of two of PAGE_SIZE,
+ * so there should be no need to free any of the tail pages to make the
+ * allocation exact.
+ */
+ WARN_ON(size != (PAGE_SIZE << get_order(size)));
+
+ return addr;
}
static void *host_s2_zalloc_page(void *pool)
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index 41fc25bdfb34..0bd7701ad1df 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -152,6 +152,7 @@ static inline void hyp_page_ref_inc(struct hyp_page *p)
static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
{
+ BUG_ON(!p->refcount);
p->refcount--;
return (p->refcount == 0);
}
@@ -193,6 +194,20 @@ void hyp_get_page(struct hyp_pool *pool, void *addr)
hyp_spin_unlock(&pool->lock);
}
+void hyp_split_page(struct hyp_page *p)
+{
+ unsigned short order = p->order;
+ unsigned int i;
+
+ p->order = 0;
+ for (i = 1; i < (1 << order); i++) {
+ struct hyp_page *tail = p + i;
+
+ tail->order = 0;
+ hyp_set_page_refcounted(tail);
+ }
+}
+
void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order)
{
unsigned short i = order;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 1a94a7ca48f2..69bd1732a299 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1529,8 +1529,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
* when updating the PG_mte_tagged page flag, see
* sanitise_mte_tags for more details.
*/
- if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED)
- return -EINVAL;
+ if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) {
+ ret = -EINVAL;
+ break;
+ }
if (vma->vm_flags & VM_PFNMAP) {
/* IO region dirty page logging not allowed */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f8f48a7ec577..70771376e246 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1097,7 +1097,7 @@ struct kvm_arch {
u64 cur_tsc_generation;
int nr_vcpus_matched_tsc;
- spinlock_t pvclock_gtod_sync_lock;
+ raw_spinlock_t pvclock_gtod_sync_lock;
bool use_master_clock;
u64 master_kernel_ns;
u64 master_cycle_now;
diff --git a/arch/x86/include/asm/kvmclock.h b/arch/x86/include/asm/kvmclock.h
index eceea9299097..6c5765192102 100644
--- a/arch/x86/include/asm/kvmclock.h
+++ b/arch/x86/include/asm/kvmclock.h
@@ -2,6 +2,20 @@
#ifndef _ASM_X86_KVM_CLOCK_H
#define _ASM_X86_KVM_CLOCK_H
+#include <linux/percpu.h>
+
extern struct clocksource kvm_clock;
+DECLARE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
+
+static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
+{
+ return &this_cpu_read(hv_clock_per_cpu)->pvti;
+}
+
+static inline struct pvclock_vsyscall_time_info *this_cpu_hvclock(void)
+{
+ return this_cpu_read(hv_clock_per_cpu);
+}
+
#endif /* _ASM_X86_KVM_CLOCK_H */
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index ad273e5861c1..73c74b961d0f 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -49,18 +49,9 @@ early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
static struct pvclock_vsyscall_time_info
hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __bss_decrypted __aligned(PAGE_SIZE);
static struct pvclock_wall_clock wall_clock __bss_decrypted;
-static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
static struct pvclock_vsyscall_time_info *hvclock_mem;
-
-static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
-{
- return &this_cpu_read(hv_clock_per_cpu)->pvti;
-}
-
-static inline struct pvclock_vsyscall_time_info *this_cpu_hvclock(void)
-{
- return this_cpu_read(hv_clock_per_cpu);
-}
+DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
+EXPORT_PER_CPU_SYMBOL_GPL(hv_clock_per_cpu);
/*
* The wallclock is the time of day when we booted. Since then, some time may
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index fe03bd978761..751aa85a3001 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -65,8 +65,8 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
for (i = 0; i < nent; i++) {
e = &entries[i];
- if (e->function == function && (e->index == index ||
- !(e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX)))
+ if (e->function == function &&
+ (!(e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) || e->index == index))
return e;
}
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index c36b5fe4c27c..e672493b5d8d 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2583,7 +2583,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
return -EINVAL;
return kvm_sev_es_string_io(&svm->vcpu, size, port,
- svm->ghcb_sa, svm->ghcb_sa_len, in);
+ svm->ghcb_sa, svm->ghcb_sa_len / size, in);
}
void sev_es_init_vmcb(struct vcpu_svm *svm)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index aabd3a2ec1bc..5c82eff19e4a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2542,7 +2542,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
kvm_vcpu_write_tsc_offset(vcpu, offset);
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
- spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
+ raw_spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
if (!matched) {
kvm->arch.nr_vcpus_matched_tsc = 0;
} else if (!already_matched) {
@@ -2550,7 +2550,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
}
kvm_track_tsc_matching(vcpu);
- spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags);
+ raw_spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags);
}
static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
@@ -2780,9 +2780,9 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
kvm_make_mclock_inprogress_request(kvm);
/* no guest entries from this point */
- spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+ raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
pvclock_update_vm_gtod_copy(kvm);
- spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+ raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -2800,15 +2800,15 @@ u64 get_kvmclock_ns(struct kvm *kvm)
unsigned long flags;
u64 ret;
- spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+ raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
if (!ka->use_master_clock) {
- spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+ raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
return get_kvmclock_base_ns() + ka->kvmclock_offset;
}
hv_clock.tsc_timestamp = ka->master_cycle_now;
hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
- spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+ raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
/* both __this_cpu_read() and rdtsc() should be on the same cpu */
get_cpu();
@@ -2902,13 +2902,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
* If the host uses TSC clock, then passthrough TSC as stable
* to the guest.
*/
- spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+ raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
use_master_clock = ka->use_master_clock;
if (use_master_clock) {
host_tsc = ka->master_cycle_now;
kernel_ns = ka->master_kernel_ns;
}
- spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+ raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
/* Keep irq disabled to prevent changes to the clock */
local_irq_save(flags);
@@ -6100,13 +6100,13 @@ set_pit2_out:
* is slightly ahead) here we risk going negative on unsigned
* 'system_time' when 'user_ns.clock' is very small.
*/
- spin_lock_irq(&ka->pvclock_gtod_sync_lock);
+ raw_spin_lock_irq(&ka->pvclock_gtod_sync_lock);
if (kvm->arch.use_master_clock)
now_ns = ka->master_kernel_ns;
else
now_ns = get_kvmclock_base_ns();
ka->kvmclock_offset = user_ns.clock - now_ns;
- spin_unlock_irq(&ka->pvclock_gtod_sync_lock);
+ raw_spin_unlock_irq(&ka->pvclock_gtod_sync_lock);
kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
break;
@@ -8139,9 +8139,9 @@ static void kvm_hyperv_tsc_notifier(void)
list_for_each_entry(kvm, &vm_list, vm_list) {
struct kvm_arch *ka = &kvm->arch;
- spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+ raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
pvclock_update_vm_gtod_copy(kvm);
- spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+ raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
kvm_for_each_vcpu(cpu, vcpu, kvm)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -11182,7 +11182,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
mutex_init(&kvm->arch.apic_map_lock);
- spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
+ raw_spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
kvm->arch.kvmclock_offset = -get_kvmclock_base_ns();
pvclock_update_vm_gtod_copy(kvm);
@@ -11392,7 +11392,8 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot,
int level = i + 1;
int lpages = __kvm_mmu_slot_lpages(slot, npages, level);
- WARN_ON(slot->arch.rmap[i]);
+ if (slot->arch.rmap[i])
+ continue;
slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT);
if (!slot->arch.rmap[i]) {
diff --git a/drivers/ptp/ptp_kvm_x86.c b/drivers/ptp/ptp_kvm_x86.c
index 3dd519dfc473..d0096cd7096a 100644
--- a/drivers/ptp/ptp_kvm_x86.c
+++ b/drivers/ptp/ptp_kvm_x86.c
@@ -15,8 +15,6 @@
#include <linux/ptp_clock_kernel.h>
#include <linux/ptp_kvm.h>
-struct pvclock_vsyscall_time_info *hv_clock;
-
static phys_addr_t clock_pair_gpa;
static struct kvm_clock_pairing clock_pair;
@@ -28,8 +26,7 @@ int kvm_arch_ptp_init(void)
return -ENODEV;
clock_pair_gpa = slow_virt_to_phys(&clock_pair);
- hv_clock = pvclock_get_pvti_cpu0_va();
- if (!hv_clock)
+ if (!pvclock_get_pvti_cpu0_va())
return -ENODEV;
ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING, clock_pair_gpa,
@@ -64,10 +61,8 @@ int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *tspec,
struct pvclock_vcpu_time_info *src;
unsigned int version;
long ret;
- int cpu;
- cpu = smp_processor_id();
- src = &hv_clock[cpu].pvti;
+ src = this_cpu_pvti();
do {
/*
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index eba8bd08293e..05e65ca1c30c 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -315,7 +315,7 @@ static inline void set_xmm(int n, unsigned long val)
#define GET_XMM(__xmm) \
({ \
unsigned long __val; \
- asm volatile("movq %%"#__xmm", %0" : "=r"(__val) : : #__xmm); \
+ asm volatile("movq %%"#__xmm", %0" : "=r"(__val)); \
__val; \
})
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
index c5e0dd664a7b..4158da0da2bb 100644
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -10,6 +10,7 @@
#include <signal.h>
#include <syscall.h>
#include <sys/ioctl.h>
+#include <sys/sysinfo.h>
#include <asm/barrier.h>
#include <linux/atomic.h>
#include <linux/rseq.h>
@@ -39,6 +40,7 @@ static __thread volatile struct rseq __rseq = {
static pthread_t migration_thread;
static cpu_set_t possible_mask;
+static int min_cpu, max_cpu;
static bool done;
static atomic_t seq_cnt;
@@ -57,20 +59,37 @@ static void sys_rseq(int flags)
TEST_ASSERT(!r, "rseq failed, errno = %d (%s)", errno, strerror(errno));
}
+static int next_cpu(int cpu)
+{
+ /*
+ * Advance to the next CPU, skipping those that weren't in the original
+ * affinity set. Sadly, there is no CPU_SET_FOR_EACH, and cpu_set_t's
+ * data storage is considered as opaque. Note, if this task is pinned
+ * to a small set of discontigous CPUs, e.g. 2 and 1023, this loop will
+ * burn a lot cycles and the test will take longer than normal to
+ * complete.
+ */
+ do {
+ cpu++;
+ if (cpu > max_cpu) {
+ cpu = min_cpu;
+ TEST_ASSERT(CPU_ISSET(cpu, &possible_mask),
+ "Min CPU = %d must always be usable", cpu);
+ break;
+ }
+ } while (!CPU_ISSET(cpu, &possible_mask));
+
+ return cpu;
+}
+
static void *migration_worker(void *ign)
{
cpu_set_t allowed_mask;
- int r, i, nr_cpus, cpu;
+ int r, i, cpu;
CPU_ZERO(&allowed_mask);
- nr_cpus = CPU_COUNT(&possible_mask);
-
- for (i = 0; i < NR_TASK_MIGRATIONS; i++) {
- cpu = i % nr_cpus;
- if (!CPU_ISSET(cpu, &possible_mask))
- continue;
-
+ for (i = 0, cpu = min_cpu; i < NR_TASK_MIGRATIONS; i++, cpu = next_cpu(cpu)) {
CPU_SET(cpu, &allowed_mask);
/*
@@ -154,6 +173,36 @@ static void *migration_worker(void *ign)
return NULL;
}
+static int calc_min_max_cpu(void)
+{
+ int i, cnt, nproc;
+
+ if (CPU_COUNT(&possible_mask) < 2)
+ return -EINVAL;
+
+ /*
+ * CPU_SET doesn't provide a FOR_EACH helper, get the min/max CPU that
+ * this task is affined to in order to reduce the time spent querying
+ * unusable CPUs, e.g. if this task is pinned to a small percentage of
+ * total CPUs.
+ */
+ nproc = get_nprocs_conf();
+ min_cpu = -1;
+ max_cpu = -1;
+ cnt = 0;
+
+ for (i = 0; i < nproc; i++) {
+ if (!CPU_ISSET(i, &possible_mask))
+ continue;
+ if (min_cpu == -1)
+ min_cpu = i;
+ max_cpu = i;
+ cnt++;
+ }
+
+ return (cnt < 2) ? -EINVAL : 0;
+}
+
int main(int argc, char *argv[])
{
int r, i, snapshot;
@@ -167,8 +216,8 @@ int main(int argc, char *argv[])
TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
strerror(errno));
- if (CPU_COUNT(&possible_mask) < 2) {
- print_skip("Only one CPU, task migration not possible\n");
+ if (calc_min_max_cpu()) {
+ print_skip("Only one usable CPU, task migration not possible");
exit(KSFT_SKIP);
}