summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/arm/include/asm/kvm_mmu.h13
-rw-r--r--arch/arm/kvm/mmu.c75
-rw-r--r--arch/arm64/include/asm/kvm_arm.h5
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h48
-rw-r--r--arch/s390/kvm/kvm-s390.c1
-rw-r--r--arch/x86/kvm/i8259.c1
-rw-r--r--arch/x86/kvm/vmx.c11
-rw-r--r--arch/x86/kvm/x86.c1
-rw-r--r--drivers/base/regmap/regcache-rbtree.c2
-rw-r--r--drivers/base/regmap/regcache.c6
-rw-r--r--drivers/base/regmap/regmap-irq.c3
-rw-r--r--drivers/char/virtio_console.c19
-rw-r--r--drivers/rpmsg/virtio_rpmsg_bus.c17
-rw-r--r--drivers/virtio/virtio_balloon.c21
-rw-r--r--drivers/virtio/virtio_mmio.c90
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--include/kvm/arm_vgic.h1
-rw-r--r--include/uapi/linux/virtio_blk.h8
-rw-r--r--include/uapi/linux/virtio_scsi.h12
-rw-r--r--net/9p/trans_virtio.c24
-rw-r--r--virt/kvm/arm/vgic-v2.c8
-rw-r--r--virt/kvm/arm/vgic-v3.c8
-rw-r--r--virt/kvm/arm/vgic.c22
-rw-r--r--virt/kvm/kvm_main.c1
24 files changed, 292 insertions, 108 deletions
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index bf0fe99e8ca9..4cf48c3aca13 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -149,29 +149,28 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
(__boundary - 1 < (end) - 1)? __boundary: (end); \
})
+#define kvm_pgd_index(addr) pgd_index(addr)
+
static inline bool kvm_page_empty(void *ptr)
{
struct page *ptr_page = virt_to_page(ptr);
return page_count(ptr_page) == 1;
}
-
#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
#define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
#define kvm_pud_table_empty(kvm, pudp) (0)
#define KVM_PREALLOC_LEVEL 0
-static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
+static inline void *kvm_get_hwpgd(struct kvm *kvm)
{
- return 0;
+ return kvm->arch.pgd;
}
-static inline void kvm_free_hwpgd(struct kvm *kvm) { }
-
-static inline void *kvm_get_hwpgd(struct kvm *kvm)
+static inline unsigned int kvm_get_hwpgd_size(void)
{
- return kvm->arch.pgd;
+ return PTRS_PER_S2_PGD * sizeof(pgd_t);
}
struct kvm;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 3e6859bc3e11..5656d79c5a44 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -290,7 +290,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
phys_addr_t addr = start, end = start + size;
phys_addr_t next;
- pgd = pgdp + pgd_index(addr);
+ pgd = pgdp + kvm_pgd_index(addr);
do {
next = kvm_pgd_addr_end(addr, end);
if (!pgd_none(*pgd))
@@ -355,7 +355,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
phys_addr_t next;
pgd_t *pgd;
- pgd = kvm->arch.pgd + pgd_index(addr);
+ pgd = kvm->arch.pgd + kvm_pgd_index(addr);
do {
next = kvm_pgd_addr_end(addr, end);
stage2_flush_puds(kvm, pgd, addr, next);
@@ -632,6 +632,20 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
__phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
}
+/* Free the HW pgd, one page at a time */
+static void kvm_free_hwpgd(void *hwpgd)
+{
+ free_pages_exact(hwpgd, kvm_get_hwpgd_size());
+}
+
+/* Allocate the HW PGD, making sure that each page gets its own refcount */
+static void *kvm_alloc_hwpgd(void)
+{
+ unsigned int size = kvm_get_hwpgd_size();
+
+ return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
+}
+
/**
* kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
* @kvm: The KVM struct pointer for the VM.
@@ -645,15 +659,31 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
*/
int kvm_alloc_stage2_pgd(struct kvm *kvm)
{
- int ret;
pgd_t *pgd;
+ void *hwpgd;
if (kvm->arch.pgd != NULL) {
kvm_err("kvm_arch already initialized?\n");
return -EINVAL;
}
+ hwpgd = kvm_alloc_hwpgd();
+ if (!hwpgd)
+ return -ENOMEM;
+
+ /* When the kernel uses more levels of page tables than the
+ * guest, we allocate a fake PGD and pre-populate it to point
+ * to the next-level page table, which will be the real
+ * initial page table pointed to by the VTTBR.
+ *
+ * When KVM_PREALLOC_LEVEL==2, we allocate a single page for
+ * the PMD and the kernel will use folded pud.
+ * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
+ * pages.
+ */
if (KVM_PREALLOC_LEVEL > 0) {
+ int i;
+
/*
* Allocate fake pgd for the page table manipulation macros to
* work. This is not used by the hardware and we have no
@@ -661,30 +691,32 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
*/
pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
GFP_KERNEL | __GFP_ZERO);
+
+ if (!pgd) {
+ kvm_free_hwpgd(hwpgd);
+ return -ENOMEM;
+ }
+
+ /* Plug the HW PGD into the fake one. */
+ for (i = 0; i < PTRS_PER_S2_PGD; i++) {
+ if (KVM_PREALLOC_LEVEL == 1)
+ pgd_populate(NULL, pgd + i,
+ (pud_t *)hwpgd + i * PTRS_PER_PUD);
+ else if (KVM_PREALLOC_LEVEL == 2)
+ pud_populate(NULL, pud_offset(pgd, 0) + i,
+ (pmd_t *)hwpgd + i * PTRS_PER_PMD);
+ }
} else {
/*
* Allocate actual first-level Stage-2 page table used by the
* hardware for Stage-2 page table walks.
*/
- pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
+ pgd = (pgd_t *)hwpgd;
}
- if (!pgd)
- return -ENOMEM;
-
- ret = kvm_prealloc_hwpgd(kvm, pgd);
- if (ret)
- goto out_err;
-
kvm_clean_pgd(pgd);
kvm->arch.pgd = pgd;
return 0;
-out_err:
- if (KVM_PREALLOC_LEVEL > 0)
- kfree(pgd);
- else
- free_pages((unsigned long)pgd, S2_PGD_ORDER);
- return ret;
}
/**
@@ -785,11 +817,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
return;
unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
- kvm_free_hwpgd(kvm);
+ kvm_free_hwpgd(kvm_get_hwpgd(kvm));
if (KVM_PREALLOC_LEVEL > 0)
kfree(kvm->arch.pgd);
- else
- free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
+
kvm->arch.pgd = NULL;
}
@@ -799,7 +830,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
pgd_t *pgd;
pud_t *pud;
- pgd = kvm->arch.pgd + pgd_index(addr);
+ pgd = kvm->arch.pgd + kvm_pgd_index(addr);
if (WARN_ON(pgd_none(*pgd))) {
if (!cache)
return NULL;
@@ -1089,7 +1120,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
pgd_t *pgd;
phys_addr_t next;
- pgd = kvm->arch.pgd + pgd_index(addr);
+ pgd = kvm->arch.pgd + kvm_pgd_index(addr);
do {
/*
* Release kvm_mmu_lock periodically if the memory region is
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 94674eb7e7bb..54bb4ba97441 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -129,6 +129,9 @@
* 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
* not known to exist and will break with this configuration.
*
+ * VTCR_EL2.PS is extracted from ID_AA64MMFR0_EL1.PARange at boot time
+ * (see hyp-init.S).
+ *
* Note that when using 4K pages, we concatenate two first level page tables
* together.
*
@@ -138,7 +141,6 @@
#ifdef CONFIG_ARM64_64K_PAGES
/*
* Stage2 translation configuration:
- * 40bits output (PS = 2)
* 40bits input (T0SZ = 24)
* 64kB pages (TG0 = 1)
* 2 level page tables (SL = 1)
@@ -150,7 +152,6 @@
#else
/*
* Stage2 translation configuration:
- * 40bits output (PS = 2)
* 40bits input (T0SZ = 24)
* 4kB pages (TG0 = 0)
* 3 level page tables (SL = 1)
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 6458b5373142..bbfb600fa822 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -158,6 +158,8 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
#define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT)
#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
+#define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
+
/*
* If we are concatenating first level stage-2 page tables, we would have less
* than or equal to 16 pointers in the fake PGD, because that's what the
@@ -171,43 +173,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
#define KVM_PREALLOC_LEVEL (0)
#endif
-/**
- * kvm_prealloc_hwpgd - allocate inital table for VTTBR
- * @kvm: The KVM struct pointer for the VM.
- * @pgd: The kernel pseudo pgd
- *
- * When the kernel uses more levels of page tables than the guest, we allocate
- * a fake PGD and pre-populate it to point to the next-level page table, which
- * will be the real initial page table pointed to by the VTTBR.
- *
- * When KVM_PREALLOC_LEVEL==2, we allocate a single page for the PMD and
- * the kernel will use folded pud. When KVM_PREALLOC_LEVEL==1, we
- * allocate 2 consecutive PUD pages.
- */
-static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
-{
- unsigned int i;
- unsigned long hwpgd;
-
- if (KVM_PREALLOC_LEVEL == 0)
- return 0;
-
- hwpgd = __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTRS_PER_S2_PGD_SHIFT);
- if (!hwpgd)
- return -ENOMEM;
-
- for (i = 0; i < PTRS_PER_S2_PGD; i++) {
- if (KVM_PREALLOC_LEVEL == 1)
- pgd_populate(NULL, pgd + i,
- (pud_t *)hwpgd + i * PTRS_PER_PUD);
- else if (KVM_PREALLOC_LEVEL == 2)
- pud_populate(NULL, pud_offset(pgd, 0) + i,
- (pmd_t *)hwpgd + i * PTRS_PER_PMD);
- }
-
- return 0;
-}
-
static inline void *kvm_get_hwpgd(struct kvm *kvm)
{
pgd_t *pgd = kvm->arch.pgd;
@@ -224,12 +189,11 @@ static inline void *kvm_get_hwpgd(struct kvm *kvm)
return pmd_offset(pud, 0);
}
-static inline void kvm_free_hwpgd(struct kvm *kvm)
+static inline unsigned int kvm_get_hwpgd_size(void)
{
- if (KVM_PREALLOC_LEVEL > 0) {
- unsigned long hwpgd = (unsigned long)kvm_get_hwpgd(kvm);
- free_pages(hwpgd, PTRS_PER_S2_PGD_SHIFT);
- }
+ if (KVM_PREALLOC_LEVEL > 0)
+ return PTRS_PER_S2_PGD * PAGE_SIZE;
+ return PTRS_PER_S2_PGD * sizeof(pgd_t);
}
static inline bool kvm_page_empty(void *ptr)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index f6579cfde2df..19e17bd7aec0 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -165,7 +165,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ONE_REG:
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_S390_CSS_SUPPORT:
- case KVM_CAP_IRQFD:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_ENABLE_CAP_VM:
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index cc31f7c06d3d..9541ba34126b 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -507,6 +507,7 @@ static int picdev_read(struct kvm_pic *s,
return -EOPNOTSUPP;
if (len != 1) {
+ memset(val, 0, len);
pr_pic_unimpl("non byte read\n");
return 0;
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f7b20b417a3a..10a481b7674d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2168,7 +2168,10 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
{
unsigned long *msr_bitmap;
- if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) {
+ if (is_guest_mode(vcpu))
+ msr_bitmap = vmx_msr_bitmap_nested;
+ else if (irqchip_in_kernel(vcpu->kvm) &&
+ apic_x2apic_mode(vcpu->arch.apic)) {
if (is_long_mode(vcpu))
msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
else
@@ -9218,9 +9221,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
}
if (cpu_has_vmx_msr_bitmap() &&
- exec_control & CPU_BASED_USE_MSR_BITMAPS &&
- nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) {
- vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_nested));
+ exec_control & CPU_BASED_USE_MSR_BITMAPS) {
+ nested_vmx_merge_msr_bitmap(vcpu, vmcs12);
+ /* MSR_BITMAP will be set by following vmx_set_efer. */
} else
exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bd7a70be41b3..32bf19ef3115 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2744,7 +2744,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_USER_NMI:
case KVM_CAP_REINJECT_CONTROL:
case KVM_CAP_IRQ_INJECT_STATUS:
- case KVM_CAP_IRQFD:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_IOEVENTFD_NO_LENGTH:
case KVM_CAP_PIT2:
diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c
index d453a2c98ad0..81751a49d8bf 100644
--- a/drivers/base/regmap/regcache-rbtree.c
+++ b/drivers/base/regmap/regcache-rbtree.c
@@ -307,7 +307,7 @@ static int regcache_rbtree_insert_to_block(struct regmap *map,
if (pos == 0) {
memmove(blk + offset * map->cache_word_size,
blk, rbnode->blklen * map->cache_word_size);
- bitmap_shift_right(present, present, offset, blklen);
+ bitmap_shift_left(present, present, offset, blklen);
}
/* update the rbnode block, its size and the base register */
diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index f373c35f9e1d..da84f544c544 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -608,7 +608,8 @@ static int regcache_sync_block_single(struct regmap *map, void *block,
for (i = start; i < end; i++) {
regtmp = block_base + (i * map->reg_stride);
- if (!regcache_reg_present(cache_present, i))
+ if (!regcache_reg_present(cache_present, i) ||
+ !regmap_writeable(map, regtmp))
continue;
val = regcache_get_val(map, block, i);
@@ -677,7 +678,8 @@ static int regcache_sync_block_raw(struct regmap *map, void *block,
for (i = start; i < end; i++) {
regtmp = block_base + (i * map->reg_stride);
- if (!regcache_reg_present(cache_present, i)) {
+ if (!regcache_reg_present(cache_present, i) ||
+ !regmap_writeable(map, regtmp)) {
ret = regcache_sync_block_raw_flush(map, &data,
base, regtmp);
if (ret != 0)
diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index 6299a50a5960..a6c3f75b4b01 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -499,7 +499,8 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags,
goto err_alloc;
}
- ret = request_threaded_irq(irq, NULL, regmap_irq_thread, irq_flags,
+ ret = request_threaded_irq(irq, NULL, regmap_irq_thread,
+ irq_flags | IRQF_ONESHOT,
chip->name, d);
if (ret != 0) {
dev_err(map->dev, "Failed to request IRQ %d for %s: %d\n",
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index fae2dbbf5745..72d7028f779b 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -142,6 +142,7 @@ struct ports_device {
* notification
*/
struct work_struct control_work;
+ struct work_struct config_work;
struct list_head ports;
@@ -1837,10 +1838,21 @@ static void config_intr(struct virtio_device *vdev)
portdev = vdev->priv;
+ if (!use_multiport(portdev))
+ schedule_work(&portdev->config_work);
+}
+
+static void config_work_handler(struct work_struct *work)
+{
+ struct ports_device *portdev;
+
+ portdev = container_of(work, struct ports_device, control_work);
if (!use_multiport(portdev)) {
+ struct virtio_device *vdev;
struct port *port;
u16 rows, cols;
+ vdev = portdev->vdev;
virtio_cread(vdev, struct virtio_console_config, cols, &cols);
virtio_cread(vdev, struct virtio_console_config, rows, &rows);
@@ -2040,12 +2052,14 @@ static int virtcons_probe(struct virtio_device *vdev)
virtio_device_ready(portdev->vdev);
+ INIT_WORK(&portdev->config_work, &config_work_handler);
+ INIT_WORK(&portdev->control_work, &control_work_handler);
+
if (multiport) {
unsigned int nr_added_bufs;
spin_lock_init(&portdev->c_ivq_lock);
spin_lock_init(&portdev->c_ovq_lock);
- INIT_WORK(&portdev->control_work, &control_work_handler);
nr_added_bufs = fill_queue(portdev->c_ivq,
&portdev->c_ivq_lock);
@@ -2113,6 +2127,8 @@ static void virtcons_remove(struct virtio_device *vdev)
/* Finish up work that's lined up */
if (use_multiport(portdev))
cancel_work_sync(&portdev->control_work);
+ else
+ cancel_work_sync(&portdev->config_work);
list_for_each_entry_safe(port, port2, &portdev->ports, list)
unplug_port(port);
@@ -2164,6 +2180,7 @@ static int virtcons_freeze(struct virtio_device *vdev)
virtqueue_disable_cb(portdev->c_ivq);
cancel_work_sync(&portdev->control_work);
+ cancel_work_sync(&portdev->config_work);
/*
* Once more: if control_work_handler() was running, it would
* enable the cb as the last step.
diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c
index 92f6af6da699..73354ee27877 100644
--- a/drivers/rpmsg/virtio_rpmsg_bus.c
+++ b/drivers/rpmsg/virtio_rpmsg_bus.c
@@ -951,6 +951,7 @@ static int rpmsg_probe(struct virtio_device *vdev)
void *bufs_va;
int err = 0, i;
size_t total_buf_space;
+ bool notify;
vrp = kzalloc(sizeof(*vrp), GFP_KERNEL);
if (!vrp)
@@ -1030,8 +1031,22 @@ static int rpmsg_probe(struct virtio_device *vdev)
}
}
+ /*
+ * Prepare to kick but don't notify yet - we can't do this before
+ * device is ready.
+ */
+ notify = virtqueue_kick_prepare(vrp->rvq);
+
+ /* From this point on, we can notify and get callbacks. */
+ virtio_device_ready(vdev);
+
/* tell the remote processor it can start sending messages */
- virtqueue_kick(vrp->rvq);
+ /*
+ * this might be concurrent with callbacks, but we are only
+ * doing notify, not a full kick here, so that's ok.
+ */
+ if (notify)
+ virtqueue_notify(vrp->rvq);
dev_info(&vdev->dev, "rpmsg host is online\n");
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 0413157f3b49..6a356e344f82 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -29,6 +29,7 @@
#include <linux/module.h>
#include <linux/balloon_compaction.h>
#include <linux/oom.h>
+#include <linux/wait.h>
/*
* Balloon device works in 4K page units. So each page is pointed to by
@@ -334,17 +335,25 @@ static int virtballoon_oom_notify(struct notifier_block *self,
static int balloon(void *_vballoon)
{
struct virtio_balloon *vb = _vballoon;
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
set_freezable();
while (!kthread_should_stop()) {
s64 diff;
try_to_freeze();
- wait_event_interruptible(vb->config_change,
- (diff = towards_target(vb)) != 0
- || vb->need_stats_update
- || kthread_should_stop()
- || freezing(current));
+
+ add_wait_queue(&vb->config_change, &wait);
+ for (;;) {
+ if ((diff = towards_target(vb)) != 0 ||
+ vb->need_stats_update ||
+ kthread_should_stop() ||
+ freezing(current))
+ break;
+ wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
+ }
+ remove_wait_queue(&vb->config_change, &wait);
+
if (vb->need_stats_update)
stats_handle_request(vb);
if (diff > 0)
@@ -499,6 +508,8 @@ static int virtballoon_probe(struct virtio_device *vdev)
if (err < 0)
goto out_oom_notify;
+ virtio_device_ready(vdev);
+
vb->thread = kthread_run(balloon, vb, "vballoon");
if (IS_ERR(vb->thread)) {
err = PTR_ERR(vb->thread);
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index cad569890908..6010d7ec0a0f 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -156,22 +156,95 @@ static void vm_get(struct virtio_device *vdev, unsigned offset,
void *buf, unsigned len)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
- u8 *ptr = buf;
- int i;
+ void __iomem *base = vm_dev->base + VIRTIO_MMIO_CONFIG;
+ u8 b;
+ __le16 w;
+ __le32 l;
- for (i = 0; i < len; i++)
- ptr[i] = readb(vm_dev->base + VIRTIO_MMIO_CONFIG + offset + i);
+ if (vm_dev->version == 1) {
+ u8 *ptr = buf;
+ int i;
+
+ for (i = 0; i < len; i++)
+ ptr[i] = readb(base + offset + i);
+ return;
+ }
+
+ switch (len) {
+ case 1:
+ b = readb(base + offset);
+ memcpy(buf, &b, sizeof b);
+ break;
+ case 2:
+ w = cpu_to_le16(readw(base + offset));
+ memcpy(buf, &w, sizeof w);
+ break;
+ case 4:
+ l = cpu_to_le32(readl(base + offset));
+ memcpy(buf, &l, sizeof l);
+ break;
+ case 8:
+ l = cpu_to_le32(readl(base + offset));
+ memcpy(buf, &l, sizeof l);
+ l = cpu_to_le32(ioread32(base + offset + sizeof l));
+ memcpy(buf + sizeof l, &l, sizeof l);
+ break;
+ default:
+ BUG();
+ }
}
static void vm_set(struct virtio_device *vdev, unsigned offset,
const void *buf, unsigned len)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
- const u8 *ptr = buf;
- int i;
+ void __iomem *base = vm_dev->base + VIRTIO_MMIO_CONFIG;
+ u8 b;
+ __le16 w;
+ __le32 l;
- for (i = 0; i < len; i++)
- writeb(ptr[i], vm_dev->base + VIRTIO_MMIO_CONFIG + offset + i);
+ if (vm_dev->version == 1) {
+ const u8 *ptr = buf;
+ int i;
+
+ for (i = 0; i < len; i++)
+ writeb(ptr[i], base + offset + i);
+
+ return;
+ }
+
+ switch (len) {
+ case 1:
+ memcpy(&b, buf, sizeof b);
+ writeb(b, base + offset);
+ break;
+ case 2:
+ memcpy(&w, buf, sizeof w);
+ writew(le16_to_cpu(w), base + offset);
+ break;
+ case 4:
+ memcpy(&l, buf, sizeof l);
+ writel(le32_to_cpu(l), base + offset);
+ break;
+ case 8:
+ memcpy(&l, buf, sizeof l);
+ writel(le32_to_cpu(l), base + offset);
+ memcpy(&l, buf + sizeof l, sizeof l);
+ writel(le32_to_cpu(l), base + offset + sizeof l);
+ break;
+ default:
+ BUG();
+ }
+}
+
+static u32 vm_generation(struct virtio_device *vdev)
+{
+ struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+
+ if (vm_dev->version == 1)
+ return 0;
+ else
+ return readl(vm_dev->base + VIRTIO_MMIO_CONFIG_GENERATION);
}
static u8 vm_get_status(struct virtio_device *vdev)
@@ -440,6 +513,7 @@ static const char *vm_bus_name(struct virtio_device *vdev)
static const struct virtio_config_ops virtio_mmio_config_ops = {
.get = vm_get,
.set = vm_set,
+ .generation = vm_generation,
.get_status = vm_get_status,
.set_status = vm_set_status,
.reset = vm_reset,
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 956b75d61809..6dee68d013ff 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1325,6 +1325,9 @@ out:
static int pagemap_open(struct inode *inode, struct file *file)
{
+ /* do not disclose physical addresses: attack vector */
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about "
"to stop being page-shift some time soon. See the "
"linux/Documentation/vm/pagemap.txt for details.\n");
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 7c55dd5dd2c9..66203b268984 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -114,6 +114,7 @@ struct vgic_ops {
void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
u64 (*get_elrsr)(const struct kvm_vcpu *vcpu);
u64 (*get_eisr)(const struct kvm_vcpu *vcpu);
+ void (*clear_eisr)(struct kvm_vcpu *vcpu);
u32 (*get_interrupt_status)(const struct kvm_vcpu *vcpu);
void (*enable_underflow)(struct kvm_vcpu *vcpu);
void (*disable_underflow)(struct kvm_vcpu *vcpu);
diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
index 3c53eec4ae22..19c66fcbab8a 100644
--- a/include/uapi/linux/virtio_blk.h
+++ b/include/uapi/linux/virtio_blk.h
@@ -60,7 +60,7 @@ struct virtio_blk_config {
__u32 size_max;
/* The maximum number of segments (if VIRTIO_BLK_F_SEG_MAX) */
__u32 seg_max;
- /* geometry the device (if VIRTIO_BLK_F_GEOMETRY) */
+ /* geometry of the device (if VIRTIO_BLK_F_GEOMETRY) */
struct virtio_blk_geometry {
__u16 cylinders;
__u8 heads;
@@ -119,7 +119,11 @@ struct virtio_blk_config {
#define VIRTIO_BLK_T_BARRIER 0x80000000
#endif /* !VIRTIO_BLK_NO_LEGACY */
-/* This is the first element of the read scatter-gather list. */
+/*
+ * This comes first in the read scatter-gather list.
+ * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated,
+ * this is the first element of the read scatter-gather list.
+ */
struct virtio_blk_outhdr {
/* VIRTIO_BLK_T* */
__virtio32 type;
diff --git a/include/uapi/linux/virtio_scsi.h b/include/uapi/linux/virtio_scsi.h
index 42b9370771b0..cc18ef8825c0 100644
--- a/include/uapi/linux/virtio_scsi.h
+++ b/include/uapi/linux/virtio_scsi.h
@@ -29,8 +29,16 @@
#include <linux/virtio_types.h>
-#define VIRTIO_SCSI_CDB_SIZE 32
-#define VIRTIO_SCSI_SENSE_SIZE 96
+/* Default values of the CDB and sense data size configuration fields */
+#define VIRTIO_SCSI_CDB_DEFAULT_SIZE 32
+#define VIRTIO_SCSI_SENSE_DEFAULT_SIZE 96
+
+#ifndef VIRTIO_SCSI_CDB_SIZE
+#define VIRTIO_SCSI_CDB_SIZE VIRTIO_SCSI_CDB_DEFAULT_SIZE
+#endif
+#ifndef VIRTIO_SCSI_SENSE_SIZE
+#define VIRTIO_SCSI_SENSE_SIZE VIRTIO_SCSI_SENSE_DEFAULT_SIZE
+#endif
/* SCSI command request, followed by data-out */
struct virtio_scsi_cmd_req {
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index d8e376a5f0f1..36a1a739ad68 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -658,14 +658,30 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
static void p9_virtio_remove(struct virtio_device *vdev)
{
struct virtio_chan *chan = vdev->priv;
-
- if (chan->inuse)
- p9_virtio_close(chan->client);
- vdev->config->del_vqs(vdev);
+ unsigned long warning_time;
mutex_lock(&virtio_9p_lock);
+
+ /* Remove self from list so we don't get new users. */
list_del(&chan->chan_list);
+ warning_time = jiffies;
+
+ /* Wait for existing users to close. */
+ while (chan->inuse) {
+ mutex_unlock(&virtio_9p_lock);
+ msleep(250);
+ if (time_after(jiffies, warning_time + 10 * HZ)) {
+ dev_emerg(&vdev->dev,
+ "p9_virtio_remove: waiting for device in use.\n");
+ warning_time = jiffies;
+ }
+ mutex_lock(&virtio_9p_lock);
+ }
+
mutex_unlock(&virtio_9p_lock);
+
+ vdev->config->del_vqs(vdev);
+
sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
kfree(chan->tag);
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index a0a7b5d1a070..f9b9c7c51372 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -72,6 +72,8 @@ static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
{
if (!(lr_desc.state & LR_STATE_MASK))
vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr);
+ else
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr &= ~(1ULL << lr);
}
static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
@@ -84,6 +86,11 @@ static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
return vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
}
+static void vgic_v2_clear_eisr(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr = 0;
+}
+
static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
{
u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr;
@@ -148,6 +155,7 @@ static const struct vgic_ops vgic_v2_ops = {
.sync_lr_elrsr = vgic_v2_sync_lr_elrsr,
.get_elrsr = vgic_v2_get_elrsr,
.get_eisr = vgic_v2_get_eisr,
+ .clear_eisr = vgic_v2_clear_eisr,
.get_interrupt_status = vgic_v2_get_interrupt_status,
.enable_underflow = vgic_v2_enable_underflow,
.disable_underflow = vgic_v2_disable_underflow,
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 3a62d8a9a2c6..dff06021e748 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -104,6 +104,8 @@ static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
{
if (!(lr_desc.state & LR_STATE_MASK))
vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
+ else
+ vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr &= ~(1U << lr);
}
static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu)
@@ -116,6 +118,11 @@ static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu)
return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr;
}
+static void vgic_v3_clear_eisr(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr = 0;
+}
+
static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu)
{
u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr;
@@ -192,6 +199,7 @@ static const struct vgic_ops vgic_v3_ops = {
.sync_lr_elrsr = vgic_v3_sync_lr_elrsr,
.get_elrsr = vgic_v3_get_elrsr,
.get_eisr = vgic_v3_get_eisr,
+ .clear_eisr = vgic_v3_clear_eisr,
.get_interrupt_status = vgic_v3_get_interrupt_status,
.enable_underflow = vgic_v3_enable_underflow,
.disable_underflow = vgic_v3_disable_underflow,
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 0cc6ab6005a0..c9f60f524588 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -883,6 +883,11 @@ static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
return vgic_ops->get_eisr(vcpu);
}
+static inline void vgic_clear_eisr(struct kvm_vcpu *vcpu)
+{
+ vgic_ops->clear_eisr(vcpu);
+}
+
static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
{
return vgic_ops->get_interrupt_status(vcpu);
@@ -922,6 +927,7 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
vgic_set_lr(vcpu, lr_nr, vlr);
clear_bit(lr_nr, vgic_cpu->lr_used);
vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
+ vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
}
/*
@@ -978,6 +984,7 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
vlr.state |= LR_STATE_PENDING;
vgic_set_lr(vcpu, lr, vlr);
+ vgic_sync_lr_elrsr(vcpu, lr, vlr);
return true;
}
}
@@ -999,6 +1006,7 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
vlr.state |= LR_EOI_INT;
vgic_set_lr(vcpu, lr, vlr);
+ vgic_sync_lr_elrsr(vcpu, lr, vlr);
return true;
}
@@ -1136,6 +1144,14 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
if (status & INT_STATUS_UNDERFLOW)
vgic_disable_underflow(vcpu);
+ /*
+ * In the next iterations of the vcpu loop, if we sync the vgic state
+ * after flushing it, but before entering the guest (this happens for
+ * pending signals and vmid rollovers), then make sure we don't pick
+ * up any old maintenance interrupts here.
+ */
+ vgic_clear_eisr(vcpu);
+
return level_pending;
}
@@ -1583,8 +1599,10 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
* emulation. So check this here again. KVM_CREATE_DEVICE does
* the proper checks already.
*/
- if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2)
- return -ENODEV;
+ if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2) {
+ ret = -ENODEV;
+ goto out;
+ }
/*
* Any time a vcpu is run, vcpu_load is called which tries to grab the
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a1093700f3a4..a2214d9609bd 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2492,6 +2492,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
case KVM_CAP_SIGNAL_MSI:
#endif
#ifdef CONFIG_HAVE_KVM_IRQFD
+ case KVM_CAP_IRQFD:
case KVM_CAP_IRQFD_RESAMPLE:
#endif
case KVM_CAP_CHECK_EXTENSION_VM: