summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorweizijie <zijie.wei@linux.alibaba.com>2025-03-03 17:33:35 -0800
committerSean Christopherson <seanjc@google.com>2025-04-24 11:18:36 -0700
commit87e4951e250bbccac47a8822f6f023a1de8b96ec (patch)
tree6ea0cc5b21df00a6161c06a33e3b7846f90e8d7a
parentc2207bbc0c0f4b6ae8dbb73ec26e17aa0c45a3ca (diff)
KVM: x86: Rescan I/O APIC routes after EOI interception for old routing
Rescan I/O APIC routes for a vCPU after handling an intercepted I/O APIC EOI for an IRQ that is not targeting said vCPU, i.e. after handling what's effectively a stale EOI VM-Exit. If a level-triggered IRQ is in-flight when IRQ routing changes, e.g. because the guest changes routing from its IRQ handler, then KVM intercepts EOIs on both the new and old target vCPUs, so that the in-flight IRQ can be de-asserted when it's EOI'd. However, only the EOI for the in-flight IRQ needs to be intercepted, as IRQs on the same vector with the new routing are coincidental, i.e. occur only if the guest is reusing the vector for multiple interrupt sources. If the I/O APIC routes aren't rescanned, KVM will unnecessarily intercept EOIs for the vector and negative impact the vCPU's interrupt performance. Note, both commit db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race") and commit 0fc5a36dd6b3 ("KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race") mentioned this issue, but it was considered a "rare" occurrence thus was not addressed. However in real environments, this issue can happen even in a well-behaved guest. Cc: Kai Huang <kai.huang@intel.com> Co-developed-by: xuyun <xuyun_xy.xy@linux.alibaba.com> Signed-off-by: xuyun <xuyun_xy.xy@linux.alibaba.com> Signed-off-by: weizijie <zijie.wei@linux.alibaba.com> [sean: massage changelog and comments, use int/-1, reset at scan] Reviewed-by: Kai Huang <kai.huang@intel.com> Link: https://lore.kernel.org/r/20250304013335.4155703-4-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/kvm/irq_comm.c16
-rw-r--r--arch/x86/kvm/lapic.c8
-rw-r--r--arch/x86/kvm/x86.c1
4 files changed, 24 insertions, 2 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6c06f3d6e081..89102b65827f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1034,6 +1034,7 @@ struct kvm_vcpu_arch {
int pending_ioapic_eoi;
int pending_external_vector;
+ int highest_stale_pending_ioapic_eoi;
/* be preempted when it's in kernel-mode(cpl=0) */
bool preempted_in_kernel;
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 14590d9c4a37..d6d792b5d1bd 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -412,9 +412,21 @@ void kvm_scan_ioapic_irq(struct kvm_vcpu *vcpu, u32 dest_id, u16 dest_mode,
* level-triggered IRQ. The EOI needs to be intercepted and forwarded
* to I/O APIC emulation so that the IRQ can be de-asserted.
*/
- if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT, dest_id, dest_mode) ||
- kvm_apic_pending_eoi(vcpu, vector))
+ if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT, dest_id, dest_mode)) {
__set_bit(vector, ioapic_handled_vectors);
+ } else if (kvm_apic_pending_eoi(vcpu, vector)) {
+ __set_bit(vector, ioapic_handled_vectors);
+
+ /*
+ * Track the highest pending EOI for which the vCPU is NOT the
+ * target in the new routing. Only the EOI for the IRQ that is
+ * in-flight (for the old routing) needs to be intercepted, any
+ * future IRQs that arrive on this vCPU will be coincidental to
+ * the level-triggered routing and don't need to be intercepted.
+ */
+ if ((int)vector > vcpu->arch.highest_stale_pending_ioapic_eoi)
+ vcpu->arch.highest_stale_pending_ioapic_eoi = vector;
+ }
}
void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c9de81cc27e1..3defbe520aed 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1459,6 +1459,14 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
if (!kvm_ioapic_handles_vector(apic, vector))
return;
+ /*
+ * If the intercepted EOI is for an IRQ that was pending from previous
+ * routing, then re-scan the I/O APIC routes as EOIs for the IRQ likely
+ * no longer need to be intercepted.
+ */
+ if (apic->vcpu->arch.highest_stale_pending_ioapic_eoi == vector)
+ kvm_make_request(KVM_REQ_SCAN_IOAPIC, apic->vcpu);
+
/* Request a KVM exit to inform the userspace IOAPIC. */
if (irqchip_split(apic->vcpu->kvm)) {
apic->vcpu->arch.pending_ioapic_eoi = vector;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e31dc57e1afa..e15af894f414 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10692,6 +10692,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
return;
bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256);
+ vcpu->arch.highest_stale_pending_ioapic_eoi = -1;
kvm_x86_call(sync_pir_to_irr)(vcpu);