summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/virtual/kvm/api.txt287
-rw-r--r--arch/x86/include/asm/kvm_emulate.h32
-rw-r--r--arch/x86/include/asm/kvm_host.h6
-rw-r--r--arch/x86/kvm/emulate.c392
-rw-r--r--arch/x86/kvm/lapic.c4
-rw-r--r--arch/x86/kvm/svm.c57
-rw-r--r--arch/x86/kvm/trace.h6
-rw-r--r--arch/x86/kvm/vmx.c30
-rw-r--r--arch/x86/kvm/x86.c94
9 files changed, 526 insertions, 382 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index a41465bd6a5c..587972ca12c5 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -297,6 +297,15 @@ struct kvm_regs {
__u64 rip, rflags;
};
+/* mips */
+struct kvm_regs {
+ /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
+ __u64 gpr[32];
+ __u64 hi;
+ __u64 lo;
+ __u64 pc;
+};
+
4.12 KVM_SET_REGS
@@ -378,7 +387,7 @@ struct kvm_translation {
4.16 KVM_INTERRUPT
Capability: basic
-Architectures: x86, ppc
+Architectures: x86, ppc, mips
Type: vcpu ioctl
Parameters: struct kvm_interrupt (in)
Returns: 0 on success, -1 on error
@@ -423,6 +432,11 @@ c) KVM_INTERRUPT_SET_LEVEL
Note that any value for 'irq' other than the ones stated above is invalid
and incurs unexpected behavior.
+MIPS:
+
+Queues an external interrupt to be injected into the virtual CPU. A negative
+interrupt number dequeues the interrupt.
+
4.17 KVM_DEBUG_GUEST
@@ -512,7 +526,7 @@ struct kvm_cpuid {
4.21 KVM_SET_SIGNAL_MASK
Capability: basic
-Architectures: x86
+Architectures: all
Type: vcpu ioctl
Parameters: struct kvm_signal_mask (in)
Returns: 0 on success, -1 on error
@@ -1783,122 +1797,151 @@ and architecture specific registers. Each have their own range of operation
and their own constants and width. To keep track of the implemented
registers, find a list below:
- Arch | Register | Width (bits)
- | |
- PPC | KVM_REG_PPC_HIOR | 64
- PPC | KVM_REG_PPC_IAC1 | 64
- PPC | KVM_REG_PPC_IAC2 | 64
- PPC | KVM_REG_PPC_IAC3 | 64
- PPC | KVM_REG_PPC_IAC4 | 64
- PPC | KVM_REG_PPC_DAC1 | 64
- PPC | KVM_REG_PPC_DAC2 | 64
- PPC | KVM_REG_PPC_DABR | 64
- PPC | KVM_REG_PPC_DSCR | 64
- PPC | KVM_REG_PPC_PURR | 64
- PPC | KVM_REG_PPC_SPURR | 64
- PPC | KVM_REG_PPC_DAR | 64
- PPC | KVM_REG_PPC_DSISR | 32
- PPC | KVM_REG_PPC_AMR | 64
- PPC | KVM_REG_PPC_UAMOR | 64
- PPC | KVM_REG_PPC_MMCR0 | 64
- PPC | KVM_REG_PPC_MMCR1 | 64
- PPC | KVM_REG_PPC_MMCRA | 64
- PPC | KVM_REG_PPC_MMCR2 | 64
- PPC | KVM_REG_PPC_MMCRS | 64
- PPC | KVM_REG_PPC_SIAR | 64
- PPC | KVM_REG_PPC_SDAR | 64
- PPC | KVM_REG_PPC_SIER | 64
- PPC | KVM_REG_PPC_PMC1 | 32
- PPC | KVM_REG_PPC_PMC2 | 32
- PPC | KVM_REG_PPC_PMC3 | 32
- PPC | KVM_REG_PPC_PMC4 | 32
- PPC | KVM_REG_PPC_PMC5 | 32
- PPC | KVM_REG_PPC_PMC6 | 32
- PPC | KVM_REG_PPC_PMC7 | 32
- PPC | KVM_REG_PPC_PMC8 | 32
- PPC | KVM_REG_PPC_FPR0 | 64
+ Arch | Register | Width (bits)
+ | |
+ PPC | KVM_REG_PPC_HIOR | 64
+ PPC | KVM_REG_PPC_IAC1 | 64
+ PPC | KVM_REG_PPC_IAC2 | 64
+ PPC | KVM_REG_PPC_IAC3 | 64
+ PPC | KVM_REG_PPC_IAC4 | 64
+ PPC | KVM_REG_PPC_DAC1 | 64
+ PPC | KVM_REG_PPC_DAC2 | 64
+ PPC | KVM_REG_PPC_DABR | 64
+ PPC | KVM_REG_PPC_DSCR | 64
+ PPC | KVM_REG_PPC_PURR | 64
+ PPC | KVM_REG_PPC_SPURR | 64
+ PPC | KVM_REG_PPC_DAR | 64
+ PPC | KVM_REG_PPC_DSISR | 32
+ PPC | KVM_REG_PPC_AMR | 64
+ PPC | KVM_REG_PPC_UAMOR | 64
+ PPC | KVM_REG_PPC_MMCR0 | 64
+ PPC | KVM_REG_PPC_MMCR1 | 64
+ PPC | KVM_REG_PPC_MMCRA | 64
+ PPC | KVM_REG_PPC_MMCR2 | 64
+ PPC | KVM_REG_PPC_MMCRS | 64
+ PPC | KVM_REG_PPC_SIAR | 64
+ PPC | KVM_REG_PPC_SDAR | 64
+ PPC | KVM_REG_PPC_SIER | 64
+ PPC | KVM_REG_PPC_PMC1 | 32
+ PPC | KVM_REG_PPC_PMC2 | 32
+ PPC | KVM_REG_PPC_PMC3 | 32
+ PPC | KVM_REG_PPC_PMC4 | 32
+ PPC | KVM_REG_PPC_PMC5 | 32
+ PPC | KVM_REG_PPC_PMC6 | 32
+ PPC | KVM_REG_PPC_PMC7 | 32
+ PPC | KVM_REG_PPC_PMC8 | 32
+ PPC | KVM_REG_PPC_FPR0 | 64
...
- PPC | KVM_REG_PPC_FPR31 | 64
- PPC | KVM_REG_PPC_VR0 | 128
+ PPC | KVM_REG_PPC_FPR31 | 64
+ PPC | KVM_REG_PPC_VR0 | 128
...
- PPC | KVM_REG_PPC_VR31 | 128
- PPC | KVM_REG_PPC_VSR0 | 128
+ PPC | KVM_REG_PPC_VR31 | 128
+ PPC | KVM_REG_PPC_VSR0 | 128
...
- PPC | KVM_REG_PPC_VSR31 | 128
- PPC | KVM_REG_PPC_FPSCR | 64
- PPC | KVM_REG_PPC_VSCR | 32
- PPC | KVM_REG_PPC_VPA_ADDR | 64
- PPC | KVM_REG_PPC_VPA_SLB | 128
- PPC | KVM_REG_PPC_VPA_DTL | 128
- PPC | KVM_REG_PPC_EPCR | 32
- PPC | KVM_REG_PPC_EPR | 32
- PPC | KVM_REG_PPC_TCR | 32
- PPC | KVM_REG_PPC_TSR | 32
- PPC | KVM_REG_PPC_OR_TSR | 32
- PPC | KVM_REG_PPC_CLEAR_TSR | 32
- PPC | KVM_REG_PPC_MAS0 | 32
- PPC | KVM_REG_PPC_MAS1 | 32
- PPC | KVM_REG_PPC_MAS2 | 64
- PPC | KVM_REG_PPC_MAS7_3 | 64
- PPC | KVM_REG_PPC_MAS4 | 32
- PPC | KVM_REG_PPC_MAS6 | 32
- PPC | KVM_REG_PPC_MMUCFG | 32
- PPC | KVM_REG_PPC_TLB0CFG | 32
- PPC | KVM_REG_PPC_TLB1CFG | 32
- PPC | KVM_REG_PPC_TLB2CFG | 32
- PPC | KVM_REG_PPC_TLB3CFG | 32
- PPC | KVM_REG_PPC_TLB0PS | 32
- PPC | KVM_REG_PPC_TLB1PS | 32
- PPC | KVM_REG_PPC_TLB2PS | 32
- PPC | KVM_REG_PPC_TLB3PS | 32
- PPC | KVM_REG_PPC_EPTCFG | 32
- PPC | KVM_REG_PPC_ICP_STATE | 64
- PPC | KVM_REG_PPC_TB_OFFSET | 64
- PPC | KVM_REG_PPC_SPMC1 | 32
- PPC | KVM_REG_PPC_SPMC2 | 32
- PPC | KVM_REG_PPC_IAMR | 64
- PPC | KVM_REG_PPC_TFHAR | 64
- PPC | KVM_REG_PPC_TFIAR | 64
- PPC | KVM_REG_PPC_TEXASR | 64
- PPC | KVM_REG_PPC_FSCR | 64
- PPC | KVM_REG_PPC_PSPB | 32
- PPC | KVM_REG_PPC_EBBHR | 64
- PPC | KVM_REG_PPC_EBBRR | 64
- PPC | KVM_REG_PPC_BESCR | 64
- PPC | KVM_REG_PPC_TAR | 64
- PPC | KVM_REG_PPC_DPDES | 64
- PPC | KVM_REG_PPC_DAWR | 64
- PPC | KVM_REG_PPC_DAWRX | 64
- PPC | KVM_REG_PPC_CIABR | 64
- PPC | KVM_REG_PPC_IC | 64
- PPC | KVM_REG_PPC_VTB | 64
- PPC | KVM_REG_PPC_CSIGR | 64
- PPC | KVM_REG_PPC_TACR | 64
- PPC | KVM_REG_PPC_TCSCR | 64
- PPC | KVM_REG_PPC_PID | 64
- PPC | KVM_REG_PPC_ACOP | 64
- PPC | KVM_REG_PPC_VRSAVE | 32
- PPC | KVM_REG_PPC_LPCR | 64
- PPC | KVM_REG_PPC_PPR | 64
- PPC | KVM_REG_PPC_ARCH_COMPAT 32
- PPC | KVM_REG_PPC_DABRX | 32
- PPC | KVM_REG_PPC_WORT | 64
- PPC | KVM_REG_PPC_TM_GPR0 | 64
+ PPC | KVM_REG_PPC_VSR31 | 128
+ PPC | KVM_REG_PPC_FPSCR | 64
+ PPC | KVM_REG_PPC_VSCR | 32
+ PPC | KVM_REG_PPC_VPA_ADDR | 64
+ PPC | KVM_REG_PPC_VPA_SLB | 128
+ PPC | KVM_REG_PPC_VPA_DTL | 128
+ PPC | KVM_REG_PPC_EPCR | 32
+ PPC | KVM_REG_PPC_EPR | 32
+ PPC | KVM_REG_PPC_TCR | 32
+ PPC | KVM_REG_PPC_TSR | 32
+ PPC | KVM_REG_PPC_OR_TSR | 32
+ PPC | KVM_REG_PPC_CLEAR_TSR | 32
+ PPC | KVM_REG_PPC_MAS0 | 32
+ PPC | KVM_REG_PPC_MAS1 | 32
+ PPC | KVM_REG_PPC_MAS2 | 64
+ PPC | KVM_REG_PPC_MAS7_3 | 64
+ PPC | KVM_REG_PPC_MAS4 | 32
+ PPC | KVM_REG_PPC_MAS6 | 32
+ PPC | KVM_REG_PPC_MMUCFG | 32
+ PPC | KVM_REG_PPC_TLB0CFG | 32
+ PPC | KVM_REG_PPC_TLB1CFG | 32
+ PPC | KVM_REG_PPC_TLB2CFG | 32
+ PPC | KVM_REG_PPC_TLB3CFG | 32
+ PPC | KVM_REG_PPC_TLB0PS | 32
+ PPC | KVM_REG_PPC_TLB1PS | 32
+ PPC | KVM_REG_PPC_TLB2PS | 32
+ PPC | KVM_REG_PPC_TLB3PS | 32
+ PPC | KVM_REG_PPC_EPTCFG | 32
+ PPC | KVM_REG_PPC_ICP_STATE | 64
+ PPC | KVM_REG_PPC_TB_OFFSET | 64
+ PPC | KVM_REG_PPC_SPMC1 | 32
+ PPC | KVM_REG_PPC_SPMC2 | 32
+ PPC | KVM_REG_PPC_IAMR | 64
+ PPC | KVM_REG_PPC_TFHAR | 64
+ PPC | KVM_REG_PPC_TFIAR | 64
+ PPC | KVM_REG_PPC_TEXASR | 64
+ PPC | KVM_REG_PPC_FSCR | 64
+ PPC | KVM_REG_PPC_PSPB | 32
+ PPC | KVM_REG_PPC_EBBHR | 64
+ PPC | KVM_REG_PPC_EBBRR | 64
+ PPC | KVM_REG_PPC_BESCR | 64
+ PPC | KVM_REG_PPC_TAR | 64
+ PPC | KVM_REG_PPC_DPDES | 64
+ PPC | KVM_REG_PPC_DAWR | 64
+ PPC | KVM_REG_PPC_DAWRX | 64
+ PPC | KVM_REG_PPC_CIABR | 64
+ PPC | KVM_REG_PPC_IC | 64
+ PPC | KVM_REG_PPC_VTB | 64
+ PPC | KVM_REG_PPC_CSIGR | 64
+ PPC | KVM_REG_PPC_TACR | 64
+ PPC | KVM_REG_PPC_TCSCR | 64
+ PPC | KVM_REG_PPC_PID | 64
+ PPC | KVM_REG_PPC_ACOP | 64
+ PPC | KVM_REG_PPC_VRSAVE | 32
+ PPC | KVM_REG_PPC_LPCR | 64
+ PPC | KVM_REG_PPC_PPR | 64
+ PPC | KVM_REG_PPC_ARCH_COMPAT | 32
+ PPC | KVM_REG_PPC_DABRX | 32
+ PPC | KVM_REG_PPC_WORT | 64
+ PPC | KVM_REG_PPC_TM_GPR0 | 64
...
- PPC | KVM_REG_PPC_TM_GPR31 | 64
- PPC | KVM_REG_PPC_TM_VSR0 | 128
+ PPC | KVM_REG_PPC_TM_GPR31 | 64
+ PPC | KVM_REG_PPC_TM_VSR0 | 128
...
- PPC | KVM_REG_PPC_TM_VSR63 | 128
- PPC | KVM_REG_PPC_TM_CR | 64
- PPC | KVM_REG_PPC_TM_LR | 64
- PPC | KVM_REG_PPC_TM_CTR | 64
- PPC | KVM_REG_PPC_TM_FPSCR | 64
- PPC | KVM_REG_PPC_TM_AMR | 64
- PPC | KVM_REG_PPC_TM_PPR | 64
- PPC | KVM_REG_PPC_TM_VRSAVE | 64
- PPC | KVM_REG_PPC_TM_VSCR | 32
- PPC | KVM_REG_PPC_TM_DSCR | 64
- PPC | KVM_REG_PPC_TM_TAR | 64
+ PPC | KVM_REG_PPC_TM_VSR63 | 128
+ PPC | KVM_REG_PPC_TM_CR | 64
+ PPC | KVM_REG_PPC_TM_LR | 64
+ PPC | KVM_REG_PPC_TM_CTR | 64
+ PPC | KVM_REG_PPC_TM_FPSCR | 64
+ PPC | KVM_REG_PPC_TM_AMR | 64
+ PPC | KVM_REG_PPC_TM_PPR | 64
+ PPC | KVM_REG_PPC_TM_VRSAVE | 64
+ PPC | KVM_REG_PPC_TM_VSCR | 32
+ PPC | KVM_REG_PPC_TM_DSCR | 64
+ PPC | KVM_REG_PPC_TM_TAR | 64
+ | |
+ MIPS | KVM_REG_MIPS_R0 | 64
+ ...
+ MIPS | KVM_REG_MIPS_R31 | 64
+ MIPS | KVM_REG_MIPS_HI | 64
+ MIPS | KVM_REG_MIPS_LO | 64
+ MIPS | KVM_REG_MIPS_PC | 64
+ MIPS | KVM_REG_MIPS_CP0_INDEX | 32
+ MIPS | KVM_REG_MIPS_CP0_CONTEXT | 64
+ MIPS | KVM_REG_MIPS_CP0_USERLOCAL | 64
+ MIPS | KVM_REG_MIPS_CP0_PAGEMASK | 32
+ MIPS | KVM_REG_MIPS_CP0_WIRED | 32
+ MIPS | KVM_REG_MIPS_CP0_HWRENA | 32
+ MIPS | KVM_REG_MIPS_CP0_BADVADDR | 64
+ MIPS | KVM_REG_MIPS_CP0_COUNT | 32
+ MIPS | KVM_REG_MIPS_CP0_ENTRYHI | 64
+ MIPS | KVM_REG_MIPS_CP0_COMPARE | 32
+ MIPS | KVM_REG_MIPS_CP0_STATUS | 32
+ MIPS | KVM_REG_MIPS_CP0_CAUSE | 32
+ MIPS | KVM_REG_MIPS_CP0_EPC | 64
+ MIPS | KVM_REG_MIPS_CP0_CONFIG | 32
+ MIPS | KVM_REG_MIPS_CP0_CONFIG1 | 32
+ MIPS | KVM_REG_MIPS_CP0_CONFIG2 | 32
+ MIPS | KVM_REG_MIPS_CP0_CONFIG3 | 32
+ MIPS | KVM_REG_MIPS_CP0_CONFIG7 | 32
+ MIPS | KVM_REG_MIPS_CP0_ERROREPC | 64
+ MIPS | KVM_REG_MIPS_COUNT_CTL | 64
+ MIPS | KVM_REG_MIPS_COUNT_RESUME | 64
+ MIPS | KVM_REG_MIPS_COUNT_HZ | 64
ARM registers are mapped using the lower 32 bits. The upper 16 of that
is the register group type, or coprocessor number:
@@ -1937,6 +1980,22 @@ arm64 CCSIDR registers are demultiplexed by CSSELR value:
arm64 system registers have the following id bit patterns:
0x6030 0000 0013 <op0:2> <op1:3> <crn:4> <crm:4> <op2:3>
+
+MIPS registers are mapped using the lower 32 bits. The upper 16 of that is
+the register group type:
+
+MIPS core registers (see above) have the following id bit patterns:
+ 0x7030 0000 0000 <reg:16>
+
+MIPS CP0 registers (see KVM_REG_MIPS_CP0_* above) have the following id bit
+patterns depending on whether they're 32-bit or 64-bit registers:
+ 0x7020 0000 0001 00 <reg:5> <sel:3> (32-bit)
+ 0x7030 0000 0001 00 <reg:5> <sel:3> (64-bit)
+
+MIPS KVM control registers (see above) have the following id bit patterns:
+ 0x7030 0000 0002 <reg:16>
+
+
4.69 KVM_GET_ONE_REG
Capability: KVM_CAP_ONE_REG
@@ -2424,7 +2483,7 @@ in VCPU matching underlying host.
4.84 KVM_GET_REG_LIST
Capability: basic
-Architectures: arm, arm64
+Architectures: arm, arm64, mips
Type: vcpu ioctl
Parameters: struct kvm_reg_list (in/out)
Returns: 0 on success; -1 on error
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index ffa2671a7f2f..eb181178fe0b 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -37,6 +37,7 @@ struct x86_instruction_info {
u8 modrm_reg; /* index of register used */
u8 modrm_rm; /* rm part of modrm */
u64 src_val; /* value of source operand */
+ u64 dst_val; /* value of destination operand */
u8 src_bytes; /* size of source operand */
u8 dst_bytes; /* size of destination operand */
u8 ad_bytes; /* size of src/dst address */
@@ -232,7 +233,7 @@ struct operand {
union {
unsigned long val;
u64 val64;
- char valptr[sizeof(unsigned long) + 2];
+ char valptr[sizeof(sse128_t)];
sse128_t vec_val;
u64 mm_val;
void *data;
@@ -241,8 +242,8 @@ struct operand {
struct fetch_cache {
u8 data[15];
- unsigned long start;
- unsigned long end;
+ u8 *ptr;
+ u8 *end;
};
struct read_cache {
@@ -287,30 +288,36 @@ struct x86_emulate_ctxt {
u8 opcode_len;
u8 b;
u8 intercept;
- u8 lock_prefix;
- u8 rep_prefix;
u8 op_bytes;
u8 ad_bytes;
- u8 rex_prefix;
struct operand src;
struct operand src2;
struct operand dst;
- bool has_seg_override;
- u8 seg_override;
- u64 d;
int (*execute)(struct x86_emulate_ctxt *ctxt);
int (*check_perm)(struct x86_emulate_ctxt *ctxt);
+ /*
+ * The following six fields are cleared together,
+ * the rest are initialized unconditionally in x86_decode_insn
+ * or elsewhere
+ */
+ bool rip_relative;
+ u8 rex_prefix;
+ u8 lock_prefix;
+ u8 rep_prefix;
+ /* bitmaps of registers in _regs[] that can be read */
+ u32 regs_valid;
+ /* bitmaps of registers in _regs[] that have been written */
+ u32 regs_dirty;
/* modrm */
u8 modrm;
u8 modrm_mod;
u8 modrm_reg;
u8 modrm_rm;
u8 modrm_seg;
- bool rip_relative;
+ u8 seg_override;
+ u64 d;
unsigned long _eip;
struct operand memop;
- u32 regs_valid; /* bitmaps of registers in _regs[] that can be read */
- u32 regs_dirty; /* bitmaps of registers in _regs[] that have been written */
/* Fields above regs are cleared together. */
unsigned long _regs[NR_VCPU_REGS];
struct operand *memopp;
@@ -408,6 +415,7 @@ bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
#define EMULATION_OK 0
#define EMULATION_RESTART 1
#define EMULATION_INTERCEPTED 2
+void init_decode_cache(struct x86_emulate_ctxt *ctxt);
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt);
int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
u16 tss_selector, int idt_index, int reason,
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 63e020be3da7..b8a4480176b9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -448,7 +448,7 @@ struct kvm_vcpu_arch {
u64 tsc_offset_adjustment;
u64 this_tsc_nsec;
u64 this_tsc_write;
- u8 this_tsc_generation;
+ u64 this_tsc_generation;
bool tsc_catchup;
bool tsc_always_catchup;
s8 virtual_tsc_shift;
@@ -591,7 +591,7 @@ struct kvm_arch {
u64 cur_tsc_nsec;
u64 cur_tsc_write;
u64 cur_tsc_offset;
- u8 cur_tsc_generation;
+ u64 cur_tsc_generation;
int nr_vcpus_matched_tsc;
spinlock_t pvclock_gtod_sync_lock;
@@ -717,7 +717,7 @@ struct kvm_x86_ops {
int (*handle_exit)(struct kvm_vcpu *vcpu);
void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
- u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
+ u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
void (*patch_hypercall)(struct kvm_vcpu *vcpu,
unsigned char *hypercall_addr);
void (*set_irq)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 84dc4ba0364d..dd074106d0c9 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -162,6 +162,10 @@
#define NoWrite ((u64)1 << 45) /* No writeback */
#define SrcWrite ((u64)1 << 46) /* Write back src operand */
#define NoMod ((u64)1 << 47) /* Mod field is ignored */
+#define Intercept ((u64)1 << 48) /* Has valid intercept field */
+#define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
+#define NoBigReal ((u64)1 << 50) /* No big real mode */
+#define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
#define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
@@ -426,6 +430,7 @@ static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
.modrm_reg = ctxt->modrm_reg,
.modrm_rm = ctxt->modrm_rm,
.src_val = ctxt->src.val64,
+ .dst_val = ctxt->dst.val64,
.src_bytes = ctxt->src.bytes,
.dst_bytes = ctxt->dst.bytes,
.ad_bytes = ctxt->ad_bytes,
@@ -511,12 +516,6 @@ static u32 desc_limit_scaled(struct desc_struct *desc)
return desc->g ? (limit << 12) | 0xfff : limit;
}
-static void set_seg_override(struct x86_emulate_ctxt *ctxt, int seg)
-{
- ctxt->has_seg_override = true;
- ctxt->seg_override = seg;
-}
-
static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
{
if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
@@ -525,14 +524,6 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
return ctxt->ops->get_cached_segment_base(ctxt, seg);
}
-static unsigned seg_override(struct x86_emulate_ctxt *ctxt)
-{
- if (!ctxt->has_seg_override)
- return 0;
-
- return ctxt->seg_override;
-}
-
static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
u32 error, bool valid)
{
@@ -651,7 +642,12 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
if (!fetch && (desc.type & 8) && !(desc.type & 2))
goto bad;
lim = desc_limit_scaled(&desc);
- if ((desc.type & 8) || !(desc.type & 4)) {
+ if ((ctxt->mode == X86EMUL_MODE_REAL) && !fetch &&
+ (ctxt->d & NoBigReal)) {
+ /* la is between zero and 0xffff */
+ if (la > 0xffff || (u32)(la + size - 1) > 0xffff)
+ goto bad;
+ } else if ((desc.type & 8) || !(desc.type & 4)) {
/* expand-up segment */
if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
goto bad;
@@ -716,68 +712,71 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
}
/*
- * Fetch the next byte of the instruction being emulated which is pointed to
- * by ctxt->_eip, then increment ctxt->_eip.
- *
- * Also prefetch the remaining bytes of the instruction without crossing page
+ * Prefetch the remaining bytes of the instruction without crossing page
* boundary if they are not in fetch_cache yet.
*/
-static int do_insn_fetch_byte(struct x86_emulate_ctxt *ctxt, u8 *dest)
+static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
{
- struct fetch_cache *fc = &ctxt->fetch;
int rc;
- int size, cur_size;
-
- if (ctxt->_eip == fc->end) {
- unsigned long linear;
- struct segmented_address addr = { .seg = VCPU_SREG_CS,
- .ea = ctxt->_eip };
- cur_size = fc->end - fc->start;
- size = min(15UL - cur_size,
- PAGE_SIZE - offset_in_page(ctxt->_eip));
- rc = __linearize(ctxt, addr, size, false, true, &linear);
- if (unlikely(rc != X86EMUL_CONTINUE))
- return rc;
- rc = ctxt->ops->fetch(ctxt, linear, fc->data + cur_size,
- size, &ctxt->exception);
- if (unlikely(rc != X86EMUL_CONTINUE))
- return rc;
- fc->end += size;
- }
- *dest = fc->data[ctxt->_eip - fc->start];
- ctxt->_eip++;
- return X86EMUL_CONTINUE;
-}
+ unsigned size;
+ unsigned long linear;
+ int cur_size = ctxt->fetch.end - ctxt->fetch.data;
+ struct segmented_address addr = { .seg = VCPU_SREG_CS,
+ .ea = ctxt->eip + cur_size };
+
+ size = 15UL ^ cur_size;
+ rc = __linearize(ctxt, addr, size, false, true, &linear);
+ if (unlikely(rc != X86EMUL_CONTINUE))
+ return rc;
-static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
- void *dest, unsigned size)
-{
- int rc;
+ size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
- /* x86 instructions are limited to 15 bytes. */
- if (unlikely(ctxt->_eip + size - ctxt->eip > 15))
+ /*
+ * One instruction can only straddle two pages,
+ * and one has been loaded at the beginning of
+ * x86_decode_insn. So, if not enough bytes
+ * still, we must have hit the 15-byte boundary.
+ */
+ if (unlikely(size < op_size))
return X86EMUL_UNHANDLEABLE;
- while (size--) {
- rc = do_insn_fetch_byte(ctxt, dest++);
- if (rc != X86EMUL_CONTINUE)
- return rc;
- }
+ rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
+ size, &ctxt->exception);
+ if (unlikely(rc != X86EMUL_CONTINUE))
+ return rc;
+ ctxt->fetch.end += size;
return X86EMUL_CONTINUE;
}
+static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
+ unsigned size)
+{
+ if (unlikely(ctxt->fetch.end - ctxt->fetch.ptr < size))
+ return __do_insn_fetch_bytes(ctxt, size);
+ else
+ return X86EMUL_CONTINUE;
+}
+
/* Fetch next part of the instruction being emulated. */
#define insn_fetch(_type, _ctxt) \
-({ unsigned long _x; \
- rc = do_insn_fetch(_ctxt, &_x, sizeof(_type)); \
+({ _type _x; \
+ \
+ rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \
if (rc != X86EMUL_CONTINUE) \
goto done; \
- (_type)_x; \
+ ctxt->_eip += sizeof(_type); \
+ _x = *(_type __aligned(1) *) ctxt->fetch.ptr; \
+ ctxt->fetch.ptr += sizeof(_type); \
+ _x; \
})
#define insn_fetch_arr(_arr, _size, _ctxt) \
-({ rc = do_insn_fetch(_ctxt, _arr, (_size)); \
+({ \
+ rc = do_insn_fetch_bytes(_ctxt, _size); \
if (rc != X86EMUL_CONTINUE) \
goto done; \
+ ctxt->_eip += (_size); \
+ memcpy(_arr, ctxt->fetch.ptr, _size); \
+ ctxt->fetch.ptr += (_size); \
})
/*
@@ -1063,19 +1062,17 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
struct operand *op)
{
u8 sib;
- int index_reg = 0, base_reg = 0, scale;
+ int index_reg, base_reg, scale;
int rc = X86EMUL_CONTINUE;
ulong modrm_ea = 0;
- if (ctxt->rex_prefix) {
- ctxt->modrm_reg = (ctxt->rex_prefix & 4) << 1; /* REX.R */
- index_reg = (ctxt->rex_prefix & 2) << 2; /* REX.X */
- ctxt->modrm_rm = base_reg = (ctxt->rex_prefix & 1) << 3; /* REG.B */
- }
+ ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
+ index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
+ base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
- ctxt->modrm_mod |= (ctxt->modrm & 0xc0) >> 6;
+ ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
- ctxt->modrm_rm |= (ctxt->modrm & 0x07);
+ ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
ctxt->modrm_seg = VCPU_SREG_DS;
if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
@@ -1190,6 +1187,9 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
}
}
op->addr.mem.ea = modrm_ea;
+ if (ctxt->ad_bytes != 8)
+ ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
+
done:
return rc;
}
@@ -1585,34 +1585,28 @@ static void write_register_operand(struct operand *op)
static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
{
- int rc;
-
switch (op->type) {
case OP_REG:
write_register_operand(op);
break;
case OP_MEM:
if (ctxt->lock_prefix)
- rc = segmented_cmpxchg(ctxt,
+ return segmented_cmpxchg(ctxt,
+ op->addr.mem,
+ &op->orig_val,
+ &op->val,
+ op->bytes);
+ else
+ return segmented_write(ctxt,
op->addr.mem,
- &op->orig_val,
&op->val,
op->bytes);
- else
- rc = segmented_write(ctxt,
- op->addr.mem,
- &op->val,
- op->bytes);
- if (rc != X86EMUL_CONTINUE)
- return rc;
break;
case OP_MEM_STR:
- rc = segmented_write(ctxt,
- op->addr.mem,
- op->data,
- op->bytes * op->count);
- if (rc != X86EMUL_CONTINUE)
- return rc;
+ return segmented_write(ctxt,
+ op->addr.mem,
+ op->data,
+ op->bytes * op->count);
break;
case OP_XMM:
write_sse_reg(ctxt, &op->vec_val, op->addr.xmm);
@@ -2987,7 +2981,7 @@ static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
static int em_mov(struct x86_emulate_ctxt *ctxt)
{
- memcpy(ctxt->dst.valptr, ctxt->src.valptr, ctxt->op_bytes);
+ memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
return X86EMUL_CONTINUE;
}
@@ -3545,9 +3539,9 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
}
#define D(_y) { .flags = (_y) }
-#define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i }
-#define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \
- .check_perm = (_p) }
+#define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
+#define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
+ .intercept = x86_intercept_##_i, .check_perm = (_p) }
#define N D(NotImpl)
#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
@@ -3556,10 +3550,10 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
#define II(_f, _e, _i) \
- { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i }
+ { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
#define IIP(_f, _e, _i, _p) \
- { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i, \
- .check_perm = (_p) }
+ { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
+ .intercept = x86_intercept_##_i, .check_perm = (_p) }
#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
#define D2bv(_f) D((_f) | ByteOp), D(_f)
@@ -4174,7 +4168,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
op->addr.mem.ea =
register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI));
- op->addr.mem.seg = seg_override(ctxt);
+ op->addr.mem.seg = ctxt->seg_override;
op->val = 0;
op->count = 1;
break;
@@ -4185,7 +4179,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
register_address(ctxt,
reg_read(ctxt, VCPU_REGS_RBX) +
(reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
- op->addr.mem.seg = seg_override(ctxt);
+ op->addr.mem.seg = ctxt->seg_override;
op->val = 0;
break;
case OpImmFAddr:
@@ -4232,16 +4226,22 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
int mode = ctxt->mode;
int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
bool op_prefix = false;
+ bool has_seg_override = false;
struct opcode opcode;
ctxt->memop.type = OP_NONE;
ctxt->memopp = NULL;
ctxt->_eip = ctxt->eip;
- ctxt->fetch.start = ctxt->_eip;
- ctxt->fetch.end = ctxt->fetch.start + insn_len;
+ ctxt->fetch.ptr = ctxt->fetch.data;
+ ctxt->fetch.end = ctxt->fetch.data + insn_len;
ctxt->opcode_len = 1;
if (insn_len > 0)
memcpy(ctxt->fetch.data, insn, insn_len);
+ else {
+ rc = __do_insn_fetch_bytes(ctxt, 1);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+ }
switch (mode) {
case X86EMUL_MODE_REAL:
@@ -4285,11 +4285,13 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
case 0x2e: /* CS override */
case 0x36: /* SS override */
case 0x3e: /* DS override */
- set_seg_override(ctxt, (ctxt->b >> 3) & 3);
+ has_seg_override = true;
+ ctxt->seg_override = (ctxt->b >> 3) & 3;
break;
case 0x64: /* FS override */
case 0x65: /* GS override */
- set_seg_override(ctxt, ctxt->b & 7);
+ has_seg_override = true;
+ ctxt->seg_override = ctxt->b & 7;
break;
case 0x40 ... 0x4f: /* REX */
if (mode != X86EMUL_MODE_PROT64)
@@ -4387,49 +4389,59 @@ done_prefixes:
ctxt->d |= opcode.flags;
}
- ctxt->execute = opcode.u.execute;
- ctxt->check_perm = opcode.check_perm;
- ctxt->intercept = opcode.intercept;
-
/* Unrecognised? */
- if (ctxt->d == 0 || (ctxt->d & NotImpl))
+ if (ctxt->d == 0)
return EMULATION_FAILED;
- if (!(ctxt->d & EmulateOnUD) && ctxt->ud)
- return EMULATION_FAILED;
+ ctxt->execute = opcode.u.execute;
- if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
- ctxt->op_bytes = 8;
+ if (unlikely(ctxt->d &
+ (NotImpl|EmulateOnUD|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) {
+ /*
+ * These are copied unconditionally here, and checked unconditionally
+ * in x86_emulate_insn.
+ */
+ ctxt->check_perm = opcode.check_perm;
+ ctxt->intercept = opcode.intercept;
+
+ if (ctxt->d & NotImpl)
+ return EMULATION_FAILED;
+
+ if (!(ctxt->d & EmulateOnUD) && ctxt->ud)
+ return EMULATION_FAILED;
- if (ctxt->d & Op3264) {
- if (mode == X86EMUL_MODE_PROT64)
+ if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
ctxt->op_bytes = 8;
- else
- ctxt->op_bytes = 4;
- }
- if (ctxt->d & Sse)
- ctxt->op_bytes = 16;
- else if (ctxt->d & Mmx)
- ctxt->op_bytes = 8;
+ if (ctxt->d & Op3264) {
+ if (mode == X86EMUL_MODE_PROT64)
+ ctxt->op_bytes = 8;
+ else
+ ctxt->op_bytes = 4;
+ }
+
+ if (ctxt->d & Sse)
+ ctxt->op_bytes = 16;
+ else if (ctxt->d & Mmx)
+ ctxt->op_bytes = 8;
+ }
/* ModRM and SIB bytes. */
if (ctxt->d & ModRM) {
rc = decode_modrm(ctxt, &ctxt->memop);
- if (!ctxt->has_seg_override)
- set_seg_override(ctxt, ctxt->modrm_seg);
+ if (!has_seg_override) {
+ has_seg_override = true;
+ ctxt->seg_override = ctxt->modrm_seg;
+ }
} else if (ctxt->d & MemAbs)
rc = decode_abs(ctxt, &ctxt->memop);
if (rc != X86EMUL_CONTINUE)
goto done;
- if (!ctxt->has_seg_override)
- set_seg_override(ctxt, VCPU_SREG_DS);
-
- ctxt->memop.addr.mem.seg = seg_override(ctxt);
+ if (!has_seg_override)
+ ctxt->seg_override = VCPU_SREG_DS;
- if (ctxt->memop.type == OP_MEM && ctxt->ad_bytes != 8)
- ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
+ ctxt->memop.addr.mem.seg = ctxt->seg_override;
/*
* Decode and fetch the source operand: register, memory
@@ -4451,7 +4463,7 @@ done_prefixes:
rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
done:
- if (ctxt->memopp && ctxt->memopp->type == OP_MEM && ctxt->rip_relative)
+ if (ctxt->rip_relative)
ctxt->memopp->addr.mem.ea += ctxt->_eip;
return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
@@ -4526,6 +4538,16 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
return X86EMUL_CONTINUE;
}
+void init_decode_cache(struct x86_emulate_ctxt *ctxt)
+{
+ memset(&ctxt->rip_relative, 0,
+ (void *)&ctxt->modrm - (void *)&ctxt->rip_relative);
+
+ ctxt->io_read.pos = 0;
+ ctxt->io_read.end = 0;
+ ctxt->mem_read.end = 0;
+}
+
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
{
const struct x86_emulate_ops *ops = ctxt->ops;
@@ -4534,12 +4556,6 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
ctxt->mem_read.pos = 0;
- if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
- (ctxt->d & Undefined)) {
- rc = emulate_ud(ctxt);
- goto done;
- }
-
/* LOCK prefix is allowed only with some instructions */
if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
rc = emulate_ud(ctxt);
@@ -4551,69 +4567,81 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
goto done;
}
- if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
- || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
- rc = emulate_ud(ctxt);
- goto done;
- }
-
- if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
- rc = emulate_nm(ctxt);
- goto done;
- }
+ if (unlikely(ctxt->d &
+ (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
+ if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
+ (ctxt->d & Undefined)) {
+ rc = emulate_ud(ctxt);
+ goto done;
+ }
- if (ctxt->d & Mmx) {
- rc = flush_pending_x87_faults(ctxt);
- if (rc != X86EMUL_CONTINUE)
+ if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
+ || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
+ rc = emulate_ud(ctxt);
goto done;
- /*
- * Now that we know the fpu is exception safe, we can fetch
- * operands from it.
- */
- fetch_possible_mmx_operand(ctxt, &ctxt->src);
- fetch_possible_mmx_operand(ctxt, &ctxt->src2);
- if (!(ctxt->d & Mov))
- fetch_possible_mmx_operand(ctxt, &ctxt->dst);
- }
+ }
- if (unlikely(ctxt->guest_mode) && ctxt->intercept) {
- rc = emulator_check_intercept(ctxt, ctxt->intercept,
- X86_ICPT_PRE_EXCEPT);
- if (rc != X86EMUL_CONTINUE)
+ if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
+ rc = emulate_nm(ctxt);
goto done;
- }
+ }
- /* Privileged instruction can be executed only in CPL=0 */
- if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
- rc = emulate_gp(ctxt, 0);
- goto done;
- }
+ if (ctxt->d & Mmx) {
+ rc = flush_pending_x87_faults(ctxt);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
+ /*
+ * Now that we know the fpu is exception safe, we can fetch
+ * operands from it.
+ */
+ fetch_possible_mmx_operand(ctxt, &ctxt->src);
+ fetch_possible_mmx_operand(ctxt, &ctxt->src2);
+ if (!(ctxt->d & Mov))
+ fetch_possible_mmx_operand(ctxt, &ctxt->dst);
+ }
- /* Instruction can only be executed in protected mode */
- if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
- rc = emulate_ud(ctxt);
- goto done;
- }
+ if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) {
+ rc = emulator_check_intercept(ctxt, ctxt->intercept,
+ X86_ICPT_PRE_EXCEPT);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
+ }
- /* Do instruction specific permission checks */
- if (ctxt->check_perm) {
- rc = ctxt->check_perm(ctxt);
- if (rc != X86EMUL_CONTINUE)
+ /* Privileged instruction can be executed only in CPL=0 */
+ if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
+ if (ctxt->d & PrivUD)
+ rc = emulate_ud(ctxt);
+ else
+ rc = emulate_gp(ctxt, 0);
goto done;
- }
+ }
- if (unlikely(ctxt->guest_mode) && ctxt->intercept) {
- rc = emulator_check_intercept(ctxt, ctxt->intercept,
- X86_ICPT_POST_EXCEPT);
- if (rc != X86EMUL_CONTINUE)
+ /* Instruction can only be executed in protected mode */
+ if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
+ rc = emulate_ud(ctxt);
goto done;
- }
+ }
- if (ctxt->rep_prefix && (ctxt->d & String)) {
- /* All REP prefixes have the same first termination condition */
- if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
- ctxt->eip = ctxt->_eip;
- goto done;
+ /* Do instruction specific permission checks */
+ if (ctxt->d & CheckPerm) {
+ rc = ctxt->check_perm(ctxt);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
+ }
+
+ if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) {
+ rc = emulator_check_intercept(ctxt, ctxt->intercept,
+ X86_ICPT_POST_EXCEPT);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
+ }
+
+ if (ctxt->rep_prefix && (ctxt->d & String)) {
+ /* All REP prefixes have the same first termination condition */
+ if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
+ ctxt->eip = ctxt->_eip;
+ goto done;
+ }
}
}
@@ -4647,7 +4675,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
special_insn:
- if (unlikely(ctxt->guest_mode) && ctxt->intercept) {
+ if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_POST_MEMACCESS);
if (rc != X86EMUL_CONTINUE)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 006911858174..3855103f71fd 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1451,7 +1451,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
vcpu->arch.apic_arb_prio = 0;
vcpu->arch.apic_attention = 0;
- apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
+ apic_debug("%s: vcpu=%p, id=%d, base_msr="
"0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
vcpu, kvm_apic_id(apic),
vcpu->arch.apic_base, apic->base_address);
@@ -1895,7 +1895,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
/* evaluate pending_events before reading the vector */
smp_rmb();
sipi_vector = apic->sipi_vector;
- pr_debug("vcpu %d received sipi with vector # %x\n",
+ apic_debug("vcpu %d received sipi with vector # %x\n",
vcpu->vcpu_id, sipi_vector);
kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b5e994ad0135..ddf742768ecf 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -486,14 +486,14 @@ static int is_external_interrupt(u32 info)
return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
}
-static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
+static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
u32 ret = 0;
if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
- ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
- return ret & mask;
+ ret = KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
+ return ret;
}
static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
@@ -1415,7 +1415,16 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
- var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
+
+ /*
+ * AMD CPUs circa 2014 track the G bit for all segments except CS.
+ * However, the SVM spec states that the G bit is not observed by the
+ * CPU, and some VMware virtual CPUs drop the G bit for all segments.
+ * So let's synthesize a legal G bit for all segments, this helps
+ * running KVM nested. It also helps cross-vendor migration, because
+ * Intel's vmentry has a check on the 'G' bit.
+ */
+ var->g = s->limit > 0xfffff;
/*
* AMD's VMCB does not have an explicit unusable field, so emulate it
@@ -1424,14 +1433,6 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
var->unusable = !var->present || (var->type == 0);
switch (seg) {
- case VCPU_SREG_CS:
- /*
- * SVM always stores 0 for the 'G' bit in the CS selector in
- * the VMCB on a VMEXIT. This hurts cross-vendor migration:
- * Intel's VMENTRY has a check on the 'G' bit.
- */
- var->g = s->limit > 0xfffff;
- break;
case VCPU_SREG_TR:
/*
* Work around a bug where the busy flag in the tr selector
@@ -2116,22 +2117,27 @@ static void nested_svm_unmap(struct page *page)
static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
{
- unsigned port;
- u8 val, bit;
+ unsigned port, size, iopm_len;
+ u16 val, mask;
+ u8 start_bit;
u64 gpa;
if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
return NESTED_EXIT_HOST;
port = svm->vmcb->control.exit_info_1 >> 16;
+ size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >>
+ SVM_IOIO_SIZE_SHIFT;
gpa = svm->nested.vmcb_iopm + (port / 8);
- bit = port % 8;
- val = 0;
+ start_bit = port % 8;
+ iopm_len = (start_bit + size > 8) ? 2 : 1;
+ mask = (0xf >> (4 - size)) << start_bit;
+ val = 0;
- if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1))
- val &= (1 << bit);
+ if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, iopm_len))
+ return NESTED_EXIT_DONE;
- return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
+ return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
}
static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
@@ -4205,7 +4211,8 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
if (info->intercept == x86_intercept_cr_write)
icpt_info.exit_code += info->modrm_reg;
- if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0)
+ if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 ||
+ info->intercept == x86_intercept_clts)
break;
intercept = svm->nested.intercept;
@@ -4250,14 +4257,14 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
u64 exit_info;
u32 bytes;
- exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16;
-
if (info->intercept == x86_intercept_in ||
info->intercept == x86_intercept_ins) {
- exit_info |= SVM_IOIO_TYPE_MASK;
- bytes = info->src_bytes;
- } else {
+ exit_info = ((info->src_val & 0xffff) << 16) |
+ SVM_IOIO_TYPE_MASK;
bytes = info->dst_bytes;
+ } else {
+ exit_info = (info->dst_val & 0xffff) << 16;
+ bytes = info->src_bytes;
}
if (info->intercept == x86_intercept_outs ||
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 33574c95220d..e850a7d332be 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -721,10 +721,10 @@ TRACE_EVENT(kvm_emulate_insn,
),
TP_fast_assign(
- __entry->rip = vcpu->arch.emulate_ctxt.fetch.start;
__entry->csbase = kvm_x86_ops->get_segment_base(vcpu, VCPU_SREG_CS);
- __entry->len = vcpu->arch.emulate_ctxt._eip
- - vcpu->arch.emulate_ctxt.fetch.start;
+ __entry->len = vcpu->arch.emulate_ctxt.fetch.ptr
+ - vcpu->arch.emulate_ctxt.fetch.data;
+ __entry->rip = vcpu->arch.emulate_ctxt._eip - __entry->len;
memcpy(__entry->insn,
vcpu->arch.emulate_ctxt.fetch.data,
15);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8748c2e19ed6..7534a9f67cc8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1943,7 +1943,7 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
vmcs_writel(GUEST_RFLAGS, rflags);
}
-static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
+static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
{
u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
int ret = 0;
@@ -1953,7 +1953,7 @@ static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
if (interruptibility & GUEST_INTR_STATE_MOV_SS)
ret |= KVM_X86_SHADOW_INT_MOV_SS;
- return ret & mask;
+ return ret;
}
static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
@@ -3672,7 +3672,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
out:
- vmx->emulation_required |= emulation_required(vcpu);
+ vmx->emulation_required = emulation_required(vcpu);
}
static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
@@ -5640,7 +5640,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
- while (!guest_state_valid(vcpu) && count-- != 0) {
+ while (vmx->emulation_required && count-- != 0) {
if (intr_window_requested && vmx_interrupt_allowed(vcpu))
return handle_interrupt_window(&vmx->vcpu);
@@ -5674,7 +5674,6 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
schedule();
}
- vmx->emulation_required = emulation_required(vcpu);
out:
return ret;
}
@@ -7131,7 +7130,26 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
if (max_irr == -1)
return;
- vmx_set_rvi(max_irr);
+ /*
+ * If a vmexit is needed, vmx_check_nested_events handles it.
+ */
+ if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
+ return;
+
+ if (!is_guest_mode(vcpu)) {
+ vmx_set_rvi(max_irr);
+ return;
+ }
+
+ /*
+ * Fall back to pre-APICv interrupt injection since L2
+ * is run without virtual interrupt delivery.
+ */
+ if (!kvm_event_needs_reinjection(vcpu) &&
+ vmx_interrupt_allowed(vcpu)) {
+ kvm_queue_interrupt(vcpu, max_irr, false);
+ vmx_inject_irq(vcpu);
+ }
}
static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5a8691b0ed76..f750b69ca443 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -87,6 +87,7 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
static void update_cr8_intercept(struct kvm_vcpu *vcpu);
static void process_nmi(struct kvm_vcpu *vcpu);
+static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
struct kvm_x86_ops *kvm_x86_ops;
EXPORT_SYMBOL_GPL(kvm_x86_ops);
@@ -1215,6 +1216,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
unsigned long flags;
s64 usdiff;
bool matched;
+ bool already_matched;
u64 data = msr->data;
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
@@ -1279,6 +1281,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
}
matched = true;
+ already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
} else {
/*
* We split periods of matched TSC writes into generations.
@@ -1294,7 +1297,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
kvm->arch.cur_tsc_write = data;
kvm->arch.cur_tsc_offset = offset;
matched = false;
- pr_debug("kvm: new tsc generation %u, clock %llu\n",
+ pr_debug("kvm: new tsc generation %llu, clock %llu\n",
kvm->arch.cur_tsc_generation, data);
}
@@ -1319,10 +1322,11 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
- if (matched)
- kvm->arch.nr_vcpus_matched_tsc++;
- else
+ if (!matched) {
kvm->arch.nr_vcpus_matched_tsc = 0;
+ } else if (!already_matched) {
+ kvm->arch.nr_vcpus_matched_tsc++;
+ }
kvm_track_tsc_matching(vcpu);
spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
@@ -2032,6 +2036,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
data &= ~(u64)0x40; /* ignore flush filter disable */
data &= ~(u64)0x100; /* ignore ignne emulation enable */
data &= ~(u64)0x8; /* ignore TLB cache disable */
+ data &= ~(u64)0x40000; /* ignore Mc status write enable */
if (data != 0) {
vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
data);
@@ -2974,9 +2979,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
events->interrupt.nr = vcpu->arch.interrupt.nr;
events->interrupt.soft = 0;
- events->interrupt.shadow =
- kvm_x86_ops->get_interrupt_shadow(vcpu,
- KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
+ events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
events->nmi.injected = vcpu->arch.nmi_injected;
events->nmi.pending = vcpu->arch.nmi_pending != 0;
@@ -4082,7 +4085,8 @@ static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
if (gpa == UNMAPPED_GVA)
return X86EMUL_PROPAGATE_FAULT;
- ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
+ ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, data,
+ offset, toread);
if (ret < 0) {
r = X86EMUL_IO_NEEDED;
goto out;
@@ -4103,10 +4107,24 @@ static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
{
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+ unsigned offset;
+ int ret;
- return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
- access | PFERR_FETCH_MASK,
- exception);
+ /* Inline kvm_read_guest_virt_helper for speed. */
+ gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access|PFERR_FETCH_MASK,
+ exception);
+ if (unlikely(gpa == UNMAPPED_GVA))
+ return X86EMUL_PROPAGATE_FAULT;
+
+ offset = addr & (PAGE_SIZE-1);
+ if (WARN_ON(offset + bytes > PAGE_SIZE))
+ bytes = (unsigned)PAGE_SIZE - offset;
+ ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, val,
+ offset, bytes);
+ if (unlikely(ret < 0))
+ return X86EMUL_IO_NEEDED;
+
+ return X86EMUL_CONTINUE;
}
int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
@@ -4856,7 +4874,7 @@ static const struct x86_emulate_ops emulate_ops = {
static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
{
- u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
+ u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
/*
* an sti; sti; sequence only disable interrupts for the first
* instruction. So, if the last instruction, be it emulated or
@@ -4864,8 +4882,13 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
* means that the last instruction is an sti. We should not
* leave the flag on in this case. The same goes for mov ss
*/
- if (!(int_shadow & mask))
+ if (int_shadow & mask)
+ mask = 0;
+ if (unlikely(int_shadow || mask)) {
kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
+ if (!mask)
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ }
}
static void inject_emulated_exception(struct kvm_vcpu *vcpu)
@@ -4880,19 +4903,6 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu)
kvm_queue_exception(vcpu, ctxt->exception.vector);
}
-static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
-{
- memset(&ctxt->opcode_len, 0,
- (void *)&ctxt->_regs - (void *)&ctxt->opcode_len);
-
- ctxt->fetch.start = 0;
- ctxt->fetch.end = 0;
- ctxt->io_read.pos = 0;
- ctxt->io_read.end = 0;
- ctxt->mem_read.pos = 0;
- ctxt->mem_read.end = 0;
-}
-
static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
{
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
@@ -5091,20 +5101,18 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
return dr6;
}
-static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, int *r)
+static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
{
struct kvm_run *kvm_run = vcpu->run;
/*
- * Use the "raw" value to see if TF was passed to the processor.
- * Note that the new value of the flags has not been saved yet.
+ * rflags is the old, "raw" value of the flags. The new value has
+ * not been saved yet.
*
* This is correct even for TF set by the guest, because "the
* processor will not generate this exception after the instruction
* that sets the TF flag".
*/
- unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
-
if (unlikely(rflags & X86_EFLAGS_TF)) {
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1;
@@ -5271,13 +5279,22 @@ restart:
r = EMULATE_DONE;
if (writeback) {
+ unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
toggle_interruptibility(vcpu, ctxt->interruptibility);
- kvm_make_request(KVM_REQ_EVENT, vcpu);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
kvm_rip_write(vcpu, ctxt->eip);
if (r == EMULATE_DONE)
- kvm_vcpu_check_singlestep(vcpu, &r);
- kvm_set_rflags(vcpu, ctxt->eflags);
+ kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+ __kvm_set_rflags(vcpu, ctxt->eflags);
+
+ /*
+ * For STI, interrupts are shadowed; so KVM_REQ_EVENT will
+ * do nothing, and it will be requested again as soon as
+ * the shadow expires. But we still need to check here,
+ * because POPF has no interrupt shadow.
+ */
+ if (unlikely((ctxt->eflags & ~rflags) & X86_EFLAGS_IF))
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
} else
vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
@@ -6842,6 +6859,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
atomic_set(&vcpu->arch.nmi_queued, 0);
vcpu->arch.nmi_pending = 0;
vcpu->arch.nmi_injected = false;
+ kvm_clear_interrupt_queue(vcpu);
+ kvm_clear_exception_queue(vcpu);
memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
vcpu->arch.dr6 = DR6_FIXED_1;
@@ -7400,12 +7419,17 @@ unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_get_rflags);
-void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
+static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
rflags |= X86_EFLAGS_TF;
kvm_x86_ops->set_rflags(vcpu, rflags);
+}
+
+void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
+{
+ __kvm_set_rflags(vcpu, rflags);
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
EXPORT_SYMBOL_GPL(kvm_set_rflags);