From 23ef731e4365196bfc186358eb4f6265d60ab352 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 30 Nov 2021 21:49:30 +0300 Subject: [PATCH 1/5] x86/insn-eval: Handle insn_get_opcode() failure is_string_insn() calls insn_get_opcode() that can fail, but does not handle the failure. is_string_insn() interface does not allow to communicate an error to the caller. Push insn_get_opcode() to the only non-static user of is_string_insn() and fail it early if insn_get_opcode() fails. [ dhansen: fix tabs-versus-spaces breakage ] Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Tested-by: Joerg Roedel Acked-by: Tom Lendacky Link: https://lkml.kernel.org/r/20211130184933.31005-2-kirill.shutemov@linux.intel.com --- arch/x86/lib/insn-eval.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index eb3ccffb9b9d..1c51e8a415e2 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -37,8 +37,6 @@ enum reg_type { */ static bool is_string_insn(struct insn *insn) { - insn_get_opcode(insn); - /* All string instructions have a 1-byte opcode. */ if (insn->opcode.nbytes != 1) return false; @@ -1405,6 +1403,9 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs) if (!insn || !regs) return (void __user *)-1L; + if (insn_get_opcode(insn)) + return (void __user *)-1L; + switch (insn->addr_bytes) { case 2: return get_addr_ref_16(insn, regs); From d5ec1877df6da6a14bbc353aff50689528930dd2 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 30 Nov 2021 21:49:31 +0300 Subject: [PATCH 2/5] x86/insn-eval: Introduce insn_get_modrm_reg_ptr() The helper returns a pointer to the register indicated by ModRM byte. It's going to replace vc_insn_get_reg() in the SEV MMIO implementation. TDX MMIO implementation will also use it. Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Andi Kleen Reviewed-by: Tony Luck Tested-by: Joerg Roedel Acked-by: Tom Lendacky Link: https://lkml.kernel.org/r/20211130184933.31005-3-kirill.shutemov@linux.intel.com --- arch/x86/include/asm/insn-eval.h | 1 + arch/x86/lib/insn-eval.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h index 4ec3613551e3..33ae4040c7ee 100644 --- a/arch/x86/include/asm/insn-eval.h +++ b/arch/x86/include/asm/insn-eval.h @@ -19,6 +19,7 @@ bool insn_has_rep_prefix(struct insn *insn); void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs); int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs); int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs); +unsigned long *insn_get_modrm_reg_ptr(struct insn *insn, struct pt_regs *regs); unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx); int insn_get_code_seg_params(struct pt_regs *regs); int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip); diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index 1c51e8a415e2..8df6236e441c 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -848,6 +848,26 @@ int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs) return get_reg_offset(insn, regs, REG_TYPE_REG); } +/** + * insn_get_modrm_reg_ptr() - Obtain register pointer based on ModRM byte + * @insn: Instruction containing the ModRM byte + * @regs: Register values as seen when entering kernel mode + * + * Returns: + * + * The register indicated by the reg part of the ModRM byte. + * The register is obtained as a pointer within pt_regs. + */ +unsigned long *insn_get_modrm_reg_ptr(struct insn *insn, struct pt_regs *regs) +{ + int offset; + + offset = insn_get_modrm_reg_off(insn, regs); + if (offset < 0) + return NULL; + return (void *)regs + offset; +} + /** * get_seg_base_limit() - obtain base address and limit of a segment * @insn: Instruction. Must be valid. From 70a81f99e45b15b3e77f70720742545e1c7541da Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 30 Nov 2021 21:49:32 +0300 Subject: [PATCH 3/5] x86/insn-eval: Introduce insn_decode_mmio() In preparation for sharing MMIO instruction decode between SEV-ES and TDX, factor out the common decode into a new insn_decode_mmio() helper. For regular virtual machine, MMIO is handled by the VMM and KVM emulates instructions that caused MMIO. But, this model doesn't work for a secure VMs (like SEV or TDX) as VMM doesn't have access to the guest memory and register state. So, for TDX or SEV VMM needs assistance in handling MMIO. It induces exception in the guest. Guest has to decode the instruction and handle it on its own. The code is based on the current SEV MMIO implementation. Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Andi Kleen Reviewed-by: Tony Luck Tested-by: Joerg Roedel Acked-by: Tom Lendacky Link: https://lkml.kernel.org/r/20211130184933.31005-4-kirill.shutemov@linux.intel.com --- arch/x86/include/asm/insn-eval.h | 12 +++++ arch/x86/lib/insn-eval.c | 84 ++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+) diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h index 33ae4040c7ee..43785ee363f1 100644 --- a/arch/x86/include/asm/insn-eval.h +++ b/arch/x86/include/asm/insn-eval.h @@ -30,4 +30,16 @@ int insn_fetch_from_user_inatomic(struct pt_regs *regs, bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE], int buf_size); +enum mmio_type { + MMIO_DECODE_FAILED, + MMIO_WRITE, + MMIO_WRITE_IMM, + MMIO_READ, + MMIO_READ_ZERO_EXTEND, + MMIO_READ_SIGN_EXTEND, + MMIO_MOVS, +}; + +enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes); + #endif /* _ASM_X86_INSN_EVAL_H */ diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index 8df6236e441c..53e57ef5925c 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -1560,3 +1560,87 @@ bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs, return true; } + +/** + * insn_decode_mmio() - Decode a MMIO instruction + * @insn: Structure to store decoded instruction + * @bytes: Returns size of memory operand + * + * Decodes instruction that used for Memory-mapped I/O. + * + * Returns: + * + * Type of the instruction. Size of the memory operand is stored in + * @bytes. If decode failed, MMIO_DECODE_FAILED returned. + */ +enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes) +{ + enum mmio_type type = MMIO_DECODE_FAILED; + + *bytes = 0; + + if (insn_get_opcode(insn)) + return MMIO_DECODE_FAILED; + + switch (insn->opcode.bytes[0]) { + case 0x88: /* MOV m8,r8 */ + *bytes = 1; + fallthrough; + case 0x89: /* MOV m16/m32/m64, r16/m32/m64 */ + if (!*bytes) + *bytes = insn->opnd_bytes; + type = MMIO_WRITE; + break; + + case 0xc6: /* MOV m8, imm8 */ + *bytes = 1; + fallthrough; + case 0xc7: /* MOV m16/m32/m64, imm16/imm32/imm64 */ + if (!*bytes) + *bytes = insn->opnd_bytes; + type = MMIO_WRITE_IMM; + break; + + case 0x8a: /* MOV r8, m8 */ + *bytes = 1; + fallthrough; + case 0x8b: /* MOV r16/r32/r64, m16/m32/m64 */ + if (!*bytes) + *bytes = insn->opnd_bytes; + type = MMIO_READ; + break; + + case 0xa4: /* MOVS m8, m8 */ + *bytes = 1; + fallthrough; + case 0xa5: /* MOVS m16/m32/m64, m16/m32/m64 */ + if (!*bytes) + *bytes = insn->opnd_bytes; + type = MMIO_MOVS; + break; + + case 0x0f: /* Two-byte instruction */ + switch (insn->opcode.bytes[1]) { + case 0xb6: /* MOVZX r16/r32/r64, m8 */ + *bytes = 1; + fallthrough; + case 0xb7: /* MOVZX r32/r64, m16 */ + if (!*bytes) + *bytes = 2; + type = MMIO_READ_ZERO_EXTEND; + break; + + case 0xbe: /* MOVSX r16/r32/r64, m8 */ + *bytes = 1; + fallthrough; + case 0xbf: /* MOVSX r32/r64, m16 */ + if (!*bytes) + *bytes = 2; + type = MMIO_READ_SIGN_EXTEND; + break; + } + break; + } + + return type; +} From c494eb366dbfc5095c0f159bbb5c6d4ec3ed37ec Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 30 Nov 2021 21:49:33 +0300 Subject: [PATCH 4/5] x86/sev-es: Use insn_decode_mmio() for MMIO implementation Switch SEV implementation to insn_decode_mmio(). The helper is going to be used by TDX too. No functional changes. Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Andi Kleen Reviewed-by: Tony Luck Tested-by: Joerg Roedel Acked-by: Tom Lendacky Link: https://lkml.kernel.org/r/20211130184933.31005-5-kirill.shutemov@linux.intel.com --- arch/x86/kernel/sev.c | 174 ++++++++++-------------------------------- 1 file changed, 42 insertions(+), 132 deletions(-) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 74f0ec955384..09c06c025626 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -776,22 +776,6 @@ static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) do_early_exception(ctxt->regs, trapnr); } -static long *vc_insn_get_reg(struct es_em_ctxt *ctxt) -{ - long *reg_array; - int offset; - - reg_array = (long *)ctxt->regs; - offset = insn_get_modrm_reg_off(&ctxt->insn, ctxt->regs); - - if (offset < 0) - return NULL; - - offset /= sizeof(long); - - return reg_array + offset; -} - static long *vc_insn_get_rm(struct es_em_ctxt *ctxt) { long *reg_array; @@ -839,76 +823,6 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, return sev_es_ghcb_hv_call(ghcb, true, ctxt, exit_code, exit_info_1, exit_info_2); } -static enum es_result vc_handle_mmio_twobyte_ops(struct ghcb *ghcb, - struct es_em_ctxt *ctxt) -{ - struct insn *insn = &ctxt->insn; - unsigned int bytes = 0; - enum es_result ret; - int sign_byte; - long *reg_data; - - switch (insn->opcode.bytes[1]) { - /* MMIO Read w/ zero-extension */ - case 0xb6: - bytes = 1; - fallthrough; - case 0xb7: - if (!bytes) - bytes = 2; - - ret = vc_do_mmio(ghcb, ctxt, bytes, true); - if (ret) - break; - - /* Zero extend based on operand size */ - reg_data = vc_insn_get_reg(ctxt); - if (!reg_data) - return ES_DECODE_FAILED; - - memset(reg_data, 0, insn->opnd_bytes); - - memcpy(reg_data, ghcb->shared_buffer, bytes); - break; - - /* MMIO Read w/ sign-extension */ - case 0xbe: - bytes = 1; - fallthrough; - case 0xbf: - if (!bytes) - bytes = 2; - - ret = vc_do_mmio(ghcb, ctxt, bytes, true); - if (ret) - break; - - /* Sign extend based on operand size */ - reg_data = vc_insn_get_reg(ctxt); - if (!reg_data) - return ES_DECODE_FAILED; - - if (bytes == 1) { - u8 *val = (u8 *)ghcb->shared_buffer; - - sign_byte = (*val & 0x80) ? 0xff : 0x00; - } else { - u16 *val = (u16 *)ghcb->shared_buffer; - - sign_byte = (*val & 0x8000) ? 0xff : 0x00; - } - memset(reg_data, sign_byte, insn->opnd_bytes); - - memcpy(reg_data, ghcb->shared_buffer, bytes); - break; - - default: - ret = ES_UNSUPPORTED; - } - - return ret; -} - /* * The MOVS instruction has two memory operands, which raises the * problem that it is not known whether the access to the source or the @@ -976,83 +890,79 @@ static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt, return ES_RETRY; } -static enum es_result vc_handle_mmio(struct ghcb *ghcb, - struct es_em_ctxt *ctxt) +static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) { struct insn *insn = &ctxt->insn; unsigned int bytes = 0; + enum mmio_type mmio; enum es_result ret; + u8 sign_byte; long *reg_data; - switch (insn->opcode.bytes[0]) { - /* MMIO Write */ - case 0x88: - bytes = 1; - fallthrough; - case 0x89: - if (!bytes) - bytes = insn->opnd_bytes; + mmio = insn_decode_mmio(insn, &bytes); + if (mmio == MMIO_DECODE_FAILED) + return ES_DECODE_FAILED; - reg_data = vc_insn_get_reg(ctxt); + if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) { + reg_data = insn_get_modrm_reg_ptr(insn, ctxt->regs); if (!reg_data) return ES_DECODE_FAILED; + } + switch (mmio) { + case MMIO_WRITE: memcpy(ghcb->shared_buffer, reg_data, bytes); - ret = vc_do_mmio(ghcb, ctxt, bytes, false); break; - - case 0xc6: - bytes = 1; - fallthrough; - case 0xc7: - if (!bytes) - bytes = insn->opnd_bytes; - + case MMIO_WRITE_IMM: memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes); - ret = vc_do_mmio(ghcb, ctxt, bytes, false); break; - - /* MMIO Read */ - case 0x8a: - bytes = 1; - fallthrough; - case 0x8b: - if (!bytes) - bytes = insn->opnd_bytes; - + case MMIO_READ: ret = vc_do_mmio(ghcb, ctxt, bytes, true); if (ret) break; - reg_data = vc_insn_get_reg(ctxt); - if (!reg_data) - return ES_DECODE_FAILED; - /* Zero-extend for 32-bit operation */ if (bytes == 4) *reg_data = 0; memcpy(reg_data, ghcb->shared_buffer, bytes); break; + case MMIO_READ_ZERO_EXTEND: + ret = vc_do_mmio(ghcb, ctxt, bytes, true); + if (ret) + break; - /* MOVS instruction */ - case 0xa4: - bytes = 1; - fallthrough; - case 0xa5: - if (!bytes) - bytes = insn->opnd_bytes; - - ret = vc_handle_mmio_movs(ctxt, bytes); + /* Zero extend based on operand size */ + memset(reg_data, 0, insn->opnd_bytes); + memcpy(reg_data, ghcb->shared_buffer, bytes); break; - /* Two-Byte Opcodes */ - case 0x0f: - ret = vc_handle_mmio_twobyte_ops(ghcb, ctxt); + case MMIO_READ_SIGN_EXTEND: + ret = vc_do_mmio(ghcb, ctxt, bytes, true); + if (ret) + break; + + if (bytes == 1) { + u8 *val = (u8 *)ghcb->shared_buffer; + + sign_byte = (*val & 0x80) ? 0xff : 0x00; + } else { + u16 *val = (u16 *)ghcb->shared_buffer; + + sign_byte = (*val & 0x8000) ? 0xff : 0x00; + } + + /* Sign extend based on operand size */ + memset(reg_data, sign_byte, insn->opnd_bytes); + memcpy(reg_data, ghcb->shared_buffer, bytes); + break; + case MMIO_MOVS: + ret = vc_handle_mmio_movs(ctxt, bytes); break; default: ret = ES_UNSUPPORTED; + break; } return ret; From 4d5cff69fbddbbefef2903faa48263cc5d3ca382 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Dec 2021 17:56:11 +0100 Subject: [PATCH 5/5] x86/mtrr: Remove the mtrr_bp_init() stub Add an IS_ENABLED() check in setup_arch() and call pat_disable() directly if MTRRs are not supported. This allows to remove the include in , which pull in lowlevel x86 headers that should not be included for UML builds and will cause build warnings with a following patch. Signed-off-by: Christoph Hellwig Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211215165612.554426-2-hch@lst.de --- arch/x86/include/asm/mtrr.h | 8 +------- arch/x86/kernel/setup.c | 7 ++++++- arch/x86/kvm/mmu/spte.c | 1 + 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index 829df26fd7a3..76d726074c16 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -24,8 +24,8 @@ #define _ASM_X86_MTRR_H #include -#include +void mtrr_bp_init(void); /* * The following functions are for use by other drivers that cannot use @@ -43,7 +43,6 @@ extern int mtrr_del(int reg, unsigned long base, unsigned long size); extern int mtrr_del_page(int reg, unsigned long base, unsigned long size); extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi); extern void mtrr_ap_init(void); -extern void mtrr_bp_init(void); extern void set_mtrr_aps_delayed_init(void); extern void mtrr_aps_init(void); extern void mtrr_bp_restore(void); @@ -84,11 +83,6 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn) static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) { } -static inline void mtrr_bp_init(void) -{ - pat_disable("PAT support disabled because CONFIG_MTRR is disabled in the kernel."); -} - #define mtrr_ap_init() do {} while (0) #define set_mtrr_aps_delayed_init() do {} while (0) #define mtrr_aps_init() do {} while (0) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 6a190c7f4d71..2f5129bd49dc 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -979,7 +980,11 @@ void __init setup_arch(char **cmdline_p) max_pfn = e820__end_of_ram_pfn(); /* update e820 for memory not covered by WB MTRRs */ - mtrr_bp_init(); + if (IS_ENABLED(CONFIG_MTRR)) + mtrr_bp_init(); + else + pat_disable("PAT support disabled because CONFIG_MTRR is disabled in the kernel."); + if (mtrr_trim_uncached_memory(max_pfn)) max_pfn = e820__end_of_ram_pfn(); diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index 0c76c45fdb68..fad546df0bba 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -16,6 +16,7 @@ #include "spte.h" #include +#include #include static bool __read_mostly enable_mmio_caching = true;