From efb339a83368ab25de1a18c0fdff85e01c13a1ea Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Tue, 7 Mar 2023 20:24:39 +0100 Subject: [PATCH 01/10] crypto: ccp - Name -1 return value as SEV_RET_NO_FW_CALL The PSP can return a "firmware error" code of -1 in circumstances where the PSP has not actually been called. To make this protocol unambiguous, name the value SEV_RET_NO_FW_CALL. [ bp: Massage a bit. ] Signed-off-by: Peter Gonda Signed-off-by: Dionna Glaze Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20221207010210.2563293-2-dionnaglaze@google.com --- Documentation/virt/coco/sev-guest.rst | 4 ++-- drivers/crypto/ccp/sev-dev.c | 8 +++++--- include/uapi/linux/psp-sev.h | 7 +++++++ 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/Documentation/virt/coco/sev-guest.rst b/Documentation/virt/coco/sev-guest.rst index bf593e88cfd9..aa3e4c6a1f90 100644 --- a/Documentation/virt/coco/sev-guest.rst +++ b/Documentation/virt/coco/sev-guest.rst @@ -40,8 +40,8 @@ along with a description: The guest ioctl should be issued on a file descriptor of the /dev/sev-guest device. The ioctl accepts struct snp_user_guest_request. The input and output structure is specified through the req_data and resp_data field respectively. If the ioctl fails -to execute due to a firmware error, then fw_err code will be set otherwise the -fw_err will be set to 0x00000000000000ff. +to execute due to a firmware error, then fw_err code will be set. Otherwise, fw_err +will be set to 0x00000000ffffffff, i.e., the lower 32-bits are -1. The firmware checks that the message sequence counter is one greater than the guests message sequence counter. If guest driver fails to increment message diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index e2f25926eb51..823c67a43c38 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -444,10 +444,10 @@ static int __sev_init_ex_locked(int *error) static int __sev_platform_init_locked(int *error) { + int rc = 0, psp_ret = SEV_RET_NO_FW_CALL; struct psp_device *psp = psp_master; - struct sev_device *sev; - int rc = 0, psp_ret = -1; int (*init_function)(int *error); + struct sev_device *sev; if (!psp || !psp->sev_data) return -ENODEV; @@ -475,9 +475,11 @@ static int __sev_platform_init_locked(int *error) * initialization function should succeed by replacing the state * with a reset state. */ - dev_err(sev->dev, "SEV: retrying INIT command because of SECURE_DATA_INVALID error. Retrying once to reset PSP SEV state."); + dev_err(sev->dev, +"SEV: retrying INIT command because of SECURE_DATA_INVALID error. Retrying once to reset PSP SEV state."); rc = init_function(&psp_ret); } + if (error) *error = psp_ret; diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h index 91b4c63d5cbf..1c9da485318f 100644 --- a/include/uapi/linux/psp-sev.h +++ b/include/uapi/linux/psp-sev.h @@ -36,6 +36,13 @@ enum { * SEV Firmware status code */ typedef enum { + /* + * This error code is not in the SEV spec. Its purpose is to convey that + * there was an error that prevented the SEV firmware from being called. + * The SEV API error codes are 16 bits, so the -1 value will not overlap + * with possible values from the specification. + */ + SEV_RET_NO_FW_CALL = -1, SEV_RET_SUCCESS = 0, SEV_RET_INVALID_PLATFORM_STATE, SEV_RET_INVALID_GUEST_STATE, From dbf07b544ca12c0ab8bd3fc1ea3509ea713a8bf5 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Tue, 7 Mar 2023 20:24:46 +0100 Subject: [PATCH 02/10] crypto: ccp: Get rid of __sev_platform_init_locked()'s local function pointer Add a wrapper instead. No functional changes. Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230307192449.24732-9-bp@alien8.de --- drivers/crypto/ccp/sev-dev.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 823c67a43c38..e346c00b132a 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -442,11 +442,18 @@ static int __sev_init_ex_locked(int *error) return __sev_do_cmd_locked(SEV_CMD_INIT_EX, &data, error); } +static inline int __sev_do_init_locked(int *psp_ret) +{ + if (sev_init_ex_buffer) + return __sev_init_ex_locked(psp_ret); + else + return __sev_init_locked(psp_ret); +} + static int __sev_platform_init_locked(int *error) { int rc = 0, psp_ret = SEV_RET_NO_FW_CALL; struct psp_device *psp = psp_master; - int (*init_function)(int *error); struct sev_device *sev; if (!psp || !psp->sev_data) @@ -458,15 +465,12 @@ static int __sev_platform_init_locked(int *error) return 0; if (sev_init_ex_buffer) { - init_function = __sev_init_ex_locked; rc = sev_read_init_ex_file(); if (rc) return rc; - } else { - init_function = __sev_init_locked; } - rc = init_function(&psp_ret); + rc = __sev_do_init_locked(&psp_ret); if (rc && psp_ret == SEV_RET_SECURE_DATA_INVALID) { /* * Initialization command returned an integrity check failure @@ -477,7 +481,7 @@ static int __sev_platform_init_locked(int *error) */ dev_err(sev->dev, "SEV: retrying INIT command because of SECURE_DATA_INVALID error. Retrying once to reset PSP SEV state."); - rc = init_function(&psp_ret); + rc = __sev_do_init_locked(&psp_ret); } if (error) From 965006103a14703cc42043bbf9b5e0cdf7a468ad Mon Sep 17 00:00:00 2001 From: Dionna Glaze Date: Tue, 7 Mar 2023 20:24:48 +0100 Subject: [PATCH 03/10] virt/coco/sev-guest: Double-buffer messages The encryption algorithms read and write directly to shared unencrypted memory, which may leak information as well as permit the host to tamper with the message integrity. Instead, copy whole messages in or out as needed before doing any computation on them. Fixes: d5af44dde546 ("x86/sev: Provide support for SNP guest request NAEs") Signed-off-by: Dionna Glaze Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230214164638.1189804-3-dionnaglaze@google.com --- drivers/virt/coco/sev-guest/sev-guest.c | 27 +++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index 46f1a8d558b0..0c7b47acba2a 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -46,7 +46,15 @@ struct snp_guest_dev { void *certs_data; struct snp_guest_crypto *crypto; + /* request and response are in unencrypted memory */ struct snp_guest_msg *request, *response; + + /* + * Avoid information leakage by double-buffering shared messages + * in fields that are in regular encrypted memory. + */ + struct snp_guest_msg secret_request, secret_response; + struct snp_secrets_page_layout *layout; struct snp_req_data input; u32 *os_area_msg_seqno; @@ -266,14 +274,17 @@ static int dec_payload(struct snp_guest_dev *snp_dev, struct snp_guest_msg *msg, static int verify_and_dec_payload(struct snp_guest_dev *snp_dev, void *payload, u32 sz) { struct snp_guest_crypto *crypto = snp_dev->crypto; - struct snp_guest_msg *resp = snp_dev->response; - struct snp_guest_msg *req = snp_dev->request; + struct snp_guest_msg *resp = &snp_dev->secret_response; + struct snp_guest_msg *req = &snp_dev->secret_request; struct snp_guest_msg_hdr *req_hdr = &req->hdr; struct snp_guest_msg_hdr *resp_hdr = &resp->hdr; dev_dbg(snp_dev->dev, "response [seqno %lld type %d version %d sz %d]\n", resp_hdr->msg_seqno, resp_hdr->msg_type, resp_hdr->msg_version, resp_hdr->msg_sz); + /* Copy response from shared memory to encrypted memory. */ + memcpy(resp, snp_dev->response, sizeof(*resp)); + /* Verify that the sequence counter is incremented by 1 */ if (unlikely(resp_hdr->msg_seqno != (req_hdr->msg_seqno + 1))) return -EBADMSG; @@ -297,7 +308,7 @@ static int verify_and_dec_payload(struct snp_guest_dev *snp_dev, void *payload, static int enc_payload(struct snp_guest_dev *snp_dev, u64 seqno, int version, u8 type, void *payload, size_t sz) { - struct snp_guest_msg *req = snp_dev->request; + struct snp_guest_msg *req = &snp_dev->secret_request; struct snp_guest_msg_hdr *hdr = &req->hdr; memset(req, 0, sizeof(*req)); @@ -417,13 +428,21 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in if (!seqno) return -EIO; + /* Clear shared memory's response for the host to populate. */ memset(snp_dev->response, 0, sizeof(struct snp_guest_msg)); - /* Encrypt the userspace provided payload */ + /* Encrypt the userspace provided payload in snp_dev->secret_request. */ rc = enc_payload(snp_dev, seqno, msg_ver, type, req_buf, req_sz); if (rc) return rc; + /* + * Write the fully encrypted request to the shared unencrypted + * request page. + */ + memcpy(snp_dev->request, &snp_dev->secret_request, + sizeof(snp_dev->secret_request)); + rc = __handle_guest_request(snp_dev, exit_code, fw_err); if (rc) { if (rc == -EIO && *fw_err == SNP_GUEST_REQ_INVALID_LEN) From 0144e3b85d7b42e8a4cda991c0e81f131897457a Mon Sep 17 00:00:00 2001 From: Dionna Glaze Date: Tue, 7 Mar 2023 20:24:49 +0100 Subject: [PATCH 04/10] x86/sev: Change snp_guest_issue_request()'s fw_err argument The GHCB specification declares that the firmware error value for a guest request will be stored in the lower 32 bits of EXIT_INFO_2. The upper 32 bits are for the VMM's own error code. The fw_err argument to snp_guest_issue_request() is thus a misnomer, and callers will need access to all 64 bits. The type of unsigned long also causes problems, since sw_exit_info2 is u64 (unsigned long long) vs the argument's unsigned long*. Change this type for issuing the guest request. Pass the ioctl command struct's error field directly instead of in a local variable, since an incomplete guest request may not set the error code, and uninitialized stack memory would be written back to user space. The firmware might not even be called, so bookend the call with the no firmware call error and clear the error. Since the "fw_err" field is really exitinfo2 split into the upper bits' vmm error code and lower bits' firmware error code, convert the 64 bit value to a union. [ bp: - Massage commit message - adjust code - Fix a build issue as Reported-by: kernel test robot Link: https://lore.kernel.org/oe-kbuild-all/202303070609.vX6wp2Af-lkp@intel.com - print exitinfo2 in hex Tom: - Correct -EIO exit case. ] Signed-off-by: Dionna Glaze Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230214164638.1189804-5-dionnaglaze@google.com Link: https://lore.kernel.org/r/20230307192449.24732-12-bp@alien8.de --- Documentation/virt/coco/sev-guest.rst | 20 ++++--- arch/x86/include/asm/sev-common.h | 4 -- arch/x86/include/asm/sev.h | 10 ++-- arch/x86/kernel/sev.c | 15 +++--- drivers/virt/coco/sev-guest/sev-guest.c | 72 +++++++++++++------------ include/uapi/linux/sev-guest.h | 18 ++++++- 6 files changed, 83 insertions(+), 56 deletions(-) diff --git a/Documentation/virt/coco/sev-guest.rst b/Documentation/virt/coco/sev-guest.rst index aa3e4c6a1f90..68b0d2363af8 100644 --- a/Documentation/virt/coco/sev-guest.rst +++ b/Documentation/virt/coco/sev-guest.rst @@ -37,11 +37,11 @@ along with a description: the return value. General error numbers (-ENOMEM, -EINVAL) are not detailed, but errors with specific meanings are. -The guest ioctl should be issued on a file descriptor of the /dev/sev-guest device. -The ioctl accepts struct snp_user_guest_request. The input and output structure is -specified through the req_data and resp_data field respectively. If the ioctl fails -to execute due to a firmware error, then fw_err code will be set. Otherwise, fw_err -will be set to 0x00000000ffffffff, i.e., the lower 32-bits are -1. +The guest ioctl should be issued on a file descriptor of the /dev/sev-guest +device. The ioctl accepts struct snp_user_guest_request. The input and +output structure is specified through the req_data and resp_data field +respectively. If the ioctl fails to execute due to a firmware error, then +the fw_error code will be set, otherwise fw_error will be set to -1. The firmware checks that the message sequence counter is one greater than the guests message sequence counter. If guest driver fails to increment message @@ -57,8 +57,14 @@ counter (e.g. counter overflow), then -EIO will be returned. __u64 req_data; __u64 resp_data; - /* firmware error code on failure (see psp-sev.h) */ - __u64 fw_err; + /* bits[63:32]: VMM error code, bits[31:0] firmware error code (see psp-sev.h) */ + union { + __u64 exitinfo2; + struct { + __u32 fw_error; + __u32 vmm_error; + }; + }; }; 2.1 SNP_GET_REPORT diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index b63be696b776..0759af9b1acf 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -128,10 +128,6 @@ struct snp_psc_desc { struct psc_entry entries[VMGEXIT_PSC_MAX_ENTRY]; } __packed; -/* Guest message request error codes */ -#define SNP_GUEST_REQ_INVALID_LEN BIT_ULL(32) -#define SNP_GUEST_REQ_ERR_BUSY BIT_ULL(33) - #define GHCB_MSR_TERM_REQ 0x100 #define GHCB_MSR_TERM_REASON_SET_POS 12 #define GHCB_MSR_TERM_REASON_SET_MASK 0xf diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index ebc271bb6d8e..13dc2a9d23c1 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -9,6 +9,8 @@ #define __ASM_ENCRYPTED_STATE_H #include +#include + #include #include #include @@ -185,6 +187,9 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) return rc; } + +struct snp_guest_request_ioctl; + void setup_ghcb(void); void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned int npages); @@ -196,7 +201,7 @@ void snp_set_memory_private(unsigned long vaddr, unsigned int npages); void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); void __init __noreturn snp_abort(void); -int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err); +int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } @@ -216,8 +221,7 @@ static inline void snp_set_memory_private(unsigned long vaddr, unsigned int npag static inline void snp_set_wakeup_secondary_cpu(void) { } static inline bool snp_init(struct boot_params *bp) { return false; } static inline void snp_abort(void) { } -static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, - unsigned long *fw_err) +static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio) { return -ENOTTY; } diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 3f664ab277c4..b031244d6d2d 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include @@ -2175,7 +2177,7 @@ static int __init init_sev_config(char *str) } __setup("sev=", init_sev_config); -int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err) +int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio) { struct ghcb_state state; struct es_em_ctxt ctxt; @@ -2183,8 +2185,7 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned struct ghcb *ghcb; int ret; - if (!fw_err) - return -EINVAL; + rio->exitinfo2 = SEV_RET_NO_FW_CALL; /* * __sev_get_ghcb() needs to run with IRQs disabled because it is using @@ -2209,16 +2210,16 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned if (ret) goto e_put; - *fw_err = ghcb->save.sw_exit_info_2; - switch (*fw_err) { + rio->exitinfo2 = ghcb->save.sw_exit_info_2; + switch (rio->exitinfo2) { case 0: break; - case SNP_GUEST_REQ_ERR_BUSY: + case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY): ret = -EAGAIN; break; - case SNP_GUEST_REQ_INVALID_LEN: + case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN): /* Number of expected pages are returned in RBX */ if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) { input->data_npages = ghcb_get_rbx(ghcb); diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index 0c7b47acba2a..97dbe715e96a 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -332,11 +332,12 @@ static int enc_payload(struct snp_guest_dev *snp_dev, u64 seqno, int version, u8 return __enc_payload(snp_dev, req, payload, sz); } -static int __handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, __u64 *fw_err) +static int __handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, + struct snp_guest_request_ioctl *rio) { - unsigned long err = 0xff, override_err = 0; unsigned long req_start = jiffies; unsigned int override_npages = 0; + u64 override_err = 0; int rc; retry_request: @@ -346,7 +347,7 @@ retry_request: * sequence number must be incremented or the VMPCK must be deleted to * prevent reuse of the IV. */ - rc = snp_issue_guest_request(exit_code, &snp_dev->input, &err); + rc = snp_issue_guest_request(exit_code, &snp_dev->input, rio); switch (rc) { case -ENOSPC: /* @@ -364,7 +365,7 @@ retry_request: * request buffer size was too small and give the caller the * required buffer size. */ - override_err = SNP_GUEST_REQ_INVALID_LEN; + override_err = SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN); /* * If this call to the firmware succeeds, the sequence number can @@ -377,7 +378,7 @@ retry_request: goto retry_request; /* - * The host may return SNP_GUEST_REQ_ERR_EBUSY if the request has been + * The host may return SNP_GUEST_VMM_ERR_BUSY if the request has been * throttled. Retry in the driver to avoid returning and reusing the * message sequence number on a different message. */ @@ -398,27 +399,29 @@ retry_request: */ snp_inc_msg_seqno(snp_dev); - if (fw_err) - *fw_err = override_err ?: err; + if (override_err) { + rio->exitinfo2 = override_err; + + /* + * If an extended guest request was issued and the supplied certificate + * buffer was not large enough, a standard guest request was issued to + * prevent IV reuse. If the standard request was successful, return -EIO + * back to the caller as would have originally been returned. + */ + if (!rc && override_err == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN)) + rc = -EIO; + } if (override_npages) snp_dev->input.data_npages = override_npages; - /* - * If an extended guest request was issued and the supplied certificate - * buffer was not large enough, a standard guest request was issued to - * prevent IV reuse. If the standard request was successful, return -EIO - * back to the caller as would have originally been returned. - */ - if (!rc && override_err == SNP_GUEST_REQ_INVALID_LEN) - return -EIO; - return rc; } -static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, int msg_ver, - u8 type, void *req_buf, size_t req_sz, void *resp_buf, - u32 resp_sz, __u64 *fw_err) +static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, + struct snp_guest_request_ioctl *rio, u8 type, + void *req_buf, size_t req_sz, void *resp_buf, + u32 resp_sz) { u64 seqno; int rc; @@ -432,7 +435,7 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in memset(snp_dev->response, 0, sizeof(struct snp_guest_msg)); /* Encrypt the userspace provided payload in snp_dev->secret_request. */ - rc = enc_payload(snp_dev, seqno, msg_ver, type, req_buf, req_sz); + rc = enc_payload(snp_dev, seqno, rio->msg_version, type, req_buf, req_sz); if (rc) return rc; @@ -443,12 +446,16 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in memcpy(snp_dev->request, &snp_dev->secret_request, sizeof(snp_dev->secret_request)); - rc = __handle_guest_request(snp_dev, exit_code, fw_err); + rc = __handle_guest_request(snp_dev, exit_code, rio); if (rc) { - if (rc == -EIO && *fw_err == SNP_GUEST_REQ_INVALID_LEN) + if (rc == -EIO && + rio->exitinfo2 == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN)) return rc; - dev_alert(snp_dev->dev, "Detected error from ASP request. rc: %d, fw_err: %llu\n", rc, *fw_err); + dev_alert(snp_dev->dev, + "Detected error from ASP request. rc: %d, exitinfo2: 0x%llx\n", + rc, rio->exitinfo2); + snp_disable_vmpck(snp_dev); return rc; } @@ -488,9 +495,9 @@ static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_io if (!resp) return -ENOMEM; - rc = handle_guest_request(snp_dev, SVM_VMGEXIT_GUEST_REQUEST, arg->msg_version, + rc = handle_guest_request(snp_dev, SVM_VMGEXIT_GUEST_REQUEST, arg, SNP_MSG_REPORT_REQ, &req, sizeof(req), resp->data, - resp_len, &arg->fw_err); + resp_len); if (rc) goto e_free; @@ -528,9 +535,8 @@ static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_reque if (copy_from_user(&req, (void __user *)arg->req_data, sizeof(req))) return -EFAULT; - rc = handle_guest_request(snp_dev, SVM_VMGEXIT_GUEST_REQUEST, arg->msg_version, - SNP_MSG_KEY_REQ, &req, sizeof(req), buf, resp_len, - &arg->fw_err); + rc = handle_guest_request(snp_dev, SVM_VMGEXIT_GUEST_REQUEST, arg, + SNP_MSG_KEY_REQ, &req, sizeof(req), buf, resp_len); if (rc) return rc; @@ -590,12 +596,12 @@ cmd: return -ENOMEM; snp_dev->input.data_npages = npages; - ret = handle_guest_request(snp_dev, SVM_VMGEXIT_EXT_GUEST_REQUEST, arg->msg_version, + ret = handle_guest_request(snp_dev, SVM_VMGEXIT_EXT_GUEST_REQUEST, arg, SNP_MSG_REPORT_REQ, &req.data, - sizeof(req.data), resp->data, resp_len, &arg->fw_err); + sizeof(req.data), resp->data, resp_len); /* If certs length is invalid then copy the returned length */ - if (arg->fw_err == SNP_GUEST_REQ_INVALID_LEN) { + if (arg->vmm_error == SNP_GUEST_VMM_ERR_INVALID_LEN) { req.certs_len = snp_dev->input.data_npages << PAGE_SHIFT; if (copy_to_user((void __user *)arg->req_data, &req, sizeof(req))) @@ -630,7 +636,7 @@ static long snp_guest_ioctl(struct file *file, unsigned int ioctl, unsigned long if (copy_from_user(&input, argp, sizeof(input))) return -EFAULT; - input.fw_err = 0xff; + input.exitinfo2 = 0xff; /* Message version must be non-zero */ if (!input.msg_version) @@ -661,7 +667,7 @@ static long snp_guest_ioctl(struct file *file, unsigned int ioctl, unsigned long mutex_unlock(&snp_cmd_mutex); - if (input.fw_err && copy_to_user(argp, &input, sizeof(input))) + if (input.exitinfo2 && copy_to_user(argp, &input, sizeof(input))) return -EFAULT; return ret; diff --git a/include/uapi/linux/sev-guest.h b/include/uapi/linux/sev-guest.h index 256aaeff7e65..2aa39112cf8d 100644 --- a/include/uapi/linux/sev-guest.h +++ b/include/uapi/linux/sev-guest.h @@ -52,8 +52,14 @@ struct snp_guest_request_ioctl { __u64 req_data; __u64 resp_data; - /* firmware error code on failure (see psp-sev.h) */ - __u64 fw_err; + /* bits[63:32]: VMM error code, bits[31:0] firmware error code (see psp-sev.h) */ + union { + __u64 exitinfo2; + struct { + __u32 fw_error; + __u32 vmm_error; + }; + }; }; struct snp_ext_report_req { @@ -77,4 +83,12 @@ struct snp_ext_report_req { /* Get SNP extended report as defined in the GHCB specification version 2. */ #define SNP_GET_EXT_REPORT _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x2, struct snp_guest_request_ioctl) +/* Guest message request EXIT_INFO_2 constants */ +#define SNP_GUEST_FW_ERR_MASK GENMASK_ULL(31, 0) +#define SNP_GUEST_VMM_ERR_SHIFT 32 +#define SNP_GUEST_VMM_ERR(x) (((u64)x) << SNP_GUEST_VMM_ERR_SHIFT) + +#define SNP_GUEST_VMM_ERR_INVALID_LEN 1 +#define SNP_GUEST_VMM_ERR_BUSY 2 + #endif /* __UAPI_LINUX_SEV_GUEST_H_ */ From 88e378d400fa0544d51cf62037e7774d8a4b4379 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 8 Mar 2023 18:40:02 -0800 Subject: [PATCH 05/10] x86/ioremap: Add hypervisor callback for private MMIO mapping in coco VM Current code always maps MMIO devices as shared (decrypted) in a confidential computing VM. But Hyper-V guest VMs on AMD SEV-SNP with vTOM use a paravisor running in VMPL0 to emulate some devices, such as the IO-APIC and TPM. In such a case, the device must be accessed as private (encrypted) because the paravisor emulates the device at an address below vTOM, where all accesses are encrypted. Add a new hypervisor callback to determine if an MMIO address should be mapped private. The callback allows hypervisor-specific code to handle any quirks, the use of a paravisor, etc. in determining whether a mapping must be private. If the callback is not used by a hypervisor, default to returning "false", which is consistent with normal coco VM behavior. Use this callback as another special case to check for when doing ioremap(). Just checking the starting address is sufficient as an ioremap range must be all private or all shared. Also make the callback in early boot IO-APIC mapping code that uses the fixmap. [ bp: Touchups. ] Signed-off-by: Michael Kelley Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/1678329614-3482-2-git-send-email-mikelley@microsoft.com --- arch/x86/include/asm/x86_init.h | 4 ++++ arch/x86/kernel/apic/io_apic.c | 10 ++++++++-- arch/x86/kernel/x86_init.c | 2 ++ arch/x86/mm/ioremap.c | 5 +++++ 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index c1c8c581759d..acc20ae4079d 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -259,11 +259,15 @@ struct x86_legacy_features { * VMMCALL under SEV-ES. Needs to return 'false' * if the checks fail. Called from the #VC * exception handler. + * @is_private_mmio: For CoCo VMs, must map MMIO address as private. + * Used when device is emulated by a paravisor + * layer in the VM context. */ struct x86_hyper_runtime { void (*pin_vcpu)(int cpu); void (*sev_es_hcall_prepare)(struct ghcb *ghcb, struct pt_regs *regs); bool (*sev_es_hcall_finish)(struct ghcb *ghcb, struct pt_regs *regs); + bool (*is_private_mmio)(u64 addr); }; /** diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1f83b052bb74..146671de9ddc 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -66,6 +66,7 @@ #include #include #include +#include #define for_each_ioapic(idx) \ for ((idx) = 0; (idx) < nr_ioapics; (idx)++) @@ -2680,10 +2681,15 @@ static void io_apic_set_fixmap(enum fixed_addresses idx, phys_addr_t phys) pgprot_t flags = FIXMAP_PAGE_NOCACHE; /* - * Ensure fixmaps for IOAPIC MMIO respect memory encryption pgprot + * Ensure fixmaps for IO-APIC MMIO respect memory encryption pgprot * bits, just like normal ioremap(): */ - flags = pgprot_decrypted(flags); + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { + if (x86_platform.hyper.is_private_mmio(phys)) + flags = pgprot_encrypted(flags); + else + flags = pgprot_decrypted(flags); + } __set_fixmap(idx, phys, flags); } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index ef80d361b463..95be3831df73 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -134,6 +134,7 @@ static void enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; } static bool enc_tlb_flush_required_noop(bool enc) { return false; } static bool enc_cache_flush_required_noop(void) { return false; } +static bool is_private_mmio_noop(u64 addr) {return false; } struct x86_platform_ops x86_platform __ro_after_init = { .calibrate_cpu = native_calibrate_cpu_early, @@ -149,6 +150,7 @@ struct x86_platform_ops x86_platform __ro_after_init = { .realmode_reserve = reserve_real_mode, .realmode_init = init_real_mode, .hyper.pin_vcpu = x86_op_int_noop, + .hyper.is_private_mmio = is_private_mmio_noop, .guest = { .enc_status_change_prepare = enc_status_change_prepare_noop, diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 6453fbaedb08..aa7d279321ea 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -116,6 +116,11 @@ static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *des if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) return; + if (x86_platform.hyper.is_private_mmio(addr)) { + desc->flags |= IORES_MAP_ENCRYPTED; + return; + } + if (!IS_ENABLED(CONFIG_EFI)) return; From 71290be18f2deeae013482bf79cd526df61fcfcd Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 26 Mar 2023 06:51:57 -0700 Subject: [PATCH 06/10] x86/hyperv: Reorder code to facilitate future work Reorder some code to facilitate future work. No functional change. Signed-off-by: Michael Kelley Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tianyu Lan Link: https://lore.kernel.org/r/1679838727-87310-3-git-send-email-mikelley@microsoft.com --- arch/x86/hyperv/ivm.c | 68 +++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 1dbcbd9da74d..f33c67ef1b25 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -235,40 +235,6 @@ void hv_ghcb_msr_read(u64 msr, u64 *value) EXPORT_SYMBOL_GPL(hv_ghcb_msr_read); #endif -enum hv_isolation_type hv_get_isolation_type(void) -{ - if (!(ms_hyperv.priv_high & HV_ISOLATION)) - return HV_ISOLATION_TYPE_NONE; - return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b); -} -EXPORT_SYMBOL_GPL(hv_get_isolation_type); - -/* - * hv_is_isolation_supported - Check system runs in the Hyper-V - * isolation VM. - */ -bool hv_is_isolation_supported(void) -{ - if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) - return false; - - if (!hypervisor_is_type(X86_HYPER_MS_HYPERV)) - return false; - - return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; -} - -DEFINE_STATIC_KEY_FALSE(isolation_type_snp); - -/* - * hv_isolation_type_snp - Check system runs in the AMD SEV-SNP based - * isolation VM. - */ -bool hv_isolation_type_snp(void) -{ - return static_branch_unlikely(&isolation_type_snp); -} - /* * hv_mark_gpa_visibility - Set pages visible to host via hvcall. * @@ -387,3 +353,37 @@ void hv_unmap_memory(void *addr) { vunmap(addr); } + +enum hv_isolation_type hv_get_isolation_type(void) +{ + if (!(ms_hyperv.priv_high & HV_ISOLATION)) + return HV_ISOLATION_TYPE_NONE; + return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b); +} +EXPORT_SYMBOL_GPL(hv_get_isolation_type); + +/* + * hv_is_isolation_supported - Check system runs in the Hyper-V + * isolation VM. + */ +bool hv_is_isolation_supported(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return false; + + if (!hypervisor_is_type(X86_HYPER_MS_HYPERV)) + return false; + + return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; +} + +DEFINE_STATIC_KEY_FALSE(isolation_type_snp); + +/* + * hv_isolation_type_snp - Check system runs in the AMD SEV-SNP based + * isolation VM. + */ +bool hv_isolation_type_snp(void) +{ + return static_branch_unlikely(&isolation_type_snp); +} From d33ddc92db8a61416473ff3d7f1c621c50733dc0 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 26 Mar 2023 06:51:58 -0700 Subject: [PATCH 07/10] Drivers: hv: Explicitly request decrypted in vmap_pfn() calls Update vmap_pfn() calls to explicitly request that the mapping be for decrypted access to the memory. There's no change in functionality since the PFNs passed to vmap_pfn() are above the shared_gpa_boundary, implicitly producing a decrypted mapping. But explicitly requesting "decrypted" allows the code to work before and after changes that cause vmap_pfn() to mask the PFNs to being below the shared_gpa_boundary. Signed-off-by: Michael Kelley Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tianyu Lan Link: https://lore.kernel.org/r/1679838727-87310-4-git-send-email-mikelley@microsoft.com --- arch/x86/hyperv/ivm.c | 2 +- drivers/hv/ring_buffer.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index f33c67ef1b25..5648efb6c73e 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -343,7 +343,7 @@ void *hv_map_memory(void *addr, unsigned long size) pfns[i] = vmalloc_to_pfn(addr + i * PAGE_SIZE) + (ms_hyperv.shared_gpa_boundary >> PAGE_SHIFT); - vaddr = vmap_pfn(pfns, size / PAGE_SIZE, PAGE_KERNEL_IO); + vaddr = vmap_pfn(pfns, size / PAGE_SIZE, pgprot_decrypted(PAGE_KERNEL)); kfree(pfns); return vaddr; diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index c6692fd5ab15..2111e97c3b63 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -211,7 +211,7 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, ring_info->ring_buffer = (struct hv_ring_buffer *) vmap_pfn(pfns_wraparound, page_cnt * 2 - 1, - PAGE_KERNEL); + pgprot_decrypted(PAGE_KERNEL)); kfree(pfns_wraparound); if (!ring_info->ring_buffer) From c7b5254bd802ee3868f1c59333545272dc700d6d Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 8 Mar 2023 18:40:05 -0800 Subject: [PATCH 08/10] x86/mm: Handle decryption/re-encryption of bss_decrypted consistently sme_postprocess_startup() decrypts the bss_decrypted section when sme_me_mask is non-zero. mem_encrypt_free_decrypted_mem() re-encrypts the unused portion based on CC_ATTR_MEM_ENCRYPT. In a Hyper-V guest VM using vTOM, these conditions are not equivalent as sme_me_mask is always zero when using vTOM. Consequently, mem_encrypt_free_decrypted_mem() attempts to re-encrypt memory that was never decrypted. So check sme_me_mask in mem_encrypt_free_decrypted_mem() too. Hyper-V guests using vTOM don't need the bss_decrypted section to be decrypted, so skipping the decryption/re-encryption doesn't cause a problem. Signed-off-by: Michael Kelley Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/1678329614-3482-5-git-send-email-mikelley@microsoft.com --- arch/x86/mm/mem_encrypt_amd.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 9c4d8dbcb129..e0b51c09109f 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -513,10 +513,14 @@ void __init mem_encrypt_free_decrypted_mem(void) npages = (vaddr_end - vaddr) >> PAGE_SHIFT; /* - * The unused memory range was mapped decrypted, change the encryption - * attribute from decrypted to encrypted before freeing it. + * If the unused memory range was mapped decrypted, change the encryption + * attribute from decrypted to encrypted before freeing it. Base the + * re-encryption on the same condition used for the decryption in + * sme_postprocess_startup(). Higher level abstractions, such as + * CC_ATTR_MEM_ENCRYPT, aren't necessarily equivalent in a Hyper-V VM + * using vTOM, where sme_me_mask is always zero. */ - if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) { + if (sme_me_mask) { r = set_memory_encrypted(vaddr, npages); if (r) { pr_warn("failed to free unused decrypted pages\n"); From e45e761b77bc0739e7e23258c4394013bbb919c7 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 8 Mar 2023 18:40:06 -0800 Subject: [PATCH 09/10] init: Call mem_encrypt_init() after Hyper-V hypercall init is done Full Hyper-V initialization, including support for hypercalls, is done as an apic_post_init callback via late_time_init(). mem_encrypt_init() needs to make hypercalls when it marks swiotlb memory as decrypted. But mem_encrypt_init() is currently called a few lines before late_time_init(), so the hypercalls don't work. Fix this by moving mem_encrypt_init() after late_time_init() and related clock initializations. The intervening initializations don't do any I/O that requires the swiotlb, so moving mem_encrypt_init() slightly later has no impact. Signed-off-by: Michael Kelley Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/1678329614-3482-6-git-send-email-mikelley@microsoft.com --- init/main.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/init/main.c b/init/main.c index 4425d1783d5c..7e9c0ca25643 100644 --- a/init/main.c +++ b/init/main.c @@ -1088,14 +1088,6 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) */ locking_selftest(); - /* - * This needs to be called before any devices perform DMA - * operations that might use the SWIOTLB bounce buffers. It will - * mark the bounce buffers as decrypted so that their usage will - * not cause "plain-text" data to be decrypted when accessed. - */ - mem_encrypt_init(); - #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start && !initrd_below_start_ok && page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) { @@ -1112,6 +1104,17 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) late_time_init(); sched_clock_init(); calibrate_delay(); + + /* + * This needs to be called before any devices perform DMA + * operations that might use the SWIOTLB bounce buffers. It will + * mark the bounce buffers as decrypted so that their usage will + * not cause "plain-text" data to be decrypted when accessed. It + * must be called after late_time_init() so that Hyper-V x86/x64 + * hypercalls work when the SWIOTLB bounce buffers are decrypted. + */ + mem_encrypt_init(); + pid_idr_init(); anon_vma_init(); #ifdef CONFIG_X86 From 812b0597fb4043240724e4c7bed7ba1fe15c0e3f Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 26 Mar 2023 06:52:01 -0700 Subject: [PATCH 10/10] x86/hyperv: Change vTOM handling to use standard coco mechanisms Hyper-V guests on AMD SEV-SNP hardware have the option of using the "virtual Top Of Memory" (vTOM) feature specified by the SEV-SNP architecture. With vTOM, shared vs. private memory accesses are controlled by splitting the guest physical address space into two halves. vTOM is the dividing line where the uppermost bit of the physical address space is set; e.g., with 47 bits of guest physical address space, vTOM is 0x400000000000 (bit 46 is set). Guest physical memory is accessible at two parallel physical addresses -- one below vTOM and one above vTOM. Accesses below vTOM are private (encrypted) while accesses above vTOM are shared (decrypted). In this sense, vTOM is like the GPA.SHARED bit in Intel TDX. Support for Hyper-V guests using vTOM was added to the Linux kernel in two patch sets[1][2]. This support treats the vTOM bit as part of the physical address. For accessing shared (decrypted) memory, these patch sets create a second kernel virtual mapping that maps to physical addresses above vTOM. A better approach is to treat the vTOM bit as a protection flag, not as part of the physical address. This new approach is like the approach for the GPA.SHARED bit in Intel TDX. Rather than creating a second kernel virtual mapping, the existing mapping is updated using recently added coco mechanisms. When memory is changed between private and shared using set_memory_decrypted() and set_memory_encrypted(), the PTEs for the existing kernel mapping are changed to add or remove the vTOM bit in the guest physical address, just as with TDX. The hypercalls to change the memory status on the host side are made using the existing callback mechanism. Everything just works, with a minor tweak to map the IO-APIC to use private accesses. To accomplish the switch in approach, the following must be done: * Update Hyper-V initialization to set the cc_mask based on vTOM and do other coco initialization. * Update physical_mask so the vTOM bit is no longer treated as part of the physical address * Remove CC_VENDOR_HYPERV and merge the associated vTOM functionality under CC_VENDOR_AMD. Update cc_mkenc() and cc_mkdec() to set/clear the vTOM bit as a protection flag. * Code already exists to make hypercalls to inform Hyper-V about pages changing between shared and private. Update this code to run as a callback from __set_memory_enc_pgtable(). * Remove the Hyper-V special case from __set_memory_enc_dec() * Remove the Hyper-V specific call to swiotlb_update_mem_attributes() since mem_encrypt_init() will now do it. * Add a Hyper-V specific implementation of the is_private_mmio() callback that returns true for the IO-APIC and vTPM MMIO addresses [1] https://lore.kernel.org/all/20211025122116.264793-1-ltykernel@gmail.com/ [2] https://lore.kernel.org/all/20211213071407.314309-1-ltykernel@gmail.com/ [ bp: Touchups. ] Signed-off-by: Michael Kelley Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/1679838727-87310-7-git-send-email-mikelley@microsoft.com --- arch/x86/coco/core.c | 40 ++++++++++++----- arch/x86/hyperv/hv_init.c | 11 ----- arch/x86/hyperv/ivm.c | 72 +++++++++++++++++++++++++----- arch/x86/include/asm/coco.h | 1 - arch/x86/include/asm/mem_encrypt.h | 1 + arch/x86/include/asm/mshyperv.h | 16 ++++--- arch/x86/kernel/cpu/mshyperv.c | 15 +++---- arch/x86/mm/pat/set_memory.c | 3 -- drivers/hv/vmbus_drv.c | 1 - include/asm-generic/mshyperv.h | 2 + 10 files changed, 111 insertions(+), 51 deletions(-) diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index 49b44f881484..f4f0625691fd 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -29,6 +29,22 @@ static bool intel_cc_platform_has(enum cc_attr attr) } } +/* + * Handle the SEV-SNP vTOM case where sme_me_mask is zero, and + * the other levels of SME/SEV functionality, including C-bit + * based SEV-SNP, are not enabled. + */ +static __maybe_unused bool amd_cc_platform_vtom(enum cc_attr attr) +{ + switch (attr) { + case CC_ATTR_GUEST_MEM_ENCRYPT: + case CC_ATTR_MEM_ENCRYPT: + return true; + default: + return false; + } +} + /* * SME and SEV are very similar but they are not the same, so there are * times that the kernel will need to distinguish between SME and SEV. The @@ -41,9 +57,14 @@ static bool intel_cc_platform_has(enum cc_attr attr) * up under SME the trampoline area cannot be encrypted, whereas under SEV * the trampoline area must be encrypted. */ + static bool amd_cc_platform_has(enum cc_attr attr) { #ifdef CONFIG_AMD_MEM_ENCRYPT + + if (sev_status & MSR_AMD64_SNP_VTOM) + return amd_cc_platform_vtom(attr); + switch (attr) { case CC_ATTR_MEM_ENCRYPT: return sme_me_mask; @@ -76,11 +97,6 @@ static bool amd_cc_platform_has(enum cc_attr attr) #endif } -static bool hyperv_cc_platform_has(enum cc_attr attr) -{ - return attr == CC_ATTR_GUEST_MEM_ENCRYPT; -} - bool cc_platform_has(enum cc_attr attr) { switch (vendor) { @@ -88,8 +104,6 @@ bool cc_platform_has(enum cc_attr attr) return amd_cc_platform_has(attr); case CC_VENDOR_INTEL: return intel_cc_platform_has(attr); - case CC_VENDOR_HYPERV: - return hyperv_cc_platform_has(attr); default: return false; } @@ -103,11 +117,14 @@ u64 cc_mkenc(u64 val) * encryption status of the page. * * - for AMD, bit *set* means the page is encrypted - * - for Intel *clear* means encrypted. + * - for AMD with vTOM and for Intel, *clear* means encrypted */ switch (vendor) { case CC_VENDOR_AMD: - return val | cc_mask; + if (sev_status & MSR_AMD64_SNP_VTOM) + return val & ~cc_mask; + else + return val | cc_mask; case CC_VENDOR_INTEL: return val & ~cc_mask; default: @@ -120,7 +137,10 @@ u64 cc_mkdec(u64 val) /* See comment in cc_mkenc() */ switch (vendor) { case CC_VENDOR_AMD: - return val & ~cc_mask; + if (sev_status & MSR_AMD64_SNP_VTOM) + return val | cc_mask; + else + return val & ~cc_mask; case CC_VENDOR_INTEL: return val | cc_mask; default: diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 41ef036ebb7b..edbc67ec1f3e 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -29,7 +29,6 @@ #include #include #include -#include int hyperv_init_cpuhp; u64 hv_current_partition_id = ~0ull; @@ -504,16 +503,6 @@ void __init hyperv_init(void) /* Query the VMs extended capability once, so that it can be cached. */ hv_query_ext_cap(0); -#ifdef CONFIG_SWIOTLB - /* - * Swiotlb bounce buffer needs to be mapped in extra address - * space. Map function doesn't work in the early place and so - * call swiotlb_update_mem_attributes() here. - */ - if (hv_is_isolation_supported()) - swiotlb_update_mem_attributes(); -#endif - return; clean_guest_os_id: diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 5648efb6c73e..f6a020cb1a24 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include #include @@ -233,7 +235,6 @@ void hv_ghcb_msr_read(u64 msr, u64 *value) local_irq_restore(flags); } EXPORT_SYMBOL_GPL(hv_ghcb_msr_read); -#endif /* * hv_mark_gpa_visibility - Set pages visible to host via hvcall. @@ -286,27 +287,25 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], } /* - * hv_set_mem_host_visibility - Set specified memory visible to host. + * hv_vtom_set_host_visibility - Set specified memory visible to host. * * In Isolation VM, all guest memory is encrypted from host and guest * needs to set memory visible to host via hvcall before sharing memory * with host. This function works as wrap of hv_mark_gpa_visibility() * with memory base and size. */ -int hv_set_mem_host_visibility(unsigned long kbuffer, int pagecount, bool visible) +static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bool enc) { - enum hv_mem_host_visibility visibility = visible ? - VMBUS_PAGE_VISIBLE_READ_WRITE : VMBUS_PAGE_NOT_VISIBLE; + enum hv_mem_host_visibility visibility = enc ? + VMBUS_PAGE_NOT_VISIBLE : VMBUS_PAGE_VISIBLE_READ_WRITE; u64 *pfn_array; int ret = 0; + bool result = true; int i, pfn; - if (!hv_is_isolation_supported() || !hv_hypercall_pg) - return 0; - pfn_array = kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); if (!pfn_array) - return -ENOMEM; + return false; for (i = 0, pfn = 0; i < pagecount; i++) { pfn_array[pfn] = virt_to_hvpfn((void *)kbuffer + i * HV_HYP_PAGE_SIZE); @@ -315,17 +314,68 @@ int hv_set_mem_host_visibility(unsigned long kbuffer, int pagecount, bool visibl if (pfn == HV_MAX_MODIFY_GPA_REP_COUNT || i == pagecount - 1) { ret = hv_mark_gpa_visibility(pfn, pfn_array, visibility); - if (ret) + if (ret) { + result = false; goto err_free_pfn_array; + } pfn = 0; } } err_free_pfn_array: kfree(pfn_array); - return ret; + return result; } +static bool hv_vtom_tlb_flush_required(bool private) +{ + return true; +} + +static bool hv_vtom_cache_flush_required(void) +{ + return false; +} + +static bool hv_is_private_mmio(u64 addr) +{ + /* + * Hyper-V always provides a single IO-APIC in a guest VM. + * When a paravisor is used, it is emulated by the paravisor + * in the guest context and must be mapped private. + */ + if (addr >= HV_IOAPIC_BASE_ADDRESS && + addr < (HV_IOAPIC_BASE_ADDRESS + PAGE_SIZE)) + return true; + + /* Same with a vTPM */ + if (addr >= VTPM_BASE_ADDRESS && + addr < (VTPM_BASE_ADDRESS + PAGE_SIZE)) + return true; + + return false; +} + +void __init hv_vtom_init(void) +{ + /* + * By design, a VM using vTOM doesn't see the SEV setting, + * so SEV initialization is bypassed and sev_status isn't set. + * Set it here to indicate a vTOM VM. + */ + sev_status = MSR_AMD64_SNP_VTOM; + cc_set_vendor(CC_VENDOR_AMD); + cc_set_mask(ms_hyperv.shared_gpa_boundary); + physical_mask &= ms_hyperv.shared_gpa_boundary - 1; + + x86_platform.hyper.is_private_mmio = hv_is_private_mmio; + x86_platform.guest.enc_cache_flush_required = hv_vtom_cache_flush_required; + x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required; + x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility; +} + +#endif /* CONFIG_AMD_MEM_ENCRYPT */ + /* * hv_map_memory - map memory to extra space in the AMD SEV-SNP Isolation VM. */ diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index 3d98c3a60d34..d2c6a2e8d04d 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -7,7 +7,6 @@ enum cc_vendor { CC_VENDOR_NONE, CC_VENDOR_AMD, - CC_VENDOR_HYPERV, CC_VENDOR_INTEL, }; diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 72ca90552b6a..b7126701574c 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -56,6 +56,7 @@ void __init sev_es_init_vc_handling(void); #else /* !CONFIG_AMD_MEM_ENCRYPT */ #define sme_me_mask 0ULL +#define sev_status 0ULL static inline void __init sme_early_encrypt(resource_size_t paddr, unsigned long size) { } diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 4c4c0ec3b62e..e3cef98a0142 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -11,6 +11,14 @@ #include #include +/* + * Hyper-V always provides a single IO-APIC at this MMIO address. + * Ideally, the value should be looked up in ACPI tables, but it + * is needed for mapping the IO-APIC early in boot on Confidential + * VMs, before ACPI functions can be used. + */ +#define HV_IOAPIC_BASE_ADDRESS 0xfec00000 + union hv_ghcb; DECLARE_STATIC_KEY_FALSE(isolation_type_snp); @@ -206,18 +214,19 @@ struct irq_domain *hv_create_pci_msi_domain(void); int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector, struct hv_interrupt_entry *entry); int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); -int hv_set_mem_host_visibility(unsigned long addr, int numpages, bool visible); #ifdef CONFIG_AMD_MEM_ENCRYPT void hv_ghcb_msr_write(u64 msr, u64 value); void hv_ghcb_msr_read(u64 msr, u64 *value); bool hv_ghcb_negotiate_protocol(void); void hv_ghcb_terminate(unsigned int set, unsigned int reason); +void hv_vtom_init(void); #else static inline void hv_ghcb_msr_write(u64 msr, u64 value) {} static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {} static inline bool hv_ghcb_negotiate_protocol(void) { return false; } static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} +static inline void hv_vtom_init(void) {} #endif extern bool hv_isolation_type_snp(void); @@ -259,11 +268,6 @@ static inline void hv_set_register(unsigned int reg, u64 value) { } static inline u64 hv_get_register(unsigned int reg) { return 0; } static inline void hv_set_non_nested_register(unsigned int reg, u64 value) { } static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; } -static inline int hv_set_mem_host_visibility(unsigned long addr, int numpages, - bool visible) -{ - return -1; -} #endif /* CONFIG_HYPERV */ diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index f36dc2f796c5..ded7506217f2 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -33,7 +33,6 @@ #include #include #include -#include /* Is Linux running as the root partition? */ bool hv_root_partition; @@ -397,8 +396,10 @@ static void __init ms_hyperv_init_platform(void) if (ms_hyperv.priv_high & HV_ISOLATION) { ms_hyperv.isolation_config_a = cpuid_eax(HYPERV_CPUID_ISOLATION_CONFIG); ms_hyperv.isolation_config_b = cpuid_ebx(HYPERV_CPUID_ISOLATION_CONFIG); - ms_hyperv.shared_gpa_boundary = - BIT_ULL(ms_hyperv.shared_gpa_boundary_bits); + + if (ms_hyperv.shared_gpa_boundary_active) + ms_hyperv.shared_gpa_boundary = + BIT_ULL(ms_hyperv.shared_gpa_boundary_bits); pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n", ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b); @@ -409,11 +410,6 @@ static void __init ms_hyperv_init_platform(void) swiotlb_unencrypted_base = ms_hyperv.shared_gpa_boundary; #endif } - /* Isolation VMs are unenlightened SEV-based VMs, thus this check: */ - if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) { - if (hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE) - cc_set_vendor(CC_VENDOR_HYPERV); - } } if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) { @@ -482,6 +478,9 @@ static void __init ms_hyperv_init_platform(void) i8253_clear_counter_on_shutdown = false; #if IS_ENABLED(CONFIG_HYPERV) + if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) || + (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP)) + hv_vtom_init(); /* * Setup the hook to get control post apic initialization. */ diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 356758b7d4b4..b037954e0f61 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -2175,9 +2175,6 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) { - if (hv_is_isolation_supported()) - return hv_set_mem_host_visibility(addr, numpages, !enc); - if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) return __set_memory_enc_pgtable(addr, numpages, enc); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index d24dd65b33d4..e9e1c4139e0d 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2156,7 +2156,6 @@ void vmbus_device_unregister(struct hv_device *device_obj) * VMBUS is an acpi enumerated device. Get the information we * need from DSDT. */ -#define VTPM_BASE_ADDRESS 0xfed40000 static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) { resource_size_t start = 0; diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index 8845a2eca339..90d7f68ed39d 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -26,6 +26,8 @@ #include #include +#define VTPM_BASE_ADDRESS 0xfed40000 + struct ms_hyperv_info { u32 features; u32 priv_high;