mirror of
https://github.com/torvalds/linux.git
synced 2024-12-31 23:31:29 +00:00
Merge branch 'kvm-5.20-early'
s390: * add an interface to provide a hypervisor dump for secure guests * improve selftests to show tests x86: * Intel IPI virtualization * Allow getting/setting pending triple fault with KVM_GET/SET_VCPU_EVENTS * PEBS virtualization * Simplify PMU emulation by just using PERF_TYPE_RAW events * More accurate event reinjection on SVM (avoid retrying instructions) * Allow getting/setting the state of the speaker port data bit * Rewrite gfn-pfn cache refresh * Refuse starting the module if VM-Entry/VM-Exit controls are inconsistent * "Notify" VM exit
This commit is contained in:
commit
e15f5e6fa6
@ -1150,6 +1150,10 @@ The following bits are defined in the flags field:
|
||||
fields contain a valid state. This bit will be set whenever
|
||||
KVM_CAP_EXCEPTION_PAYLOAD is enabled.
|
||||
|
||||
- KVM_VCPUEVENT_VALID_TRIPLE_FAULT may be set to signal that the
|
||||
triple_fault_pending field contains a valid state. This bit will
|
||||
be set whenever KVM_CAP_TRIPLE_FAULT_EVENT is enabled.
|
||||
|
||||
ARM64:
|
||||
^^^^^^
|
||||
|
||||
@ -1245,6 +1249,10 @@ can be set in the flags field to signal that the
|
||||
exception_has_payload, exception_payload, and exception.pending fields
|
||||
contain a valid state and shall be written into the VCPU.
|
||||
|
||||
If KVM_CAP_TRIPLE_FAULT_EVENT is enabled, KVM_VCPUEVENT_VALID_TRIPLE_FAULT
|
||||
can be set in flags field to signal that the triple_fault field contains
|
||||
a valid state and shall be written into the VCPU.
|
||||
|
||||
ARM64:
|
||||
^^^^^^
|
||||
|
||||
@ -2998,7 +3006,9 @@ KVM_CREATE_PIT2. The state is returned in the following structure::
|
||||
Valid flags are::
|
||||
|
||||
/* disable PIT in HPET legacy mode */
|
||||
#define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001
|
||||
#define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001
|
||||
/* speaker port data bit enabled */
|
||||
#define KVM_PIT_FLAGS_SPEAKER_DATA_ON 0x00000002
|
||||
|
||||
This IOCTL replaces the obsolete KVM_GET_PIT.
|
||||
|
||||
@ -5127,7 +5137,15 @@ into ESA mode. This reset is a superset of the initial reset.
|
||||
__u32 reserved[3];
|
||||
};
|
||||
|
||||
cmd values:
|
||||
**Ultravisor return codes**
|
||||
The Ultravisor return (reason) codes are provided by the kernel if a
|
||||
Ultravisor call has been executed to achieve the results expected by
|
||||
the command. Therefore they are independent of the IOCTL return
|
||||
code. If KVM changes `rc`, its value will always be greater than 0
|
||||
hence setting it to 0 before issuing a PV command is advised to be
|
||||
able to detect a change of `rc`.
|
||||
|
||||
**cmd values:**
|
||||
|
||||
KVM_PV_ENABLE
|
||||
Allocate memory and register the VM with the Ultravisor, thereby
|
||||
@ -5143,7 +5161,6 @@ KVM_PV_ENABLE
|
||||
===== =============================
|
||||
|
||||
KVM_PV_DISABLE
|
||||
|
||||
Deregister the VM from the Ultravisor and reclaim the memory that
|
||||
had been donated to the Ultravisor, making it usable by the kernel
|
||||
again. All registered VCPUs are converted back to non-protected
|
||||
@ -5160,6 +5177,117 @@ KVM_PV_VM_VERIFY
|
||||
Verify the integrity of the unpacked image. Only if this succeeds,
|
||||
KVM is allowed to start protected VCPUs.
|
||||
|
||||
KVM_PV_INFO
|
||||
:Capability: KVM_CAP_S390_PROTECTED_DUMP
|
||||
|
||||
Presents an API that provides Ultravisor related data to userspace
|
||||
via subcommands. len_max is the size of the user space buffer,
|
||||
len_written is KVM's indication of how much bytes of that buffer
|
||||
were actually written to. len_written can be used to determine the
|
||||
valid fields if more response fields are added in the future.
|
||||
|
||||
::
|
||||
|
||||
enum pv_cmd_info_id {
|
||||
KVM_PV_INFO_VM,
|
||||
KVM_PV_INFO_DUMP,
|
||||
};
|
||||
|
||||
struct kvm_s390_pv_info_header {
|
||||
__u32 id;
|
||||
__u32 len_max;
|
||||
__u32 len_written;
|
||||
__u32 reserved;
|
||||
};
|
||||
|
||||
struct kvm_s390_pv_info {
|
||||
struct kvm_s390_pv_info_header header;
|
||||
struct kvm_s390_pv_info_dump dump;
|
||||
struct kvm_s390_pv_info_vm vm;
|
||||
};
|
||||
|
||||
**subcommands:**
|
||||
|
||||
KVM_PV_INFO_VM
|
||||
This subcommand provides basic Ultravisor information for PV
|
||||
hosts. These values are likely also exported as files in the sysfs
|
||||
firmware UV query interface but they are more easily available to
|
||||
programs in this API.
|
||||
|
||||
The installed calls and feature_indication members provide the
|
||||
installed UV calls and the UV's other feature indications.
|
||||
|
||||
The max_* members provide information about the maximum number of PV
|
||||
vcpus, PV guests and PV guest memory size.
|
||||
|
||||
::
|
||||
|
||||
struct kvm_s390_pv_info_vm {
|
||||
__u64 inst_calls_list[4];
|
||||
__u64 max_cpus;
|
||||
__u64 max_guests;
|
||||
__u64 max_guest_addr;
|
||||
__u64 feature_indication;
|
||||
};
|
||||
|
||||
|
||||
KVM_PV_INFO_DUMP
|
||||
This subcommand provides information related to dumping PV guests.
|
||||
|
||||
::
|
||||
|
||||
struct kvm_s390_pv_info_dump {
|
||||
__u64 dump_cpu_buffer_len;
|
||||
__u64 dump_config_mem_buffer_per_1m;
|
||||
__u64 dump_config_finalize_len;
|
||||
};
|
||||
|
||||
KVM_PV_DUMP
|
||||
:Capability: KVM_CAP_S390_PROTECTED_DUMP
|
||||
|
||||
Presents an API that provides calls which facilitate dumping a
|
||||
protected VM.
|
||||
|
||||
::
|
||||
|
||||
struct kvm_s390_pv_dmp {
|
||||
__u64 subcmd;
|
||||
__u64 buff_addr;
|
||||
__u64 buff_len;
|
||||
__u64 gaddr; /* For dump storage state */
|
||||
};
|
||||
|
||||
**subcommands:**
|
||||
|
||||
KVM_PV_DUMP_INIT
|
||||
Initializes the dump process of a protected VM. If this call does
|
||||
not succeed all other subcommands will fail with -EINVAL. This
|
||||
subcommand will return -EINVAL if a dump process has not yet been
|
||||
completed.
|
||||
|
||||
Not all PV vms can be dumped, the owner needs to set `dump
|
||||
allowed` PCF bit 34 in the SE header to allow dumping.
|
||||
|
||||
KVM_PV_DUMP_CONFIG_STOR_STATE
|
||||
Stores `buff_len` bytes of tweak component values starting with
|
||||
the 1MB block specified by the absolute guest address
|
||||
(`gaddr`). `buff_len` needs to be `conf_dump_storage_state_len`
|
||||
aligned and at least >= the `conf_dump_storage_state_len` value
|
||||
provided by the dump uv_info data. buff_user might be written to
|
||||
even if an error rc is returned. For instance if we encounter a
|
||||
fault after writing the first page of data.
|
||||
|
||||
KVM_PV_DUMP_COMPLETE
|
||||
If the subcommand succeeds it completes the dump process and lets
|
||||
KVM_PV_DUMP_INIT be called again.
|
||||
|
||||
On success `conf_dump_finalize_len` bytes of completion data will be
|
||||
stored to the `buff_addr`. The completion data contains a key
|
||||
derivation seed, IV, tweak nonce and encryption keys as well as an
|
||||
authentication tag all of which are needed to decrypt the dump at a
|
||||
later time.
|
||||
|
||||
|
||||
4.126 KVM_X86_SET_MSR_FILTER
|
||||
----------------------------
|
||||
|
||||
@ -5802,6 +5930,32 @@ of CPUID leaf 0xD on the host.
|
||||
|
||||
This ioctl injects an event channel interrupt directly to the guest vCPU.
|
||||
|
||||
4.136 KVM_S390_PV_CPU_COMMAND
|
||||
-----------------------------
|
||||
|
||||
:Capability: KVM_CAP_S390_PROTECTED_DUMP
|
||||
:Architectures: s390
|
||||
:Type: vcpu ioctl
|
||||
:Parameters: none
|
||||
:Returns: 0 on success, < 0 on error
|
||||
|
||||
This ioctl closely mirrors `KVM_S390_PV_COMMAND` but handles requests
|
||||
for vcpus. It re-uses the kvm_s390_pv_dmp struct and hence also shares
|
||||
the command ids.
|
||||
|
||||
**command:**
|
||||
|
||||
KVM_PV_DUMP
|
||||
Presents an API that provides calls which facilitate dumping a vcpu
|
||||
of a protected VM.
|
||||
|
||||
**subcommand:**
|
||||
|
||||
KVM_PV_DUMP_CPU
|
||||
Provides encrypted dump data like register values.
|
||||
The length of the returned data is provided by uv_info.guest_cpu_stor_len.
|
||||
|
||||
|
||||
5. The kvm_run structure
|
||||
========================
|
||||
|
||||
@ -6405,6 +6559,26 @@ array field represents return values. The userspace should update the return
|
||||
values of SBI call before resuming the VCPU. For more details on RISC-V SBI
|
||||
spec refer, https://github.com/riscv/riscv-sbi-doc.
|
||||
|
||||
::
|
||||
|
||||
/* KVM_EXIT_NOTIFY */
|
||||
struct {
|
||||
#define KVM_NOTIFY_CONTEXT_INVALID (1 << 0)
|
||||
__u32 flags;
|
||||
} notify;
|
||||
|
||||
Used on x86 systems. When the VM capability KVM_CAP_X86_NOTIFY_VMEXIT is
|
||||
enabled, a VM exit generated if no event window occurs in VM non-root mode
|
||||
for a specified amount of time. Once KVM_X86_NOTIFY_VMEXIT_USER is set when
|
||||
enabling the cap, it would exit to userspace with the exit reason
|
||||
KVM_EXIT_NOTIFY for further handling. The "flags" field contains more
|
||||
detailed info.
|
||||
|
||||
The valid value for 'flags' is:
|
||||
|
||||
- KVM_NOTIFY_CONTEXT_INVALID -- the VM context is corrupted and not valid
|
||||
in VMCS. It would run into unknown result if resume the target VM.
|
||||
|
||||
::
|
||||
|
||||
/* Fix the size of the union. */
|
||||
@ -7350,6 +7524,56 @@ The valid bits in cap.args[0] are:
|
||||
generate a #UD within the guest.
|
||||
=================================== ============================================
|
||||
|
||||
7.32 KVM_CAP_MAX_VCPU_ID
|
||||
------------------------
|
||||
|
||||
:Architectures: x86
|
||||
:Target: VM
|
||||
:Parameters: args[0] - maximum APIC ID value set for current VM
|
||||
:Returns: 0 on success, -EINVAL if args[0] is beyond KVM_MAX_VCPU_IDS
|
||||
supported in KVM or if it has been set.
|
||||
|
||||
This capability allows userspace to specify maximum possible APIC ID
|
||||
assigned for current VM session prior to the creation of vCPUs, saving
|
||||
memory for data structures indexed by the APIC ID. Userspace is able
|
||||
to calculate the limit to APIC ID values from designated
|
||||
CPU topology.
|
||||
|
||||
The value can be changed only until KVM_ENABLE_CAP is set to a nonzero
|
||||
value or until a vCPU is created. Upon creation of the first vCPU,
|
||||
if the value was set to zero or KVM_ENABLE_CAP was not invoked, KVM
|
||||
uses the return value of KVM_CHECK_EXTENSION(KVM_CAP_MAX_VCPU_ID) as
|
||||
the maximum APIC ID.
|
||||
|
||||
7.33 KVM_CAP_X86_NOTIFY_VMEXIT
|
||||
------------------------------
|
||||
|
||||
:Architectures: x86
|
||||
:Target: VM
|
||||
:Parameters: args[0] is the value of notify window as well as some flags
|
||||
:Returns: 0 on success, -EINVAL if args[0] contains invalid flags or notify
|
||||
VM exit is unsupported.
|
||||
|
||||
Bits 63:32 of args[0] are used for notify window.
|
||||
Bits 31:0 of args[0] are for some flags. Valid bits are::
|
||||
|
||||
#define KVM_X86_NOTIFY_VMEXIT_ENABLED (1 << 0)
|
||||
#define KVM_X86_NOTIFY_VMEXIT_USER (1 << 1)
|
||||
|
||||
This capability allows userspace to configure the notify VM exit on/off
|
||||
in per-VM scope during VM creation. Notify VM exit is disabled by default.
|
||||
When userspace sets KVM_X86_NOTIFY_VMEXIT_ENABLED bit in args[0], VMM will
|
||||
enable this feature with the notify window provided, which will generate
|
||||
a VM exit if no event window occurs in VM non-root mode for a specified of
|
||||
time (notify window).
|
||||
|
||||
If KVM_X86_NOTIFY_VMEXIT_USER is set in args[0], upon notify VM exits happen,
|
||||
KVM would exit to userspace for handling.
|
||||
|
||||
This capability is aimed to mitigate the threat that malicious VMs can
|
||||
cause CPU stuck (due to event windows don't open up) and make the CPU
|
||||
unavailable to host or other VMs.
|
||||
|
||||
8. Other capabilities.
|
||||
======================
|
||||
|
||||
@ -7956,6 +8180,20 @@ should adjust CPUID leaf 0xA to reflect that the PMU is disabled.
|
||||
When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of
|
||||
type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request.
|
||||
|
||||
8.37 KVM_CAP_S390_PROTECTED_DUMP
|
||||
--------------------------------
|
||||
|
||||
:Capability: KVM_CAP_S390_PROTECTED_DUMP
|
||||
:Architectures: s390
|
||||
:Type: vm
|
||||
|
||||
This capability indicates that KVM and the Ultravisor support dumping
|
||||
PV guests. The `KVM_PV_DUMP` command is available for the
|
||||
`KVM_S390_PV_COMMAND` ioctl and the `KVM_PV_INFO` command provides
|
||||
dump related UV data. Also the vcpu ioctl `KVM_S390_PV_CPU_COMMAND` is
|
||||
available and supports the `KVM_PV_DUMP_CPU` subcommand.
|
||||
|
||||
|
||||
9. Known KVM API problems
|
||||
=========================
|
||||
|
||||
|
@ -10,3 +10,4 @@ KVM for s390 systems
|
||||
s390-diag
|
||||
s390-pv
|
||||
s390-pv-boot
|
||||
s390-pv-dump
|
||||
|
64
Documentation/virt/kvm/s390/s390-pv-dump.rst
Normal file
64
Documentation/virt/kvm/s390/s390-pv-dump.rst
Normal file
@ -0,0 +1,64 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===========================================
|
||||
s390 (IBM Z) Protected Virtualization dumps
|
||||
===========================================
|
||||
|
||||
Summary
|
||||
-------
|
||||
|
||||
Dumping a VM is an essential tool for debugging problems inside
|
||||
it. This is especially true when a protected VM runs into trouble as
|
||||
there's no way to access its memory and registers from the outside
|
||||
while it's running.
|
||||
|
||||
However when dumping a protected VM we need to maintain its
|
||||
confidentiality until the dump is in the hands of the VM owner who
|
||||
should be the only one capable of analysing it.
|
||||
|
||||
The confidentiality of the VM dump is ensured by the Ultravisor who
|
||||
provides an interface to KVM over which encrypted CPU and memory data
|
||||
can be requested. The encryption is based on the Customer
|
||||
Communication Key which is the key that's used to encrypt VM data in a
|
||||
way that the customer is able to decrypt.
|
||||
|
||||
|
||||
Dump process
|
||||
------------
|
||||
|
||||
A dump is done in 3 steps:
|
||||
|
||||
**Initiation**
|
||||
|
||||
This step initializes the dump process, generates cryptographic seeds
|
||||
and extracts dump keys with which the VM dump data will be encrypted.
|
||||
|
||||
**Data gathering**
|
||||
|
||||
Currently there are two types of data that can be gathered from a VM:
|
||||
the memory and the vcpu state.
|
||||
|
||||
The vcpu state contains all the important registers, general, floating
|
||||
point, vector, control and tod/timers of a vcpu. The vcpu dump can
|
||||
contain incomplete data if a vcpu is dumped while an instruction is
|
||||
emulated with help of the hypervisor. This is indicated by a flag bit
|
||||
in the dump data. For the same reason it is very important to not only
|
||||
write out the encrypted vcpu state, but also the unencrypted state
|
||||
from the hypervisor.
|
||||
|
||||
The memory state is further divided into the encrypted memory and its
|
||||
metadata comprised of the encryption tweaks and status flags. The
|
||||
encrypted memory can simply be read once it has been exported. The
|
||||
time of the export does not matter as no re-encryption is
|
||||
needed. Memory that has been swapped out and hence was exported can be
|
||||
read from the swap and written to the dump target without need for any
|
||||
special actions.
|
||||
|
||||
The tweaks / status flags for the exported pages need to be requested
|
||||
from the Ultravisor.
|
||||
|
||||
**Finalization**
|
||||
|
||||
The finalization step will provide the data needed to be able to
|
||||
decrypt the vcpu and memory data and end the dump process. When this
|
||||
step completes successfully a new dump initiation can be started.
|
@ -41,6 +41,10 @@ void uv_query_info(void)
|
||||
uv_info.max_num_sec_conf = uvcb.max_num_sec_conf;
|
||||
uv_info.max_guest_cpu_id = uvcb.max_guest_cpu_id;
|
||||
uv_info.uv_feature_indications = uvcb.uv_feature_indications;
|
||||
uv_info.supp_se_hdr_ver = uvcb.supp_se_hdr_versions;
|
||||
uv_info.supp_se_hdr_pcf = uvcb.supp_se_hdr_pcf;
|
||||
uv_info.conf_dump_storage_state_len = uvcb.conf_dump_storage_state_len;
|
||||
uv_info.conf_dump_finalize_len = uvcb.conf_dump_finalize_len;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
|
||||
|
@ -923,6 +923,7 @@ struct kvm_s390_pv {
|
||||
u64 guest_len;
|
||||
unsigned long stor_base;
|
||||
void *stor_var;
|
||||
bool dumping;
|
||||
};
|
||||
|
||||
struct kvm_arch{
|
||||
|
@ -50,6 +50,10 @@
|
||||
#define UVC_CMD_SET_UNSHARE_ALL 0x0340
|
||||
#define UVC_CMD_PIN_PAGE_SHARED 0x0341
|
||||
#define UVC_CMD_UNPIN_PAGE_SHARED 0x0342
|
||||
#define UVC_CMD_DUMP_INIT 0x0400
|
||||
#define UVC_CMD_DUMP_CONF_STOR_STATE 0x0401
|
||||
#define UVC_CMD_DUMP_CPU 0x0402
|
||||
#define UVC_CMD_DUMP_COMPLETE 0x0403
|
||||
#define UVC_CMD_SET_SHARED_ACCESS 0x1000
|
||||
#define UVC_CMD_REMOVE_SHARED_ACCESS 0x1001
|
||||
#define UVC_CMD_RETR_ATTEST 0x1020
|
||||
@ -77,6 +81,10 @@ enum uv_cmds_inst {
|
||||
BIT_UVC_CMD_UNSHARE_ALL = 20,
|
||||
BIT_UVC_CMD_PIN_PAGE_SHARED = 21,
|
||||
BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22,
|
||||
BIT_UVC_CMD_DUMP_INIT = 24,
|
||||
BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE = 25,
|
||||
BIT_UVC_CMD_DUMP_CPU = 26,
|
||||
BIT_UVC_CMD_DUMP_COMPLETE = 27,
|
||||
BIT_UVC_CMD_RETR_ATTEST = 28,
|
||||
};
|
||||
|
||||
@ -110,7 +118,13 @@ struct uv_cb_qui {
|
||||
u8 reserved88[158 - 136]; /* 0x0088 */
|
||||
u16 max_guest_cpu_id; /* 0x009e */
|
||||
u64 uv_feature_indications; /* 0x00a0 */
|
||||
u8 reserveda8[200 - 168]; /* 0x00a8 */
|
||||
u64 reserveda8; /* 0x00a8 */
|
||||
u64 supp_se_hdr_versions; /* 0x00b0 */
|
||||
u64 supp_se_hdr_pcf; /* 0x00b8 */
|
||||
u64 reservedc0; /* 0x00c0 */
|
||||
u64 conf_dump_storage_state_len; /* 0x00c8 */
|
||||
u64 conf_dump_finalize_len; /* 0x00d0 */
|
||||
u8 reservedd8[256 - 216]; /* 0x00d8 */
|
||||
} __packed __aligned(8);
|
||||
|
||||
/* Initialize Ultravisor */
|
||||
@ -240,6 +254,31 @@ struct uv_cb_attest {
|
||||
u64 reserved168[4]; /* 0x0168 */
|
||||
} __packed __aligned(8);
|
||||
|
||||
struct uv_cb_dump_cpu {
|
||||
struct uv_cb_header header;
|
||||
u64 reserved08[2];
|
||||
u64 cpu_handle;
|
||||
u64 dump_area_origin;
|
||||
u64 reserved28[5];
|
||||
} __packed __aligned(8);
|
||||
|
||||
struct uv_cb_dump_stor_state {
|
||||
struct uv_cb_header header;
|
||||
u64 reserved08[2];
|
||||
u64 config_handle;
|
||||
u64 dump_area_origin;
|
||||
u64 gaddr;
|
||||
u64 reserved28[4];
|
||||
} __packed __aligned(8);
|
||||
|
||||
struct uv_cb_dump_complete {
|
||||
struct uv_cb_header header;
|
||||
u64 reserved08[2];
|
||||
u64 config_handle;
|
||||
u64 dump_area_origin;
|
||||
u64 reserved30[5];
|
||||
} __packed __aligned(8);
|
||||
|
||||
static inline int __uv_call(unsigned long r1, unsigned long r2)
|
||||
{
|
||||
int cc;
|
||||
@ -307,6 +346,10 @@ struct uv_info {
|
||||
unsigned int max_num_sec_conf;
|
||||
unsigned short max_guest_cpu_id;
|
||||
unsigned long uv_feature_indications;
|
||||
unsigned long supp_se_hdr_ver;
|
||||
unsigned long supp_se_hdr_pcf;
|
||||
unsigned long conf_dump_storage_state_len;
|
||||
unsigned long conf_dump_finalize_len;
|
||||
};
|
||||
|
||||
extern struct uv_info uv_info;
|
||||
|
@ -392,6 +392,54 @@ static ssize_t uv_query_facilities(struct kobject *kobj,
|
||||
static struct kobj_attribute uv_query_facilities_attr =
|
||||
__ATTR(facilities, 0444, uv_query_facilities, NULL);
|
||||
|
||||
static ssize_t uv_query_supp_se_hdr_ver(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
return sysfs_emit(buf, "%lx\n", uv_info.supp_se_hdr_ver);
|
||||
}
|
||||
|
||||
static struct kobj_attribute uv_query_supp_se_hdr_ver_attr =
|
||||
__ATTR(supp_se_hdr_ver, 0444, uv_query_supp_se_hdr_ver, NULL);
|
||||
|
||||
static ssize_t uv_query_supp_se_hdr_pcf(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
return sysfs_emit(buf, "%lx\n", uv_info.supp_se_hdr_pcf);
|
||||
}
|
||||
|
||||
static struct kobj_attribute uv_query_supp_se_hdr_pcf_attr =
|
||||
__ATTR(supp_se_hdr_pcf, 0444, uv_query_supp_se_hdr_pcf, NULL);
|
||||
|
||||
static ssize_t uv_query_dump_cpu_len(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *page)
|
||||
{
|
||||
return scnprintf(page, PAGE_SIZE, "%lx\n",
|
||||
uv_info.guest_cpu_stor_len);
|
||||
}
|
||||
|
||||
static struct kobj_attribute uv_query_dump_cpu_len_attr =
|
||||
__ATTR(uv_query_dump_cpu_len, 0444, uv_query_dump_cpu_len, NULL);
|
||||
|
||||
static ssize_t uv_query_dump_storage_state_len(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *page)
|
||||
{
|
||||
return scnprintf(page, PAGE_SIZE, "%lx\n",
|
||||
uv_info.conf_dump_storage_state_len);
|
||||
}
|
||||
|
||||
static struct kobj_attribute uv_query_dump_storage_state_len_attr =
|
||||
__ATTR(dump_storage_state_len, 0444, uv_query_dump_storage_state_len, NULL);
|
||||
|
||||
static ssize_t uv_query_dump_finalize_len(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *page)
|
||||
{
|
||||
return scnprintf(page, PAGE_SIZE, "%lx\n",
|
||||
uv_info.conf_dump_finalize_len);
|
||||
}
|
||||
|
||||
static struct kobj_attribute uv_query_dump_finalize_len_attr =
|
||||
__ATTR(dump_finalize_len, 0444, uv_query_dump_finalize_len, NULL);
|
||||
|
||||
static ssize_t uv_query_feature_indications(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
@ -437,6 +485,11 @@ static struct attribute *uv_query_attrs[] = {
|
||||
&uv_query_max_guest_cpus_attr.attr,
|
||||
&uv_query_max_guest_vms_attr.attr,
|
||||
&uv_query_max_guest_addr_attr.attr,
|
||||
&uv_query_supp_se_hdr_ver_attr.attr,
|
||||
&uv_query_supp_se_hdr_pcf_attr.attr,
|
||||
&uv_query_dump_storage_state_len_attr.attr,
|
||||
&uv_query_dump_finalize_len_attr.attr,
|
||||
&uv_query_dump_cpu_len_attr.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
@ -606,6 +606,26 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_S390_PROTECTED:
|
||||
r = is_prot_virt_host();
|
||||
break;
|
||||
case KVM_CAP_S390_PROTECTED_DUMP: {
|
||||
u64 pv_cmds_dump[] = {
|
||||
BIT_UVC_CMD_DUMP_INIT,
|
||||
BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE,
|
||||
BIT_UVC_CMD_DUMP_CPU,
|
||||
BIT_UVC_CMD_DUMP_COMPLETE,
|
||||
};
|
||||
int i;
|
||||
|
||||
r = is_prot_virt_host();
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) {
|
||||
if (!test_bit_inv(pv_cmds_dump[i],
|
||||
(unsigned long *)&uv_info.inst_calls_list)) {
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = 0;
|
||||
}
|
||||
@ -2220,6 +2240,115 @@ static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Here we provide user space with a direct interface to query UV
|
||||
* related data like UV maxima and available features as well as
|
||||
* feature specific data.
|
||||
*
|
||||
* To facilitate future extension of the data structures we'll try to
|
||||
* write data up to the maximum requested length.
|
||||
*/
|
||||
static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info)
|
||||
{
|
||||
ssize_t len_min;
|
||||
|
||||
switch (info->header.id) {
|
||||
case KVM_PV_INFO_VM: {
|
||||
len_min = sizeof(info->header) + sizeof(info->vm);
|
||||
|
||||
if (info->header.len_max < len_min)
|
||||
return -EINVAL;
|
||||
|
||||
memcpy(info->vm.inst_calls_list,
|
||||
uv_info.inst_calls_list,
|
||||
sizeof(uv_info.inst_calls_list));
|
||||
|
||||
/* It's max cpuid not max cpus, so it's off by one */
|
||||
info->vm.max_cpus = uv_info.max_guest_cpu_id + 1;
|
||||
info->vm.max_guests = uv_info.max_num_sec_conf;
|
||||
info->vm.max_guest_addr = uv_info.max_sec_stor_addr;
|
||||
info->vm.feature_indication = uv_info.uv_feature_indications;
|
||||
|
||||
return len_min;
|
||||
}
|
||||
case KVM_PV_INFO_DUMP: {
|
||||
len_min = sizeof(info->header) + sizeof(info->dump);
|
||||
|
||||
if (info->header.len_max < len_min)
|
||||
return -EINVAL;
|
||||
|
||||
info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len;
|
||||
info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len;
|
||||
info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len;
|
||||
return len_min;
|
||||
}
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
|
||||
struct kvm_s390_pv_dmp dmp)
|
||||
{
|
||||
int r = -EINVAL;
|
||||
void __user *result_buff = (void __user *)dmp.buff_addr;
|
||||
|
||||
switch (dmp.subcmd) {
|
||||
case KVM_PV_DUMP_INIT: {
|
||||
if (kvm->arch.pv.dumping)
|
||||
break;
|
||||
|
||||
/*
|
||||
* Block SIE entry as concurrent dump UVCs could lead
|
||||
* to validities.
|
||||
*/
|
||||
kvm_s390_vcpu_block_all(kvm);
|
||||
|
||||
r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
|
||||
UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc);
|
||||
KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x",
|
||||
cmd->rc, cmd->rrc);
|
||||
if (!r) {
|
||||
kvm->arch.pv.dumping = true;
|
||||
} else {
|
||||
kvm_s390_vcpu_unblock_all(kvm);
|
||||
r = -EINVAL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case KVM_PV_DUMP_CONFIG_STOR_STATE: {
|
||||
if (!kvm->arch.pv.dumping)
|
||||
break;
|
||||
|
||||
/*
|
||||
* gaddr is an output parameter since we might stop
|
||||
* early. As dmp will be copied back in our caller, we
|
||||
* don't need to do it ourselves.
|
||||
*/
|
||||
r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len,
|
||||
&cmd->rc, &cmd->rrc);
|
||||
break;
|
||||
}
|
||||
case KVM_PV_DUMP_COMPLETE: {
|
||||
if (!kvm->arch.pv.dumping)
|
||||
break;
|
||||
|
||||
r = -EINVAL;
|
||||
if (dmp.buff_len < uv_info.conf_dump_finalize_len)
|
||||
break;
|
||||
|
||||
r = kvm_s390_pv_dump_complete(kvm, result_buff,
|
||||
&cmd->rc, &cmd->rrc);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = -ENOTTY;
|
||||
break;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
|
||||
{
|
||||
int r = 0;
|
||||
@ -2356,6 +2485,68 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
|
||||
cmd->rc, cmd->rrc);
|
||||
break;
|
||||
}
|
||||
case KVM_PV_INFO: {
|
||||
struct kvm_s390_pv_info info = {};
|
||||
ssize_t data_len;
|
||||
|
||||
/*
|
||||
* No need to check the VM protection here.
|
||||
*
|
||||
* Maybe user space wants to query some of the data
|
||||
* when the VM is still unprotected. If we see the
|
||||
* need to fence a new data command we can still
|
||||
* return an error in the info handler.
|
||||
*/
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&info, argp, sizeof(info.header)))
|
||||
break;
|
||||
|
||||
r = -EINVAL;
|
||||
if (info.header.len_max < sizeof(info.header))
|
||||
break;
|
||||
|
||||
data_len = kvm_s390_handle_pv_info(&info);
|
||||
if (data_len < 0) {
|
||||
r = data_len;
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* If a data command struct is extended (multiple
|
||||
* times) this can be used to determine how much of it
|
||||
* is valid.
|
||||
*/
|
||||
info.header.len_written = data_len;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_to_user(argp, &info, data_len))
|
||||
break;
|
||||
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_PV_DUMP: {
|
||||
struct kvm_s390_pv_dmp dmp;
|
||||
|
||||
r = -EINVAL;
|
||||
if (!kvm_s390_pv_is_protected(kvm))
|
||||
break;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&dmp, argp, sizeof(dmp)))
|
||||
break;
|
||||
|
||||
r = kvm_s390_pv_dmp(kvm, cmd, dmp);
|
||||
if (r)
|
||||
break;
|
||||
|
||||
if (copy_to_user(argp, &dmp, sizeof(dmp))) {
|
||||
r = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = -ENOTTY;
|
||||
}
|
||||
@ -3047,9 +3238,7 @@ static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
|
||||
if (!sclp.has_esca || !sclp.has_64bscao)
|
||||
return false;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
|
||||
}
|
||||
@ -4473,6 +4662,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
struct kvm_run *kvm_run = vcpu->run;
|
||||
int rc;
|
||||
|
||||
/*
|
||||
* Running a VM while dumping always has the potential to
|
||||
* produce inconsistent dump data. But for PV vcpus a SIE
|
||||
* entry while dumping could also lead to a fatal validity
|
||||
* intercept which we absolutely want to avoid.
|
||||
*/
|
||||
if (vcpu->kvm->arch.pv.dumping)
|
||||
return -EINVAL;
|
||||
|
||||
if (kvm_run->immediate_exit)
|
||||
return -EINTR;
|
||||
|
||||
@ -4912,6 +5110,48 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
|
||||
static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
|
||||
struct kvm_pv_cmd *cmd)
|
||||
{
|
||||
struct kvm_s390_pv_dmp dmp;
|
||||
void *data;
|
||||
int ret;
|
||||
|
||||
/* Dump initialization is a prerequisite */
|
||||
if (!vcpu->kvm->arch.pv.dumping)
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp)))
|
||||
return -EFAULT;
|
||||
|
||||
/* We only handle this subcmd right now */
|
||||
if (dmp.subcmd != KVM_PV_DUMP_CPU)
|
||||
return -EINVAL;
|
||||
|
||||
/* CPU dump length is the same as create cpu storage donation. */
|
||||
if (dmp.buff_len != uv_info.guest_cpu_stor_len)
|
||||
return -EINVAL;
|
||||
|
||||
data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL);
|
||||
if (!data)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc);
|
||||
|
||||
VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x",
|
||||
vcpu->vcpu_id, cmd->rc, cmd->rrc);
|
||||
|
||||
if (ret)
|
||||
ret = -EINVAL;
|
||||
|
||||
/* On success copy over the dump data */
|
||||
if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len))
|
||||
ret = -EFAULT;
|
||||
|
||||
kvfree(data);
|
||||
return ret;
|
||||
}
|
||||
|
||||
long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg)
|
||||
{
|
||||
@ -5076,6 +5316,33 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
irq_state.len);
|
||||
break;
|
||||
}
|
||||
case KVM_S390_PV_CPU_COMMAND: {
|
||||
struct kvm_pv_cmd cmd;
|
||||
|
||||
r = -EINVAL;
|
||||
if (!is_prot_virt_host())
|
||||
break;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&cmd, argp, sizeof(cmd)))
|
||||
break;
|
||||
|
||||
r = -EINVAL;
|
||||
if (cmd.flags)
|
||||
break;
|
||||
|
||||
/* We only handle this cmd right now */
|
||||
if (cmd.cmd != KVM_PV_DUMP)
|
||||
break;
|
||||
|
||||
r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd);
|
||||
|
||||
/* Always copy over UV rc / rrc data */
|
||||
if (copy_to_user((__u8 __user *)argp, &cmd.rc,
|
||||
sizeof(cmd.rc) + sizeof(cmd.rrc)))
|
||||
r = -EFAULT;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = -ENOTTY;
|
||||
}
|
||||
|
@ -250,6 +250,11 @@ int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
|
||||
int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
|
||||
unsigned long tweak, u16 *rc, u16 *rrc);
|
||||
int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state);
|
||||
int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc);
|
||||
int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
|
||||
u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc);
|
||||
int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
|
||||
u16 *rc, u16 *rrc);
|
||||
|
||||
static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm)
|
||||
{
|
||||
|
@ -7,6 +7,7 @@
|
||||
*/
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/minmax.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <asm/gmap.h>
|
||||
@ -298,3 +299,200 @@ int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc)
|
||||
{
|
||||
struct uv_cb_dump_cpu uvcb = {
|
||||
.header.cmd = UVC_CMD_DUMP_CPU,
|
||||
.header.len = sizeof(uvcb),
|
||||
.cpu_handle = vcpu->arch.pv.handle,
|
||||
.dump_area_origin = (u64)buff,
|
||||
};
|
||||
int cc;
|
||||
|
||||
cc = uv_call_sched(0, (u64)&uvcb);
|
||||
*rc = uvcb.header.rc;
|
||||
*rrc = uvcb.header.rrc;
|
||||
return cc;
|
||||
}
|
||||
|
||||
/* Size of the cache for the storage state dump data. 1MB for now */
|
||||
#define DUMP_BUFF_LEN HPAGE_SIZE
|
||||
|
||||
/**
|
||||
* kvm_s390_pv_dump_stor_state
|
||||
*
|
||||
* @kvm: pointer to the guest's KVM struct
|
||||
* @buff_user: Userspace pointer where we will write the results to
|
||||
* @gaddr: Starting absolute guest address for which the storage state
|
||||
* is requested.
|
||||
* @buff_user_len: Length of the buff_user buffer
|
||||
* @rc: Pointer to where the uvcb return code is stored
|
||||
* @rrc: Pointer to where the uvcb return reason code is stored
|
||||
*
|
||||
* Stores buff_len bytes of tweak component values to buff_user
|
||||
* starting with the 1MB block specified by the absolute guest address
|
||||
* (gaddr). The gaddr pointer will be updated with the last address
|
||||
* for which data was written when returning to userspace. buff_user
|
||||
* might be written to even if an error rc is returned. For instance
|
||||
* if we encounter a fault after writing the first page of data.
|
||||
*
|
||||
* Context: kvm->lock needs to be held
|
||||
*
|
||||
* Return:
|
||||
* 0 on success
|
||||
* -ENOMEM if allocating the cache fails
|
||||
* -EINVAL if gaddr is not aligned to 1MB
|
||||
* -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len
|
||||
* -EINVAL if the UV call fails, rc and rrc will be set in this case
|
||||
* -EFAULT if copying the result to buff_user failed
|
||||
*/
|
||||
int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
|
||||
u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc)
|
||||
{
|
||||
struct uv_cb_dump_stor_state uvcb = {
|
||||
.header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE,
|
||||
.header.len = sizeof(uvcb),
|
||||
.config_handle = kvm->arch.pv.handle,
|
||||
.gaddr = *gaddr,
|
||||
.dump_area_origin = 0,
|
||||
};
|
||||
const u64 increment_len = uv_info.conf_dump_storage_state_len;
|
||||
size_t buff_kvm_size;
|
||||
size_t size_done = 0;
|
||||
u8 *buff_kvm = NULL;
|
||||
int cc, ret;
|
||||
|
||||
ret = -EINVAL;
|
||||
/* UV call processes 1MB guest storage chunks at a time */
|
||||
if (!IS_ALIGNED(*gaddr, HPAGE_SIZE))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* We provide the storage state for 1MB chunks of guest
|
||||
* storage. The buffer will need to be aligned to
|
||||
* conf_dump_storage_state_len so we don't end on a partial
|
||||
* chunk.
|
||||
*/
|
||||
if (!buff_user_len ||
|
||||
!IS_ALIGNED(buff_user_len, increment_len))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Allocate a buffer from which we will later copy to the user
|
||||
* process. We don't want userspace to dictate our buffer size
|
||||
* so we limit it to DUMP_BUFF_LEN.
|
||||
*/
|
||||
ret = -ENOMEM;
|
||||
buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN);
|
||||
buff_kvm = vzalloc(buff_kvm_size);
|
||||
if (!buff_kvm)
|
||||
goto out;
|
||||
|
||||
ret = 0;
|
||||
uvcb.dump_area_origin = (u64)buff_kvm;
|
||||
/* We will loop until the user buffer is filled or an error occurs */
|
||||
do {
|
||||
/* Get 1MB worth of guest storage state data */
|
||||
cc = uv_call_sched(0, (u64)&uvcb);
|
||||
|
||||
/* All or nothing */
|
||||
if (cc) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
size_done += increment_len;
|
||||
uvcb.dump_area_origin += increment_len;
|
||||
buff_user_len -= increment_len;
|
||||
uvcb.gaddr += HPAGE_SIZE;
|
||||
|
||||
/* KVM Buffer full, time to copy to the process */
|
||||
if (!buff_user_len || size_done == DUMP_BUFF_LEN) {
|
||||
if (copy_to_user(buff_user, buff_kvm, size_done)) {
|
||||
ret = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
buff_user += size_done;
|
||||
size_done = 0;
|
||||
uvcb.dump_area_origin = (u64)buff_kvm;
|
||||
}
|
||||
} while (buff_user_len);
|
||||
|
||||
/* Report back where we ended dumping */
|
||||
*gaddr = uvcb.gaddr;
|
||||
|
||||
/* Lets only log errors, we don't want to spam */
|
||||
out:
|
||||
if (ret)
|
||||
KVM_UV_EVENT(kvm, 3,
|
||||
"PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x",
|
||||
uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc);
|
||||
*rc = uvcb.header.rc;
|
||||
*rrc = uvcb.header.rrc;
|
||||
vfree(buff_kvm);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_s390_pv_dump_complete
|
||||
*
|
||||
* @kvm: pointer to the guest's KVM struct
|
||||
* @buff_user: Userspace pointer where we will write the results to
|
||||
* @rc: Pointer to where the uvcb return code is stored
|
||||
* @rrc: Pointer to where the uvcb return reason code is stored
|
||||
*
|
||||
* Completes the dumping operation and writes the completion data to
|
||||
* user space.
|
||||
*
|
||||
* Context: kvm->lock needs to be held
|
||||
*
|
||||
* Return:
|
||||
* 0 on success
|
||||
* -ENOMEM if allocating the completion buffer fails
|
||||
* -EINVAL if the UV call fails, rc and rrc will be set in this case
|
||||
* -EFAULT if copying the result to buff_user failed
|
||||
*/
|
||||
int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
|
||||
u16 *rc, u16 *rrc)
|
||||
{
|
||||
struct uv_cb_dump_complete complete = {
|
||||
.header.len = sizeof(complete),
|
||||
.header.cmd = UVC_CMD_DUMP_COMPLETE,
|
||||
.config_handle = kvm_s390_pv_get_handle(kvm),
|
||||
};
|
||||
u64 *compl_data;
|
||||
int ret;
|
||||
|
||||
/* Allocate dump area */
|
||||
compl_data = vzalloc(uv_info.conf_dump_finalize_len);
|
||||
if (!compl_data)
|
||||
return -ENOMEM;
|
||||
complete.dump_area_origin = (u64)compl_data;
|
||||
|
||||
ret = uv_call_sched(0, (u64)&complete);
|
||||
*rc = complete.header.rc;
|
||||
*rrc = complete.header.rrc;
|
||||
KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x",
|
||||
complete.header.rc, complete.header.rrc);
|
||||
|
||||
if (!ret) {
|
||||
/*
|
||||
* kvm_s390_pv_dealloc_vm() will also (mem)set
|
||||
* this to false on a reboot or other destroy
|
||||
* operation for this vm.
|
||||
*/
|
||||
kvm->arch.pv.dumping = false;
|
||||
kvm_s390_vcpu_unblock_all(kvm);
|
||||
ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len);
|
||||
if (ret)
|
||||
ret = -EFAULT;
|
||||
}
|
||||
vfree(compl_data);
|
||||
/* If the UVC returned an error, translate it to -EINVAL */
|
||||
if (ret > 0)
|
||||
ret = -EINVAL;
|
||||
return ret;
|
||||
}
|
||||
|
@ -693,9 +693,9 @@ void x86_pmu_disable_all(void)
|
||||
}
|
||||
}
|
||||
|
||||
struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
|
||||
struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data)
|
||||
{
|
||||
return static_call(x86_pmu_guest_get_msrs)(nr);
|
||||
return static_call(x86_pmu_guest_get_msrs)(nr, data);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
|
||||
|
||||
@ -2103,14 +2103,15 @@ static int __init init_hw_perf_events(void)
|
||||
}
|
||||
if (err != 0) {
|
||||
pr_cont("no PMU driver, software events only.\n");
|
||||
return 0;
|
||||
err = 0;
|
||||
goto out_bad_pmu;
|
||||
}
|
||||
|
||||
pmu_check_apic();
|
||||
|
||||
/* sanity check that the hardware exists or is emulated */
|
||||
if (!check_hw_exists(&pmu, x86_pmu.num_counters, x86_pmu.num_counters_fixed))
|
||||
return 0;
|
||||
goto out_bad_pmu;
|
||||
|
||||
pr_cont("%s PMU driver.\n", x86_pmu.name);
|
||||
|
||||
@ -2219,6 +2220,8 @@ out1:
|
||||
cpuhp_remove_state(CPUHP_AP_PERF_X86_STARTING);
|
||||
out:
|
||||
cpuhp_remove_state(CPUHP_PERF_X86_PREPARE);
|
||||
out_bad_pmu:
|
||||
memset(&x86_pmu, 0, sizeof(x86_pmu));
|
||||
return err;
|
||||
}
|
||||
early_initcall(init_hw_perf_events);
|
||||
@ -2990,6 +2993,11 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
|
||||
|
||||
void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
|
||||
{
|
||||
if (!x86_pmu_initialized()) {
|
||||
memset(cap, 0, sizeof(*cap));
|
||||
return;
|
||||
}
|
||||
|
||||
cap->version = x86_pmu.version;
|
||||
/*
|
||||
* KVM doesn't support the hybrid PMU yet.
|
||||
@ -3002,5 +3010,17 @@ void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
|
||||
cap->bit_width_fixed = x86_pmu.cntval_bits;
|
||||
cap->events_mask = (unsigned int)x86_pmu.events_maskl;
|
||||
cap->events_mask_len = x86_pmu.events_mask_len;
|
||||
cap->pebs_ept = x86_pmu.pebs_ept;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
|
||||
|
||||
u64 perf_get_hw_event_config(int hw_event)
|
||||
{
|
||||
int max = x86_pmu.max_events;
|
||||
|
||||
if (hw_event < max)
|
||||
return x86_pmu.event_map(array_index_nospec(hw_event, max));
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_get_hw_event_config);
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/hardirq.h>
|
||||
@ -2852,6 +2853,47 @@ static void intel_pmu_reset(void)
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* We may be running with guest PEBS events created by KVM, and the
|
||||
* PEBS records are logged into the guest's DS and invisible to host.
|
||||
*
|
||||
* In the case of guest PEBS overflow, we only trigger a fake event
|
||||
* to emulate the PEBS overflow PMI for guest PEBS counters in KVM.
|
||||
* The guest will then vm-entry and check the guest DS area to read
|
||||
* the guest PEBS records.
|
||||
*
|
||||
* The contents and other behavior of the guest event do not matter.
|
||||
*/
|
||||
static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
|
||||
struct perf_sample_data *data)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
u64 guest_pebs_idxs = cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask;
|
||||
struct perf_event *event = NULL;
|
||||
int bit;
|
||||
|
||||
if (!unlikely(perf_guest_state()))
|
||||
return;
|
||||
|
||||
if (!x86_pmu.pebs_ept || !x86_pmu.pebs_active ||
|
||||
!guest_pebs_idxs)
|
||||
return;
|
||||
|
||||
for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs,
|
||||
INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed) {
|
||||
event = cpuc->events[bit];
|
||||
if (!event->attr.precise_ip)
|
||||
continue;
|
||||
|
||||
perf_sample_data_init(data, 0, event->hw.last_period);
|
||||
if (perf_event_overflow(event, data, regs))
|
||||
x86_pmu_stop(event, 0);
|
||||
|
||||
/* Inject one fake event is enough. */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int handle_pmi_common(struct pt_regs *regs, u64 status)
|
||||
{
|
||||
struct perf_sample_data data;
|
||||
@ -2891,10 +2933,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
|
||||
* counters from the GLOBAL_STATUS mask and we always process PEBS
|
||||
* events via drain_pebs().
|
||||
*/
|
||||
if (x86_pmu.flags & PMU_FL_PEBS_ALL)
|
||||
status &= ~cpuc->pebs_enabled;
|
||||
else
|
||||
status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
|
||||
status &= ~(cpuc->pebs_enabled & x86_pmu.pebs_capable);
|
||||
|
||||
/*
|
||||
* PEBS overflow sets bit 62 in the global status register
|
||||
@ -2903,6 +2942,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
|
||||
u64 pebs_enabled = cpuc->pebs_enabled;
|
||||
|
||||
handled++;
|
||||
x86_pmu_handle_guest_pebs(regs, &data);
|
||||
x86_pmu.drain_pebs(regs, &data);
|
||||
status &= intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
|
||||
|
||||
@ -3930,40 +3970,98 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
|
||||
/*
|
||||
* Currently, the only caller of this function is the atomic_switch_perf_msrs().
|
||||
* The host perf conext helps to prepare the values of the real hardware for
|
||||
* a set of msrs that need to be switched atomically in a vmx transaction.
|
||||
*
|
||||
* For example, the pseudocode needed to add a new msr should look like:
|
||||
*
|
||||
* arr[(*nr)++] = (struct perf_guest_switch_msr){
|
||||
* .msr = the hardware msr address,
|
||||
* .host = the value the hardware has when it doesn't run a guest,
|
||||
* .guest = the value the hardware has when it runs a guest,
|
||||
* };
|
||||
*
|
||||
* These values have nothing to do with the emulated values the guest sees
|
||||
* when it uses {RD,WR}MSR, which should be handled by the KVM context,
|
||||
* specifically in the intel_pmu_{get,set}_msr().
|
||||
*/
|
||||
static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
|
||||
struct kvm_pmu *kvm_pmu = (struct kvm_pmu *)data;
|
||||
u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl);
|
||||
u64 pebs_mask = cpuc->pebs_enabled & x86_pmu.pebs_capable;
|
||||
int global_ctrl, pebs_enable;
|
||||
|
||||
arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
|
||||
arr[0].host = intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
|
||||
arr[0].guest = intel_ctrl & ~cpuc->intel_ctrl_host_mask;
|
||||
if (x86_pmu.flags & PMU_FL_PEBS_ALL)
|
||||
arr[0].guest &= ~cpuc->pebs_enabled;
|
||||
else
|
||||
arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
|
||||
*nr = 1;
|
||||
*nr = 0;
|
||||
global_ctrl = (*nr)++;
|
||||
arr[global_ctrl] = (struct perf_guest_switch_msr){
|
||||
.msr = MSR_CORE_PERF_GLOBAL_CTRL,
|
||||
.host = intel_ctrl & ~cpuc->intel_ctrl_guest_mask,
|
||||
.guest = intel_ctrl & (~cpuc->intel_ctrl_host_mask | ~pebs_mask),
|
||||
};
|
||||
|
||||
if (x86_pmu.pebs && x86_pmu.pebs_no_isolation) {
|
||||
/*
|
||||
* If PMU counter has PEBS enabled it is not enough to
|
||||
* disable counter on a guest entry since PEBS memory
|
||||
* write can overshoot guest entry and corrupt guest
|
||||
* memory. Disabling PEBS solves the problem.
|
||||
*
|
||||
* Don't do this if the CPU already enforces it.
|
||||
*/
|
||||
arr[1].msr = MSR_IA32_PEBS_ENABLE;
|
||||
arr[1].host = cpuc->pebs_enabled;
|
||||
arr[1].guest = 0;
|
||||
*nr = 2;
|
||||
if (!x86_pmu.pebs)
|
||||
return arr;
|
||||
|
||||
/*
|
||||
* If PMU counter has PEBS enabled it is not enough to
|
||||
* disable counter on a guest entry since PEBS memory
|
||||
* write can overshoot guest entry and corrupt guest
|
||||
* memory. Disabling PEBS solves the problem.
|
||||
*
|
||||
* Don't do this if the CPU already enforces it.
|
||||
*/
|
||||
if (x86_pmu.pebs_no_isolation) {
|
||||
arr[(*nr)++] = (struct perf_guest_switch_msr){
|
||||
.msr = MSR_IA32_PEBS_ENABLE,
|
||||
.host = cpuc->pebs_enabled,
|
||||
.guest = 0,
|
||||
};
|
||||
return arr;
|
||||
}
|
||||
|
||||
if (!kvm_pmu || !x86_pmu.pebs_ept)
|
||||
return arr;
|
||||
|
||||
arr[(*nr)++] = (struct perf_guest_switch_msr){
|
||||
.msr = MSR_IA32_DS_AREA,
|
||||
.host = (unsigned long)cpuc->ds,
|
||||
.guest = kvm_pmu->ds_area,
|
||||
};
|
||||
|
||||
if (x86_pmu.intel_cap.pebs_baseline) {
|
||||
arr[(*nr)++] = (struct perf_guest_switch_msr){
|
||||
.msr = MSR_PEBS_DATA_CFG,
|
||||
.host = cpuc->pebs_data_cfg,
|
||||
.guest = kvm_pmu->pebs_data_cfg,
|
||||
};
|
||||
}
|
||||
|
||||
pebs_enable = (*nr)++;
|
||||
arr[pebs_enable] = (struct perf_guest_switch_msr){
|
||||
.msr = MSR_IA32_PEBS_ENABLE,
|
||||
.host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask,
|
||||
.guest = pebs_mask & ~cpuc->intel_ctrl_host_mask,
|
||||
};
|
||||
|
||||
if (arr[pebs_enable].host) {
|
||||
/* Disable guest PEBS if host PEBS is enabled. */
|
||||
arr[pebs_enable].guest = 0;
|
||||
} else {
|
||||
/* Disable guest PEBS for cross-mapped PEBS counters. */
|
||||
arr[pebs_enable].guest &= ~kvm_pmu->host_cross_mapped_mask;
|
||||
/* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */
|
||||
arr[global_ctrl].guest |= arr[pebs_enable].guest;
|
||||
}
|
||||
|
||||
return arr;
|
||||
}
|
||||
|
||||
static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
|
||||
static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr, void *data)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
|
||||
@ -5650,6 +5748,7 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.events_mask_len = eax.split.mask_length;
|
||||
|
||||
x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
|
||||
x86_pmu.pebs_capable = PEBS_COUNTER_MASK;
|
||||
|
||||
/*
|
||||
* Quirk: v2 perfmon does not report fixed-purpose events, so
|
||||
@ -5834,6 +5933,7 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.pebs_aliases = NULL;
|
||||
x86_pmu.pebs_prec_dist = true;
|
||||
x86_pmu.lbr_pt_coexist = true;
|
||||
x86_pmu.pebs_capable = ~0ULL;
|
||||
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
|
||||
x86_pmu.flags |= PMU_FL_PEBS_ALL;
|
||||
x86_pmu.get_event_constraints = glp_get_event_constraints;
|
||||
@ -6138,6 +6238,7 @@ __init int intel_pmu_init(void)
|
||||
|
||||
case INTEL_FAM6_ICELAKE_X:
|
||||
case INTEL_FAM6_ICELAKE_D:
|
||||
x86_pmu.pebs_ept = 1;
|
||||
pmem = true;
|
||||
fallthrough;
|
||||
case INTEL_FAM6_ICELAKE_L:
|
||||
@ -6190,6 +6291,7 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.pebs_aliases = NULL;
|
||||
x86_pmu.pebs_prec_dist = true;
|
||||
x86_pmu.pebs_block = true;
|
||||
x86_pmu.pebs_capable = ~0ULL;
|
||||
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
|
||||
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
|
||||
x86_pmu.flags |= PMU_FL_PEBS_ALL;
|
||||
@ -6235,6 +6337,7 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.pebs_aliases = NULL;
|
||||
x86_pmu.pebs_prec_dist = true;
|
||||
x86_pmu.pebs_block = true;
|
||||
x86_pmu.pebs_capable = ~0ULL;
|
||||
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
|
||||
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
|
||||
x86_pmu.flags |= PMU_FL_PEBS_ALL;
|
||||
@ -6398,8 +6501,7 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.intel_ctrl);
|
||||
/*
|
||||
* Access LBR MSR may cause #GP under certain circumstances.
|
||||
* E.g. KVM doesn't support LBR MSR
|
||||
* Check all LBT MSR here.
|
||||
* Check all LBR MSR here.
|
||||
* Disable LBR access if any LBR MSRs can not be accessed.
|
||||
*/
|
||||
if (x86_pmu.lbr_tos && !check_msr(x86_pmu.lbr_tos, 0x3UL))
|
||||
|
@ -818,7 +818,8 @@ struct x86_pmu {
|
||||
pebs_prec_dist :1,
|
||||
pebs_no_tlb :1,
|
||||
pebs_no_isolation :1,
|
||||
pebs_block :1;
|
||||
pebs_block :1,
|
||||
pebs_ept :1;
|
||||
int pebs_record_size;
|
||||
int pebs_buffer_size;
|
||||
int max_pebs_events;
|
||||
@ -827,6 +828,7 @@ struct x86_pmu {
|
||||
void (*pebs_aliases)(struct perf_event *event);
|
||||
unsigned long large_pebs_flags;
|
||||
u64 rtm_abort_event;
|
||||
u64 pebs_capable;
|
||||
|
||||
/*
|
||||
* Intel LBR
|
||||
@ -902,7 +904,7 @@ struct x86_pmu {
|
||||
/*
|
||||
* Intel host/guest support (KVM)
|
||||
*/
|
||||
struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
|
||||
struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr, void *data);
|
||||
|
||||
/*
|
||||
* Check period value for PERF_EVENT_IOC_PERIOD ioctl.
|
||||
|
@ -21,6 +21,7 @@ KVM_X86_OP(has_emulated_msr)
|
||||
KVM_X86_OP(vcpu_after_set_cpuid)
|
||||
KVM_X86_OP(vm_init)
|
||||
KVM_X86_OP_OPTIONAL(vm_destroy)
|
||||
KVM_X86_OP_OPTIONAL_RET0(vcpu_precreate)
|
||||
KVM_X86_OP(vcpu_create)
|
||||
KVM_X86_OP(vcpu_free)
|
||||
KVM_X86_OP(vcpu_reset)
|
||||
|
@ -12,7 +12,7 @@ BUILD_BUG_ON(1)
|
||||
* a NULL definition, for example if "static_call_cond()" will be used
|
||||
* at the call sites.
|
||||
*/
|
||||
KVM_X86_PMU_OP(pmc_perf_hw_id)
|
||||
KVM_X86_PMU_OP(hw_event_available)
|
||||
KVM_X86_PMU_OP(pmc_is_enabled)
|
||||
KVM_X86_PMU_OP(pmc_idx_to_pmc)
|
||||
KVM_X86_PMU_OP(rdpmc_ecx_to_pmc)
|
||||
|
@ -65,6 +65,9 @@
|
||||
#define KVM_BUS_LOCK_DETECTION_VALID_MODE (KVM_BUS_LOCK_DETECTION_OFF | \
|
||||
KVM_BUS_LOCK_DETECTION_EXIT)
|
||||
|
||||
#define KVM_X86_NOTIFY_VMEXIT_VALID_BITS (KVM_X86_NOTIFY_VMEXIT_ENABLED | \
|
||||
KVM_X86_NOTIFY_VMEXIT_USER)
|
||||
|
||||
/* x86-specific vcpu->requests bit members */
|
||||
#define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0)
|
||||
#define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1)
|
||||
@ -505,6 +508,7 @@ struct kvm_pmu {
|
||||
unsigned nr_arch_fixed_counters;
|
||||
unsigned available_event_types;
|
||||
u64 fixed_ctr_ctrl;
|
||||
u64 fixed_ctr_ctrl_mask;
|
||||
u64 global_ctrl;
|
||||
u64 global_status;
|
||||
u64 counter_bitmask[2];
|
||||
@ -520,6 +524,21 @@ struct kvm_pmu {
|
||||
DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
|
||||
DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
|
||||
|
||||
u64 ds_area;
|
||||
u64 pebs_enable;
|
||||
u64 pebs_enable_mask;
|
||||
u64 pebs_data_cfg;
|
||||
u64 pebs_data_cfg_mask;
|
||||
|
||||
/*
|
||||
* If a guest counter is cross-mapped to host counter with different
|
||||
* index, its PEBS capability will be temporarily disabled.
|
||||
*
|
||||
* The user should make sure that this mask is updated
|
||||
* after disabling interrupts and before perf_guest_get_msrs();
|
||||
*/
|
||||
u64 host_cross_mapped_mask;
|
||||
|
||||
/*
|
||||
* The gate to release perf_events not marked in
|
||||
* pmc_in_use only once in a vcpu time slice.
|
||||
@ -1222,8 +1241,13 @@ struct kvm_arch {
|
||||
bool guest_can_read_msr_platform_info;
|
||||
bool exception_payload_enabled;
|
||||
|
||||
bool triple_fault_event;
|
||||
|
||||
bool bus_lock_detection_enabled;
|
||||
bool enable_pmu;
|
||||
|
||||
u32 notify_window;
|
||||
u32 notify_vmexit_flags;
|
||||
/*
|
||||
* If exit_on_emulation_error is set, and the in-kernel instruction
|
||||
* emulator fails to emulate an instruction, allow userspace
|
||||
@ -1307,6 +1331,12 @@ struct kvm_arch {
|
||||
hpa_t hv_root_tdp;
|
||||
spinlock_t hv_root_tdp_lock;
|
||||
#endif
|
||||
/*
|
||||
* VM-scope maximum vCPU ID. Used to determine the size of structures
|
||||
* that increase along with the maximum vCPU ID, in which case, using
|
||||
* the global KVM_MAX_VCPU_IDS may lead to significant memory waste.
|
||||
*/
|
||||
u32 max_vcpu_ids;
|
||||
};
|
||||
|
||||
struct kvm_vm_stat {
|
||||
@ -1367,6 +1397,7 @@ struct kvm_vcpu_stat {
|
||||
u64 preemption_reported;
|
||||
u64 preemption_other;
|
||||
u64 guest_mode;
|
||||
u64 notify_window_exits;
|
||||
};
|
||||
|
||||
struct x86_instruction_info;
|
||||
@ -1407,6 +1438,7 @@ struct kvm_x86_ops {
|
||||
void (*vm_destroy)(struct kvm *kvm);
|
||||
|
||||
/* Create, but do not attach this VCPU */
|
||||
int (*vcpu_precreate)(struct kvm *kvm);
|
||||
int (*vcpu_create)(struct kvm_vcpu *vcpu);
|
||||
void (*vcpu_free)(struct kvm_vcpu *vcpu);
|
||||
void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event);
|
||||
@ -1471,7 +1503,7 @@ struct kvm_x86_ops {
|
||||
u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
|
||||
void (*patch_hypercall)(struct kvm_vcpu *vcpu,
|
||||
unsigned char *hypercall_addr);
|
||||
void (*inject_irq)(struct kvm_vcpu *vcpu);
|
||||
void (*inject_irq)(struct kvm_vcpu *vcpu, bool reinjected);
|
||||
void (*inject_nmi)(struct kvm_vcpu *vcpu);
|
||||
void (*queue_exception)(struct kvm_vcpu *vcpu);
|
||||
void (*cancel_injection)(struct kvm_vcpu *vcpu);
|
||||
@ -1705,21 +1737,6 @@ extern bool tdp_enabled;
|
||||
|
||||
u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* control of guest tsc rate supported? */
|
||||
extern bool kvm_has_tsc_control;
|
||||
/* maximum supported tsc_khz for guests */
|
||||
extern u32 kvm_max_guest_tsc_khz;
|
||||
/* number of bits of the fractional part of the TSC scaling ratio */
|
||||
extern u8 kvm_tsc_scaling_ratio_frac_bits;
|
||||
/* maximum allowed value of TSC scaling ratio */
|
||||
extern u64 kvm_max_tsc_scaling_ratio;
|
||||
/* 1ull << kvm_tsc_scaling_ratio_frac_bits */
|
||||
extern u64 kvm_default_tsc_scaling_ratio;
|
||||
/* bus lock detection supported? */
|
||||
extern bool kvm_has_bus_lock_exit;
|
||||
|
||||
extern u64 kvm_mce_cap_supported;
|
||||
|
||||
/*
|
||||
* EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing
|
||||
* userspace I/O) to indicate that the emulation context
|
||||
|
@ -196,6 +196,12 @@
|
||||
#define PERF_CAP_PT_IDX 16
|
||||
|
||||
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
|
||||
#define PERF_CAP_PEBS_TRAP BIT_ULL(6)
|
||||
#define PERF_CAP_ARCH_REG BIT_ULL(7)
|
||||
#define PERF_CAP_PEBS_FORMAT 0xf00
|
||||
#define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
|
||||
#define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
|
||||
PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE)
|
||||
|
||||
#define MSR_IA32_RTIT_CTL 0x00000570
|
||||
#define RTIT_CTL_TRACEEN BIT(0)
|
||||
@ -980,6 +986,7 @@
|
||||
#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f
|
||||
#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490
|
||||
#define MSR_IA32_VMX_VMFUNC 0x00000491
|
||||
#define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492
|
||||
|
||||
/* VMX_BASIC bits and bitmasks */
|
||||
#define VMX_BASIC_VMCS_SIZE_SHIFT 32
|
||||
|
@ -206,6 +206,7 @@ struct x86_pmu_capability {
|
||||
int bit_width_fixed;
|
||||
unsigned int events_mask;
|
||||
int events_mask_len;
|
||||
unsigned int pebs_ept :1;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -504,6 +505,7 @@ struct x86_pmu_lbr {
|
||||
};
|
||||
|
||||
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
|
||||
extern u64 perf_get_hw_event_config(int hw_event);
|
||||
extern void perf_check_microcode(void);
|
||||
extern void perf_clear_dirty_counters(void);
|
||||
extern int x86_perf_rdpmc_index(struct perf_event *event);
|
||||
@ -513,15 +515,20 @@ static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
|
||||
memset(cap, 0, sizeof(*cap));
|
||||
}
|
||||
|
||||
static inline u64 perf_get_hw_event_config(int hw_event)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void perf_events_lapic_init(void) { }
|
||||
static inline void perf_check_microcode(void) { }
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
|
||||
extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
|
||||
extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data);
|
||||
extern int x86_perf_get_lbr(struct x86_pmu_lbr *lbr);
|
||||
#else
|
||||
struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
|
||||
struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data);
|
||||
static inline int x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
|
||||
{
|
||||
return -1;
|
||||
|
@ -31,6 +31,7 @@
|
||||
#define CPU_BASED_RDTSC_EXITING VMCS_CONTROL_BIT(RDTSC_EXITING)
|
||||
#define CPU_BASED_CR3_LOAD_EXITING VMCS_CONTROL_BIT(CR3_LOAD_EXITING)
|
||||
#define CPU_BASED_CR3_STORE_EXITING VMCS_CONTROL_BIT(CR3_STORE_EXITING)
|
||||
#define CPU_BASED_ACTIVATE_TERTIARY_CONTROLS VMCS_CONTROL_BIT(TERTIARY_CONTROLS)
|
||||
#define CPU_BASED_CR8_LOAD_EXITING VMCS_CONTROL_BIT(CR8_LOAD_EXITING)
|
||||
#define CPU_BASED_CR8_STORE_EXITING VMCS_CONTROL_BIT(CR8_STORE_EXITING)
|
||||
#define CPU_BASED_TPR_SHADOW VMCS_CONTROL_BIT(VIRTUAL_TPR)
|
||||
@ -74,6 +75,12 @@
|
||||
#define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING)
|
||||
#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE VMCS_CONTROL_BIT(USR_WAIT_PAUSE)
|
||||
#define SECONDARY_EXEC_BUS_LOCK_DETECTION VMCS_CONTROL_BIT(BUS_LOCK_DETECTION)
|
||||
#define SECONDARY_EXEC_NOTIFY_VM_EXITING VMCS_CONTROL_BIT(NOTIFY_VM_EXITING)
|
||||
|
||||
/*
|
||||
* Definitions of Tertiary Processor-Based VM-Execution Controls.
|
||||
*/
|
||||
#define TERTIARY_EXEC_IPI_VIRT VMCS_CONTROL_BIT(IPI_VIRT)
|
||||
|
||||
#define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING)
|
||||
#define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING)
|
||||
@ -158,6 +165,7 @@ static inline int vmx_misc_mseg_revid(u64 vmx_misc)
|
||||
enum vmcs_field {
|
||||
VIRTUAL_PROCESSOR_ID = 0x00000000,
|
||||
POSTED_INTR_NV = 0x00000002,
|
||||
LAST_PID_POINTER_INDEX = 0x00000008,
|
||||
GUEST_ES_SELECTOR = 0x00000800,
|
||||
GUEST_CS_SELECTOR = 0x00000802,
|
||||
GUEST_SS_SELECTOR = 0x00000804,
|
||||
@ -221,6 +229,10 @@ enum vmcs_field {
|
||||
ENCLS_EXITING_BITMAP_HIGH = 0x0000202F,
|
||||
TSC_MULTIPLIER = 0x00002032,
|
||||
TSC_MULTIPLIER_HIGH = 0x00002033,
|
||||
TERTIARY_VM_EXEC_CONTROL = 0x00002034,
|
||||
TERTIARY_VM_EXEC_CONTROL_HIGH = 0x00002035,
|
||||
PID_POINTER_TABLE = 0x00002042,
|
||||
PID_POINTER_TABLE_HIGH = 0x00002043,
|
||||
GUEST_PHYSICAL_ADDRESS = 0x00002400,
|
||||
GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
|
||||
VMCS_LINK_POINTER = 0x00002800,
|
||||
@ -269,6 +281,7 @@ enum vmcs_field {
|
||||
SECONDARY_VM_EXEC_CONTROL = 0x0000401e,
|
||||
PLE_GAP = 0x00004020,
|
||||
PLE_WINDOW = 0x00004022,
|
||||
NOTIFY_WINDOW = 0x00004024,
|
||||
VM_INSTRUCTION_ERROR = 0x00004400,
|
||||
VM_EXIT_REASON = 0x00004402,
|
||||
VM_EXIT_INTR_INFO = 0x00004404,
|
||||
@ -553,6 +566,11 @@ enum vm_entry_failure_code {
|
||||
#define EPT_VIOLATION_GVA_IS_VALID (1 << EPT_VIOLATION_GVA_IS_VALID_BIT)
|
||||
#define EPT_VIOLATION_GVA_TRANSLATED (1 << EPT_VIOLATION_GVA_TRANSLATED_BIT)
|
||||
|
||||
/*
|
||||
* Exit Qualifications for NOTIFY VM EXIT
|
||||
*/
|
||||
#define NOTIFY_VM_CONTEXT_INVALID BIT(0)
|
||||
|
||||
/*
|
||||
* VM-instruction error numbers
|
||||
*/
|
||||
|
@ -5,7 +5,7 @@
|
||||
/*
|
||||
* Defines VMX CPU feature bits
|
||||
*/
|
||||
#define NVMXINTS 3 /* N 32-bit words worth of info */
|
||||
#define NVMXINTS 5 /* N 32-bit words worth of info */
|
||||
|
||||
/*
|
||||
* Note: If the comment begins with a quoted string, that string is used
|
||||
@ -43,6 +43,7 @@
|
||||
#define VMX_FEATURE_RDTSC_EXITING ( 1*32+ 12) /* "" VM-Exit on RDTSC */
|
||||
#define VMX_FEATURE_CR3_LOAD_EXITING ( 1*32+ 15) /* "" VM-Exit on writes to CR3 */
|
||||
#define VMX_FEATURE_CR3_STORE_EXITING ( 1*32+ 16) /* "" VM-Exit on reads from CR3 */
|
||||
#define VMX_FEATURE_TERTIARY_CONTROLS ( 1*32+ 17) /* "" Enable Tertiary VM-Execution Controls */
|
||||
#define VMX_FEATURE_CR8_LOAD_EXITING ( 1*32+ 19) /* "" VM-Exit on writes to CR8 */
|
||||
#define VMX_FEATURE_CR8_STORE_EXITING ( 1*32+ 20) /* "" VM-Exit on reads from CR8 */
|
||||
#define VMX_FEATURE_VIRTUAL_TPR ( 1*32+ 21) /* "vtpr" TPR virtualization, a.k.a. TPR shadow */
|
||||
@ -84,5 +85,8 @@
|
||||
#define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* Enable TPAUSE, UMONITOR, UMWAIT in guest */
|
||||
#define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */
|
||||
#define VMX_FEATURE_BUS_LOCK_DETECTION ( 2*32+ 30) /* "" VM-Exit when bus lock caused */
|
||||
#define VMX_FEATURE_NOTIFY_VM_EXITING ( 2*32+ 31) /* VM-Exit when no event windows after notify window */
|
||||
|
||||
/* Tertiary Processor-Based VM-Execution Controls, word 3 */
|
||||
#define VMX_FEATURE_IPI_VIRT ( 3*32+ 4) /* Enable IPI virtualization */
|
||||
#endif /* _ASM_X86_VMXFEATURES_H */
|
||||
|
@ -306,7 +306,8 @@ struct kvm_pit_state {
|
||||
struct kvm_pit_channel_state channels[3];
|
||||
};
|
||||
|
||||
#define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001
|
||||
#define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001
|
||||
#define KVM_PIT_FLAGS_SPEAKER_DATA_ON 0x00000002
|
||||
|
||||
struct kvm_pit_state2 {
|
||||
struct kvm_pit_channel_state channels[3];
|
||||
@ -325,6 +326,7 @@ struct kvm_reinject_control {
|
||||
#define KVM_VCPUEVENT_VALID_SHADOW 0x00000004
|
||||
#define KVM_VCPUEVENT_VALID_SMM 0x00000008
|
||||
#define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010
|
||||
#define KVM_VCPUEVENT_VALID_TRIPLE_FAULT 0x00000020
|
||||
|
||||
/* Interrupt shadow states */
|
||||
#define KVM_X86_SHADOW_INT_MOV_SS 0x01
|
||||
@ -359,7 +361,10 @@ struct kvm_vcpu_events {
|
||||
__u8 smm_inside_nmi;
|
||||
__u8 latched_init;
|
||||
} smi;
|
||||
__u8 reserved[27];
|
||||
struct {
|
||||
__u8 pending;
|
||||
} triple_fault;
|
||||
__u8 reserved[26];
|
||||
__u8 exception_has_payload;
|
||||
__u64 exception_payload;
|
||||
};
|
||||
|
@ -91,6 +91,7 @@
|
||||
#define EXIT_REASON_UMWAIT 67
|
||||
#define EXIT_REASON_TPAUSE 68
|
||||
#define EXIT_REASON_BUS_LOCK 74
|
||||
#define EXIT_REASON_NOTIFY 75
|
||||
|
||||
#define VMX_EXIT_REASONS \
|
||||
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
|
||||
@ -153,7 +154,8 @@
|
||||
{ EXIT_REASON_XRSTORS, "XRSTORS" }, \
|
||||
{ EXIT_REASON_UMWAIT, "UMWAIT" }, \
|
||||
{ EXIT_REASON_TPAUSE, "TPAUSE" }, \
|
||||
{ EXIT_REASON_BUS_LOCK, "BUS_LOCK" }
|
||||
{ EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \
|
||||
{ EXIT_REASON_NOTIFY, "NOTIFY" }
|
||||
|
||||
#define VMX_EXIT_REASON_FLAGS \
|
||||
{ VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" }
|
||||
|
@ -15,6 +15,8 @@ enum vmx_feature_leafs {
|
||||
MISC_FEATURES = 0,
|
||||
PRIMARY_CTLS,
|
||||
SECONDARY_CTLS,
|
||||
TERTIARY_CTLS_LOW,
|
||||
TERTIARY_CTLS_HIGH,
|
||||
NR_VMX_FEATURE_WORDS,
|
||||
};
|
||||
|
||||
@ -22,7 +24,7 @@ enum vmx_feature_leafs {
|
||||
|
||||
static void init_vmx_capabilities(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 supported, funcs, ept, vpid, ign;
|
||||
u32 supported, funcs, ept, vpid, ign, low, high;
|
||||
|
||||
BUILD_BUG_ON(NVMXINTS != NR_VMX_FEATURE_WORDS);
|
||||
|
||||
@ -42,6 +44,11 @@ static void init_vmx_capabilities(struct cpuinfo_x86 *c)
|
||||
rdmsr_safe(MSR_IA32_VMX_PROCBASED_CTLS2, &ign, &supported);
|
||||
c->vmx_capability[SECONDARY_CTLS] = supported;
|
||||
|
||||
/* All 64 bits of tertiary controls MSR are allowed-1 settings. */
|
||||
rdmsr_safe(MSR_IA32_VMX_PROCBASED_CTLS3, &low, &high);
|
||||
c->vmx_capability[TERTIARY_CTLS_LOW] = low;
|
||||
c->vmx_capability[TERTIARY_CTLS_HIGH] = high;
|
||||
|
||||
rdmsr(MSR_IA32_VMX_PINBASED_CTLS, ign, supported);
|
||||
rdmsr_safe(MSR_IA32_VMX_VMFUNC, &ign, &funcs);
|
||||
|
||||
|
@ -200,7 +200,7 @@ void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
|
||||
|
||||
/*
|
||||
* Calculate guest's supported XCR0 taking into account guest CPUID data and
|
||||
* supported_xcr0 (comprised of host configuration and KVM_SUPPORTED_XCR0).
|
||||
* KVM's supported XCR0 (comprised of host's XCR0 and KVM_SUPPORTED_XCR0).
|
||||
*/
|
||||
static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
|
||||
{
|
||||
@ -210,7 +210,7 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
|
||||
if (!best)
|
||||
return 0;
|
||||
|
||||
return (best->eax | ((u64)best->edx << 32)) & supported_xcr0;
|
||||
return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
|
||||
}
|
||||
|
||||
static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
|
||||
@ -868,7 +868,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
|
||||
case 9:
|
||||
break;
|
||||
case 0xa: { /* Architectural Performance Monitoring */
|
||||
struct x86_pmu_capability cap;
|
||||
union cpuid10_eax eax;
|
||||
union cpuid10_edx edx;
|
||||
|
||||
@ -877,30 +876,20 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
|
||||
break;
|
||||
}
|
||||
|
||||
perf_get_x86_pmu_capability(&cap);
|
||||
eax.split.version_id = kvm_pmu_cap.version;
|
||||
eax.split.num_counters = kvm_pmu_cap.num_counters_gp;
|
||||
eax.split.bit_width = kvm_pmu_cap.bit_width_gp;
|
||||
eax.split.mask_length = kvm_pmu_cap.events_mask_len;
|
||||
edx.split.num_counters_fixed = kvm_pmu_cap.num_counters_fixed;
|
||||
edx.split.bit_width_fixed = kvm_pmu_cap.bit_width_fixed;
|
||||
|
||||
/*
|
||||
* The guest architecture pmu is only supported if the architecture
|
||||
* pmu exists on the host and the module parameters allow it.
|
||||
*/
|
||||
if (!cap.version || !enable_pmu)
|
||||
memset(&cap, 0, sizeof(cap));
|
||||
|
||||
eax.split.version_id = min(cap.version, 2);
|
||||
eax.split.num_counters = cap.num_counters_gp;
|
||||
eax.split.bit_width = cap.bit_width_gp;
|
||||
eax.split.mask_length = cap.events_mask_len;
|
||||
|
||||
edx.split.num_counters_fixed =
|
||||
min(cap.num_counters_fixed, KVM_PMC_MAX_FIXED);
|
||||
edx.split.bit_width_fixed = cap.bit_width_fixed;
|
||||
if (cap.version)
|
||||
if (kvm_pmu_cap.version)
|
||||
edx.split.anythread_deprecated = 1;
|
||||
edx.split.reserved1 = 0;
|
||||
edx.split.reserved2 = 0;
|
||||
|
||||
entry->eax = eax.full;
|
||||
entry->ebx = cap.events_mask;
|
||||
entry->ebx = kvm_pmu_cap.events_mask;
|
||||
entry->ecx = 0;
|
||||
entry->edx = edx.full;
|
||||
break;
|
||||
@ -923,8 +912,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
|
||||
}
|
||||
break;
|
||||
case 0xd: {
|
||||
u64 permitted_xcr0 = supported_xcr0 & xstate_get_guest_group_perm();
|
||||
u64 permitted_xss = supported_xss;
|
||||
u64 permitted_xcr0 = kvm_caps.supported_xcr0 & xstate_get_guest_group_perm();
|
||||
u64 permitted_xss = kvm_caps.supported_xss;
|
||||
|
||||
entry->eax &= permitted_xcr0;
|
||||
entry->ebx = xstate_required_size(permitted_xcr0, false);
|
||||
|
@ -145,6 +145,11 @@ static inline int guest_cpuid_model(struct kvm_vcpu *vcpu)
|
||||
return x86_model(best->eax);
|
||||
}
|
||||
|
||||
static inline bool cpuid_model_is_consistent(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return boot_cpu_data.x86_model == guest_cpuid_model(vcpu);
|
||||
}
|
||||
|
||||
static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
@ -48,7 +48,7 @@ DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_scaling_fops, vcpu_get_tsc_scaling_ratio, NULL,
|
||||
|
||||
static int vcpu_get_tsc_scaling_frac_bits(void *data, u64 *val)
|
||||
{
|
||||
*val = kvm_tsc_scaling_ratio_frac_bits;
|
||||
*val = kvm_caps.tsc_scaling_ratio_frac_bits;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -66,7 +66,7 @@ void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_
|
||||
debugfs_dentry, vcpu,
|
||||
&vcpu_timer_advance_ns_fops);
|
||||
|
||||
if (kvm_has_tsc_control) {
|
||||
if (kvm_caps.has_tsc_control) {
|
||||
debugfs_create_file("tsc-scaling-ratio", 0444,
|
||||
debugfs_dentry, vcpu,
|
||||
&vcpu_tsc_scaling_fops);
|
||||
|
@ -591,7 +591,10 @@ static int speaker_ioport_write(struct kvm_vcpu *vcpu,
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
mutex_lock(&pit_state->lock);
|
||||
pit_state->speaker_data_on = (val >> 1) & 1;
|
||||
if (val & (1 << 1))
|
||||
pit_state->flags |= KVM_PIT_FLAGS_SPEAKER_DATA_ON;
|
||||
else
|
||||
pit_state->flags &= ~KVM_PIT_FLAGS_SPEAKER_DATA_ON;
|
||||
pit_set_gate(pit, 2, val & 1);
|
||||
mutex_unlock(&pit_state->lock);
|
||||
return 0;
|
||||
@ -612,8 +615,9 @@ static int speaker_ioport_read(struct kvm_vcpu *vcpu,
|
||||
refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
|
||||
|
||||
mutex_lock(&pit_state->lock);
|
||||
ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(pit, 2) |
|
||||
(pit_get_out(pit, 2) << 5) | (refresh_clock << 4));
|
||||
ret = (!!(pit_state->flags & KVM_PIT_FLAGS_SPEAKER_DATA_ON) << 1) |
|
||||
pit_get_gate(pit, 2) | (pit_get_out(pit, 2) << 5) |
|
||||
(refresh_clock << 4);
|
||||
if (len > sizeof(ret))
|
||||
len = sizeof(ret);
|
||||
memcpy(data, (char *)&ret, len);
|
||||
|
@ -29,7 +29,6 @@ struct kvm_kpit_state {
|
||||
bool is_periodic;
|
||||
s64 period; /* unit: ns */
|
||||
struct hrtimer timer;
|
||||
u32 speaker_data_on;
|
||||
|
||||
struct mutex lock;
|
||||
atomic_t reinject;
|
||||
|
@ -67,6 +67,7 @@ static bool lapic_timer_advance_dynamic __read_mostly;
|
||||
#define LAPIC_TIMER_ADVANCE_NS_MAX 5000
|
||||
/* step-by-step approximation to mitigate fluctuation */
|
||||
#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
|
||||
static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data);
|
||||
|
||||
static inline void __kvm_lapic_set_reg(char *regs, int reg_off, u32 val)
|
||||
{
|
||||
@ -1602,7 +1603,7 @@ static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
|
||||
* that __delay() uses delay_tsc whenever the hardware has TSC, thus
|
||||
* always for VMX enabled hardware.
|
||||
*/
|
||||
if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) {
|
||||
if (vcpu->arch.tsc_scaling_ratio == kvm_caps.default_tsc_scaling_ratio) {
|
||||
__delay(min(guest_cycles,
|
||||
nsec_to_cycles(vcpu, timer_advance_ns)));
|
||||
} else {
|
||||
@ -2246,10 +2247,27 @@ EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
|
||||
/* emulate APIC access in a trap manner */
|
||||
void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
|
||||
{
|
||||
u32 val = kvm_lapic_get_reg(vcpu->arch.apic, offset);
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
u64 val;
|
||||
|
||||
/* TODO: optimize to just emulate side effect w/o one more write */
|
||||
kvm_lapic_reg_write(vcpu->arch.apic, offset, val);
|
||||
if (apic_x2apic_mode(apic)) {
|
||||
/*
|
||||
* When guest APIC is in x2APIC mode and IPI virtualization
|
||||
* is enabled, accessing APIC_ICR may cause trap-like VM-exit
|
||||
* on Intel hardware. Other offsets are not possible.
|
||||
*/
|
||||
if (WARN_ON_ONCE(offset != APIC_ICR))
|
||||
return;
|
||||
|
||||
kvm_lapic_msr_read(apic, offset, &val);
|
||||
kvm_apic_send_ipi(apic, (u32)val, (u32)(val >> 32));
|
||||
trace_kvm_apic_write(APIC_ICR, val);
|
||||
} else {
|
||||
val = kvm_lapic_get_reg(apic, offset);
|
||||
|
||||
/* TODO: optimize to just emulate side effect w/o one more write */
|
||||
kvm_lapic_reg_write(apic, offset, (u32)val);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
|
||||
|
||||
|
@ -1053,7 +1053,14 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
||||
if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access))
|
||||
continue;
|
||||
|
||||
if (gfn != sp->gfns[i]) {
|
||||
/*
|
||||
* Drop the SPTE if the new protections would result in a RWX=0
|
||||
* SPTE or if the gfn is changing. The RWX=0 case only affects
|
||||
* EPT with execute-only support, i.e. EPT without an effective
|
||||
* "present" bit, as all other paging modes will create a
|
||||
* read-only SPTE if pte_access is zero.
|
||||
*/
|
||||
if ((!pte_access && !shadow_present_mask) || gfn != sp->gfns[i]) {
|
||||
drop_spte(vcpu->kvm, &sp->spt[i]);
|
||||
flush = true;
|
||||
continue;
|
||||
@ -1070,6 +1077,15 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
||||
flush |= mmu_spte_update(sptep, spte);
|
||||
}
|
||||
|
||||
/*
|
||||
* Note, any flush is purely for KVM's correctness, e.g. when dropping
|
||||
* an existing SPTE or clearing W/A/D bits to ensure an mmu_notifier
|
||||
* unmap or dirty logging event doesn't fail to flush. The guest is
|
||||
* responsible for flushing the TLB to ensure any changes in protection
|
||||
* bits are recognized, i.e. until the guest flushes or page faults on
|
||||
* a relevant address, KVM is architecturally allowed to let vCPUs use
|
||||
* cached translations with the old protection bits.
|
||||
*/
|
||||
return flush;
|
||||
}
|
||||
|
||||
|
@ -129,6 +129,8 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
u64 spte = SPTE_MMU_PRESENT_MASK;
|
||||
bool wrprot = false;
|
||||
|
||||
WARN_ON_ONCE(!pte_access && !shadow_present_mask);
|
||||
|
||||
if (sp->role.ad_disabled)
|
||||
spte |= SPTE_TDP_AD_DISABLED_MASK;
|
||||
else if (kvm_mmu_page_ad_need_write_protect(sp))
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <linux/bsearch.h>
|
||||
#include <linux/sort.h>
|
||||
#include <asm/perf_event.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include "x86.h"
|
||||
#include "cpuid.h"
|
||||
#include "lapic.h"
|
||||
@ -24,6 +25,15 @@
|
||||
/* This is enough to filter the vast majority of currently defined events. */
|
||||
#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
|
||||
|
||||
struct x86_pmu_capability __read_mostly kvm_pmu_cap;
|
||||
EXPORT_SYMBOL_GPL(kvm_pmu_cap);
|
||||
|
||||
static const struct x86_cpu_id vmx_icl_pebs_cpu[] = {
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, NULL),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, NULL),
|
||||
{}
|
||||
};
|
||||
|
||||
/* NOTE:
|
||||
* - Each perf counter is defined as "struct kvm_pmc";
|
||||
* - There are two types of perf counters: general purpose (gp) and fixed.
|
||||
@ -34,7 +44,9 @@
|
||||
* However AMD doesn't support fixed-counters;
|
||||
* - There are three types of index to access perf counters (PMC):
|
||||
* 1. MSR (named msr): For example Intel has MSR_IA32_PERFCTRn and AMD
|
||||
* has MSR_K7_PERFCTRn.
|
||||
* has MSR_K7_PERFCTRn and, for families 15H and later,
|
||||
* MSR_F15H_PERF_CTRn, where MSR_F15H_PERF_CTR[0-3] are
|
||||
* aliased to MSR_K7_PERFCTRn.
|
||||
* 2. MSR Index (named idx): This normally is used by RDPMC instruction.
|
||||
* For instance AMD RDPMC instruction uses 0000_0003h in ECX to access
|
||||
* C001_0007h (MSR_K7_PERCTR3). Intel has a similar mechanism, except
|
||||
@ -46,7 +58,8 @@
|
||||
* between pmc and perf counters is as the following:
|
||||
* * Intel: [0 .. INTEL_PMC_MAX_GENERIC-1] <=> gp counters
|
||||
* [INTEL_PMC_IDX_FIXED .. INTEL_PMC_IDX_FIXED + 2] <=> fixed
|
||||
* * AMD: [0 .. AMD64_NUM_COUNTERS-1] <=> gp counters
|
||||
* * AMD: [0 .. AMD64_NUM_COUNTERS-1] and, for families 15H
|
||||
* and later, [0 .. AMD64_NUM_COUNTERS_CORE-1] <=> gp counters
|
||||
*/
|
||||
|
||||
static struct kvm_pmu_ops kvm_pmu_ops __read_mostly;
|
||||
@ -86,15 +99,22 @@ static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
|
||||
static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
|
||||
{
|
||||
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
|
||||
bool skip_pmi = false;
|
||||
|
||||
/* Ignore counters that have been reprogrammed already. */
|
||||
if (test_and_set_bit(pmc->idx, pmu->reprogram_pmi))
|
||||
return;
|
||||
|
||||
__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
|
||||
if (pmc->perf_event && pmc->perf_event->attr.precise_ip) {
|
||||
/* Indicate PEBS overflow PMI to guest. */
|
||||
skip_pmi = __test_and_set_bit(GLOBAL_STATUS_BUFFER_OVF_BIT,
|
||||
(unsigned long *)&pmu->global_status);
|
||||
} else {
|
||||
__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
|
||||
}
|
||||
kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
|
||||
|
||||
if (!pmc->intr)
|
||||
if (!pmc->intr || skip_pmi)
|
||||
return;
|
||||
|
||||
/*
|
||||
@ -124,6 +144,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
|
||||
u64 config, bool exclude_user,
|
||||
bool exclude_kernel, bool intr)
|
||||
{
|
||||
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
|
||||
struct perf_event *event;
|
||||
struct perf_event_attr attr = {
|
||||
.type = type,
|
||||
@ -135,9 +156,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
|
||||
.exclude_kernel = exclude_kernel,
|
||||
.config = config,
|
||||
};
|
||||
|
||||
if (type == PERF_TYPE_HARDWARE && config >= PERF_COUNT_HW_MAX)
|
||||
return;
|
||||
bool pebs = test_bit(pmc->idx, (unsigned long *)&pmu->pebs_enable);
|
||||
|
||||
attr.sample_period = get_sample_period(pmc, pmc->counter);
|
||||
|
||||
@ -150,6 +169,25 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
|
||||
*/
|
||||
attr.sample_period = 0;
|
||||
}
|
||||
if (pebs) {
|
||||
/*
|
||||
* The non-zero precision level of guest event makes the ordinary
|
||||
* guest event becomes a guest PEBS event and triggers the host
|
||||
* PEBS PMI handler to determine whether the PEBS overflow PMI
|
||||
* comes from the host counters or the guest.
|
||||
*
|
||||
* For most PEBS hardware events, the difference in the software
|
||||
* precision levels of guest and host PEBS events will not affect
|
||||
* the accuracy of the PEBS profiling result, because the "event IP"
|
||||
* in the PEBS record is calibrated on the guest side.
|
||||
*
|
||||
* On Icelake everything is fine. Other hardware (GLC+, TNT+) that
|
||||
* could possibly care here is unsupported and needs changes.
|
||||
*/
|
||||
attr.precise_ip = 1;
|
||||
if (x86_match_cpu(vmx_icl_pebs_cpu) && pmc->idx == 32)
|
||||
attr.precise_ip = 3;
|
||||
}
|
||||
|
||||
event = perf_event_create_kernel_counter(&attr, -1, current,
|
||||
kvm_perf_overflow, pmc);
|
||||
@ -163,7 +201,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
|
||||
pmc_to_pmu(pmc)->event_count++;
|
||||
clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
|
||||
pmc->is_paused = false;
|
||||
pmc->intr = intr;
|
||||
pmc->intr = intr || pebs;
|
||||
}
|
||||
|
||||
static void pmc_pause_counter(struct kvm_pmc *pmc)
|
||||
@ -189,6 +227,10 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
|
||||
get_sample_period(pmc, pmc->counter)))
|
||||
return false;
|
||||
|
||||
if (!test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) &&
|
||||
pmc->perf_event->attr.precise_ip)
|
||||
return false;
|
||||
|
||||
/* reuse perf_event to serve as pmc_reprogram_counter() does*/
|
||||
perf_event_enable(pmc->perf_event);
|
||||
pmc->is_paused = false;
|
||||
@ -205,115 +247,83 @@ static int cmp_u64(const void *pa, const void *pb)
|
||||
return (a > b) - (a < b);
|
||||
}
|
||||
|
||||
void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
|
||||
static bool check_pmu_event_filter(struct kvm_pmc *pmc)
|
||||
{
|
||||
u64 config;
|
||||
u32 type = PERF_TYPE_RAW;
|
||||
struct kvm *kvm = pmc->vcpu->kvm;
|
||||
struct kvm_pmu_event_filter *filter;
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(pmc->vcpu);
|
||||
struct kvm *kvm = pmc->vcpu->kvm;
|
||||
bool allow_event = true;
|
||||
__u64 key;
|
||||
int idx;
|
||||
|
||||
if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
|
||||
printk_once("kvm pmu: pin control bit is ignored\n");
|
||||
|
||||
pmc->eventsel = eventsel;
|
||||
|
||||
pmc_pause_counter(pmc);
|
||||
|
||||
if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc))
|
||||
return;
|
||||
if (!static_call(kvm_x86_pmu_hw_event_available)(pmc))
|
||||
return false;
|
||||
|
||||
filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
|
||||
if (filter) {
|
||||
__u64 key = eventsel & AMD64_RAW_EVENT_MASK_NB;
|
||||
if (!filter)
|
||||
goto out;
|
||||
|
||||
if (pmc_is_gp(pmc)) {
|
||||
key = pmc->eventsel & AMD64_RAW_EVENT_MASK_NB;
|
||||
if (bsearch(&key, filter->events, filter->nevents,
|
||||
sizeof(__u64), cmp_u64))
|
||||
allow_event = filter->action == KVM_PMU_EVENT_ALLOW;
|
||||
else
|
||||
allow_event = filter->action == KVM_PMU_EVENT_DENY;
|
||||
}
|
||||
if (!allow_event)
|
||||
return;
|
||||
|
||||
if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
|
||||
ARCH_PERFMON_EVENTSEL_INV |
|
||||
ARCH_PERFMON_EVENTSEL_CMASK |
|
||||
HSW_IN_TX |
|
||||
HSW_IN_TX_CHECKPOINTED))) {
|
||||
config = static_call(kvm_x86_pmu_pmc_perf_hw_id)(pmc);
|
||||
if (config != PERF_COUNT_HW_MAX)
|
||||
type = PERF_TYPE_HARDWARE;
|
||||
} else {
|
||||
idx = pmc->idx - INTEL_PMC_IDX_FIXED;
|
||||
if (filter->action == KVM_PMU_EVENT_DENY &&
|
||||
test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
|
||||
allow_event = false;
|
||||
if (filter->action == KVM_PMU_EVENT_ALLOW &&
|
||||
!test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
|
||||
allow_event = false;
|
||||
}
|
||||
|
||||
if (type == PERF_TYPE_RAW)
|
||||
config = eventsel & pmu->raw_event_mask;
|
||||
|
||||
if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
|
||||
return;
|
||||
|
||||
pmc_release_perf_event(pmc);
|
||||
|
||||
pmc->current_config = eventsel;
|
||||
pmc_reprogram_counter(pmc, type, config,
|
||||
!(eventsel & ARCH_PERFMON_EVENTSEL_USR),
|
||||
!(eventsel & ARCH_PERFMON_EVENTSEL_OS),
|
||||
eventsel & ARCH_PERFMON_EVENTSEL_INT);
|
||||
out:
|
||||
return allow_event;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(reprogram_gp_counter);
|
||||
|
||||
void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
|
||||
void reprogram_counter(struct kvm_pmc *pmc)
|
||||
{
|
||||
unsigned en_field = ctrl & 0x3;
|
||||
bool pmi = ctrl & 0x8;
|
||||
struct kvm_pmu_event_filter *filter;
|
||||
struct kvm *kvm = pmc->vcpu->kvm;
|
||||
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
|
||||
u64 eventsel = pmc->eventsel;
|
||||
u64 new_config = eventsel;
|
||||
u8 fixed_ctr_ctrl;
|
||||
|
||||
pmc_pause_counter(pmc);
|
||||
|
||||
if (!en_field || !pmc_is_enabled(pmc))
|
||||
if (!pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc))
|
||||
return;
|
||||
|
||||
filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
|
||||
if (filter) {
|
||||
if (filter->action == KVM_PMU_EVENT_DENY &&
|
||||
test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
|
||||
return;
|
||||
if (filter->action == KVM_PMU_EVENT_ALLOW &&
|
||||
!test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
|
||||
return;
|
||||
if (!check_pmu_event_filter(pmc))
|
||||
return;
|
||||
|
||||
if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
|
||||
printk_once("kvm pmu: pin control bit is ignored\n");
|
||||
|
||||
if (pmc_is_fixed(pmc)) {
|
||||
fixed_ctr_ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl,
|
||||
pmc->idx - INTEL_PMC_IDX_FIXED);
|
||||
if (fixed_ctr_ctrl & 0x1)
|
||||
eventsel |= ARCH_PERFMON_EVENTSEL_OS;
|
||||
if (fixed_ctr_ctrl & 0x2)
|
||||
eventsel |= ARCH_PERFMON_EVENTSEL_USR;
|
||||
if (fixed_ctr_ctrl & 0x8)
|
||||
eventsel |= ARCH_PERFMON_EVENTSEL_INT;
|
||||
new_config = (u64)fixed_ctr_ctrl;
|
||||
}
|
||||
|
||||
if (pmc->current_config == (u64)ctrl && pmc_resume_counter(pmc))
|
||||
if (pmc->current_config == new_config && pmc_resume_counter(pmc))
|
||||
return;
|
||||
|
||||
pmc_release_perf_event(pmc);
|
||||
|
||||
pmc->current_config = (u64)ctrl;
|
||||
pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
|
||||
static_call(kvm_x86_pmu_pmc_perf_hw_id)(pmc),
|
||||
!(en_field & 0x2), /* exclude user */
|
||||
!(en_field & 0x1), /* exclude kernel */
|
||||
pmi);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(reprogram_fixed_counter);
|
||||
|
||||
void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx)
|
||||
{
|
||||
struct kvm_pmc *pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, pmc_idx);
|
||||
|
||||
if (!pmc)
|
||||
return;
|
||||
|
||||
if (pmc_is_gp(pmc))
|
||||
reprogram_gp_counter(pmc, pmc->eventsel);
|
||||
else {
|
||||
int idx = pmc_idx - INTEL_PMC_IDX_FIXED;
|
||||
u8 ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, idx);
|
||||
|
||||
reprogram_fixed_counter(pmc, ctrl, idx);
|
||||
}
|
||||
pmc->current_config = new_config;
|
||||
pmc_reprogram_counter(pmc, PERF_TYPE_RAW,
|
||||
(eventsel & pmu->raw_event_mask),
|
||||
!(eventsel & ARCH_PERFMON_EVENTSEL_USR),
|
||||
!(eventsel & ARCH_PERFMON_EVENTSEL_OS),
|
||||
eventsel & ARCH_PERFMON_EVENTSEL_INT);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(reprogram_counter);
|
||||
|
||||
@ -329,8 +339,7 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
|
||||
clear_bit(bit, pmu->reprogram_pmi);
|
||||
continue;
|
||||
}
|
||||
|
||||
reprogram_counter(pmu, bit);
|
||||
reprogram_counter(pmc);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -416,10 +425,10 @@ void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
}
|
||||
|
||||
bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
|
||||
bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr, bool host_initiated)
|
||||
{
|
||||
return static_call(kvm_x86_pmu_msr_idx_to_pmc)(vcpu, msr) ||
|
||||
static_call(kvm_x86_pmu_is_valid_msr)(vcpu, msr);
|
||||
static_call(kvm_x86_pmu_is_valid_msr)(vcpu, msr, host_initiated);
|
||||
}
|
||||
|
||||
static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
|
||||
@ -433,11 +442,19 @@ static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
|
||||
|
||||
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
{
|
||||
if (msr_info->host_initiated && !vcpu->kvm->arch.enable_pmu) {
|
||||
msr_info->data = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return static_call(kvm_x86_pmu_get_msr)(vcpu, msr_info);
|
||||
}
|
||||
|
||||
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
{
|
||||
if (msr_info->host_initiated && !vcpu->kvm->arch.enable_pmu)
|
||||
return !!msr_info->data;
|
||||
|
||||
kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index);
|
||||
return static_call(kvm_x86_pmu_set_msr)(vcpu, msr_info);
|
||||
}
|
||||
@ -471,17 +488,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
|
||||
kvm_pmu_refresh(vcpu);
|
||||
}
|
||||
|
||||
static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
|
||||
{
|
||||
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
|
||||
|
||||
if (pmc_is_fixed(pmc))
|
||||
return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
|
||||
pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
|
||||
|
||||
return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
|
||||
}
|
||||
|
||||
/* Release perf_events for vPMCs that have been unused for a full time slice. */
|
||||
void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@ -514,13 +520,12 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
|
||||
|
||||
static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
|
||||
{
|
||||
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
|
||||
u64 prev_count;
|
||||
|
||||
prev_count = pmc->counter;
|
||||
pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc);
|
||||
|
||||
reprogram_counter(pmu, pmc->idx);
|
||||
reprogram_counter(pmc);
|
||||
if (pmc->counter < prev_count)
|
||||
__kvm_perf_overflow(pmc, false);
|
||||
}
|
||||
@ -528,13 +533,8 @@ static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
|
||||
static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc,
|
||||
unsigned int perf_hw_id)
|
||||
{
|
||||
u64 old_eventsel = pmc->eventsel;
|
||||
unsigned int config;
|
||||
|
||||
pmc->eventsel &= (ARCH_PERFMON_EVENTSEL_EVENT | ARCH_PERFMON_EVENTSEL_UMASK);
|
||||
config = static_call(kvm_x86_pmu_pmc_perf_hw_id)(pmc);
|
||||
pmc->eventsel = old_eventsel;
|
||||
return config == perf_hw_id;
|
||||
return !((pmc->eventsel ^ perf_get_hw_event_config(perf_hw_id)) &
|
||||
AMD64_RAW_EVENT_MASK_NB);
|
||||
}
|
||||
|
||||
static inline bool cpl_is_matched(struct kvm_pmc *pmc)
|
||||
|
@ -8,6 +8,9 @@
|
||||
#define pmu_to_vcpu(pmu) (container_of((pmu), struct kvm_vcpu, arch.pmu))
|
||||
#define pmc_to_pmu(pmc) (&(pmc)->vcpu->arch.pmu)
|
||||
|
||||
#define MSR_IA32_MISC_ENABLE_PMU_RO_MASK (MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL | \
|
||||
MSR_IA32_MISC_ENABLE_BTS_UNAVAIL)
|
||||
|
||||
/* retrieve the 4 bits for EN and PMI out of IA32_FIXED_CTR_CTRL */
|
||||
#define fixed_ctrl_field(ctrl_reg, idx) (((ctrl_reg) >> ((idx)*4)) & 0xf)
|
||||
|
||||
@ -22,14 +25,14 @@ struct kvm_event_hw_type_mapping {
|
||||
};
|
||||
|
||||
struct kvm_pmu_ops {
|
||||
unsigned int (*pmc_perf_hw_id)(struct kvm_pmc *pmc);
|
||||
bool (*hw_event_available)(struct kvm_pmc *pmc);
|
||||
bool (*pmc_is_enabled)(struct kvm_pmc *pmc);
|
||||
struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx);
|
||||
struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu,
|
||||
unsigned int idx, u64 *mask);
|
||||
struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, u32 msr);
|
||||
bool (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
|
||||
bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
|
||||
bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr, bool host_initiated);
|
||||
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
|
||||
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
|
||||
void (*refresh)(struct kvm_vcpu *vcpu);
|
||||
@ -144,15 +147,49 @@ static inline void pmc_update_sample_period(struct kvm_pmc *pmc)
|
||||
get_sample_period(pmc, pmc->counter));
|
||||
}
|
||||
|
||||
void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel);
|
||||
void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx);
|
||||
void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
|
||||
static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
|
||||
{
|
||||
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
|
||||
|
||||
if (pmc_is_fixed(pmc))
|
||||
return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
|
||||
pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
|
||||
|
||||
return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
|
||||
}
|
||||
|
||||
extern struct x86_pmu_capability kvm_pmu_cap;
|
||||
|
||||
static inline void kvm_init_pmu_capability(void)
|
||||
{
|
||||
bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL;
|
||||
|
||||
perf_get_x86_pmu_capability(&kvm_pmu_cap);
|
||||
|
||||
/*
|
||||
* For Intel, only support guest architectural pmu
|
||||
* on a host with architectural pmu.
|
||||
*/
|
||||
if ((is_intel && !kvm_pmu_cap.version) || !kvm_pmu_cap.num_counters_gp)
|
||||
enable_pmu = false;
|
||||
|
||||
if (!enable_pmu) {
|
||||
memset(&kvm_pmu_cap, 0, sizeof(kvm_pmu_cap));
|
||||
return;
|
||||
}
|
||||
|
||||
kvm_pmu_cap.version = min(kvm_pmu_cap.version, 2);
|
||||
kvm_pmu_cap.num_counters_fixed = min(kvm_pmu_cap.num_counters_fixed,
|
||||
KVM_PMC_MAX_FIXED);
|
||||
}
|
||||
|
||||
void reprogram_counter(struct kvm_pmc *pmc);
|
||||
|
||||
void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
|
||||
int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
|
||||
bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
|
||||
bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr);
|
||||
bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr, bool host_initiated);
|
||||
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
|
||||
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
|
||||
void kvm_pmu_refresh(struct kvm_vcpu *vcpu);
|
||||
|
@ -371,6 +371,7 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
|
||||
to->nested_ctl = from->nested_ctl;
|
||||
to->event_inj = from->event_inj;
|
||||
to->event_inj_err = from->event_inj_err;
|
||||
to->next_rip = from->next_rip;
|
||||
to->nested_cr3 = from->nested_cr3;
|
||||
to->virt_ext = from->virt_ext;
|
||||
to->pause_filter_count = from->pause_filter_count;
|
||||
@ -608,7 +609,32 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
|
||||
}
|
||||
}
|
||||
|
||||
static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
|
||||
static inline bool is_evtinj_soft(u32 evtinj)
|
||||
{
|
||||
u32 type = evtinj & SVM_EVTINJ_TYPE_MASK;
|
||||
u8 vector = evtinj & SVM_EVTINJ_VEC_MASK;
|
||||
|
||||
if (!(evtinj & SVM_EVTINJ_VALID))
|
||||
return false;
|
||||
|
||||
if (type == SVM_EVTINJ_TYPE_SOFT)
|
||||
return true;
|
||||
|
||||
return type == SVM_EVTINJ_TYPE_EXEPT && kvm_exception_is_soft(vector);
|
||||
}
|
||||
|
||||
static bool is_evtinj_nmi(u32 evtinj)
|
||||
{
|
||||
u32 type = evtinj & SVM_EVTINJ_TYPE_MASK;
|
||||
|
||||
if (!(evtinj & SVM_EVTINJ_VALID))
|
||||
return false;
|
||||
|
||||
return type == SVM_EVTINJ_TYPE_NMI;
|
||||
}
|
||||
|
||||
static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
|
||||
unsigned long vmcb12_rip)
|
||||
{
|
||||
u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK;
|
||||
u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
|
||||
@ -650,7 +676,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
|
||||
|
||||
vmcb02->control.tsc_offset = vcpu->arch.tsc_offset;
|
||||
|
||||
if (svm->tsc_ratio_msr != kvm_default_tsc_scaling_ratio) {
|
||||
if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) {
|
||||
WARN_ON(!svm->tsc_scaling_enabled);
|
||||
nested_svm_update_tsc_ratio_msr(vcpu);
|
||||
}
|
||||
@ -664,6 +690,30 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
|
||||
vmcb02->control.event_inj = svm->nested.ctl.event_inj;
|
||||
vmcb02->control.event_inj_err = svm->nested.ctl.event_inj_err;
|
||||
|
||||
/*
|
||||
* next_rip is consumed on VMRUN as the return address pushed on the
|
||||
* stack for injected soft exceptions/interrupts. If nrips is exposed
|
||||
* to L1, take it verbatim from vmcb12. If nrips is supported in
|
||||
* hardware but not exposed to L1, stuff the actual L2 RIP to emulate
|
||||
* what a nrips=0 CPU would do (L1 is responsible for advancing RIP
|
||||
* prior to injecting the event).
|
||||
*/
|
||||
if (svm->nrips_enabled)
|
||||
vmcb02->control.next_rip = svm->nested.ctl.next_rip;
|
||||
else if (boot_cpu_has(X86_FEATURE_NRIPS))
|
||||
vmcb02->control.next_rip = vmcb12_rip;
|
||||
|
||||
svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj);
|
||||
if (is_evtinj_soft(vmcb02->control.event_inj)) {
|
||||
svm->soft_int_injected = true;
|
||||
svm->soft_int_csbase = svm->vmcb->save.cs.base;
|
||||
svm->soft_int_old_rip = vmcb12_rip;
|
||||
if (svm->nrips_enabled)
|
||||
svm->soft_int_next_rip = svm->nested.ctl.next_rip;
|
||||
else
|
||||
svm->soft_int_next_rip = vmcb12_rip;
|
||||
}
|
||||
|
||||
vmcb02->control.virt_ext = vmcb01->control.virt_ext &
|
||||
LBR_CTL_ENABLE_MASK;
|
||||
if (svm->lbrv_enabled)
|
||||
@ -745,7 +795,7 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
|
||||
nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr);
|
||||
|
||||
svm_switch_vmcb(svm, &svm->nested.vmcb02);
|
||||
nested_vmcb02_prepare_control(svm);
|
||||
nested_vmcb02_prepare_control(svm, vmcb12->save.rip);
|
||||
nested_vmcb02_prepare_save(svm, vmcb12);
|
||||
|
||||
ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3,
|
||||
@ -834,6 +884,8 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
|
||||
|
||||
out_exit_err:
|
||||
svm->nested.nested_run_pending = 0;
|
||||
svm->nmi_l1_to_l2 = false;
|
||||
svm->soft_int_injected = false;
|
||||
|
||||
svm->vmcb->control.exit_code = SVM_EXIT_ERR;
|
||||
svm->vmcb->control.exit_code_hi = 0;
|
||||
@ -982,7 +1034,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
|
||||
vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
|
||||
}
|
||||
|
||||
if (svm->tsc_ratio_msr != kvm_default_tsc_scaling_ratio) {
|
||||
if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) {
|
||||
WARN_ON(!svm->tsc_scaling_enabled);
|
||||
vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
|
||||
__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
|
||||
@ -1421,6 +1473,7 @@ static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst,
|
||||
dst->nested_ctl = from->nested_ctl;
|
||||
dst->event_inj = from->event_inj;
|
||||
dst->event_inj_err = from->event_inj_err;
|
||||
dst->next_rip = from->next_rip;
|
||||
dst->nested_cr3 = from->nested_cr3;
|
||||
dst->virt_ext = from->virt_ext;
|
||||
dst->pause_filter_count = from->pause_filter_count;
|
||||
@ -1605,7 +1658,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
|
||||
nested_copy_vmcb_control_to_cache(svm, ctl);
|
||||
|
||||
svm_switch_vmcb(svm, &svm->nested.vmcb02);
|
||||
nested_vmcb02_prepare_control(svm);
|
||||
nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip);
|
||||
|
||||
/*
|
||||
* While the nested guest CR3 is already checked and set by
|
||||
|
@ -33,34 +33,6 @@ enum index {
|
||||
INDEX_ERROR,
|
||||
};
|
||||
|
||||
/* duplicated from amd_perfmon_event_map, K7 and above should work. */
|
||||
static struct kvm_event_hw_type_mapping amd_event_mapping[] = {
|
||||
[0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES },
|
||||
[1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
|
||||
[2] = { 0x7d, 0x07, PERF_COUNT_HW_CACHE_REFERENCES },
|
||||
[3] = { 0x7e, 0x07, PERF_COUNT_HW_CACHE_MISSES },
|
||||
[4] = { 0xc2, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
|
||||
[5] = { 0xc3, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
|
||||
[6] = { 0xd0, 0x00, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
|
||||
[7] = { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
|
||||
};
|
||||
|
||||
/* duplicated from amd_f17h_perfmon_event_map. */
|
||||
static struct kvm_event_hw_type_mapping amd_f17h_event_mapping[] = {
|
||||
[0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES },
|
||||
[1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
|
||||
[2] = { 0x60, 0xff, PERF_COUNT_HW_CACHE_REFERENCES },
|
||||
[3] = { 0x64, 0x09, PERF_COUNT_HW_CACHE_MISSES },
|
||||
[4] = { 0xc2, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
|
||||
[5] = { 0xc3, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
|
||||
[6] = { 0x87, 0x02, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
|
||||
[7] = { 0x87, 0x01, PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
|
||||
};
|
||||
|
||||
/* amd_pmc_perf_hw_id depends on these being the same size */
|
||||
static_assert(ARRAY_SIZE(amd_event_mapping) ==
|
||||
ARRAY_SIZE(amd_f17h_event_mapping));
|
||||
|
||||
static unsigned int get_msr_base(struct kvm_pmu *pmu, enum pmu_type type)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
|
||||
@ -154,31 +126,9 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
|
||||
return &pmu->gp_counters[msr_to_index(msr)];
|
||||
}
|
||||
|
||||
static unsigned int amd_pmc_perf_hw_id(struct kvm_pmc *pmc)
|
||||
static bool amd_hw_event_available(struct kvm_pmc *pmc)
|
||||
{
|
||||
struct kvm_event_hw_type_mapping *event_mapping;
|
||||
u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
|
||||
u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
|
||||
int i;
|
||||
|
||||
/* return PERF_COUNT_HW_MAX as AMD doesn't have fixed events */
|
||||
if (WARN_ON(pmc_is_fixed(pmc)))
|
||||
return PERF_COUNT_HW_MAX;
|
||||
|
||||
if (guest_cpuid_family(pmc->vcpu) >= 0x17)
|
||||
event_mapping = amd_f17h_event_mapping;
|
||||
else
|
||||
event_mapping = amd_event_mapping;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(amd_event_mapping); i++)
|
||||
if (event_mapping[i].eventsel == event_select
|
||||
&& event_mapping[i].unit_mask == unit_mask)
|
||||
break;
|
||||
|
||||
if (i == ARRAY_SIZE(amd_event_mapping))
|
||||
return PERF_COUNT_HW_MAX;
|
||||
|
||||
return event_mapping[i].event_type;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* check if a PMC is enabled by comparing it against global_ctrl bits. Because
|
||||
@ -229,10 +179,19 @@ static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
|
||||
return &counters[idx];
|
||||
}
|
||||
|
||||
static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
|
||||
static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr, bool host_initiated)
|
||||
{
|
||||
/* All MSRs refer to exactly one PMC, so msr_idx_to_pmc is enough. */
|
||||
return false;
|
||||
if (!host_initiated)
|
||||
return false;
|
||||
|
||||
switch (msr) {
|
||||
case MSR_K7_EVNTSEL0 ... MSR_K7_PERFCTR3:
|
||||
case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
|
||||
@ -286,8 +245,10 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
|
||||
if (pmc) {
|
||||
data &= ~pmu->reserved_bits;
|
||||
if (data != pmc->eventsel)
|
||||
reprogram_gp_counter(pmc, data);
|
||||
if (data != pmc->eventsel) {
|
||||
pmc->eventsel = data;
|
||||
reprogram_counter(pmc);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -343,7 +304,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
struct kvm_pmu_ops amd_pmu_ops __initdata = {
|
||||
.pmc_perf_hw_id = amd_pmc_perf_hw_id,
|
||||
.hw_event_available = amd_hw_event_available,
|
||||
.pmc_is_enabled = amd_pmc_is_enabled,
|
||||
.pmc_idx_to_pmc = amd_pmc_idx_to_pmc,
|
||||
.rdpmc_ecx_to_pmc = amd_rdpmc_ecx_to_pmc,
|
||||
|
@ -342,9 +342,11 @@ static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
|
||||
|
||||
}
|
||||
|
||||
static int svm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
||||
static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
|
||||
bool commit_side_effects)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
unsigned long old_rflags;
|
||||
|
||||
/*
|
||||
* SEV-ES does not expose the next RIP. The RIP update is controlled by
|
||||
@ -359,18 +361,75 @@ static int svm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
if (!svm->next_rip) {
|
||||
if (unlikely(!commit_side_effects))
|
||||
old_rflags = svm->vmcb->save.rflags;
|
||||
|
||||
if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
|
||||
return 0;
|
||||
|
||||
if (unlikely(!commit_side_effects))
|
||||
svm->vmcb->save.rflags = old_rflags;
|
||||
} else {
|
||||
kvm_rip_write(vcpu, svm->next_rip);
|
||||
}
|
||||
|
||||
done:
|
||||
svm_set_interrupt_shadow(vcpu, 0);
|
||||
if (likely(commit_side_effects))
|
||||
svm_set_interrupt_shadow(vcpu, 0);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int svm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return __svm_skip_emulated_instruction(vcpu, true);
|
||||
}
|
||||
|
||||
static int svm_update_soft_interrupt_rip(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long rip, old_rip = kvm_rip_read(vcpu);
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
/*
|
||||
* Due to architectural shortcomings, the CPU doesn't always provide
|
||||
* NextRIP, e.g. if KVM intercepted an exception that occurred while
|
||||
* the CPU was vectoring an INTO/INT3 in the guest. Temporarily skip
|
||||
* the instruction even if NextRIP is supported to acquire the next
|
||||
* RIP so that it can be shoved into the NextRIP field, otherwise
|
||||
* hardware will fail to advance guest RIP during event injection.
|
||||
* Drop the exception/interrupt if emulation fails and effectively
|
||||
* retry the instruction, it's the least awful option. If NRIPS is
|
||||
* in use, the skip must not commit any side effects such as clearing
|
||||
* the interrupt shadow or RFLAGS.RF.
|
||||
*/
|
||||
if (!__svm_skip_emulated_instruction(vcpu, !nrips))
|
||||
return -EIO;
|
||||
|
||||
rip = kvm_rip_read(vcpu);
|
||||
|
||||
/*
|
||||
* Save the injection information, even when using next_rip, as the
|
||||
* VMCB's next_rip will be lost (cleared on VM-Exit) if the injection
|
||||
* doesn't complete due to a VM-Exit occurring while the CPU is
|
||||
* vectoring the event. Decoding the instruction isn't guaranteed to
|
||||
* work as there may be no backing instruction, e.g. if the event is
|
||||
* being injected by L1 for L2, or if the guest is patching INT3 into
|
||||
* a different instruction.
|
||||
*/
|
||||
svm->soft_int_injected = true;
|
||||
svm->soft_int_csbase = svm->vmcb->save.cs.base;
|
||||
svm->soft_int_old_rip = old_rip;
|
||||
svm->soft_int_next_rip = rip;
|
||||
|
||||
if (nrips)
|
||||
kvm_rip_write(vcpu, old_rip);
|
||||
|
||||
if (static_cpu_has(X86_FEATURE_NRIPS))
|
||||
svm->vmcb->control.next_rip = rip;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void svm_queue_exception(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
@ -380,21 +439,9 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
|
||||
|
||||
kvm_deliver_exception_payload(vcpu);
|
||||
|
||||
if (nr == BP_VECTOR && !nrips) {
|
||||
unsigned long rip, old_rip = kvm_rip_read(vcpu);
|
||||
|
||||
/*
|
||||
* For guest debugging where we have to reinject #BP if some
|
||||
* INT3 is guest-owned:
|
||||
* Emulate nRIP by moving RIP forward. Will fail if injection
|
||||
* raises a fault that is not intercepted. Still better than
|
||||
* failing in all cases.
|
||||
*/
|
||||
(void)svm_skip_emulated_instruction(vcpu);
|
||||
rip = kvm_rip_read(vcpu);
|
||||
svm->int3_rip = rip + svm->vmcb->save.cs.base;
|
||||
svm->int3_injected = rip - old_rip;
|
||||
}
|
||||
if (kvm_exception_is_soft(nr) &&
|
||||
svm_update_soft_interrupt_rip(vcpu))
|
||||
return;
|
||||
|
||||
svm->vmcb->control.event_inj = nr
|
||||
| SVM_EVTINJ_VALID
|
||||
@ -1238,7 +1285,7 @@ static void __svm_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
|
||||
svm_init_osvw(vcpu);
|
||||
vcpu->arch.microcode_version = 0x01000065;
|
||||
svm->tsc_ratio_msr = kvm_default_tsc_scaling_ratio;
|
||||
svm->tsc_ratio_msr = kvm_caps.default_tsc_scaling_ratio;
|
||||
|
||||
if (sev_es_guest(vcpu->kvm))
|
||||
sev_es_vcpu_reset(svm);
|
||||
@ -3382,23 +3429,36 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu)
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
|
||||
|
||||
if (svm->nmi_l1_to_l2)
|
||||
return;
|
||||
|
||||
vcpu->arch.hflags |= HF_NMI_MASK;
|
||||
if (!sev_es_guest(vcpu->kvm))
|
||||
svm_set_intercept(svm, INTERCEPT_IRET);
|
||||
++vcpu->stat.nmi_injections;
|
||||
}
|
||||
|
||||
static void svm_inject_irq(struct kvm_vcpu *vcpu)
|
||||
static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
u32 type;
|
||||
|
||||
BUG_ON(!(gif_set(svm)));
|
||||
if (vcpu->arch.interrupt.soft) {
|
||||
if (svm_update_soft_interrupt_rip(vcpu))
|
||||
return;
|
||||
|
||||
trace_kvm_inj_virq(vcpu->arch.interrupt.nr);
|
||||
type = SVM_EVTINJ_TYPE_SOFT;
|
||||
} else {
|
||||
type = SVM_EVTINJ_TYPE_INTR;
|
||||
}
|
||||
|
||||
trace_kvm_inj_virq(vcpu->arch.interrupt.nr,
|
||||
vcpu->arch.interrupt.soft, reinjected);
|
||||
++vcpu->stat.irq_injections;
|
||||
|
||||
svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
|
||||
SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
|
||||
SVM_EVTINJ_VALID | type;
|
||||
}
|
||||
|
||||
void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
|
||||
@ -3675,15 +3735,49 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
|
||||
svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
|
||||
}
|
||||
|
||||
static void svm_complete_soft_interrupt(struct kvm_vcpu *vcpu, u8 vector,
|
||||
int type)
|
||||
{
|
||||
bool is_exception = (type == SVM_EXITINTINFO_TYPE_EXEPT);
|
||||
bool is_soft = (type == SVM_EXITINTINFO_TYPE_SOFT);
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
/*
|
||||
* If NRIPS is enabled, KVM must snapshot the pre-VMRUN next_rip that's
|
||||
* associated with the original soft exception/interrupt. next_rip is
|
||||
* cleared on all exits that can occur while vectoring an event, so KVM
|
||||
* needs to manually set next_rip for re-injection. Unlike the !nrips
|
||||
* case below, this needs to be done if and only if KVM is re-injecting
|
||||
* the same event, i.e. if the event is a soft exception/interrupt,
|
||||
* otherwise next_rip is unused on VMRUN.
|
||||
*/
|
||||
if (nrips && (is_soft || (is_exception && kvm_exception_is_soft(vector))) &&
|
||||
kvm_is_linear_rip(vcpu, svm->soft_int_old_rip + svm->soft_int_csbase))
|
||||
svm->vmcb->control.next_rip = svm->soft_int_next_rip;
|
||||
/*
|
||||
* If NRIPS isn't enabled, KVM must manually advance RIP prior to
|
||||
* injecting the soft exception/interrupt. That advancement needs to
|
||||
* be unwound if vectoring didn't complete. Note, the new event may
|
||||
* not be the injected event, e.g. if KVM injected an INTn, the INTn
|
||||
* hit a #NP in the guest, and the #NP encountered a #PF, the #NP will
|
||||
* be the reported vectored event, but RIP still needs to be unwound.
|
||||
*/
|
||||
else if (!nrips && (is_soft || is_exception) &&
|
||||
kvm_is_linear_rip(vcpu, svm->soft_int_next_rip + svm->soft_int_csbase))
|
||||
kvm_rip_write(vcpu, svm->soft_int_old_rip);
|
||||
}
|
||||
|
||||
static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
u8 vector;
|
||||
int type;
|
||||
u32 exitintinfo = svm->vmcb->control.exit_int_info;
|
||||
unsigned int3_injected = svm->int3_injected;
|
||||
bool nmi_l1_to_l2 = svm->nmi_l1_to_l2;
|
||||
bool soft_int_injected = svm->soft_int_injected;
|
||||
|
||||
svm->int3_injected = 0;
|
||||
svm->nmi_l1_to_l2 = false;
|
||||
svm->soft_int_injected = false;
|
||||
|
||||
/*
|
||||
* If we've made progress since setting HF_IRET_MASK, we've
|
||||
@ -3708,9 +3802,13 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
|
||||
vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
|
||||
type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
|
||||
|
||||
if (soft_int_injected)
|
||||
svm_complete_soft_interrupt(vcpu, vector, type);
|
||||
|
||||
switch (type) {
|
||||
case SVM_EXITINTINFO_TYPE_NMI:
|
||||
vcpu->arch.nmi_injected = true;
|
||||
svm->nmi_l1_to_l2 = nmi_l1_to_l2;
|
||||
break;
|
||||
case SVM_EXITINTINFO_TYPE_EXEPT:
|
||||
/*
|
||||
@ -3719,18 +3817,6 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
|
||||
if (vector == X86_TRAP_VC)
|
||||
break;
|
||||
|
||||
/*
|
||||
* In case of software exceptions, do not reinject the vector,
|
||||
* but re-execute the instruction instead. Rewind RIP first
|
||||
* if we emulated INT3 before.
|
||||
*/
|
||||
if (kvm_exception_is_soft(vector)) {
|
||||
if (vector == BP_VECTOR && int3_injected &&
|
||||
kvm_is_linear_rip(vcpu, svm->int3_rip))
|
||||
kvm_rip_write(vcpu,
|
||||
kvm_rip_read(vcpu) - int3_injected);
|
||||
break;
|
||||
}
|
||||
if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
|
||||
u32 err = svm->vmcb->control.exit_int_info_err;
|
||||
kvm_requeue_exception_e(vcpu, vector, err);
|
||||
@ -3741,9 +3827,13 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
|
||||
case SVM_EXITINTINFO_TYPE_INTR:
|
||||
kvm_queue_interrupt(vcpu, vector, false);
|
||||
break;
|
||||
case SVM_EXITINTINFO_TYPE_SOFT:
|
||||
kvm_queue_interrupt(vcpu, vector, true);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void svm_cancel_injection(struct kvm_vcpu *vcpu)
|
||||
@ -4780,7 +4870,7 @@ static __init void svm_set_cpu_caps(void)
|
||||
{
|
||||
kvm_set_cpu_caps();
|
||||
|
||||
supported_xss = 0;
|
||||
kvm_caps.supported_xss = 0;
|
||||
|
||||
/* CPUID 0x80000001 and 0x8000000A (SVM features) */
|
||||
if (nested) {
|
||||
@ -4856,7 +4946,8 @@ static __init int svm_hardware_setup(void)
|
||||
|
||||
init_msrpm_offsets();
|
||||
|
||||
supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
|
||||
kvm_caps.supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS |
|
||||
XFEATURE_MASK_BNDCSR);
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
|
||||
kvm_enable_efer_bits(EFER_FFXSR);
|
||||
@ -4866,11 +4957,11 @@ static __init int svm_hardware_setup(void)
|
||||
tsc_scaling = false;
|
||||
} else {
|
||||
pr_info("TSC scaling supported\n");
|
||||
kvm_has_tsc_control = true;
|
||||
kvm_caps.has_tsc_control = true;
|
||||
}
|
||||
}
|
||||
kvm_max_tsc_scaling_ratio = SVM_TSC_RATIO_MAX;
|
||||
kvm_tsc_scaling_ratio_frac_bits = 32;
|
||||
kvm_caps.max_tsc_scaling_ratio = SVM_TSC_RATIO_MAX;
|
||||
kvm_caps.tsc_scaling_ratio_frac_bits = 32;
|
||||
|
||||
tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);
|
||||
|
||||
|
@ -139,6 +139,7 @@ struct vmcb_ctrl_area_cached {
|
||||
u64 nested_ctl;
|
||||
u32 event_inj;
|
||||
u32 event_inj_err;
|
||||
u64 next_rip;
|
||||
u64 nested_cr3;
|
||||
u64 virt_ext;
|
||||
u32 clean;
|
||||
@ -228,9 +229,12 @@ struct vcpu_svm {
|
||||
|
||||
bool nmi_singlestep;
|
||||
u64 nmi_singlestep_guest_rflags;
|
||||
bool nmi_l1_to_l2;
|
||||
|
||||
unsigned int3_injected;
|
||||
unsigned long int3_rip;
|
||||
unsigned long soft_int_csbase;
|
||||
unsigned long soft_int_old_rip;
|
||||
unsigned long soft_int_next_rip;
|
||||
bool soft_int_injected;
|
||||
|
||||
/* optional nested SVM features that are enabled for this guest */
|
||||
bool nrips_enabled : 1;
|
||||
|
@ -333,18 +333,24 @@ TRACE_EVENT_KVM_EXIT(kvm_exit);
|
||||
* Tracepoint for kvm interrupt injection:
|
||||
*/
|
||||
TRACE_EVENT(kvm_inj_virq,
|
||||
TP_PROTO(unsigned int irq),
|
||||
TP_ARGS(irq),
|
||||
TP_PROTO(unsigned int vector, bool soft, bool reinjected),
|
||||
TP_ARGS(vector, soft, reinjected),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, irq )
|
||||
__field( unsigned int, vector )
|
||||
__field( bool, soft )
|
||||
__field( bool, reinjected )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->irq = irq;
|
||||
__entry->vector = vector;
|
||||
__entry->soft = soft;
|
||||
__entry->reinjected = reinjected;
|
||||
),
|
||||
|
||||
TP_printk("irq %u", __entry->irq)
|
||||
TP_printk("%s 0x%x%s",
|
||||
__entry->soft ? "Soft/INTn" : "IRQ", __entry->vector,
|
||||
__entry->reinjected ? " [reinjected]" : "")
|
||||
);
|
||||
|
||||
#define EXS(x) { x##_VECTOR, "#" #x }
|
||||
@ -358,25 +364,30 @@ TRACE_EVENT(kvm_inj_virq,
|
||||
* Tracepoint for kvm interrupt injection:
|
||||
*/
|
||||
TRACE_EVENT(kvm_inj_exception,
|
||||
TP_PROTO(unsigned exception, bool has_error, unsigned error_code),
|
||||
TP_ARGS(exception, has_error, error_code),
|
||||
TP_PROTO(unsigned exception, bool has_error, unsigned error_code,
|
||||
bool reinjected),
|
||||
TP_ARGS(exception, has_error, error_code, reinjected),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( u8, exception )
|
||||
__field( u8, has_error )
|
||||
__field( u32, error_code )
|
||||
__field( bool, reinjected )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->exception = exception;
|
||||
__entry->has_error = has_error;
|
||||
__entry->error_code = error_code;
|
||||
__entry->reinjected = reinjected;
|
||||
),
|
||||
|
||||
TP_printk("%s (0x%x)",
|
||||
TP_printk("%s%s%s%s%s",
|
||||
__print_symbolic(__entry->exception, kvm_trace_sym_exc),
|
||||
/* FIXME: don't print error_code if not present */
|
||||
__entry->has_error ? __entry->error_code : 0)
|
||||
!__entry->has_error ? "" : " (",
|
||||
!__entry->has_error ? "" : __print_symbolic(__entry->error_code, { }),
|
||||
!__entry->has_error ? "" : ")",
|
||||
__entry->reinjected ? " [reinjected]" : "")
|
||||
);
|
||||
|
||||
/*
|
||||
|
@ -6,6 +6,8 @@
|
||||
|
||||
#include "lapic.h"
|
||||
#include "x86.h"
|
||||
#include "pmu.h"
|
||||
#include "cpuid.h"
|
||||
|
||||
extern bool __read_mostly enable_vpid;
|
||||
extern bool __read_mostly flexpriority_enabled;
|
||||
@ -13,6 +15,7 @@ extern bool __read_mostly enable_ept;
|
||||
extern bool __read_mostly enable_unrestricted_guest;
|
||||
extern bool __read_mostly enable_ept_ad_bits;
|
||||
extern bool __read_mostly enable_pml;
|
||||
extern bool __read_mostly enable_ipiv;
|
||||
extern int __read_mostly pt_mode;
|
||||
|
||||
#define PT_MODE_SYSTEM 0
|
||||
@ -59,6 +62,7 @@ struct vmcs_config {
|
||||
u32 pin_based_exec_ctrl;
|
||||
u32 cpu_based_exec_ctrl;
|
||||
u32 cpu_based_2nd_exec_ctrl;
|
||||
u64 cpu_based_3rd_exec_ctrl;
|
||||
u32 vmexit_ctrl;
|
||||
u32 vmentry_ctrl;
|
||||
struct nested_vmx_msrs nested;
|
||||
@ -94,20 +98,17 @@ static inline bool cpu_has_vmx_posted_intr(void)
|
||||
|
||||
static inline bool cpu_has_load_ia32_efer(void)
|
||||
{
|
||||
return (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_EFER) &&
|
||||
(vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_EFER);
|
||||
return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_EFER;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_load_perf_global_ctrl(void)
|
||||
{
|
||||
return (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
|
||||
(vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
|
||||
return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_mpx(void)
|
||||
{
|
||||
return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) &&
|
||||
(vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS);
|
||||
return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_tpr_shadow(void)
|
||||
@ -131,6 +132,12 @@ static inline bool cpu_has_secondary_exec_ctrls(void)
|
||||
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_tertiary_exec_ctrls(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_exec_ctrl &
|
||||
CPU_BASED_ACTIVATE_TERTIARY_CONTROLS;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
@ -276,6 +283,11 @@ static inline bool cpu_has_vmx_apicv(void)
|
||||
cpu_has_vmx_posted_intr();
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_ipiv(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_3rd_exec_ctrl & TERTIARY_EXEC_IPI_VIRT;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_flexpriority(void)
|
||||
{
|
||||
return cpu_has_vmx_tpr_shadow() &&
|
||||
@ -363,7 +375,6 @@ static inline bool cpu_has_vmx_intel_pt(void)
|
||||
rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
|
||||
return (vmx_msr & MSR_IA32_VMX_MISC_INTEL_PT) &&
|
||||
(vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_PT_USE_GPA) &&
|
||||
(vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_IA32_RTIT_CTL) &&
|
||||
(vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_RTIT_CTL);
|
||||
}
|
||||
|
||||
@ -385,23 +396,31 @@ static inline bool vmx_pt_mode_is_host_guest(void)
|
||||
return pt_mode == PT_MODE_HOST_GUEST;
|
||||
}
|
||||
|
||||
static inline bool vmx_pebs_supported(void)
|
||||
{
|
||||
return boot_cpu_has(X86_FEATURE_PEBS) && kvm_pmu_cap.pebs_ept;
|
||||
}
|
||||
|
||||
static inline u64 vmx_get_perf_capabilities(void)
|
||||
{
|
||||
u64 perf_cap = 0;
|
||||
u64 perf_cap = PMU_CAP_FW_WRITES;
|
||||
u64 host_perf_cap = 0;
|
||||
|
||||
if (!enable_pmu)
|
||||
return perf_cap;
|
||||
return 0;
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_PDCM))
|
||||
rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap);
|
||||
rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
|
||||
|
||||
perf_cap &= PMU_CAP_LBR_FMT;
|
||||
perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
|
||||
|
||||
/*
|
||||
* Since counters are virtualized, KVM would support full
|
||||
* width counting unconditionally, even if the host lacks it.
|
||||
*/
|
||||
return PMU_CAP_FW_WRITES | perf_cap;
|
||||
if (vmx_pebs_supported()) {
|
||||
perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK;
|
||||
if ((perf_cap & PERF_CAP_PEBS_FORMAT) < 4)
|
||||
perf_cap &= ~PERF_CAP_PEBS_BASELINE;
|
||||
}
|
||||
|
||||
return perf_cap;
|
||||
}
|
||||
|
||||
static inline u64 vmx_supported_debugctl(void)
|
||||
@ -417,4 +436,10 @@ static inline u64 vmx_supported_debugctl(void)
|
||||
return debugctl;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_notify_vmexit(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_NOTIFY_VM_EXITING;
|
||||
}
|
||||
|
||||
#endif /* __KVM_X86_VMX_CAPS_H */
|
||||
|
@ -297,8 +297,10 @@ const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1);
|
||||
#if IS_ENABLED(CONFIG_HYPERV)
|
||||
__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
|
||||
{
|
||||
vmcs_conf->cpu_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_EXEC_CTRL;
|
||||
vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL;
|
||||
vmcs_conf->cpu_based_2nd_exec_ctrl &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
|
||||
vmcs_conf->cpu_based_3rd_exec_ctrl = 0;
|
||||
|
||||
vmcs_conf->vmexit_ctrl &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
|
||||
vmcs_conf->vmentry_ctrl &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
|
||||
|
@ -50,6 +50,7 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs);
|
||||
*/
|
||||
#define EVMCS1_UNSUPPORTED_PINCTRL (PIN_BASED_POSTED_INTR | \
|
||||
PIN_BASED_VMX_PREEMPTION_TIMER)
|
||||
#define EVMCS1_UNSUPPORTED_EXEC_CTRL (CPU_BASED_ACTIVATE_TERTIARY_CONTROLS)
|
||||
#define EVMCS1_UNSUPPORTED_2NDEXEC \
|
||||
(SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \
|
||||
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | \
|
||||
|
@ -2133,6 +2133,8 @@ static u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
|
||||
|
||||
static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
|
||||
{
|
||||
struct kvm *kvm = vmx->vcpu.kvm;
|
||||
|
||||
/*
|
||||
* If vmcs02 hasn't been initialized, set the constant vmcs02 state
|
||||
* according to L0's settings (vmcs12 is irrelevant here). Host
|
||||
@ -2175,6 +2177,9 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
|
||||
if (cpu_has_vmx_encls_vmexit())
|
||||
vmcs_write64(ENCLS_EXITING_BITMAP, INVALID_GPA);
|
||||
|
||||
if (kvm_notify_vmexit_enabled(kvm))
|
||||
vmcs_write32(NOTIFY_WINDOW, kvm->arch.notify_window);
|
||||
|
||||
/*
|
||||
* Set the MSR load/store lists to match L0's settings. Only the
|
||||
* addresses are constant (for vmcs02), the counts can change based
|
||||
@ -2548,7 +2553,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
|
||||
vmx_get_l2_tsc_multiplier(vcpu));
|
||||
|
||||
vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
|
||||
if (kvm_has_tsc_control)
|
||||
if (kvm_caps.has_tsc_control)
|
||||
vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
|
||||
|
||||
nested_vmx_transition_tlb_flush(vcpu, vmcs12, true);
|
||||
@ -4610,7 +4615,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
|
||||
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
|
||||
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
|
||||
vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
|
||||
if (kvm_has_tsc_control)
|
||||
if (kvm_caps.has_tsc_control)
|
||||
vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
|
||||
|
||||
if (vmx->nested.l1_tpr_threshold != -1)
|
||||
@ -6112,6 +6117,9 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
|
||||
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE);
|
||||
case EXIT_REASON_ENCLS:
|
||||
return nested_vmx_exit_handled_encls(vcpu, vmcs12);
|
||||
case EXIT_REASON_NOTIFY:
|
||||
/* Notify VM exit is not exposed to L1 */
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
|
@ -37,23 +37,35 @@ static int fixed_pmc_events[] = {1, 0, 7};
|
||||
|
||||
static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
|
||||
{
|
||||
struct kvm_pmc *pmc;
|
||||
u8 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl;
|
||||
int i;
|
||||
|
||||
pmu->fixed_ctr_ctrl = data;
|
||||
for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
|
||||
u8 new_ctrl = fixed_ctrl_field(data, i);
|
||||
u8 old_ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, i);
|
||||
struct kvm_pmc *pmc;
|
||||
|
||||
pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i);
|
||||
u8 old_ctrl = fixed_ctrl_field(old_fixed_ctr_ctrl, i);
|
||||
|
||||
if (old_ctrl == new_ctrl)
|
||||
continue;
|
||||
|
||||
__set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use);
|
||||
reprogram_fixed_counter(pmc, new_ctrl, i);
|
||||
}
|
||||
pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i);
|
||||
|
||||
pmu->fixed_ctr_ctrl = data;
|
||||
__set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use);
|
||||
reprogram_counter(pmc);
|
||||
}
|
||||
}
|
||||
|
||||
static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
|
||||
{
|
||||
if (pmc_idx < INTEL_PMC_IDX_FIXED) {
|
||||
return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + pmc_idx,
|
||||
MSR_P6_EVNTSEL0);
|
||||
} else {
|
||||
u32 idx = pmc_idx - INTEL_PMC_IDX_FIXED;
|
||||
|
||||
return get_fixed_pmc(pmu, idx + MSR_CORE_PERF_FIXED_CTR0);
|
||||
}
|
||||
}
|
||||
|
||||
/* function is called when global control register has been updated. */
|
||||
@ -61,14 +73,18 @@ static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data)
|
||||
{
|
||||
int bit;
|
||||
u64 diff = pmu->global_ctrl ^ data;
|
||||
struct kvm_pmc *pmc;
|
||||
|
||||
pmu->global_ctrl = data;
|
||||
|
||||
for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX)
|
||||
reprogram_counter(pmu, bit);
|
||||
for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) {
|
||||
pmc = intel_pmc_idx_to_pmc(pmu, bit);
|
||||
if (pmc)
|
||||
reprogram_counter(pmc);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int intel_pmc_perf_hw_id(struct kvm_pmc *pmc)
|
||||
static bool intel_hw_event_available(struct kvm_pmc *pmc)
|
||||
{
|
||||
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
|
||||
u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
|
||||
@ -82,15 +98,12 @@ static unsigned int intel_pmc_perf_hw_id(struct kvm_pmc *pmc)
|
||||
|
||||
/* disable event that reported as not present by cpuid */
|
||||
if ((i < 7) && !(pmu->available_event_types & (1 << i)))
|
||||
return PERF_COUNT_HW_MAX + 1;
|
||||
return false;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (i == ARRAY_SIZE(intel_arch_events))
|
||||
return PERF_COUNT_HW_MAX;
|
||||
|
||||
return intel_arch_events[i].event_type;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* check if a PMC is enabled by comparing it with globl_ctrl bits. */
|
||||
@ -98,21 +111,12 @@ static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
|
||||
{
|
||||
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
|
||||
|
||||
if (pmu->version < 2)
|
||||
return true;
|
||||
|
||||
return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
|
||||
}
|
||||
|
||||
static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
|
||||
{
|
||||
if (pmc_idx < INTEL_PMC_IDX_FIXED)
|
||||
return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + pmc_idx,
|
||||
MSR_P6_EVNTSEL0);
|
||||
else {
|
||||
u32 idx = pmc_idx - INTEL_PMC_IDX_FIXED;
|
||||
|
||||
return get_fixed_pmc(pmu, idx + MSR_CORE_PERF_FIXED_CTR0);
|
||||
}
|
||||
}
|
||||
|
||||
static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
@ -167,16 +171,6 @@ static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr)
|
||||
return get_gp_pmc(pmu, msr, MSR_IA32_PMC0);
|
||||
}
|
||||
|
||||
bool intel_pmu_lbr_is_compatible(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* As a first step, a guest could only enable LBR feature if its
|
||||
* cpu model is the same as the host because the LBR registers
|
||||
* would be pass-through to the guest and they're model specific.
|
||||
*/
|
||||
return boot_cpu_data.x86_model == guest_cpuid_model(vcpu);
|
||||
}
|
||||
|
||||
bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct x86_pmu_lbr *lbr = vcpu_to_lbr_records(vcpu);
|
||||
@ -202,27 +196,45 @@ static bool intel_pmu_is_valid_lbr_msr(struct kvm_vcpu *vcpu, u32 index)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
|
||||
static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr, bool host_initiated)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
int ret;
|
||||
u64 perf_capabilities = vcpu->arch.perf_capabilities;
|
||||
|
||||
switch (msr) {
|
||||
case MSR_CORE_PERF_FIXED_CTR_CTRL:
|
||||
case MSR_CORE_PERF_GLOBAL_STATUS:
|
||||
case MSR_CORE_PERF_GLOBAL_CTRL:
|
||||
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
|
||||
ret = pmu->version > 1;
|
||||
if (host_initiated)
|
||||
return true;
|
||||
return pmu->version > 1;
|
||||
break;
|
||||
case MSR_IA32_PEBS_ENABLE:
|
||||
if (host_initiated)
|
||||
return true;
|
||||
return perf_capabilities & PERF_CAP_PEBS_FORMAT;
|
||||
break;
|
||||
case MSR_IA32_DS_AREA:
|
||||
if (host_initiated)
|
||||
return true;
|
||||
return guest_cpuid_has(vcpu, X86_FEATURE_DS);
|
||||
break;
|
||||
case MSR_PEBS_DATA_CFG:
|
||||
if (host_initiated)
|
||||
return true;
|
||||
return (perf_capabilities & PERF_CAP_PEBS_BASELINE) &&
|
||||
((perf_capabilities & PERF_CAP_PEBS_FORMAT) > 3);
|
||||
break;
|
||||
default:
|
||||
ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) ||
|
||||
if (host_initiated)
|
||||
return true;
|
||||
return get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) ||
|
||||
get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) ||
|
||||
get_fixed_pmc(pmu, msr) || get_fw_gp_pmc(pmu, msr) ||
|
||||
intel_pmu_is_valid_lbr_msr(vcpu, msr);
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
|
||||
@ -361,6 +373,15 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
|
||||
msr_info->data = 0;
|
||||
return 0;
|
||||
case MSR_IA32_PEBS_ENABLE:
|
||||
msr_info->data = pmu->pebs_enable;
|
||||
return 0;
|
||||
case MSR_IA32_DS_AREA:
|
||||
msr_info->data = pmu->ds_area;
|
||||
return 0;
|
||||
case MSR_PEBS_DATA_CFG:
|
||||
msr_info->data = pmu->pebs_data_cfg;
|
||||
return 0;
|
||||
default:
|
||||
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
|
||||
(pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
|
||||
@ -395,7 +416,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
case MSR_CORE_PERF_FIXED_CTR_CTRL:
|
||||
if (pmu->fixed_ctr_ctrl == data)
|
||||
return 0;
|
||||
if (!(data & 0xfffffffffffff444ull)) {
|
||||
if (!(data & pmu->fixed_ctr_ctrl_mask)) {
|
||||
reprogram_fixed_counters(pmu, data);
|
||||
return 0;
|
||||
}
|
||||
@ -421,6 +442,29 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
case MSR_IA32_PEBS_ENABLE:
|
||||
if (pmu->pebs_enable == data)
|
||||
return 0;
|
||||
if (!(data & pmu->pebs_enable_mask)) {
|
||||
pmu->pebs_enable = data;
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
case MSR_IA32_DS_AREA:
|
||||
if (msr_info->host_initiated && data && !guest_cpuid_has(vcpu, X86_FEATURE_DS))
|
||||
return 1;
|
||||
if (is_noncanonical_address(data, vcpu))
|
||||
return 1;
|
||||
pmu->ds_area = data;
|
||||
return 0;
|
||||
case MSR_PEBS_DATA_CFG:
|
||||
if (pmu->pebs_data_cfg == data)
|
||||
return 0;
|
||||
if (!(data & pmu->pebs_data_cfg_mask)) {
|
||||
pmu->pebs_data_cfg = data;
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
|
||||
(pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
|
||||
@ -445,7 +489,8 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
(pmu->raw_event_mask & HSW_IN_TX_CHECKPOINTED))
|
||||
reserved_bits ^= HSW_IN_TX_CHECKPOINTED;
|
||||
if (!(data & reserved_bits)) {
|
||||
reprogram_gp_counter(pmc, data);
|
||||
pmc->eventsel = data;
|
||||
reprogram_counter(pmc);
|
||||
return 0;
|
||||
}
|
||||
} else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false))
|
||||
@ -474,11 +519,11 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
|
||||
|
||||
struct x86_pmu_capability x86_pmu;
|
||||
struct kvm_cpuid_entry2 *entry;
|
||||
union cpuid10_eax eax;
|
||||
union cpuid10_edx edx;
|
||||
u64 counter_mask;
|
||||
int i;
|
||||
|
||||
pmu->nr_arch_gp_counters = 0;
|
||||
pmu->nr_arch_fixed_counters = 0;
|
||||
@ -487,6 +532,11 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
|
||||
pmu->version = 0;
|
||||
pmu->reserved_bits = 0xffffffff00200000ull;
|
||||
pmu->raw_event_mask = X86_RAW_EVENT_MASK;
|
||||
pmu->fixed_ctr_ctrl_mask = ~0ull;
|
||||
pmu->pebs_enable_mask = ~0ull;
|
||||
pmu->pebs_data_cfg_mask = ~0ull;
|
||||
|
||||
vcpu->arch.ia32_misc_enable_msr |= MSR_IA32_MISC_ENABLE_PMU_RO_MASK;
|
||||
|
||||
entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
|
||||
if (!entry || !vcpu->kvm->arch.enable_pmu)
|
||||
@ -498,13 +548,15 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
|
||||
if (!pmu->version)
|
||||
return;
|
||||
|
||||
perf_get_x86_pmu_capability(&x86_pmu);
|
||||
vcpu->arch.ia32_misc_enable_msr |= MSR_IA32_MISC_ENABLE_EMON;
|
||||
|
||||
pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
|
||||
x86_pmu.num_counters_gp);
|
||||
eax.split.bit_width = min_t(int, eax.split.bit_width, x86_pmu.bit_width_gp);
|
||||
kvm_pmu_cap.num_counters_gp);
|
||||
eax.split.bit_width = min_t(int, eax.split.bit_width,
|
||||
kvm_pmu_cap.bit_width_gp);
|
||||
pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1;
|
||||
eax.split.mask_length = min_t(int, eax.split.mask_length, x86_pmu.events_mask_len);
|
||||
eax.split.mask_length = min_t(int, eax.split.mask_length,
|
||||
kvm_pmu_cap.events_mask_len);
|
||||
pmu->available_event_types = ~entry->ebx &
|
||||
((1ull << eax.split.mask_length) - 1);
|
||||
|
||||
@ -514,17 +566,19 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
|
||||
pmu->nr_arch_fixed_counters =
|
||||
min3(ARRAY_SIZE(fixed_pmc_events),
|
||||
(size_t) edx.split.num_counters_fixed,
|
||||
(size_t) x86_pmu.num_counters_fixed);
|
||||
edx.split.bit_width_fixed = min_t(int,
|
||||
edx.split.bit_width_fixed, x86_pmu.bit_width_fixed);
|
||||
(size_t)kvm_pmu_cap.num_counters_fixed);
|
||||
edx.split.bit_width_fixed = min_t(int, edx.split.bit_width_fixed,
|
||||
kvm_pmu_cap.bit_width_fixed);
|
||||
pmu->counter_bitmask[KVM_PMC_FIXED] =
|
||||
((u64)1 << edx.split.bit_width_fixed) - 1;
|
||||
setup_fixed_pmc_eventsel(pmu);
|
||||
}
|
||||
|
||||
pmu->global_ctrl = ((1ull << pmu->nr_arch_gp_counters) - 1) |
|
||||
(((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED);
|
||||
pmu->global_ctrl_mask = ~pmu->global_ctrl;
|
||||
for (i = 0; i < pmu->nr_arch_fixed_counters; i++)
|
||||
pmu->fixed_ctr_ctrl_mask &= ~(0xbull << (i * 4));
|
||||
counter_mask = ~(((1ull << pmu->nr_arch_gp_counters) - 1) |
|
||||
(((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED));
|
||||
pmu->global_ctrl_mask = counter_mask;
|
||||
pmu->global_ovf_ctrl_mask = pmu->global_ctrl_mask
|
||||
& ~(MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF |
|
||||
MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD);
|
||||
@ -546,15 +600,33 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
|
||||
INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters);
|
||||
|
||||
nested_vmx_pmu_refresh(vcpu,
|
||||
intel_is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL));
|
||||
intel_is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, false));
|
||||
|
||||
if (intel_pmu_lbr_is_compatible(vcpu))
|
||||
if (cpuid_model_is_consistent(vcpu))
|
||||
x86_perf_get_lbr(&lbr_desc->records);
|
||||
else
|
||||
lbr_desc->records.nr = 0;
|
||||
|
||||
if (lbr_desc->records.nr)
|
||||
bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_IDX_FIXED_VLBR, 1);
|
||||
|
||||
if (vcpu->arch.perf_capabilities & PERF_CAP_PEBS_FORMAT) {
|
||||
vcpu->arch.ia32_misc_enable_msr &= ~MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL;
|
||||
if (vcpu->arch.perf_capabilities & PERF_CAP_PEBS_BASELINE) {
|
||||
pmu->pebs_enable_mask = counter_mask;
|
||||
pmu->reserved_bits &= ~ICL_EVENTSEL_ADAPTIVE;
|
||||
for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
|
||||
pmu->fixed_ctr_ctrl_mask &=
|
||||
~(1ULL << (INTEL_PMC_IDX_FIXED + i * 4));
|
||||
}
|
||||
pmu->pebs_data_cfg_mask = ~0xff00000full;
|
||||
} else {
|
||||
pmu->pebs_enable_mask =
|
||||
~((1ull << pmu->nr_arch_gp_counters) - 1);
|
||||
}
|
||||
} else {
|
||||
vcpu->arch.perf_capabilities &= ~PERF_CAP_PEBS_MASK;
|
||||
}
|
||||
}
|
||||
|
||||
static void intel_pmu_init(struct kvm_vcpu *vcpu)
|
||||
@ -719,8 +791,28 @@ static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
|
||||
intel_pmu_release_guest_lbr_event(vcpu);
|
||||
}
|
||||
|
||||
void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu)
|
||||
{
|
||||
struct kvm_pmc *pmc = NULL;
|
||||
int bit;
|
||||
|
||||
for_each_set_bit(bit, (unsigned long *)&pmu->global_ctrl,
|
||||
X86_PMC_IDX_MAX) {
|
||||
pmc = intel_pmc_idx_to_pmc(pmu, bit);
|
||||
|
||||
if (!pmc || !pmc_speculative_in_use(pmc) ||
|
||||
!intel_pmc_is_enabled(pmc))
|
||||
continue;
|
||||
|
||||
if (pmc->perf_event && pmc->idx != pmc->perf_event->hw.idx) {
|
||||
pmu->host_cross_mapped_mask |=
|
||||
BIT_ULL(pmc->perf_event->hw.idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct kvm_pmu_ops intel_pmu_ops __initdata = {
|
||||
.pmc_perf_hw_id = intel_pmc_perf_hw_id,
|
||||
.hw_event_available = intel_hw_event_available,
|
||||
.pmc_is_enabled = intel_pmc_is_enabled,
|
||||
.pmc_idx_to_pmc = intel_pmc_idx_to_pmc,
|
||||
.rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc,
|
||||
|
@ -177,11 +177,24 @@ static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu)
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static bool vmx_needs_pi_wakeup(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* The default posted interrupt vector does nothing when
|
||||
* invoked outside guest mode. Return whether a blocked vCPU
|
||||
* can be the target of posted interrupts, as is the case when
|
||||
* using either IPI virtualization or VT-d PI, so that the
|
||||
* notification vector is switched to the one that calls
|
||||
* back to the pi_wakeup_handler() function.
|
||||
*/
|
||||
return vmx_can_use_ipiv(vcpu) || vmx_can_use_vtd_pi(vcpu->kvm);
|
||||
}
|
||||
|
||||
void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
|
||||
|
||||
if (!vmx_can_use_vtd_pi(vcpu->kvm))
|
||||
if (!vmx_needs_pi_wakeup(vcpu))
|
||||
return;
|
||||
|
||||
if (kvm_vcpu_is_blocking(vcpu) && !vmx_interrupt_blocked(vcpu))
|
||||
|
@ -5,6 +5,8 @@
|
||||
#define POSTED_INTR_ON 0
|
||||
#define POSTED_INTR_SN 1
|
||||
|
||||
#define PID_TABLE_ENTRY_VALID 1
|
||||
|
||||
/* Posted-Interrupt Descriptor */
|
||||
struct pi_desc {
|
||||
u32 pir[8]; /* Posted interrupt requested */
|
||||
|
@ -50,6 +50,7 @@ struct vmcs_controls_shadow {
|
||||
u32 pin;
|
||||
u32 exec;
|
||||
u32 secondary_exec;
|
||||
u64 tertiary_exec;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -105,6 +105,9 @@ module_param(fasteoi, bool, S_IRUGO);
|
||||
|
||||
module_param(enable_apicv, bool, S_IRUGO);
|
||||
|
||||
bool __read_mostly enable_ipiv = true;
|
||||
module_param(enable_ipiv, bool, 0444);
|
||||
|
||||
/*
|
||||
* If nested=1, nested virtualization is supported, i.e., guests may use
|
||||
* VMX and be a hypervisor for its own guests. If nested=0, guests may not
|
||||
@ -116,6 +119,9 @@ module_param(nested, bool, S_IRUGO);
|
||||
bool __read_mostly enable_pml = 1;
|
||||
module_param_named(pml, enable_pml, bool, S_IRUGO);
|
||||
|
||||
static bool __read_mostly error_on_inconsistent_vmcs_config = true;
|
||||
module_param(error_on_inconsistent_vmcs_config, bool, 0444);
|
||||
|
||||
static bool __read_mostly dump_invalid_vmcs = 0;
|
||||
module_param(dump_invalid_vmcs, bool, 0644);
|
||||
|
||||
@ -386,18 +392,20 @@ asmlinkage void vmread_error(unsigned long field, bool fault)
|
||||
|
||||
noinline void vmwrite_error(unsigned long field, unsigned long value)
|
||||
{
|
||||
vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%d\n",
|
||||
vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%u\n",
|
||||
field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
|
||||
}
|
||||
|
||||
noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr)
|
||||
{
|
||||
vmx_insn_failed("kvm: vmclear failed: %p/%llx\n", vmcs, phys_addr);
|
||||
vmx_insn_failed("kvm: vmclear failed: %p/%llx err=%u\n",
|
||||
vmcs, phys_addr, vmcs_read32(VM_INSTRUCTION_ERROR));
|
||||
}
|
||||
|
||||
noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr)
|
||||
{
|
||||
vmx_insn_failed("kvm: vmptrld failed: %p/%llx\n", vmcs, phys_addr);
|
||||
vmx_insn_failed("kvm: vmptrld failed: %p/%llx err=%u\n",
|
||||
vmcs, phys_addr, vmcs_read32(VM_INSTRUCTION_ERROR));
|
||||
}
|
||||
|
||||
noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva)
|
||||
@ -1712,7 +1720,7 @@ u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
|
||||
nested_cpu_has2(vmcs12, SECONDARY_EXEC_TSC_SCALING))
|
||||
return vmcs12->tsc_multiplier;
|
||||
|
||||
return kvm_default_tsc_scaling_ratio;
|
||||
return kvm_caps.default_tsc_scaling_ratio;
|
||||
}
|
||||
|
||||
static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
|
||||
@ -2237,7 +2245,18 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
if ((data & PMU_CAP_LBR_FMT) !=
|
||||
(vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT))
|
||||
return 1;
|
||||
if (!intel_pmu_lbr_is_compatible(vcpu))
|
||||
if (!cpuid_model_is_consistent(vcpu))
|
||||
return 1;
|
||||
}
|
||||
if (data & PERF_CAP_PEBS_FORMAT) {
|
||||
if ((data & PERF_CAP_PEBS_MASK) !=
|
||||
(vmx_get_perf_capabilities() & PERF_CAP_PEBS_MASK))
|
||||
return 1;
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_DS))
|
||||
return 1;
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_DTES64))
|
||||
return 1;
|
||||
if (!cpuid_model_is_consistent(vcpu))
|
||||
return 1;
|
||||
}
|
||||
ret = kvm_set_msr_common(vcpu, msr_info);
|
||||
@ -2410,6 +2429,15 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __init u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr)
|
||||
{
|
||||
u64 allowed;
|
||||
|
||||
rdmsrl(msr, allowed);
|
||||
|
||||
return ctl_opt & allowed;
|
||||
}
|
||||
|
||||
static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
|
||||
struct vmx_capability *vmx_cap)
|
||||
{
|
||||
@ -2418,8 +2446,26 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
|
||||
u32 _pin_based_exec_control = 0;
|
||||
u32 _cpu_based_exec_control = 0;
|
||||
u32 _cpu_based_2nd_exec_control = 0;
|
||||
u64 _cpu_based_3rd_exec_control = 0;
|
||||
u32 _vmexit_control = 0;
|
||||
u32 _vmentry_control = 0;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* LOAD/SAVE_DEBUG_CONTROLS are absent because both are mandatory.
|
||||
* SAVE_IA32_PAT and SAVE_IA32_EFER are absent because KVM always
|
||||
* intercepts writes to PAT and EFER, i.e. never enables those controls.
|
||||
*/
|
||||
struct {
|
||||
u32 entry_control;
|
||||
u32 exit_control;
|
||||
} const vmcs_entry_exit_pairs[] = {
|
||||
{ VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL },
|
||||
{ VM_ENTRY_LOAD_IA32_PAT, VM_EXIT_LOAD_IA32_PAT },
|
||||
{ VM_ENTRY_LOAD_IA32_EFER, VM_EXIT_LOAD_IA32_EFER },
|
||||
{ VM_ENTRY_LOAD_BNDCFGS, VM_EXIT_CLEAR_BNDCFGS },
|
||||
{ VM_ENTRY_LOAD_IA32_RTIT_CTL, VM_EXIT_CLEAR_IA32_RTIT_CTL },
|
||||
};
|
||||
|
||||
memset(vmcs_conf, 0, sizeof(*vmcs_conf));
|
||||
min = CPU_BASED_HLT_EXITING |
|
||||
@ -2439,7 +2485,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
|
||||
|
||||
opt = CPU_BASED_TPR_SHADOW |
|
||||
CPU_BASED_USE_MSR_BITMAPS |
|
||||
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
|
||||
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS |
|
||||
CPU_BASED_ACTIVATE_TERTIARY_CONTROLS;
|
||||
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
|
||||
&_cpu_based_exec_control) < 0)
|
||||
return -EIO;
|
||||
@ -2472,7 +2519,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
|
||||
SECONDARY_EXEC_PT_USE_GPA |
|
||||
SECONDARY_EXEC_PT_CONCEAL_VMX |
|
||||
SECONDARY_EXEC_ENABLE_VMFUNC |
|
||||
SECONDARY_EXEC_BUS_LOCK_DETECTION;
|
||||
SECONDARY_EXEC_BUS_LOCK_DETECTION |
|
||||
SECONDARY_EXEC_NOTIFY_VM_EXITING;
|
||||
if (cpu_has_sgx())
|
||||
opt2 |= SECONDARY_EXEC_ENCLS_EXITING;
|
||||
if (adjust_vmx_controls(min2, opt2,
|
||||
@ -2502,15 +2550,30 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
|
||||
CPU_BASED_CR3_STORE_EXITING |
|
||||
CPU_BASED_INVLPG_EXITING);
|
||||
} else if (vmx_cap->ept) {
|
||||
vmx_cap->ept = 0;
|
||||
pr_warn_once("EPT CAP should not exist if not support "
|
||||
"1-setting enable EPT VM-execution control\n");
|
||||
|
||||
if (error_on_inconsistent_vmcs_config)
|
||||
return -EIO;
|
||||
|
||||
vmx_cap->ept = 0;
|
||||
}
|
||||
if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) &&
|
||||
vmx_cap->vpid) {
|
||||
vmx_cap->vpid = 0;
|
||||
vmx_cap->vpid) {
|
||||
pr_warn_once("VPID CAP should not exist if not support "
|
||||
"1-setting enable VPID VM-execution control\n");
|
||||
|
||||
if (error_on_inconsistent_vmcs_config)
|
||||
return -EIO;
|
||||
|
||||
vmx_cap->vpid = 0;
|
||||
}
|
||||
|
||||
if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_TERTIARY_CONTROLS) {
|
||||
u64 opt3 = TERTIARY_EXEC_IPI_VIRT;
|
||||
|
||||
_cpu_based_3rd_exec_control = adjust_vmx_controls64(opt3,
|
||||
MSR_IA32_VMX_PROCBASED_CTLS3);
|
||||
}
|
||||
|
||||
min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT;
|
||||
@ -2551,6 +2614,23 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
|
||||
&_vmentry_control) < 0)
|
||||
return -EIO;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(vmcs_entry_exit_pairs); i++) {
|
||||
u32 n_ctrl = vmcs_entry_exit_pairs[i].entry_control;
|
||||
u32 x_ctrl = vmcs_entry_exit_pairs[i].exit_control;
|
||||
|
||||
if (!(_vmentry_control & n_ctrl) == !(_vmexit_control & x_ctrl))
|
||||
continue;
|
||||
|
||||
pr_warn_once("Inconsistent VM-Entry/VM-Exit pair, entry = %x, exit = %x\n",
|
||||
_vmentry_control & n_ctrl, _vmexit_control & x_ctrl);
|
||||
|
||||
if (error_on_inconsistent_vmcs_config)
|
||||
return -EIO;
|
||||
|
||||
_vmentry_control &= ~n_ctrl;
|
||||
_vmexit_control &= ~x_ctrl;
|
||||
}
|
||||
|
||||
/*
|
||||
* Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they
|
||||
* can't be used due to an errata where VM Exit may incorrectly clear
|
||||
@ -2599,6 +2679,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
|
||||
vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
|
||||
vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
|
||||
vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control;
|
||||
vmcs_conf->cpu_based_3rd_exec_ctrl = _cpu_based_3rd_exec_control;
|
||||
vmcs_conf->vmexit_ctrl = _vmexit_control;
|
||||
vmcs_conf->vmentry_ctrl = _vmentry_control;
|
||||
|
||||
@ -3853,6 +3934,8 @@ static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu)
|
||||
vmx_enable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_RW);
|
||||
vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
|
||||
vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
|
||||
if (enable_ipiv)
|
||||
vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_ICR), MSR_TYPE_RW);
|
||||
}
|
||||
}
|
||||
|
||||
@ -4180,15 +4263,19 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
|
||||
if (cpu_has_secondary_exec_ctrls()) {
|
||||
if (kvm_vcpu_apicv_active(vcpu))
|
||||
secondary_exec_controls_setbit(vmx,
|
||||
SECONDARY_EXEC_APIC_REGISTER_VIRT |
|
||||
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
|
||||
else
|
||||
secondary_exec_controls_clearbit(vmx,
|
||||
SECONDARY_EXEC_APIC_REGISTER_VIRT |
|
||||
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
|
||||
|
||||
if (kvm_vcpu_apicv_active(vcpu)) {
|
||||
secondary_exec_controls_setbit(vmx,
|
||||
SECONDARY_EXEC_APIC_REGISTER_VIRT |
|
||||
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
|
||||
if (enable_ipiv)
|
||||
tertiary_exec_controls_setbit(vmx, TERTIARY_EXEC_IPI_VIRT);
|
||||
} else {
|
||||
secondary_exec_controls_clearbit(vmx,
|
||||
SECONDARY_EXEC_APIC_REGISTER_VIRT |
|
||||
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
|
||||
if (enable_ipiv)
|
||||
tertiary_exec_controls_clearbit(vmx, TERTIARY_EXEC_IPI_VIRT);
|
||||
}
|
||||
|
||||
vmx_update_msr_bitmap_x2apic(vcpu);
|
||||
@ -4220,6 +4307,20 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
|
||||
return exec_control;
|
||||
}
|
||||
|
||||
static u64 vmx_tertiary_exec_control(struct vcpu_vmx *vmx)
|
||||
{
|
||||
u64 exec_control = vmcs_config.cpu_based_3rd_exec_ctrl;
|
||||
|
||||
/*
|
||||
* IPI virtualization relies on APICv. Disable IPI virtualization if
|
||||
* APICv is inhibited.
|
||||
*/
|
||||
if (!enable_ipiv || !kvm_vcpu_apicv_active(&vmx->vcpu))
|
||||
exec_control &= ~TERTIARY_EXEC_IPI_VIRT;
|
||||
|
||||
return exec_control;
|
||||
}
|
||||
|
||||
/*
|
||||
* Adjust a single secondary execution control bit to intercept/allow an
|
||||
* instruction in the guest. This is usually done based on whether or not a
|
||||
@ -4362,13 +4463,48 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
|
||||
if (!vcpu->kvm->arch.bus_lock_detection_enabled)
|
||||
exec_control &= ~SECONDARY_EXEC_BUS_LOCK_DETECTION;
|
||||
|
||||
if (!kvm_notify_vmexit_enabled(vcpu->kvm))
|
||||
exec_control &= ~SECONDARY_EXEC_NOTIFY_VM_EXITING;
|
||||
|
||||
return exec_control;
|
||||
}
|
||||
|
||||
static inline int vmx_get_pid_table_order(struct kvm *kvm)
|
||||
{
|
||||
return get_order(kvm->arch.max_vcpu_ids * sizeof(*to_kvm_vmx(kvm)->pid_table));
|
||||
}
|
||||
|
||||
static int vmx_alloc_ipiv_pid_table(struct kvm *kvm)
|
||||
{
|
||||
struct page *pages;
|
||||
struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
|
||||
|
||||
if (!irqchip_in_kernel(kvm) || !enable_ipiv)
|
||||
return 0;
|
||||
|
||||
if (kvm_vmx->pid_table)
|
||||
return 0;
|
||||
|
||||
pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, vmx_get_pid_table_order(kvm));
|
||||
if (!pages)
|
||||
return -ENOMEM;
|
||||
|
||||
kvm_vmx->pid_table = (void *)page_address(pages);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vmx_vcpu_precreate(struct kvm *kvm)
|
||||
{
|
||||
return vmx_alloc_ipiv_pid_table(kvm);
|
||||
}
|
||||
|
||||
#define VMX_XSS_EXIT_BITMAP 0
|
||||
|
||||
static void init_vmcs(struct vcpu_vmx *vmx)
|
||||
{
|
||||
struct kvm *kvm = vmx->vcpu.kvm;
|
||||
struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
|
||||
|
||||
if (nested)
|
||||
nested_vmx_set_vmcs_shadowing_bitmap();
|
||||
|
||||
@ -4385,6 +4521,9 @@ static void init_vmcs(struct vcpu_vmx *vmx)
|
||||
if (cpu_has_secondary_exec_ctrls())
|
||||
secondary_exec_controls_set(vmx, vmx_secondary_exec_control(vmx));
|
||||
|
||||
if (cpu_has_tertiary_exec_ctrls())
|
||||
tertiary_exec_controls_set(vmx, vmx_tertiary_exec_control(vmx));
|
||||
|
||||
if (enable_apicv && lapic_in_kernel(&vmx->vcpu)) {
|
||||
vmcs_write64(EOI_EXIT_BITMAP0, 0);
|
||||
vmcs_write64(EOI_EXIT_BITMAP1, 0);
|
||||
@ -4397,12 +4536,20 @@ static void init_vmcs(struct vcpu_vmx *vmx)
|
||||
vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
|
||||
}
|
||||
|
||||
if (!kvm_pause_in_guest(vmx->vcpu.kvm)) {
|
||||
if (vmx_can_use_ipiv(&vmx->vcpu)) {
|
||||
vmcs_write64(PID_POINTER_TABLE, __pa(kvm_vmx->pid_table));
|
||||
vmcs_write16(LAST_PID_POINTER_INDEX, kvm->arch.max_vcpu_ids - 1);
|
||||
}
|
||||
|
||||
if (!kvm_pause_in_guest(kvm)) {
|
||||
vmcs_write32(PLE_GAP, ple_gap);
|
||||
vmx->ple_window = ple_window;
|
||||
vmx->ple_window_dirty = true;
|
||||
}
|
||||
|
||||
if (kvm_notify_vmexit_enabled(kvm))
|
||||
vmcs_write32(NOTIFY_WINDOW, kvm->arch.notify_window);
|
||||
|
||||
vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
|
||||
vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
|
||||
vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */
|
||||
@ -4571,13 +4718,13 @@ static void vmx_enable_nmi_window(struct kvm_vcpu *vcpu)
|
||||
exec_controls_setbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING);
|
||||
}
|
||||
|
||||
static void vmx_inject_irq(struct kvm_vcpu *vcpu)
|
||||
static void vmx_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
uint32_t intr;
|
||||
int irq = vcpu->arch.interrupt.nr;
|
||||
|
||||
trace_kvm_inj_virq(irq);
|
||||
trace_kvm_inj_virq(irq, vcpu->arch.interrupt.soft, reinjected);
|
||||
|
||||
++vcpu->stat.irq_injections;
|
||||
if (vmx->rmode.vm86_active) {
|
||||
@ -5689,6 +5836,32 @@ static int handle_bus_lock_vmexit(struct kvm_vcpu *vcpu)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int handle_notify(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long exit_qual = vmx_get_exit_qual(vcpu);
|
||||
bool context_invalid = exit_qual & NOTIFY_VM_CONTEXT_INVALID;
|
||||
|
||||
++vcpu->stat.notify_window_exits;
|
||||
|
||||
/*
|
||||
* Notify VM exit happened while executing iret from NMI,
|
||||
* "blocked by NMI" bit has to be set before next VM entry.
|
||||
*/
|
||||
if (enable_vnmi && (exit_qual & INTR_INFO_UNBLOCK_NMI))
|
||||
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
|
||||
GUEST_INTR_STATE_NMI);
|
||||
|
||||
if (vcpu->kvm->arch.notify_vmexit_flags & KVM_X86_NOTIFY_VMEXIT_USER ||
|
||||
context_invalid) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_NOTIFY;
|
||||
vcpu->run->notify.flags = context_invalid ?
|
||||
KVM_NOTIFY_CONTEXT_INVALID : 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* The exit handlers return 1 if the exit was handled fully and guest execution
|
||||
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
|
||||
@ -5746,6 +5919,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
|
||||
[EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
|
||||
[EXIT_REASON_ENCLS] = handle_encls,
|
||||
[EXIT_REASON_BUS_LOCK] = handle_bus_lock_vmexit,
|
||||
[EXIT_REASON_NOTIFY] = handle_notify,
|
||||
};
|
||||
|
||||
static const int kvm_vmx_max_exit_handlers =
|
||||
@ -5843,6 +6017,7 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
u32 vmentry_ctl, vmexit_ctl;
|
||||
u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control;
|
||||
u64 tertiary_exec_control;
|
||||
unsigned long cr4;
|
||||
int efer_slot;
|
||||
|
||||
@ -5856,9 +6031,16 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
|
||||
cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
|
||||
pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
|
||||
cr4 = vmcs_readl(GUEST_CR4);
|
||||
secondary_exec_control = 0;
|
||||
|
||||
if (cpu_has_secondary_exec_ctrls())
|
||||
secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
|
||||
else
|
||||
secondary_exec_control = 0;
|
||||
|
||||
if (cpu_has_tertiary_exec_ctrls())
|
||||
tertiary_exec_control = vmcs_read64(TERTIARY_VM_EXEC_CONTROL);
|
||||
else
|
||||
tertiary_exec_control = 0;
|
||||
|
||||
pr_err("VMCS %p, last attempted VM-entry on CPU %d\n",
|
||||
vmx->loaded_vmcs->vmcs, vcpu->arch.last_vmentry_cpu);
|
||||
@ -5958,9 +6140,10 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
|
||||
vmx_dump_msrs("host autoload", &vmx->msr_autoload.host);
|
||||
|
||||
pr_err("*** Control State ***\n");
|
||||
pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
|
||||
pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control);
|
||||
pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl);
|
||||
pr_err("CPUBased=0x%08x SecondaryExec=0x%08x TertiaryExec=0x%016llx\n",
|
||||
cpu_based_exec_ctrl, secondary_exec_control, tertiary_exec_control);
|
||||
pr_err("PinBased=0x%08x EntryControls=%08x ExitControls=%08x\n",
|
||||
pin_based_exec_ctrl, vmentry_ctl, vmexit_ctl);
|
||||
pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n",
|
||||
vmcs_read32(EXCEPTION_BITMAP),
|
||||
vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK),
|
||||
@ -6110,7 +6293,8 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
|
||||
exit_reason.basic != EXIT_REASON_EPT_VIOLATION &&
|
||||
exit_reason.basic != EXIT_REASON_PML_FULL &&
|
||||
exit_reason.basic != EXIT_REASON_APIC_ACCESS &&
|
||||
exit_reason.basic != EXIT_REASON_TASK_SWITCH)) {
|
||||
exit_reason.basic != EXIT_REASON_TASK_SWITCH &&
|
||||
exit_reason.basic != EXIT_REASON_NOTIFY)) {
|
||||
int ndata = 3;
|
||||
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
@ -6702,9 +6886,14 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
|
||||
{
|
||||
int i, nr_msrs;
|
||||
struct perf_guest_switch_msr *msrs;
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(&vmx->vcpu);
|
||||
|
||||
pmu->host_cross_mapped_mask = 0;
|
||||
if (pmu->pebs_enable & pmu->global_ctrl)
|
||||
intel_pmu_cross_mapped_check(pmu);
|
||||
|
||||
/* Note, nr_msrs may be garbage if perf_guest_get_msrs() returns NULL. */
|
||||
msrs = perf_guest_get_msrs(&nr_msrs);
|
||||
msrs = perf_guest_get_msrs(&nr_msrs, (void *)pmu);
|
||||
if (!msrs)
|
||||
return;
|
||||
|
||||
@ -7080,6 +7269,10 @@ static int vmx_vcpu_create(struct kvm_vcpu *vcpu)
|
||||
goto free_vmcs;
|
||||
}
|
||||
|
||||
if (vmx_can_use_ipiv(vcpu))
|
||||
WRITE_ONCE(to_kvm_vmx(vcpu->kvm)->pid_table[vcpu->vcpu_id],
|
||||
__pa(&vmx->pi_desc) | PID_TABLE_ENTRY_VALID);
|
||||
|
||||
return 0;
|
||||
|
||||
free_vmcs:
|
||||
@ -7416,6 +7609,13 @@ static __init void vmx_set_cpu_caps(void)
|
||||
kvm_cpu_cap_clear(X86_FEATURE_INVPCID);
|
||||
if (vmx_pt_mode_is_host_guest())
|
||||
kvm_cpu_cap_check_and_set(X86_FEATURE_INTEL_PT);
|
||||
if (vmx_pebs_supported()) {
|
||||
kvm_cpu_cap_check_and_set(X86_FEATURE_DS);
|
||||
kvm_cpu_cap_check_and_set(X86_FEATURE_DTES64);
|
||||
}
|
||||
|
||||
if (!enable_pmu)
|
||||
kvm_cpu_cap_clear(X86_FEATURE_PDCM);
|
||||
|
||||
if (!enable_sgx) {
|
||||
kvm_cpu_cap_clear(X86_FEATURE_SGX);
|
||||
@ -7428,7 +7628,7 @@ static __init void vmx_set_cpu_caps(void)
|
||||
kvm_cpu_cap_set(X86_FEATURE_UMIP);
|
||||
|
||||
/* CPUID 0xD.1 */
|
||||
supported_xss = 0;
|
||||
kvm_caps.supported_xss = 0;
|
||||
if (!cpu_has_vmx_xsaves())
|
||||
kvm_cpu_cap_clear(X86_FEATURE_XSAVES);
|
||||
|
||||
@ -7569,9 +7769,9 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
|
||||
delta_tsc = 0;
|
||||
|
||||
/* Convert to host delta tsc if tsc scaling is enabled */
|
||||
if (vcpu->arch.l1_tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
|
||||
if (vcpu->arch.l1_tsc_scaling_ratio != kvm_caps.default_tsc_scaling_ratio &&
|
||||
delta_tsc && u64_shl_div_u64(delta_tsc,
|
||||
kvm_tsc_scaling_ratio_frac_bits,
|
||||
kvm_caps.tsc_scaling_ratio_frac_bits,
|
||||
vcpu->arch.l1_tsc_scaling_ratio, &delta_tsc))
|
||||
return -ERANGE;
|
||||
|
||||
@ -7716,6 +7916,13 @@ static bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
|
||||
return supported & BIT(reason);
|
||||
}
|
||||
|
||||
static void vmx_vm_destroy(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
|
||||
|
||||
free_pages((unsigned long)kvm_vmx->pid_table, vmx_get_pid_table_order(kvm));
|
||||
}
|
||||
|
||||
static struct kvm_x86_ops vmx_x86_ops __initdata = {
|
||||
.name = "kvm_intel",
|
||||
|
||||
@ -7727,7 +7934,9 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
|
||||
|
||||
.vm_size = sizeof(struct kvm_vmx),
|
||||
.vm_init = vmx_vm_init,
|
||||
.vm_destroy = vmx_vm_destroy,
|
||||
|
||||
.vcpu_precreate = vmx_vcpu_precreate,
|
||||
.vcpu_create = vmx_vcpu_create,
|
||||
.vcpu_free = vmx_vcpu_free,
|
||||
.vcpu_reset = vmx_vcpu_reset,
|
||||
@ -7941,8 +8150,8 @@ static __init int hardware_setup(void)
|
||||
}
|
||||
|
||||
if (!cpu_has_vmx_mpx())
|
||||
supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS |
|
||||
XFEATURE_MASK_BNDCSR);
|
||||
kvm_caps.supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS |
|
||||
XFEATURE_MASK_BNDCSR);
|
||||
|
||||
if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
|
||||
!(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
|
||||
@ -8005,12 +8214,16 @@ static __init int hardware_setup(void)
|
||||
if (!enable_apicv)
|
||||
vmx_x86_ops.sync_pir_to_irr = NULL;
|
||||
|
||||
if (cpu_has_vmx_tsc_scaling())
|
||||
kvm_has_tsc_control = true;
|
||||
if (!enable_apicv || !cpu_has_vmx_ipiv())
|
||||
enable_ipiv = false;
|
||||
|
||||
kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX;
|
||||
kvm_tsc_scaling_ratio_frac_bits = 48;
|
||||
kvm_has_bus_lock_exit = cpu_has_vmx_bus_lock_detection();
|
||||
if (cpu_has_vmx_tsc_scaling())
|
||||
kvm_caps.has_tsc_control = true;
|
||||
|
||||
kvm_caps.max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX;
|
||||
kvm_caps.tsc_scaling_ratio_frac_bits = 48;
|
||||
kvm_caps.has_bus_lock_exit = cpu_has_vmx_bus_lock_detection();
|
||||
kvm_caps.has_notify_vmexit = cpu_has_notify_vmexit();
|
||||
|
||||
set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
|
||||
|
||||
@ -8067,11 +8280,11 @@ static __init int hardware_setup(void)
|
||||
vmx_x86_ops.request_immediate_exit = __kvm_request_immediate_exit;
|
||||
}
|
||||
|
||||
kvm_mce_cap_supported |= MCG_LMCE_P;
|
||||
kvm_caps.supported_mce_cap |= MCG_LMCE_P;
|
||||
|
||||
if (pt_mode != PT_MODE_SYSTEM && pt_mode != PT_MODE_HOST_GUEST)
|
||||
return -EINVAL;
|
||||
if (!enable_ept || !cpu_has_vmx_intel_pt())
|
||||
if (!enable_ept || !enable_pmu || !cpu_has_vmx_intel_pt())
|
||||
pt_mode = PT_MODE_SYSTEM;
|
||||
if (pt_mode == PT_MODE_HOST_GUEST)
|
||||
vmx_init_ops.handle_intel_pt_intr = vmx_handle_intel_pt_intr;
|
||||
|
@ -94,7 +94,7 @@ union vmx_exit_reason {
|
||||
#define vcpu_to_lbr_desc(vcpu) (&to_vmx(vcpu)->lbr_desc)
|
||||
#define vcpu_to_lbr_records(vcpu) (&to_vmx(vcpu)->lbr_desc.records)
|
||||
|
||||
bool intel_pmu_lbr_is_compatible(struct kvm_vcpu *vcpu);
|
||||
void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu);
|
||||
bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu);
|
||||
|
||||
int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu);
|
||||
@ -366,6 +366,8 @@ struct kvm_vmx {
|
||||
unsigned int tss_addr;
|
||||
bool ept_identity_pagetable_done;
|
||||
gpa_t ept_identity_map_addr;
|
||||
/* Posted Interrupt Descriptor (PID) table for IPI virtualization */
|
||||
u64 *pid_table;
|
||||
};
|
||||
|
||||
bool nested_vmx_allowed(struct kvm_vcpu *vcpu);
|
||||
@ -456,35 +458,36 @@ static inline u8 vmx_get_rvi(void)
|
||||
return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
|
||||
}
|
||||
|
||||
#define BUILD_CONTROLS_SHADOW(lname, uname) \
|
||||
static inline void lname##_controls_set(struct vcpu_vmx *vmx, u32 val) \
|
||||
{ \
|
||||
if (vmx->loaded_vmcs->controls_shadow.lname != val) { \
|
||||
vmcs_write32(uname, val); \
|
||||
vmx->loaded_vmcs->controls_shadow.lname = val; \
|
||||
} \
|
||||
} \
|
||||
static inline u32 __##lname##_controls_get(struct loaded_vmcs *vmcs) \
|
||||
{ \
|
||||
return vmcs->controls_shadow.lname; \
|
||||
} \
|
||||
static inline u32 lname##_controls_get(struct vcpu_vmx *vmx) \
|
||||
{ \
|
||||
return __##lname##_controls_get(vmx->loaded_vmcs); \
|
||||
} \
|
||||
static inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u32 val) \
|
||||
{ \
|
||||
lname##_controls_set(vmx, lname##_controls_get(vmx) | val); \
|
||||
} \
|
||||
static inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u32 val) \
|
||||
{ \
|
||||
lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \
|
||||
#define BUILD_CONTROLS_SHADOW(lname, uname, bits) \
|
||||
static inline void lname##_controls_set(struct vcpu_vmx *vmx, u##bits val) \
|
||||
{ \
|
||||
if (vmx->loaded_vmcs->controls_shadow.lname != val) { \
|
||||
vmcs_write##bits(uname, val); \
|
||||
vmx->loaded_vmcs->controls_shadow.lname = val; \
|
||||
} \
|
||||
} \
|
||||
static inline u##bits __##lname##_controls_get(struct loaded_vmcs *vmcs) \
|
||||
{ \
|
||||
return vmcs->controls_shadow.lname; \
|
||||
} \
|
||||
static inline u##bits lname##_controls_get(struct vcpu_vmx *vmx) \
|
||||
{ \
|
||||
return __##lname##_controls_get(vmx->loaded_vmcs); \
|
||||
} \
|
||||
static inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u##bits val) \
|
||||
{ \
|
||||
lname##_controls_set(vmx, lname##_controls_get(vmx) | val); \
|
||||
} \
|
||||
static inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u##bits val) \
|
||||
{ \
|
||||
lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \
|
||||
}
|
||||
BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS)
|
||||
BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS)
|
||||
BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL)
|
||||
BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL)
|
||||
BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL)
|
||||
BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS, 32)
|
||||
BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS, 32)
|
||||
BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL, 32)
|
||||
BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL, 32)
|
||||
BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL, 32)
|
||||
BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64)
|
||||
|
||||
/*
|
||||
* VMX_REGS_LAZY_LOAD_SET - The set of registers that will be updated in the
|
||||
@ -580,4 +583,9 @@ static inline int vmx_get_instr_info_reg2(u32 vmx_instr_info)
|
||||
return (vmx_instr_info >> 28) & 0xf;
|
||||
}
|
||||
|
||||
static inline bool vmx_can_use_ipiv(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return lapic_in_kernel(vcpu) && enable_ipiv;
|
||||
}
|
||||
|
||||
#endif /* __KVM_X86_VMX_H */
|
||||
|
@ -87,8 +87,11 @@
|
||||
|
||||
#define MAX_IO_MSRS 256
|
||||
#define KVM_MAX_MCE_BANKS 32
|
||||
u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
|
||||
EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
|
||||
|
||||
struct kvm_caps kvm_caps __read_mostly = {
|
||||
.supported_mce_cap = MCG_CTL_P | MCG_SER_P,
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(kvm_caps);
|
||||
|
||||
#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e))
|
||||
|
||||
@ -151,19 +154,6 @@ module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
|
||||
static bool __read_mostly kvmclock_periodic_sync = true;
|
||||
module_param(kvmclock_periodic_sync, bool, S_IRUGO);
|
||||
|
||||
bool __read_mostly kvm_has_tsc_control;
|
||||
EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
|
||||
u32 __read_mostly kvm_max_guest_tsc_khz;
|
||||
EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
|
||||
u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits;
|
||||
EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits);
|
||||
u64 __read_mostly kvm_max_tsc_scaling_ratio;
|
||||
EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
|
||||
u64 __read_mostly kvm_default_tsc_scaling_ratio;
|
||||
EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
|
||||
bool __read_mostly kvm_has_bus_lock_exit;
|
||||
EXPORT_SYMBOL_GPL(kvm_has_bus_lock_exit);
|
||||
|
||||
/* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */
|
||||
static u32 __read_mostly tsc_tolerance_ppm = 250;
|
||||
module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
|
||||
@ -235,8 +225,6 @@ EXPORT_SYMBOL_GPL(enable_apicv);
|
||||
|
||||
u64 __read_mostly host_xss;
|
||||
EXPORT_SYMBOL_GPL(host_xss);
|
||||
u64 __read_mostly supported_xss;
|
||||
EXPORT_SYMBOL_GPL(supported_xss);
|
||||
|
||||
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
|
||||
KVM_GENERIC_VM_STATS(),
|
||||
@ -298,7 +286,8 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
|
||||
STATS_DESC_COUNTER(VCPU, directed_yield_successful),
|
||||
STATS_DESC_COUNTER(VCPU, preemption_reported),
|
||||
STATS_DESC_COUNTER(VCPU, preemption_other),
|
||||
STATS_DESC_ICOUNTER(VCPU, guest_mode)
|
||||
STATS_DESC_ICOUNTER(VCPU, guest_mode),
|
||||
STATS_DESC_COUNTER(VCPU, notify_window_exits),
|
||||
};
|
||||
|
||||
const struct kvm_stats_header kvm_vcpu_stats_header = {
|
||||
@ -311,8 +300,6 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
|
||||
};
|
||||
|
||||
u64 __read_mostly host_xcr0;
|
||||
u64 __read_mostly supported_xcr0;
|
||||
EXPORT_SYMBOL_GPL(supported_xcr0);
|
||||
|
||||
static struct kmem_cache *x86_emulator_cache;
|
||||
|
||||
@ -1450,6 +1437,7 @@ static const u32 msrs_to_save_all[] = {
|
||||
MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
|
||||
MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
|
||||
MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
|
||||
MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
|
||||
|
||||
MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
|
||||
MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
|
||||
@ -2346,12 +2334,12 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
|
||||
|
||||
/* Guest TSC same frequency as host TSC? */
|
||||
if (!scale) {
|
||||
kvm_vcpu_write_tsc_multiplier(vcpu, kvm_default_tsc_scaling_ratio);
|
||||
kvm_vcpu_write_tsc_multiplier(vcpu, kvm_caps.default_tsc_scaling_ratio);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* TSC scaling supported? */
|
||||
if (!kvm_has_tsc_control) {
|
||||
if (!kvm_caps.has_tsc_control) {
|
||||
if (user_tsc_khz > tsc_khz) {
|
||||
vcpu->arch.tsc_catchup = 1;
|
||||
vcpu->arch.tsc_always_catchup = 1;
|
||||
@ -2363,10 +2351,10 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
|
||||
}
|
||||
|
||||
/* TSC scaling required - calculate ratio */
|
||||
ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits,
|
||||
ratio = mul_u64_u32_div(1ULL << kvm_caps.tsc_scaling_ratio_frac_bits,
|
||||
user_tsc_khz, tsc_khz);
|
||||
|
||||
if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
|
||||
if (ratio == 0 || ratio >= kvm_caps.max_tsc_scaling_ratio) {
|
||||
pr_warn_ratelimited("Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
|
||||
user_tsc_khz);
|
||||
return -1;
|
||||
@ -2384,7 +2372,7 @@ static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
|
||||
/* tsc_khz can be zero if TSC calibration fails */
|
||||
if (user_tsc_khz == 0) {
|
||||
/* set tsc_scaling_ratio to a safe value */
|
||||
kvm_vcpu_write_tsc_multiplier(vcpu, kvm_default_tsc_scaling_ratio);
|
||||
kvm_vcpu_write_tsc_multiplier(vcpu, kvm_caps.default_tsc_scaling_ratio);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -2461,18 +2449,18 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
|
||||
* (frac) represent the fractional part, ie. ratio represents a fixed
|
||||
* point number (mult + frac * 2^(-N)).
|
||||
*
|
||||
* N equals to kvm_tsc_scaling_ratio_frac_bits.
|
||||
* N equals to kvm_caps.tsc_scaling_ratio_frac_bits.
|
||||
*/
|
||||
static inline u64 __scale_tsc(u64 ratio, u64 tsc)
|
||||
{
|
||||
return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits);
|
||||
return mul_u64_u64_shr(tsc, ratio, kvm_caps.tsc_scaling_ratio_frac_bits);
|
||||
}
|
||||
|
||||
u64 kvm_scale_tsc(u64 tsc, u64 ratio)
|
||||
{
|
||||
u64 _tsc = tsc;
|
||||
|
||||
if (ratio != kvm_default_tsc_scaling_ratio)
|
||||
if (ratio != kvm_caps.default_tsc_scaling_ratio)
|
||||
_tsc = __scale_tsc(ratio, tsc);
|
||||
|
||||
return _tsc;
|
||||
@ -2499,11 +2487,11 @@ u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier)
|
||||
{
|
||||
u64 nested_offset;
|
||||
|
||||
if (l2_multiplier == kvm_default_tsc_scaling_ratio)
|
||||
if (l2_multiplier == kvm_caps.default_tsc_scaling_ratio)
|
||||
nested_offset = l1_offset;
|
||||
else
|
||||
nested_offset = mul_s64_u64_shr((s64) l1_offset, l2_multiplier,
|
||||
kvm_tsc_scaling_ratio_frac_bits);
|
||||
kvm_caps.tsc_scaling_ratio_frac_bits);
|
||||
|
||||
nested_offset += l2_offset;
|
||||
return nested_offset;
|
||||
@ -2512,9 +2500,9 @@ EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_offset);
|
||||
|
||||
u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier)
|
||||
{
|
||||
if (l2_multiplier != kvm_default_tsc_scaling_ratio)
|
||||
if (l2_multiplier != kvm_caps.default_tsc_scaling_ratio)
|
||||
return mul_u64_u64_shr(l1_multiplier, l2_multiplier,
|
||||
kvm_tsc_scaling_ratio_frac_bits);
|
||||
kvm_caps.tsc_scaling_ratio_frac_bits);
|
||||
|
||||
return l1_multiplier;
|
||||
}
|
||||
@ -2556,7 +2544,7 @@ static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multipli
|
||||
else
|
||||
vcpu->arch.tsc_scaling_ratio = l1_multiplier;
|
||||
|
||||
if (kvm_has_tsc_control)
|
||||
if (kvm_caps.has_tsc_control)
|
||||
static_call(kvm_x86_write_tsc_multiplier)(
|
||||
vcpu, vcpu->arch.tsc_scaling_ratio);
|
||||
}
|
||||
@ -2692,7 +2680,7 @@ static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
|
||||
|
||||
static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
|
||||
{
|
||||
if (vcpu->arch.l1_tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
|
||||
if (vcpu->arch.l1_tsc_scaling_ratio != kvm_caps.default_tsc_scaling_ratio)
|
||||
WARN_ON(adjustment < 0);
|
||||
adjustment = kvm_scale_tsc((u64) adjustment,
|
||||
vcpu->arch.l1_tsc_scaling_ratio);
|
||||
@ -3105,7 +3093,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
|
||||
/* With all the info we got, fill in the values */
|
||||
|
||||
if (kvm_has_tsc_control)
|
||||
if (kvm_caps.has_tsc_control)
|
||||
tgt_tsc_khz = kvm_scale_tsc(tgt_tsc_khz,
|
||||
v->arch.l1_tsc_scaling_ratio);
|
||||
|
||||
@ -3236,10 +3224,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
/* only 0 or all 1s can be written to IA32_MCi_CTL
|
||||
* some Linux kernels though clear bit 10 in bank 4 to
|
||||
* workaround a BIOS/GART TBL issue on AMD K8s, ignore
|
||||
* this to avoid an uncatched #GP in the guest
|
||||
* this to avoid an uncatched #GP in the guest.
|
||||
*
|
||||
* UNIXWARE clears bit 0 of MC1_CTL to ignore
|
||||
* correctable, single-bit ECC data errors.
|
||||
*/
|
||||
if ((offset & 0x3) == 0 &&
|
||||
data != 0 && (data | (1 << 10)) != ~(u64)0)
|
||||
data != 0 && (data | (1 << 10) | 1) != ~(u64)0)
|
||||
return -1;
|
||||
|
||||
/* MCi_STATUS */
|
||||
@ -3557,9 +3548,25 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
vcpu->arch.ia32_tsc_adjust_msr = data;
|
||||
}
|
||||
break;
|
||||
case MSR_IA32_MISC_ENABLE:
|
||||
case MSR_IA32_MISC_ENABLE: {
|
||||
u64 old_val = vcpu->arch.ia32_misc_enable_msr;
|
||||
u64 pmu_mask = MSR_IA32_MISC_ENABLE_PMU_RO_MASK |
|
||||
MSR_IA32_MISC_ENABLE_EMON;
|
||||
|
||||
/* RO bits */
|
||||
if (!msr_info->host_initiated &&
|
||||
((old_val ^ data) & MSR_IA32_MISC_ENABLE_PMU_RO_MASK))
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* For a dummy user space, the order of setting vPMU capabilities and
|
||||
* initialising MSR_IA32_MISC_ENABLE is not strictly guaranteed, so to
|
||||
* avoid inconsistent functionality we keep the vPMU bits unchanged here.
|
||||
*/
|
||||
data &= ~pmu_mask;
|
||||
data |= old_val & pmu_mask;
|
||||
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT) &&
|
||||
((vcpu->arch.ia32_misc_enable_msr ^ data) & MSR_IA32_MISC_ENABLE_MWAIT)) {
|
||||
((old_val ^ data) & MSR_IA32_MISC_ENABLE_MWAIT)) {
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3))
|
||||
return 1;
|
||||
vcpu->arch.ia32_misc_enable_msr = data;
|
||||
@ -3568,6 +3575,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
vcpu->arch.ia32_misc_enable_msr = data;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case MSR_IA32_SMBASE:
|
||||
if (!msr_info->host_initiated)
|
||||
return 1;
|
||||
@ -3594,7 +3602,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
* IA32_XSS[bit 8]. Guests have to use RDMSR/WRMSR rather than
|
||||
* XSAVES/XRSTORS to save/restore PT MSRs.
|
||||
*/
|
||||
if (data & ~supported_xss)
|
||||
if (data & ~kvm_caps.supported_xss)
|
||||
return 1;
|
||||
vcpu->arch.ia32_xss = data;
|
||||
kvm_update_cpuid_runtime(vcpu);
|
||||
@ -3700,7 +3708,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
fallthrough;
|
||||
case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
|
||||
case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
|
||||
if (kvm_pmu_is_valid_msr(vcpu, msr))
|
||||
if (kvm_pmu_is_valid_msr(vcpu, msr, msr_info->host_initiated))
|
||||
return kvm_pmu_set_msr(vcpu, msr_info);
|
||||
|
||||
if (pr || data != 0)
|
||||
@ -3783,7 +3791,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
if (kvm_pmu_is_valid_msr(vcpu, msr))
|
||||
if (kvm_pmu_is_valid_msr(vcpu, msr, msr_info->host_initiated))
|
||||
return kvm_pmu_set_msr(vcpu, msr_info);
|
||||
return KVM_MSR_RET_INVALID;
|
||||
}
|
||||
@ -3863,7 +3871,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
msr_info->data = 0;
|
||||
break;
|
||||
case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
|
||||
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
|
||||
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index, msr_info->host_initiated))
|
||||
return kvm_pmu_get_msr(vcpu, msr_info);
|
||||
if (!msr_info->host_initiated)
|
||||
return 1;
|
||||
@ -3873,7 +3881,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
|
||||
case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
|
||||
case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
|
||||
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
|
||||
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index, msr_info->host_initiated))
|
||||
return kvm_pmu_get_msr(vcpu, msr_info);
|
||||
msr_info->data = 0;
|
||||
break;
|
||||
@ -4119,7 +4127,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
|
||||
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index, msr_info->host_initiated))
|
||||
return kvm_pmu_get_msr(vcpu, msr_info);
|
||||
return KVM_MSR_RET_INVALID;
|
||||
}
|
||||
@ -4277,6 +4285,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_GET_MSR_FEATURES:
|
||||
case KVM_CAP_MSR_PLATFORM_INFO:
|
||||
case KVM_CAP_EXCEPTION_PAYLOAD:
|
||||
case KVM_CAP_X86_TRIPLE_FAULT_EVENT:
|
||||
case KVM_CAP_SET_GUEST_DEBUG:
|
||||
case KVM_CAP_LAST_CPU:
|
||||
case KVM_CAP_X86_USER_SPACE_MSR:
|
||||
@ -4354,7 +4363,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
break;
|
||||
case KVM_CAP_TSC_CONTROL:
|
||||
case KVM_CAP_VM_TSC_CONTROL:
|
||||
r = kvm_has_tsc_control;
|
||||
r = kvm_caps.has_tsc_control;
|
||||
break;
|
||||
case KVM_CAP_X2APIC_API:
|
||||
r = KVM_X2APIC_API_VALID_FLAGS;
|
||||
@ -4376,7 +4385,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
r = sched_info_on();
|
||||
break;
|
||||
case KVM_CAP_X86_BUS_LOCK_EXIT:
|
||||
if (kvm_has_bus_lock_exit)
|
||||
if (kvm_caps.has_bus_lock_exit)
|
||||
r = KVM_BUS_LOCK_DETECTION_OFF |
|
||||
KVM_BUS_LOCK_DETECTION_EXIT;
|
||||
else
|
||||
@ -4385,7 +4394,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_XSAVE2: {
|
||||
u64 guest_perm = xstate_get_guest_group_perm();
|
||||
|
||||
r = xstate_required_size(supported_xcr0 & guest_perm, false);
|
||||
r = xstate_required_size(kvm_caps.supported_xcr0 & guest_perm, false);
|
||||
if (r < sizeof(struct kvm_xsave))
|
||||
r = sizeof(struct kvm_xsave);
|
||||
break;
|
||||
@ -4396,6 +4405,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_DISABLE_QUIRKS2:
|
||||
r = KVM_X86_VALID_QUIRKS;
|
||||
break;
|
||||
case KVM_CAP_X86_NOTIFY_VMEXIT:
|
||||
r = kvm_caps.has_notify_vmexit;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -4423,7 +4435,7 @@ static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr)
|
||||
|
||||
switch (attr->attr) {
|
||||
case KVM_X86_XCOMP_GUEST_SUPP:
|
||||
if (put_user(supported_xcr0, uaddr))
|
||||
if (put_user(kvm_caps.supported_xcr0, uaddr))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
default:
|
||||
@ -4500,8 +4512,8 @@ long kvm_arch_dev_ioctl(struct file *filp,
|
||||
}
|
||||
case KVM_X86_GET_MCE_CAP_SUPPORTED:
|
||||
r = -EFAULT;
|
||||
if (copy_to_user(argp, &kvm_mce_cap_supported,
|
||||
sizeof(kvm_mce_cap_supported)))
|
||||
if (copy_to_user(argp, &kvm_caps.supported_mce_cap,
|
||||
sizeof(kvm_caps.supported_mce_cap)))
|
||||
goto out;
|
||||
r = 0;
|
||||
break;
|
||||
@ -4800,7 +4812,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
|
||||
r = -EINVAL;
|
||||
if (!bank_num || bank_num > KVM_MAX_MCE_BANKS)
|
||||
goto out;
|
||||
if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
|
||||
if (mcg_cap & ~(kvm_caps.supported_mce_cap | 0xff | 0xff0000))
|
||||
goto out;
|
||||
r = 0;
|
||||
vcpu->arch.mcg_cap = mcg_cap;
|
||||
@ -4938,6 +4950,10 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
|
||||
| KVM_VCPUEVENT_VALID_SMM);
|
||||
if (vcpu->kvm->arch.exception_payload_enabled)
|
||||
events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD;
|
||||
if (vcpu->kvm->arch.triple_fault_event) {
|
||||
events->triple_fault.pending = kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu);
|
||||
events->flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT;
|
||||
}
|
||||
|
||||
memset(&events->reserved, 0, sizeof(events->reserved));
|
||||
}
|
||||
@ -4951,7 +4967,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
|
||||
| KVM_VCPUEVENT_VALID_SIPI_VECTOR
|
||||
| KVM_VCPUEVENT_VALID_SHADOW
|
||||
| KVM_VCPUEVENT_VALID_SMM
|
||||
| KVM_VCPUEVENT_VALID_PAYLOAD))
|
||||
| KVM_VCPUEVENT_VALID_PAYLOAD
|
||||
| KVM_VCPUEVENT_VALID_TRIPLE_FAULT))
|
||||
return -EINVAL;
|
||||
|
||||
if (events->flags & KVM_VCPUEVENT_VALID_PAYLOAD) {
|
||||
@ -5024,6 +5041,15 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
}
|
||||
|
||||
if (events->flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT) {
|
||||
if (!vcpu->kvm->arch.triple_fault_event)
|
||||
return -EINVAL;
|
||||
if (events->triple_fault.pending)
|
||||
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
|
||||
else
|
||||
kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu);
|
||||
}
|
||||
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
|
||||
return 0;
|
||||
@ -5092,7 +5118,8 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
|
||||
|
||||
return fpu_copy_uabi_to_guest_fpstate(&vcpu->arch.guest_fpu,
|
||||
guest_xsave->region,
|
||||
supported_xcr0, &vcpu->arch.pkru);
|
||||
kvm_caps.supported_xcr0,
|
||||
&vcpu->arch.pkru);
|
||||
}
|
||||
|
||||
static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
|
||||
@ -5597,8 +5624,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
r = -EINVAL;
|
||||
user_tsc_khz = (u32)arg;
|
||||
|
||||
if (kvm_has_tsc_control &&
|
||||
user_tsc_khz >= kvm_max_guest_tsc_khz)
|
||||
if (kvm_caps.has_tsc_control &&
|
||||
user_tsc_khz >= kvm_caps.max_guest_tsc_khz)
|
||||
goto out;
|
||||
|
||||
if (user_tsc_khz == 0)
|
||||
@ -6025,6 +6052,10 @@ split_irqchip_unlock:
|
||||
kvm->arch.exception_payload_enabled = cap->args[0];
|
||||
r = 0;
|
||||
break;
|
||||
case KVM_CAP_X86_TRIPLE_FAULT_EVENT:
|
||||
kvm->arch.triple_fault_event = cap->args[0];
|
||||
r = 0;
|
||||
break;
|
||||
case KVM_CAP_X86_USER_SPACE_MSR:
|
||||
kvm->arch.user_space_msr_mask = cap->args[0];
|
||||
r = 0;
|
||||
@ -6038,7 +6069,7 @@ split_irqchip_unlock:
|
||||
(cap->args[0] & KVM_BUS_LOCK_DETECTION_EXIT))
|
||||
break;
|
||||
|
||||
if (kvm_has_bus_lock_exit &&
|
||||
if (kvm_caps.has_bus_lock_exit &&
|
||||
cap->args[0] & KVM_BUS_LOCK_DETECTION_EXIT)
|
||||
kvm->arch.bus_lock_detection_enabled = true;
|
||||
r = 0;
|
||||
@ -6101,6 +6132,36 @@ split_irqchip_unlock:
|
||||
}
|
||||
mutex_unlock(&kvm->lock);
|
||||
break;
|
||||
case KVM_CAP_MAX_VCPU_ID:
|
||||
r = -EINVAL;
|
||||
if (cap->args[0] > KVM_MAX_VCPU_IDS)
|
||||
break;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
if (kvm->arch.max_vcpu_ids == cap->args[0]) {
|
||||
r = 0;
|
||||
} else if (!kvm->arch.max_vcpu_ids) {
|
||||
kvm->arch.max_vcpu_ids = cap->args[0];
|
||||
r = 0;
|
||||
}
|
||||
mutex_unlock(&kvm->lock);
|
||||
break;
|
||||
case KVM_CAP_X86_NOTIFY_VMEXIT:
|
||||
r = -EINVAL;
|
||||
if ((u32)cap->args[0] & ~KVM_X86_NOTIFY_VMEXIT_VALID_BITS)
|
||||
break;
|
||||
if (!kvm_caps.has_notify_vmexit)
|
||||
break;
|
||||
if (!((u32)cap->args[0] & KVM_X86_NOTIFY_VMEXIT_ENABLED))
|
||||
break;
|
||||
mutex_lock(&kvm->lock);
|
||||
if (!kvm->created_vcpus) {
|
||||
kvm->arch.notify_window = cap->args[0] >> 32;
|
||||
kvm->arch.notify_vmexit_flags = (u32)cap->args[0];
|
||||
r = 0;
|
||||
}
|
||||
mutex_unlock(&kvm->lock);
|
||||
break;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
break;
|
||||
@ -6573,8 +6634,8 @@ set_pit2_out:
|
||||
r = -EINVAL;
|
||||
user_tsc_khz = (u32)arg;
|
||||
|
||||
if (kvm_has_tsc_control &&
|
||||
user_tsc_khz >= kvm_max_guest_tsc_khz)
|
||||
if (kvm_caps.has_tsc_control &&
|
||||
user_tsc_khz >= kvm_caps.max_guest_tsc_khz)
|
||||
goto out;
|
||||
|
||||
if (user_tsc_khz == 0)
|
||||
@ -6649,15 +6710,12 @@ out:
|
||||
|
||||
static void kvm_init_msr_list(void)
|
||||
{
|
||||
struct x86_pmu_capability x86_pmu;
|
||||
u32 dummy[2];
|
||||
unsigned i;
|
||||
|
||||
BUILD_BUG_ON_MSG(KVM_PMC_MAX_FIXED != 3,
|
||||
"Please update the fixed PMCs in msrs_to_saved_all[]");
|
||||
|
||||
perf_get_x86_pmu_capability(&x86_pmu);
|
||||
|
||||
num_msrs_to_save = 0;
|
||||
num_emulated_msrs = 0;
|
||||
num_msr_based_features = 0;
|
||||
@ -6709,12 +6767,12 @@ static void kvm_init_msr_list(void)
|
||||
break;
|
||||
case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
|
||||
if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
|
||||
min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
|
||||
min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp))
|
||||
continue;
|
||||
break;
|
||||
case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
|
||||
if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
|
||||
min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
|
||||
min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp))
|
||||
continue;
|
||||
break;
|
||||
case MSR_IA32_XFD:
|
||||
@ -8740,7 +8798,7 @@ static void kvm_hyperv_tsc_notifier(void)
|
||||
/* TSC frequency always matches when on Hyper-V */
|
||||
for_each_present_cpu(cpu)
|
||||
per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
|
||||
kvm_max_guest_tsc_khz = tsc_khz;
|
||||
kvm_caps.max_guest_tsc_khz = tsc_khz;
|
||||
|
||||
list_for_each_entry(kvm, &vm_list, vm_list) {
|
||||
__kvm_start_pvclock_update(kvm);
|
||||
@ -9002,7 +9060,7 @@ int kvm_arch_init(void *opaque)
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_XSAVE)) {
|
||||
host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
|
||||
supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
|
||||
kvm_caps.supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
|
||||
}
|
||||
|
||||
if (pi_inject_timer == -1)
|
||||
@ -9422,6 +9480,11 @@ int kvm_check_nested_events(struct kvm_vcpu *vcpu)
|
||||
|
||||
static void kvm_inject_exception(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
trace_kvm_inj_exception(vcpu->arch.exception.nr,
|
||||
vcpu->arch.exception.has_error_code,
|
||||
vcpu->arch.exception.error_code,
|
||||
vcpu->arch.exception.injected);
|
||||
|
||||
if (vcpu->arch.exception.error_code && !is_protmode(vcpu))
|
||||
vcpu->arch.exception.error_code = false;
|
||||
static_call(kvm_x86_queue_exception)(vcpu);
|
||||
@ -9457,7 +9520,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
|
||||
static_call(kvm_x86_inject_nmi)(vcpu);
|
||||
can_inject = false;
|
||||
} else if (vcpu->arch.interrupt.injected) {
|
||||
static_call(kvm_x86_inject_irq)(vcpu);
|
||||
static_call(kvm_x86_inject_irq)(vcpu, true);
|
||||
can_inject = false;
|
||||
}
|
||||
}
|
||||
@ -9479,13 +9542,6 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
|
||||
|
||||
/* try to inject new event if pending */
|
||||
if (vcpu->arch.exception.pending) {
|
||||
trace_kvm_inj_exception(vcpu->arch.exception.nr,
|
||||
vcpu->arch.exception.has_error_code,
|
||||
vcpu->arch.exception.error_code);
|
||||
|
||||
vcpu->arch.exception.pending = false;
|
||||
vcpu->arch.exception.injected = true;
|
||||
|
||||
if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
|
||||
__kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
|
||||
X86_EFLAGS_RF);
|
||||
@ -9499,6 +9555,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
|
||||
}
|
||||
|
||||
kvm_inject_exception(vcpu);
|
||||
|
||||
vcpu->arch.exception.pending = false;
|
||||
vcpu->arch.exception.injected = true;
|
||||
|
||||
can_inject = false;
|
||||
}
|
||||
|
||||
@ -9551,7 +9611,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
|
||||
goto out;
|
||||
if (r) {
|
||||
kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
|
||||
static_call(kvm_x86_inject_irq)(vcpu);
|
||||
static_call(kvm_x86_inject_irq)(vcpu, false);
|
||||
WARN_ON(static_call(kvm_x86_interrupt_allowed)(vcpu, true) < 0);
|
||||
}
|
||||
if (kvm_cpu_has_injectable_intr(vcpu))
|
||||
@ -11263,11 +11323,17 @@ static int sync_regs(struct kvm_vcpu *vcpu)
|
||||
|
||||
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
|
||||
{
|
||||
if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
|
||||
if (kvm_check_tsc_unstable() && kvm->created_vcpus)
|
||||
pr_warn_once("kvm: SMP vm created on host with unstable TSC; "
|
||||
"guest TSC will not be reliable\n");
|
||||
|
||||
return 0;
|
||||
if (!kvm->arch.max_vcpu_ids)
|
||||
kvm->arch.max_vcpu_ids = KVM_MAX_VCPU_IDS;
|
||||
|
||||
if (id >= kvm->arch.max_vcpu_ids)
|
||||
return -EINVAL;
|
||||
|
||||
return static_call(kvm_x86_vcpu_precreate)(kvm);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
|
||||
@ -11704,6 +11770,8 @@ int kvm_arch_hardware_setup(void *opaque)
|
||||
if (boot_cpu_has(X86_FEATURE_XSAVES))
|
||||
rdmsrl(MSR_IA32_XSS, host_xss);
|
||||
|
||||
kvm_init_pmu_capability();
|
||||
|
||||
r = ops->hardware_setup();
|
||||
if (r != 0)
|
||||
return r;
|
||||
@ -11713,13 +11781,13 @@ int kvm_arch_hardware_setup(void *opaque)
|
||||
kvm_register_perf_callbacks(ops->handle_intel_pt_intr);
|
||||
|
||||
if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
|
||||
supported_xss = 0;
|
||||
kvm_caps.supported_xss = 0;
|
||||
|
||||
#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
|
||||
cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
|
||||
#undef __kvm_cpu_cap_has
|
||||
|
||||
if (kvm_has_tsc_control) {
|
||||
if (kvm_caps.has_tsc_control) {
|
||||
/*
|
||||
* Make sure the user can only configure tsc_khz values that
|
||||
* fit into a signed integer.
|
||||
@ -11727,10 +11795,10 @@ int kvm_arch_hardware_setup(void *opaque)
|
||||
* be 1 on all machines.
|
||||
*/
|
||||
u64 max = min(0x7fffffffULL,
|
||||
__scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz));
|
||||
kvm_max_guest_tsc_khz = max;
|
||||
__scale_tsc(kvm_caps.max_tsc_scaling_ratio, tsc_khz));
|
||||
kvm_caps.max_guest_tsc_khz = max;
|
||||
}
|
||||
kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
|
||||
kvm_caps.default_tsc_scaling_ratio = 1ULL << kvm_caps.tsc_scaling_ratio_frac_bits;
|
||||
kvm_init_msr_list();
|
||||
return 0;
|
||||
}
|
||||
|
@ -8,6 +8,27 @@
|
||||
#include "kvm_cache_regs.h"
|
||||
#include "kvm_emulate.h"
|
||||
|
||||
struct kvm_caps {
|
||||
/* control of guest tsc rate supported? */
|
||||
bool has_tsc_control;
|
||||
/* maximum supported tsc_khz for guests */
|
||||
u32 max_guest_tsc_khz;
|
||||
/* number of bits of the fractional part of the TSC scaling ratio */
|
||||
u8 tsc_scaling_ratio_frac_bits;
|
||||
/* maximum allowed value of TSC scaling ratio */
|
||||
u64 max_tsc_scaling_ratio;
|
||||
/* 1ull << kvm_caps.tsc_scaling_ratio_frac_bits */
|
||||
u64 default_tsc_scaling_ratio;
|
||||
/* bus lock detection supported? */
|
||||
bool has_bus_lock_exit;
|
||||
/* notify VM exit supported? */
|
||||
bool has_notify_vmexit;
|
||||
|
||||
u64 supported_mce_cap;
|
||||
u64 supported_xcr0;
|
||||
u64 supported_xss;
|
||||
};
|
||||
|
||||
void kvm_spurious_fault(void);
|
||||
|
||||
#define KVM_NESTED_VMENTER_CONSISTENCY_CHECK(consistency_check) \
|
||||
@ -283,14 +304,15 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
|
||||
fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
|
||||
|
||||
extern u64 host_xcr0;
|
||||
extern u64 supported_xcr0;
|
||||
extern u64 host_xss;
|
||||
extern u64 supported_xss;
|
||||
|
||||
extern struct kvm_caps kvm_caps;
|
||||
|
||||
extern bool enable_pmu;
|
||||
|
||||
static inline bool kvm_mpx_supported(void)
|
||||
{
|
||||
return (supported_xcr0 & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR))
|
||||
return (kvm_caps.supported_xcr0 & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR))
|
||||
== (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
|
||||
}
|
||||
|
||||
@ -344,6 +366,11 @@ static inline bool kvm_cstate_in_guest(struct kvm *kvm)
|
||||
return kvm->arch.cstate_in_guest;
|
||||
}
|
||||
|
||||
static inline bool kvm_notify_vmexit_enabled(struct kvm *kvm)
|
||||
{
|
||||
return kvm->arch.notify_vmexit_flags & KVM_X86_NOTIFY_VMEXIT_ENABLED;
|
||||
}
|
||||
|
||||
enum kvm_intr_type {
|
||||
/* Values are arbitrary, but must be non-zero. */
|
||||
KVM_HANDLING_IRQ = 1,
|
||||
|
@ -19,6 +19,7 @@ struct kvm_memslots;
|
||||
enum kvm_mr_change;
|
||||
|
||||
#include <linux/bits.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/spinlock_types.h>
|
||||
|
||||
@ -69,6 +70,7 @@ struct gfn_to_pfn_cache {
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct list_head list;
|
||||
rwlock_t lock;
|
||||
struct mutex refresh_lock;
|
||||
void *khva;
|
||||
kvm_pfn_t pfn;
|
||||
enum pfn_cache_usage usage;
|
||||
|
@ -270,6 +270,7 @@ struct kvm_xen_exit {
|
||||
#define KVM_EXIT_X86_BUS_LOCK 33
|
||||
#define KVM_EXIT_XEN 34
|
||||
#define KVM_EXIT_RISCV_SBI 35
|
||||
#define KVM_EXIT_NOTIFY 36
|
||||
|
||||
/* For KVM_EXIT_INTERNAL_ERROR */
|
||||
/* Emulate instruction failed. */
|
||||
@ -496,6 +497,11 @@ struct kvm_run {
|
||||
unsigned long args[6];
|
||||
unsigned long ret[2];
|
||||
} riscv_sbi;
|
||||
/* KVM_EXIT_NOTIFY */
|
||||
struct {
|
||||
#define KVM_NOTIFY_CONTEXT_INVALID (1 << 0)
|
||||
__u32 flags;
|
||||
} notify;
|
||||
/* Fix the size of the union. */
|
||||
char padding[256];
|
||||
};
|
||||
@ -1157,6 +1163,9 @@ struct kvm_ppc_resize_hpt {
|
||||
#define KVM_CAP_VM_TSC_CONTROL 214
|
||||
#define KVM_CAP_SYSTEM_EVENT_DATA 215
|
||||
#define KVM_CAP_ARM_SYSTEM_SUSPEND 216
|
||||
#define KVM_CAP_S390_PROTECTED_DUMP 217
|
||||
#define KVM_CAP_X86_TRIPLE_FAULT_EVENT 218
|
||||
#define KVM_CAP_X86_NOTIFY_VMEXIT 219
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
@ -1660,6 +1669,55 @@ struct kvm_s390_pv_unp {
|
||||
__u64 tweak;
|
||||
};
|
||||
|
||||
enum pv_cmd_dmp_id {
|
||||
KVM_PV_DUMP_INIT,
|
||||
KVM_PV_DUMP_CONFIG_STOR_STATE,
|
||||
KVM_PV_DUMP_COMPLETE,
|
||||
KVM_PV_DUMP_CPU,
|
||||
};
|
||||
|
||||
struct kvm_s390_pv_dmp {
|
||||
__u64 subcmd;
|
||||
__u64 buff_addr;
|
||||
__u64 buff_len;
|
||||
__u64 gaddr; /* For dump storage state */
|
||||
__u64 reserved[4];
|
||||
};
|
||||
|
||||
enum pv_cmd_info_id {
|
||||
KVM_PV_INFO_VM,
|
||||
KVM_PV_INFO_DUMP,
|
||||
};
|
||||
|
||||
struct kvm_s390_pv_info_dump {
|
||||
__u64 dump_cpu_buffer_len;
|
||||
__u64 dump_config_mem_buffer_per_1m;
|
||||
__u64 dump_config_finalize_len;
|
||||
};
|
||||
|
||||
struct kvm_s390_pv_info_vm {
|
||||
__u64 inst_calls_list[4];
|
||||
__u64 max_cpus;
|
||||
__u64 max_guests;
|
||||
__u64 max_guest_addr;
|
||||
__u64 feature_indication;
|
||||
};
|
||||
|
||||
struct kvm_s390_pv_info_header {
|
||||
__u32 id;
|
||||
__u32 len_max;
|
||||
__u32 len_written;
|
||||
__u32 reserved;
|
||||
};
|
||||
|
||||
struct kvm_s390_pv_info {
|
||||
struct kvm_s390_pv_info_header header;
|
||||
union {
|
||||
struct kvm_s390_pv_info_dump dump;
|
||||
struct kvm_s390_pv_info_vm vm;
|
||||
};
|
||||
};
|
||||
|
||||
enum pv_cmd_id {
|
||||
KVM_PV_ENABLE,
|
||||
KVM_PV_DISABLE,
|
||||
@ -1668,6 +1726,8 @@ enum pv_cmd_id {
|
||||
KVM_PV_VERIFY,
|
||||
KVM_PV_PREP_RESET,
|
||||
KVM_PV_UNSHARE_ALL,
|
||||
KVM_PV_INFO,
|
||||
KVM_PV_DUMP,
|
||||
};
|
||||
|
||||
struct kvm_pv_cmd {
|
||||
@ -2118,4 +2178,11 @@ struct kvm_stats_desc {
|
||||
/* Available with KVM_CAP_XSAVE2 */
|
||||
#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
|
||||
|
||||
/* Available with KVM_CAP_S390_PROTECTED_DUMP */
|
||||
#define KVM_S390_PV_CPU_COMMAND _IOWR(KVMIO, 0xd0, struct kvm_pv_cmd)
|
||||
|
||||
/* Available with KVM_CAP_X86_NOTIFY_VMEXIT */
|
||||
#define KVM_X86_NOTIFY_VMEXIT_ENABLED (1ULL << 0)
|
||||
#define KVM_X86_NOTIFY_VMEXIT_USER (1ULL << 1)
|
||||
|
||||
#endif /* __LINUX_KVM_H */
|
||||
|
5
tools/testing/selftests/kvm/.gitignore
vendored
5
tools/testing/selftests/kvm/.gitignore
vendored
@ -25,6 +25,7 @@
|
||||
/x86_64/hyperv_cpuid
|
||||
/x86_64/hyperv_features
|
||||
/x86_64/hyperv_svm_test
|
||||
/x86_64/max_vcpuid_cap_test
|
||||
/x86_64/mmio_warning_test
|
||||
/x86_64/mmu_role_test
|
||||
/x86_64/platform_info_test
|
||||
@ -36,9 +37,10 @@
|
||||
/x86_64/state_test
|
||||
/x86_64/svm_vmcall_test
|
||||
/x86_64/svm_int_ctl_test
|
||||
/x86_64/tsc_scaling_sync
|
||||
/x86_64/svm_nested_soft_inject_test
|
||||
/x86_64/sync_regs_test
|
||||
/x86_64/tsc_msrs_test
|
||||
/x86_64/tsc_scaling_sync
|
||||
/x86_64/userspace_io_test
|
||||
/x86_64/userspace_msr_exit_test
|
||||
/x86_64/vmx_apic_access_test
|
||||
@ -56,6 +58,7 @@
|
||||
/x86_64/xen_vmcall_test
|
||||
/x86_64/xss_msr_test
|
||||
/x86_64/vmx_pmu_caps_test
|
||||
/x86_64/triple_fault_event_test
|
||||
/access_tracking_perf_test
|
||||
/demand_paging_test
|
||||
/dirty_log_test
|
||||
|
@ -93,6 +93,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test
|
||||
@ -115,6 +116,8 @@ TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/amx_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test
|
||||
TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
|
||||
TEST_GEN_PROGS_x86_64 += demand_paging_test
|
||||
TEST_GEN_PROGS_x86_64 += dirty_log_test
|
||||
|
@ -17,6 +17,8 @@
|
||||
|
||||
#include "../kvm_util.h"
|
||||
|
||||
#define NMI_VECTOR 0x02
|
||||
|
||||
#define X86_EFLAGS_FIXED (1u << 1)
|
||||
|
||||
#define X86_CR4_VME (1ul << 0)
|
||||
@ -385,6 +387,21 @@ static inline void cpu_relax(void)
|
||||
asm volatile("rep; nop" ::: "memory");
|
||||
}
|
||||
|
||||
#define vmmcall() \
|
||||
__asm__ __volatile__( \
|
||||
"vmmcall\n" \
|
||||
)
|
||||
|
||||
#define ud2() \
|
||||
__asm__ __volatile__( \
|
||||
"ud2\n" \
|
||||
)
|
||||
|
||||
#define hlt() \
|
||||
__asm__ __volatile__( \
|
||||
"hlt\n" \
|
||||
)
|
||||
|
||||
bool is_intel_cpu(void);
|
||||
bool is_amd_cpu(void);
|
||||
|
||||
|
@ -16,6 +16,8 @@
|
||||
#define CPUID_SVM_BIT 2
|
||||
#define CPUID_SVM BIT_ULL(CPUID_SVM_BIT)
|
||||
|
||||
#define SVM_EXIT_EXCP_BASE 0x040
|
||||
#define SVM_EXIT_HLT 0x078
|
||||
#define SVM_EXIT_MSR 0x07c
|
||||
#define SVM_EXIT_VMMCALL 0x081
|
||||
|
||||
@ -36,6 +38,16 @@ struct svm_test_data {
|
||||
uint64_t msr_gpa;
|
||||
};
|
||||
|
||||
#define stgi() \
|
||||
__asm__ __volatile__( \
|
||||
"stgi\n" \
|
||||
)
|
||||
|
||||
#define clgi() \
|
||||
__asm__ __volatile__( \
|
||||
"clgi\n" \
|
||||
)
|
||||
|
||||
struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva);
|
||||
void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp);
|
||||
void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa);
|
||||
|
@ -52,7 +52,7 @@ void ucall_init(struct kvm_vm *vm, void *arg)
|
||||
* lower and won't match physical addresses.
|
||||
*/
|
||||
bits = vm->va_bits - 1;
|
||||
bits = vm->pa_bits < bits ? vm->pa_bits : bits;
|
||||
bits = min(vm->pa_bits, bits);
|
||||
end = 1ul << bits;
|
||||
start = end * 5 / 8;
|
||||
step = end / 16;
|
||||
@ -79,7 +79,7 @@ void ucall(uint64_t cmd, int nargs, ...)
|
||||
va_list va;
|
||||
int i;
|
||||
|
||||
nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
|
||||
nargs = min(nargs, UCALL_MAX_ARGS);
|
||||
|
||||
va_start(va, nargs);
|
||||
for (i = 0; i < nargs; ++i)
|
||||
|
@ -53,7 +53,7 @@ void ucall(uint64_t cmd, int nargs, ...)
|
||||
va_list va;
|
||||
int i;
|
||||
|
||||
nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
|
||||
nargs = min(nargs, UCALL_MAX_ARGS);
|
||||
|
||||
va_start(va, nargs);
|
||||
for (i = 0; i < nargs; ++i)
|
||||
|
@ -22,7 +22,7 @@ void ucall(uint64_t cmd, int nargs, ...)
|
||||
va_list va;
|
||||
int i;
|
||||
|
||||
nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
|
||||
nargs = min(nargs, UCALL_MAX_ARGS);
|
||||
|
||||
va_start(va, nargs);
|
||||
for (i = 0; i < nargs; ++i)
|
||||
|
@ -24,7 +24,7 @@ void ucall(uint64_t cmd, int nargs, ...)
|
||||
va_list va;
|
||||
int i;
|
||||
|
||||
nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
|
||||
nargs = min(nargs, UCALL_MAX_ARGS);
|
||||
|
||||
va_start(va, nargs);
|
||||
for (i = 0; i < nargs; ++i)
|
||||
|
@ -14,6 +14,7 @@
|
||||
|
||||
#include "test_util.h"
|
||||
#include "kvm_util.h"
|
||||
#include "kselftest.h"
|
||||
|
||||
enum mop_target {
|
||||
LOGICAL,
|
||||
@ -691,34 +692,92 @@ static void test_errors(void)
|
||||
kvm_vm_free(t.kvm_vm);
|
||||
}
|
||||
|
||||
struct testdef {
|
||||
const char *name;
|
||||
void (*test)(void);
|
||||
int extension;
|
||||
} testlist[] = {
|
||||
{
|
||||
.name = "simple copy",
|
||||
.test = test_copy,
|
||||
},
|
||||
{
|
||||
.name = "generic error checks",
|
||||
.test = test_errors,
|
||||
},
|
||||
{
|
||||
.name = "copy with storage keys",
|
||||
.test = test_copy_key,
|
||||
.extension = 1,
|
||||
},
|
||||
{
|
||||
.name = "copy with key storage protection override",
|
||||
.test = test_copy_key_storage_prot_override,
|
||||
.extension = 1,
|
||||
},
|
||||
{
|
||||
.name = "copy with key fetch protection",
|
||||
.test = test_copy_key_fetch_prot,
|
||||
.extension = 1,
|
||||
},
|
||||
{
|
||||
.name = "copy with key fetch protection override",
|
||||
.test = test_copy_key_fetch_prot_override,
|
||||
.extension = 1,
|
||||
},
|
||||
{
|
||||
.name = "error checks with key",
|
||||
.test = test_errors_key,
|
||||
.extension = 1,
|
||||
},
|
||||
{
|
||||
.name = "termination",
|
||||
.test = test_termination,
|
||||
.extension = 1,
|
||||
},
|
||||
{
|
||||
.name = "error checks with key storage protection override",
|
||||
.test = test_errors_key_storage_prot_override,
|
||||
.extension = 1,
|
||||
},
|
||||
{
|
||||
.name = "error checks without key fetch prot override",
|
||||
.test = test_errors_key_fetch_prot_override_not_enabled,
|
||||
.extension = 1,
|
||||
},
|
||||
{
|
||||
.name = "error checks with key fetch prot override",
|
||||
.test = test_errors_key_fetch_prot_override_enabled,
|
||||
.extension = 1,
|
||||
},
|
||||
};
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int memop_cap, extension_cap;
|
||||
int memop_cap, extension_cap, idx;
|
||||
|
||||
setbuf(stdout, NULL); /* Tell stdout not to buffer its content */
|
||||
|
||||
ksft_print_header();
|
||||
|
||||
memop_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP);
|
||||
extension_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP_EXTENSION);
|
||||
if (!memop_cap) {
|
||||
print_skip("CAP_S390_MEM_OP not supported");
|
||||
exit(KSFT_SKIP);
|
||||
ksft_exit_skip("CAP_S390_MEM_OP not supported.\n");
|
||||
}
|
||||
|
||||
test_copy();
|
||||
if (extension_cap > 0) {
|
||||
test_copy_key();
|
||||
test_copy_key_storage_prot_override();
|
||||
test_copy_key_fetch_prot();
|
||||
test_copy_key_fetch_prot_override();
|
||||
test_errors_key();
|
||||
test_termination();
|
||||
test_errors_key_storage_prot_override();
|
||||
test_errors_key_fetch_prot_override_not_enabled();
|
||||
test_errors_key_fetch_prot_override_enabled();
|
||||
} else {
|
||||
print_skip("storage key memop extension not supported");
|
||||
}
|
||||
test_errors();
|
||||
ksft_set_plan(ARRAY_SIZE(testlist));
|
||||
|
||||
return 0;
|
||||
for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
|
||||
if (testlist[idx].extension >= extension_cap) {
|
||||
testlist[idx].test();
|
||||
ksft_test_result_pass("%s\n", testlist[idx].name);
|
||||
} else {
|
||||
ksft_test_result_skip("%s - extension level %d not supported\n",
|
||||
testlist[idx].name,
|
||||
testlist[idx].extension);
|
||||
}
|
||||
}
|
||||
|
||||
ksft_finished(); /* Print results and exit() accordingly */
|
||||
}
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
#include "test_util.h"
|
||||
#include "kvm_util.h"
|
||||
#include "kselftest.h"
|
||||
|
||||
#define VCPU_ID 3
|
||||
#define LOCAL_IRQS 32
|
||||
@ -202,7 +203,7 @@ static void inject_irq(int cpu_id)
|
||||
|
||||
static void test_normal(void)
|
||||
{
|
||||
pr_info("Testing normal reset\n");
|
||||
ksft_print_msg("Testing normal reset\n");
|
||||
/* Create VM */
|
||||
vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
@ -225,7 +226,7 @@ static void test_normal(void)
|
||||
|
||||
static void test_initial(void)
|
||||
{
|
||||
pr_info("Testing initial reset\n");
|
||||
ksft_print_msg("Testing initial reset\n");
|
||||
vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
sync_regs = &run->s.regs;
|
||||
@ -247,7 +248,7 @@ static void test_initial(void)
|
||||
|
||||
static void test_clear(void)
|
||||
{
|
||||
pr_info("Testing clear reset\n");
|
||||
ksft_print_msg("Testing clear reset\n");
|
||||
vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
sync_regs = &run->s.regs;
|
||||
@ -266,14 +267,35 @@ static void test_clear(void)
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
||||
struct testdef {
|
||||
const char *name;
|
||||
void (*test)(void);
|
||||
bool needs_cap;
|
||||
} testlist[] = {
|
||||
{ "initial", test_initial, false },
|
||||
{ "normal", test_normal, true },
|
||||
{ "clear", test_clear, true },
|
||||
};
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
bool has_s390_vcpu_resets = kvm_check_cap(KVM_CAP_S390_VCPU_RESETS);
|
||||
int idx;
|
||||
|
||||
setbuf(stdout, NULL); /* Tell stdout not to buffer its content */
|
||||
|
||||
test_initial();
|
||||
if (kvm_check_cap(KVM_CAP_S390_VCPU_RESETS)) {
|
||||
test_normal();
|
||||
test_clear();
|
||||
ksft_print_header();
|
||||
ksft_set_plan(ARRAY_SIZE(testlist));
|
||||
|
||||
for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
|
||||
if (!testlist[idx].needs_cap || has_s390_vcpu_resets) {
|
||||
testlist[idx].test();
|
||||
ksft_test_result_pass("%s\n", testlist[idx].name);
|
||||
} else {
|
||||
ksft_test_result_skip("%s - no VCPU_RESETS capability\n",
|
||||
testlist[idx].name);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
ksft_finished(); /* Print results and exit() accordingly */
|
||||
}
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "test_util.h"
|
||||
#include "kvm_util.h"
|
||||
#include "diag318_test_handler.h"
|
||||
#include "kselftest.h"
|
||||
|
||||
#define VCPU_ID 5
|
||||
|
||||
@ -74,27 +75,9 @@ static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right)
|
||||
#define TEST_SYNC_FIELDS (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS|KVM_SYNC_DIAG318)
|
||||
#define INVALID_SYNC_FIELD 0x80000000
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
void test_read_invalid(struct kvm_vm *vm, struct kvm_run *run)
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
struct kvm_run *run;
|
||||
struct kvm_regs regs;
|
||||
struct kvm_sregs sregs;
|
||||
int rv, cap;
|
||||
|
||||
/* Tell stdout not to buffer its content */
|
||||
setbuf(stdout, NULL);
|
||||
|
||||
cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
|
||||
if (!cap) {
|
||||
print_skip("CAP_SYNC_REGS not supported");
|
||||
exit(KSFT_SKIP);
|
||||
}
|
||||
|
||||
/* Create VM */
|
||||
vm = vm_create_default(VCPU_ID, 0, guest_code);
|
||||
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
int rv;
|
||||
|
||||
/* Request reading invalid register set from VCPU. */
|
||||
run->kvm_valid_regs = INVALID_SYNC_FIELD;
|
||||
@ -110,6 +93,11 @@ int main(int argc, char *argv[])
|
||||
"Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
|
||||
rv);
|
||||
vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
|
||||
}
|
||||
|
||||
void test_set_invalid(struct kvm_vm *vm, struct kvm_run *run)
|
||||
{
|
||||
int rv;
|
||||
|
||||
/* Request setting invalid register set into VCPU. */
|
||||
run->kvm_dirty_regs = INVALID_SYNC_FIELD;
|
||||
@ -125,6 +113,13 @@ int main(int argc, char *argv[])
|
||||
"Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
|
||||
rv);
|
||||
vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
|
||||
}
|
||||
|
||||
void test_req_and_verify_all_valid_regs(struct kvm_vm *vm, struct kvm_run *run)
|
||||
{
|
||||
struct kvm_sregs sregs;
|
||||
struct kvm_regs regs;
|
||||
int rv;
|
||||
|
||||
/* Request and verify all valid register sets. */
|
||||
run->kvm_valid_regs = TEST_SYNC_FIELDS;
|
||||
@ -146,6 +141,13 @@ int main(int argc, char *argv[])
|
||||
|
||||
vcpu_sregs_get(vm, VCPU_ID, &sregs);
|
||||
compare_sregs(&sregs, &run->s.regs);
|
||||
}
|
||||
|
||||
void test_set_and_verify_various_reg_values(struct kvm_vm *vm, struct kvm_run *run)
|
||||
{
|
||||
struct kvm_sregs sregs;
|
||||
struct kvm_regs regs;
|
||||
int rv;
|
||||
|
||||
/* Set and verify various register values */
|
||||
run->s.regs.gprs[11] = 0xBAD1DEA;
|
||||
@ -180,6 +182,11 @@ int main(int argc, char *argv[])
|
||||
|
||||
vcpu_sregs_get(vm, VCPU_ID, &sregs);
|
||||
compare_sregs(&sregs, &run->s.regs);
|
||||
}
|
||||
|
||||
void test_clear_kvm_dirty_regs_bits(struct kvm_vm *vm, struct kvm_run *run)
|
||||
{
|
||||
int rv;
|
||||
|
||||
/* Clear kvm_dirty_regs bits, verify new s.regs values are
|
||||
* overwritten with existing guest values.
|
||||
@ -200,8 +207,46 @@ int main(int argc, char *argv[])
|
||||
TEST_ASSERT(run->s.regs.diag318 != 0x4B1D,
|
||||
"diag318 sync regs value incorrect 0x%llx.",
|
||||
run->s.regs.diag318);
|
||||
}
|
||||
|
||||
struct testdef {
|
||||
const char *name;
|
||||
void (*test)(struct kvm_vm *vm, struct kvm_run *run);
|
||||
} testlist[] = {
|
||||
{ "read invalid", test_read_invalid },
|
||||
{ "set invalid", test_set_invalid },
|
||||
{ "request+verify all valid regs", test_req_and_verify_all_valid_regs },
|
||||
{ "set+verify various regs", test_set_and_verify_various_reg_values },
|
||||
{ "clear kvm_dirty_regs bits", test_clear_kvm_dirty_regs_bits },
|
||||
};
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
static struct kvm_run *run;
|
||||
static struct kvm_vm *vm;
|
||||
int idx;
|
||||
|
||||
/* Tell stdout not to buffer its content */
|
||||
setbuf(stdout, NULL);
|
||||
|
||||
ksft_print_header();
|
||||
|
||||
if (!kvm_check_cap(KVM_CAP_SYNC_REGS))
|
||||
ksft_exit_skip("CAP_SYNC_REGS not supported");
|
||||
|
||||
ksft_set_plan(ARRAY_SIZE(testlist));
|
||||
|
||||
/* Create VM */
|
||||
vm = vm_create_default(VCPU_ID, 0, guest_code);
|
||||
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
|
||||
for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
|
||||
testlist[idx].test(vm, run);
|
||||
ksft_test_result_pass("%s\n", testlist[idx].name);
|
||||
}
|
||||
|
||||
kvm_vm_free(vm);
|
||||
|
||||
return 0;
|
||||
ksft_finished(); /* Print results and exit() accordingly */
|
||||
}
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <sys/mman.h>
|
||||
#include "test_util.h"
|
||||
#include "kvm_util.h"
|
||||
#include "kselftest.h"
|
||||
|
||||
#define PAGE_SHIFT 12
|
||||
#define PAGE_SIZE (1 << PAGE_SHIFT)
|
||||
@ -63,12 +64,12 @@ static enum permission test_protection(void *addr, uint8_t key)
|
||||
}
|
||||
|
||||
enum stage {
|
||||
STAGE_END,
|
||||
STAGE_INIT_SIMPLE,
|
||||
TEST_SIMPLE,
|
||||
STAGE_INIT_FETCH_PROT_OVERRIDE,
|
||||
TEST_FETCH_PROT_OVERRIDE,
|
||||
TEST_STORAGE_PROT_OVERRIDE,
|
||||
STAGE_END /* must be the last entry (it's the amount of tests) */
|
||||
};
|
||||
|
||||
struct test {
|
||||
@ -182,7 +183,7 @@ static void guest_code(void)
|
||||
GUEST_SYNC(perform_next_stage(&i, mapped_0));
|
||||
}
|
||||
|
||||
#define HOST_SYNC(vmp, stage) \
|
||||
#define HOST_SYNC_NO_TAP(vmp, stage) \
|
||||
({ \
|
||||
struct kvm_vm *__vm = (vmp); \
|
||||
struct ucall uc; \
|
||||
@ -198,12 +199,21 @@ static void guest_code(void)
|
||||
ASSERT_EQ(uc.args[1], __stage); \
|
||||
})
|
||||
|
||||
#define HOST_SYNC(vmp, stage) \
|
||||
({ \
|
||||
HOST_SYNC_NO_TAP(vmp, stage); \
|
||||
ksft_test_result_pass("" #stage "\n"); \
|
||||
})
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
struct kvm_run *run;
|
||||
vm_vaddr_t guest_0_page;
|
||||
|
||||
ksft_print_header();
|
||||
ksft_set_plan(STAGE_END);
|
||||
|
||||
vm = vm_create_default(VCPU_ID, 0, guest_code);
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
|
||||
@ -212,9 +222,14 @@ int main(int argc, char *argv[])
|
||||
HOST_SYNC(vm, TEST_SIMPLE);
|
||||
|
||||
guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0);
|
||||
if (guest_0_page != 0)
|
||||
print_skip("Did not allocate page at 0 for fetch protection override tests");
|
||||
HOST_SYNC(vm, STAGE_INIT_FETCH_PROT_OVERRIDE);
|
||||
if (guest_0_page != 0) {
|
||||
/* Use NO_TAP so we don't get a PASS print */
|
||||
HOST_SYNC_NO_TAP(vm, STAGE_INIT_FETCH_PROT_OVERRIDE);
|
||||
ksft_test_result_skip("STAGE_INIT_FETCH_PROT_OVERRIDE - "
|
||||
"Did not allocate page at 0\n");
|
||||
} else {
|
||||
HOST_SYNC(vm, STAGE_INIT_FETCH_PROT_OVERRIDE);
|
||||
}
|
||||
if (guest_0_page == 0)
|
||||
mprotect(addr_gva2hva(vm, (vm_vaddr_t)0), PAGE_SIZE, PROT_READ);
|
||||
run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
|
||||
@ -224,4 +239,8 @@ int main(int argc, char *argv[])
|
||||
run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
|
||||
run->kvm_dirty_regs = KVM_SYNC_CRS;
|
||||
HOST_SYNC(vm, TEST_STORAGE_PROT_OVERRIDE);
|
||||
|
||||
kvm_vm_free(vm);
|
||||
|
||||
ksft_finished(); /* Print results and exit() accordingly */
|
||||
}
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include "vmx.h"
|
||||
|
||||
#define VCPU_ID 5
|
||||
#define NMI_VECTOR 2
|
||||
|
||||
static int ud_count;
|
||||
|
||||
|
@ -42,11 +42,6 @@ struct hv_enlightenments {
|
||||
*/
|
||||
#define VMCB_HV_NESTED_ENLIGHTENMENTS (1U << 31)
|
||||
|
||||
static inline void vmmcall(void)
|
||||
{
|
||||
__asm__ __volatile__("vmmcall");
|
||||
}
|
||||
|
||||
void l2_guest_code(void)
|
||||
{
|
||||
GUEST_SYNC(3);
|
||||
|
54
tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c
Normal file
54
tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c
Normal file
@ -0,0 +1,54 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* maximum APIC ID capability tests
|
||||
*
|
||||
* Copyright (C) 2022, Intel, Inc.
|
||||
*
|
||||
* Tests for getting/setting maximum APIC ID capability
|
||||
*/
|
||||
|
||||
#include "kvm_util.h"
|
||||
#include "../lib/kvm_util_internal.h"
|
||||
|
||||
#define MAX_VCPU_ID 2
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
struct kvm_enable_cap cap = { 0 };
|
||||
int ret;
|
||||
|
||||
vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
|
||||
|
||||
/* Get KVM_CAP_MAX_VCPU_ID cap supported in KVM */
|
||||
ret = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID);
|
||||
|
||||
/* Try to set KVM_CAP_MAX_VCPU_ID beyond KVM cap */
|
||||
cap.cap = KVM_CAP_MAX_VCPU_ID;
|
||||
cap.args[0] = ret + 1;
|
||||
ret = ioctl(vm->fd, KVM_ENABLE_CAP, &cap);
|
||||
TEST_ASSERT(ret < 0,
|
||||
"Unexpected success to enable KVM_CAP_MAX_VCPU_ID"
|
||||
"beyond KVM cap!\n");
|
||||
|
||||
/* Set KVM_CAP_MAX_VCPU_ID */
|
||||
cap.cap = KVM_CAP_MAX_VCPU_ID;
|
||||
cap.args[0] = MAX_VCPU_ID;
|
||||
ret = ioctl(vm->fd, KVM_ENABLE_CAP, &cap);
|
||||
TEST_ASSERT(ret == 0,
|
||||
"Unexpected failure to enable KVM_CAP_MAX_VCPU_ID!\n");
|
||||
|
||||
/* Try to set KVM_CAP_MAX_VCPU_ID again */
|
||||
cap.args[0] = MAX_VCPU_ID + 1;
|
||||
ret = ioctl(vm->fd, KVM_ENABLE_CAP, &cap);
|
||||
TEST_ASSERT(ret < 0,
|
||||
"Unexpected success to enable KVM_CAP_MAX_VCPU_ID again\n");
|
||||
|
||||
/* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap*/
|
||||
ret = ioctl(vm->fd, KVM_CREATE_VCPU, MAX_VCPU_ID);
|
||||
TEST_ASSERT(ret < 0,
|
||||
"Unexpected success in creating a vCPU with VCPU ID out of range\n");
|
||||
|
||||
kvm_vm_free(vm);
|
||||
return 0;
|
||||
}
|
217
tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
Normal file
217
tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
Normal file
@ -0,0 +1,217 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2022 Oracle and/or its affiliates.
|
||||
*
|
||||
* Based on:
|
||||
* svm_int_ctl_test
|
||||
*
|
||||
* Copyright (C) 2021, Red Hat, Inc.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdatomic.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include "apic.h"
|
||||
#include "kvm_util.h"
|
||||
#include "processor.h"
|
||||
#include "svm_util.h"
|
||||
#include "test_util.h"
|
||||
#include "../lib/kvm_util_internal.h"
|
||||
|
||||
#define VCPU_ID 0
|
||||
#define INT_NR 0x20
|
||||
#define X86_FEATURE_NRIPS BIT(3)
|
||||
|
||||
static_assert(ATOMIC_INT_LOCK_FREE == 2, "atomic int is not lockless");
|
||||
|
||||
static unsigned int bp_fired;
|
||||
static void guest_bp_handler(struct ex_regs *regs)
|
||||
{
|
||||
bp_fired++;
|
||||
}
|
||||
|
||||
static unsigned int int_fired;
|
||||
static void l2_guest_code_int(void);
|
||||
|
||||
static void guest_int_handler(struct ex_regs *regs)
|
||||
{
|
||||
int_fired++;
|
||||
GUEST_ASSERT_2(regs->rip == (unsigned long)l2_guest_code_int,
|
||||
regs->rip, (unsigned long)l2_guest_code_int);
|
||||
}
|
||||
|
||||
static void l2_guest_code_int(void)
|
||||
{
|
||||
GUEST_ASSERT_1(int_fired == 1, int_fired);
|
||||
vmmcall();
|
||||
ud2();
|
||||
|
||||
GUEST_ASSERT_1(bp_fired == 1, bp_fired);
|
||||
hlt();
|
||||
}
|
||||
|
||||
static atomic_int nmi_stage;
|
||||
#define nmi_stage_get() atomic_load_explicit(&nmi_stage, memory_order_acquire)
|
||||
#define nmi_stage_inc() atomic_fetch_add_explicit(&nmi_stage, 1, memory_order_acq_rel)
|
||||
static void guest_nmi_handler(struct ex_regs *regs)
|
||||
{
|
||||
nmi_stage_inc();
|
||||
|
||||
if (nmi_stage_get() == 1) {
|
||||
vmmcall();
|
||||
GUEST_ASSERT(false);
|
||||
} else {
|
||||
GUEST_ASSERT_1(nmi_stage_get() == 3, nmi_stage_get());
|
||||
GUEST_DONE();
|
||||
}
|
||||
}
|
||||
|
||||
static void l2_guest_code_nmi(void)
|
||||
{
|
||||
ud2();
|
||||
}
|
||||
|
||||
static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t idt_alt)
|
||||
{
|
||||
#define L2_GUEST_STACK_SIZE 64
|
||||
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
|
||||
struct vmcb *vmcb = svm->vmcb;
|
||||
|
||||
if (is_nmi)
|
||||
x2apic_enable();
|
||||
|
||||
/* Prepare for L2 execution. */
|
||||
generic_svm_setup(svm,
|
||||
is_nmi ? l2_guest_code_nmi : l2_guest_code_int,
|
||||
&l2_guest_stack[L2_GUEST_STACK_SIZE]);
|
||||
|
||||
vmcb->control.intercept_exceptions |= BIT(PF_VECTOR) | BIT(UD_VECTOR);
|
||||
vmcb->control.intercept |= BIT(INTERCEPT_NMI) | BIT(INTERCEPT_HLT);
|
||||
|
||||
if (is_nmi) {
|
||||
vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
|
||||
} else {
|
||||
vmcb->control.event_inj = INT_NR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_SOFT;
|
||||
/* The return address pushed on stack */
|
||||
vmcb->control.next_rip = vmcb->save.rip;
|
||||
}
|
||||
|
||||
run_guest(vmcb, svm->vmcb_gpa);
|
||||
GUEST_ASSERT_3(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
|
||||
vmcb->control.exit_code,
|
||||
vmcb->control.exit_info_1, vmcb->control.exit_info_2);
|
||||
|
||||
if (is_nmi) {
|
||||
clgi();
|
||||
x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_NMI);
|
||||
|
||||
GUEST_ASSERT_1(nmi_stage_get() == 1, nmi_stage_get());
|
||||
nmi_stage_inc();
|
||||
|
||||
stgi();
|
||||
/* self-NMI happens here */
|
||||
while (true)
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
/* Skip over VMMCALL */
|
||||
vmcb->save.rip += 3;
|
||||
|
||||
/* Switch to alternate IDT to cause intervening NPF again */
|
||||
vmcb->save.idtr.base = idt_alt;
|
||||
vmcb->control.clean = 0; /* &= ~BIT(VMCB_DT) would be enough */
|
||||
|
||||
vmcb->control.event_inj = BP_VECTOR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT;
|
||||
/* The return address pushed on stack, skip over UD2 */
|
||||
vmcb->control.next_rip = vmcb->save.rip + 2;
|
||||
|
||||
run_guest(vmcb, svm->vmcb_gpa);
|
||||
GUEST_ASSERT_3(vmcb->control.exit_code == SVM_EXIT_HLT,
|
||||
vmcb->control.exit_code,
|
||||
vmcb->control.exit_info_1, vmcb->control.exit_info_2);
|
||||
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
static void run_test(bool is_nmi)
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
vm_vaddr_t svm_gva;
|
||||
vm_vaddr_t idt_alt_vm;
|
||||
struct kvm_guest_debug debug;
|
||||
|
||||
pr_info("Running %s test\n", is_nmi ? "NMI" : "soft int");
|
||||
|
||||
vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
|
||||
|
||||
vm_init_descriptor_tables(vm);
|
||||
vcpu_init_descriptor_tables(vm, VCPU_ID);
|
||||
|
||||
vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
|
||||
vm_install_exception_handler(vm, BP_VECTOR, guest_bp_handler);
|
||||
vm_install_exception_handler(vm, INT_NR, guest_int_handler);
|
||||
|
||||
vcpu_alloc_svm(vm, &svm_gva);
|
||||
|
||||
if (!is_nmi) {
|
||||
void *idt, *idt_alt;
|
||||
|
||||
idt_alt_vm = vm_vaddr_alloc_page(vm);
|
||||
idt_alt = addr_gva2hva(vm, idt_alt_vm);
|
||||
idt = addr_gva2hva(vm, vm->idt);
|
||||
memcpy(idt_alt, idt, getpagesize());
|
||||
} else {
|
||||
idt_alt_vm = 0;
|
||||
}
|
||||
vcpu_args_set(vm, VCPU_ID, 3, svm_gva, (uint64_t)is_nmi, (uint64_t)idt_alt_vm);
|
||||
|
||||
memset(&debug, 0, sizeof(debug));
|
||||
vcpu_set_guest_debug(vm, VCPU_ID, &debug);
|
||||
|
||||
struct kvm_run *run = vcpu_state(vm, VCPU_ID);
|
||||
struct ucall uc;
|
||||
|
||||
alarm(2);
|
||||
vcpu_run(vm, VCPU_ID);
|
||||
alarm(0);
|
||||
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
|
||||
"Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
|
||||
run->exit_reason,
|
||||
exit_reason_str(run->exit_reason));
|
||||
|
||||
switch (get_ucall(vm, VCPU_ID, &uc)) {
|
||||
case UCALL_ABORT:
|
||||
TEST_FAIL("%s at %s:%ld, vals = 0x%lx 0x%lx 0x%lx", (const char *)uc.args[0],
|
||||
__FILE__, uc.args[1], uc.args[2], uc.args[3], uc.args[4]);
|
||||
break;
|
||||
/* NOT REACHED */
|
||||
case UCALL_DONE:
|
||||
goto done;
|
||||
default:
|
||||
TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
|
||||
}
|
||||
done:
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
struct kvm_cpuid_entry2 *cpuid;
|
||||
|
||||
/* Tell stdout not to buffer its content */
|
||||
setbuf(stdout, NULL);
|
||||
|
||||
nested_svm_check_supported();
|
||||
|
||||
cpuid = kvm_get_supported_cpuid_entry(0x8000000a);
|
||||
TEST_ASSERT(cpuid->edx & X86_FEATURE_NRIPS,
|
||||
"KVM with nSVM is supposed to unconditionally advertise nRIP Save\n");
|
||||
|
||||
atomic_init(&nmi_stage, 0);
|
||||
|
||||
run_test(false);
|
||||
run_test(true);
|
||||
|
||||
return 0;
|
||||
}
|
101
tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c
Normal file
101
tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c
Normal file
@ -0,0 +1,101 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
#include "test_util.h"
|
||||
#include "kvm_util.h"
|
||||
#include "processor.h"
|
||||
#include "vmx.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include "kselftest.h"
|
||||
|
||||
#define VCPU_ID 0
|
||||
#define ARBITRARY_IO_PORT 0x2000
|
||||
|
||||
/* The virtual machine object. */
|
||||
static struct kvm_vm *vm;
|
||||
|
||||
static void l2_guest_code(void)
|
||||
{
|
||||
asm volatile("inb %%dx, %%al"
|
||||
: : [port] "d" (ARBITRARY_IO_PORT) : "rax");
|
||||
}
|
||||
|
||||
void l1_guest_code(struct vmx_pages *vmx)
|
||||
{
|
||||
#define L2_GUEST_STACK_SIZE 64
|
||||
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
|
||||
|
||||
GUEST_ASSERT(vmx->vmcs_gpa);
|
||||
GUEST_ASSERT(prepare_for_vmx_operation(vmx));
|
||||
GUEST_ASSERT(load_vmcs(vmx));
|
||||
|
||||
prepare_vmcs(vmx, l2_guest_code,
|
||||
&l2_guest_stack[L2_GUEST_STACK_SIZE]);
|
||||
|
||||
GUEST_ASSERT(!vmlaunch());
|
||||
/* L2 should triple fault after a triple fault event injected. */
|
||||
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
struct kvm_run *run;
|
||||
struct kvm_vcpu_events events;
|
||||
vm_vaddr_t vmx_pages_gva;
|
||||
struct ucall uc;
|
||||
|
||||
struct kvm_enable_cap cap = {
|
||||
.cap = KVM_CAP_TRIPLE_FAULT_EVENT,
|
||||
.args = {1}
|
||||
};
|
||||
|
||||
if (!nested_vmx_supported()) {
|
||||
print_skip("Nested VMX not supported");
|
||||
exit(KSFT_SKIP);
|
||||
}
|
||||
|
||||
if (!kvm_check_cap(KVM_CAP_TRIPLE_FAULT_EVENT)) {
|
||||
print_skip("KVM_CAP_TRIPLE_FAULT_EVENT not supported");
|
||||
exit(KSFT_SKIP);
|
||||
}
|
||||
|
||||
vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
|
||||
vm_enable_cap(vm, &cap);
|
||||
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
vcpu_alloc_vmx(vm, &vmx_pages_gva);
|
||||
vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
|
||||
vcpu_run(vm, VCPU_ID);
|
||||
|
||||
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
|
||||
"Expected KVM_EXIT_IO, got: %u (%s)\n",
|
||||
run->exit_reason, exit_reason_str(run->exit_reason));
|
||||
TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
|
||||
"Expected IN from port %d from L2, got port %d",
|
||||
ARBITRARY_IO_PORT, run->io.port);
|
||||
vcpu_events_get(vm, VCPU_ID, &events);
|
||||
events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT;
|
||||
events.triple_fault.pending = true;
|
||||
vcpu_events_set(vm, VCPU_ID, &events);
|
||||
run->immediate_exit = true;
|
||||
vcpu_run_complete_io(vm, VCPU_ID);
|
||||
|
||||
vcpu_events_get(vm, VCPU_ID, &events);
|
||||
TEST_ASSERT(events.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT,
|
||||
"Triple fault event invalid");
|
||||
TEST_ASSERT(events.triple_fault.pending,
|
||||
"No triple fault pending");
|
||||
vcpu_run(vm, VCPU_ID);
|
||||
|
||||
switch (get_ucall(vm, VCPU_ID, &uc)) {
|
||||
case UCALL_DONE:
|
||||
break;
|
||||
case UCALL_ABORT:
|
||||
TEST_FAIL("%s", (const char *)uc.args[0]);
|
||||
default:
|
||||
TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
|
||||
}
|
||||
|
||||
}
|
@ -724,6 +724,15 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
|
||||
kvm->mn_active_invalidate_count++;
|
||||
spin_unlock(&kvm->mn_invalidate_lock);
|
||||
|
||||
/*
|
||||
* Invalidate pfn caches _before_ invalidating the secondary MMUs, i.e.
|
||||
* before acquiring mmu_lock, to avoid holding mmu_lock while acquiring
|
||||
* each cache's lock. There are relatively few caches in existence at
|
||||
* any given time, and the caches themselves can check for hva overlap,
|
||||
* i.e. don't need to rely on memslot overlap checks for performance.
|
||||
* Because this runs without holding mmu_lock, the pfn caches must use
|
||||
* mn_active_invalidate_count (see above) instead of mmu_notifier_count.
|
||||
*/
|
||||
gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end,
|
||||
hva_range.may_block);
|
||||
|
||||
@ -3763,13 +3772,15 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
r = kvm_arch_vcpu_precreate(kvm, id);
|
||||
if (r) {
|
||||
mutex_unlock(&kvm->lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
kvm->created_vcpus++;
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
r = kvm_arch_vcpu_precreate(kvm, id);
|
||||
if (r)
|
||||
goto vcpu_decrement;
|
||||
|
||||
vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
|
||||
if (!vcpu) {
|
||||
r = -ENOMEM;
|
||||
|
@ -95,48 +95,143 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_check);
|
||||
|
||||
static void __release_gpc(struct kvm *kvm, kvm_pfn_t pfn, void *khva, gpa_t gpa)
|
||||
static void gpc_unmap_khva(struct kvm *kvm, kvm_pfn_t pfn, void *khva)
|
||||
{
|
||||
/* Unmap the old page if it was mapped before, and release it */
|
||||
if (!is_error_noslot_pfn(pfn)) {
|
||||
if (khva) {
|
||||
if (pfn_valid(pfn))
|
||||
kunmap(pfn_to_page(pfn));
|
||||
/* Unmap the old pfn/page if it was mapped before. */
|
||||
if (!is_error_noslot_pfn(pfn) && khva) {
|
||||
if (pfn_valid(pfn))
|
||||
kunmap(pfn_to_page(pfn));
|
||||
#ifdef CONFIG_HAS_IOMEM
|
||||
else
|
||||
memunmap(khva);
|
||||
else
|
||||
memunmap(khva);
|
||||
#endif
|
||||
}
|
||||
|
||||
kvm_release_pfn(pfn, false);
|
||||
}
|
||||
}
|
||||
|
||||
static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, unsigned long uhva)
|
||||
static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_seq)
|
||||
{
|
||||
/*
|
||||
* mn_active_invalidate_count acts for all intents and purposes
|
||||
* like mmu_notifier_count here; but the latter cannot be used
|
||||
* here because the invalidation of caches in the mmu_notifier
|
||||
* event occurs _before_ mmu_notifier_count is elevated.
|
||||
*
|
||||
* Note, it does not matter that mn_active_invalidate_count
|
||||
* is not protected by gpc->lock. It is guaranteed to
|
||||
* be elevated before the mmu_notifier acquires gpc->lock, and
|
||||
* isn't dropped until after mmu_notifier_seq is updated.
|
||||
*/
|
||||
if (kvm->mn_active_invalidate_count)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Ensure mn_active_invalidate_count is read before
|
||||
* mmu_notifier_seq. This pairs with the smp_wmb() in
|
||||
* mmu_notifier_invalidate_range_end() to guarantee either the
|
||||
* old (non-zero) value of mn_active_invalidate_count or the
|
||||
* new (incremented) value of mmu_notifier_seq is observed.
|
||||
*/
|
||||
smp_rmb();
|
||||
return kvm->mmu_notifier_seq != mmu_seq;
|
||||
}
|
||||
|
||||
static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
|
||||
{
|
||||
/* Note, the new page offset may be different than the old! */
|
||||
void *old_khva = gpc->khva - offset_in_page(gpc->khva);
|
||||
kvm_pfn_t new_pfn = KVM_PFN_ERR_FAULT;
|
||||
void *new_khva = NULL;
|
||||
unsigned long mmu_seq;
|
||||
kvm_pfn_t new_pfn;
|
||||
int retry;
|
||||
|
||||
lockdep_assert_held(&gpc->refresh_lock);
|
||||
|
||||
lockdep_assert_held_write(&gpc->lock);
|
||||
|
||||
/*
|
||||
* Invalidate the cache prior to dropping gpc->lock, the gpa=>uhva
|
||||
* assets have already been updated and so a concurrent check() from a
|
||||
* different task may not fail the gpa/uhva/generation checks.
|
||||
*/
|
||||
gpc->valid = false;
|
||||
|
||||
do {
|
||||
mmu_seq = kvm->mmu_notifier_seq;
|
||||
smp_rmb();
|
||||
|
||||
write_unlock_irq(&gpc->lock);
|
||||
|
||||
/*
|
||||
* If the previous iteration "failed" due to an mmu_notifier
|
||||
* event, release the pfn and unmap the kernel virtual address
|
||||
* from the previous attempt. Unmapping might sleep, so this
|
||||
* needs to be done after dropping the lock. Opportunistically
|
||||
* check for resched while the lock isn't held.
|
||||
*/
|
||||
if (new_pfn != KVM_PFN_ERR_FAULT) {
|
||||
/*
|
||||
* Keep the mapping if the previous iteration reused
|
||||
* the existing mapping and didn't create a new one.
|
||||
*/
|
||||
if (new_khva != old_khva)
|
||||
gpc_unmap_khva(kvm, new_pfn, new_khva);
|
||||
|
||||
kvm_release_pfn_clean(new_pfn);
|
||||
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
/* We always request a writeable mapping */
|
||||
new_pfn = hva_to_pfn(uhva, false, NULL, true, NULL);
|
||||
new_pfn = hva_to_pfn(gpc->uhva, false, NULL, true, NULL);
|
||||
if (is_error_noslot_pfn(new_pfn))
|
||||
break;
|
||||
goto out_error;
|
||||
|
||||
KVM_MMU_READ_LOCK(kvm);
|
||||
retry = mmu_notifier_retry_hva(kvm, mmu_seq, uhva);
|
||||
KVM_MMU_READ_UNLOCK(kvm);
|
||||
if (!retry)
|
||||
break;
|
||||
/*
|
||||
* Obtain a new kernel mapping if KVM itself will access the
|
||||
* pfn. Note, kmap() and memremap() can both sleep, so this
|
||||
* too must be done outside of gpc->lock!
|
||||
*/
|
||||
if (gpc->usage & KVM_HOST_USES_PFN) {
|
||||
if (new_pfn == gpc->pfn) {
|
||||
new_khva = old_khva;
|
||||
} else if (pfn_valid(new_pfn)) {
|
||||
new_khva = kmap(pfn_to_page(new_pfn));
|
||||
#ifdef CONFIG_HAS_IOMEM
|
||||
} else {
|
||||
new_khva = memremap(pfn_to_hpa(new_pfn), PAGE_SIZE, MEMREMAP_WB);
|
||||
#endif
|
||||
}
|
||||
if (!new_khva) {
|
||||
kvm_release_pfn_clean(new_pfn);
|
||||
goto out_error;
|
||||
}
|
||||
}
|
||||
|
||||
cond_resched();
|
||||
} while (1);
|
||||
write_lock_irq(&gpc->lock);
|
||||
|
||||
return new_pfn;
|
||||
/*
|
||||
* Other tasks must wait for _this_ refresh to complete before
|
||||
* attempting to refresh.
|
||||
*/
|
||||
WARN_ON_ONCE(gpc->valid);
|
||||
} while (mmu_notifier_retry_cache(kvm, mmu_seq));
|
||||
|
||||
gpc->valid = true;
|
||||
gpc->pfn = new_pfn;
|
||||
gpc->khva = new_khva + (gpc->gpa & ~PAGE_MASK);
|
||||
|
||||
/*
|
||||
* Put the reference to the _new_ pfn. The pfn is now tracked by the
|
||||
* cache and can be safely migrated, swapped, etc... as the cache will
|
||||
* invalidate any mappings in response to relevant mmu_notifier events.
|
||||
*/
|
||||
kvm_release_pfn_clean(new_pfn);
|
||||
|
||||
return 0;
|
||||
|
||||
out_error:
|
||||
write_lock_irq(&gpc->lock);
|
||||
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
|
||||
@ -146,9 +241,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
|
||||
unsigned long page_offset = gpa & ~PAGE_MASK;
|
||||
kvm_pfn_t old_pfn, new_pfn;
|
||||
unsigned long old_uhva;
|
||||
gpa_t old_gpa;
|
||||
void *old_khva;
|
||||
bool old_valid;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
@ -158,13 +251,18 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
|
||||
if (page_offset + len > PAGE_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* If another task is refreshing the cache, wait for it to complete.
|
||||
* There is no guarantee that concurrent refreshes will see the same
|
||||
* gpa, memslots generation, etc..., so they must be fully serialized.
|
||||
*/
|
||||
mutex_lock(&gpc->refresh_lock);
|
||||
|
||||
write_lock_irq(&gpc->lock);
|
||||
|
||||
old_gpa = gpc->gpa;
|
||||
old_pfn = gpc->pfn;
|
||||
old_khva = gpc->khva - offset_in_page(gpc->khva);
|
||||
old_uhva = gpc->uhva;
|
||||
old_valid = gpc->valid;
|
||||
|
||||
/* If the userspace HVA is invalid, refresh that first */
|
||||
if (gpc->gpa != gpa || gpc->generation != slots->generation ||
|
||||
@ -177,64 +275,17 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
|
||||
gpc->uhva = gfn_to_hva_memslot(gpc->memslot, gfn);
|
||||
|
||||
if (kvm_is_error_hva(gpc->uhva)) {
|
||||
gpc->pfn = KVM_PFN_ERR_FAULT;
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
gpc->uhva += page_offset;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the userspace HVA changed or the PFN was already invalid,
|
||||
* drop the lock and do the HVA to PFN lookup again.
|
||||
*/
|
||||
if (!old_valid || old_uhva != gpc->uhva) {
|
||||
unsigned long uhva = gpc->uhva;
|
||||
void *new_khva = NULL;
|
||||
|
||||
/* Placeholders for "hva is valid but not yet mapped" */
|
||||
gpc->pfn = KVM_PFN_ERR_FAULT;
|
||||
gpc->khva = NULL;
|
||||
gpc->valid = true;
|
||||
|
||||
write_unlock_irq(&gpc->lock);
|
||||
|
||||
new_pfn = hva_to_pfn_retry(kvm, uhva);
|
||||
if (is_error_noslot_pfn(new_pfn)) {
|
||||
ret = -EFAULT;
|
||||
goto map_done;
|
||||
}
|
||||
|
||||
if (gpc->usage & KVM_HOST_USES_PFN) {
|
||||
if (new_pfn == old_pfn) {
|
||||
new_khva = old_khva;
|
||||
old_pfn = KVM_PFN_ERR_FAULT;
|
||||
old_khva = NULL;
|
||||
} else if (pfn_valid(new_pfn)) {
|
||||
new_khva = kmap(pfn_to_page(new_pfn));
|
||||
#ifdef CONFIG_HAS_IOMEM
|
||||
} else {
|
||||
new_khva = memremap(pfn_to_hpa(new_pfn), PAGE_SIZE, MEMREMAP_WB);
|
||||
#endif
|
||||
}
|
||||
if (new_khva)
|
||||
new_khva += page_offset;
|
||||
else
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
||||
map_done:
|
||||
write_lock_irq(&gpc->lock);
|
||||
if (ret) {
|
||||
gpc->valid = false;
|
||||
gpc->pfn = KVM_PFN_ERR_FAULT;
|
||||
gpc->khva = NULL;
|
||||
} else {
|
||||
/* At this point, gpc->valid may already have been cleared */
|
||||
gpc->pfn = new_pfn;
|
||||
gpc->khva = new_khva;
|
||||
}
|
||||
if (!gpc->valid || old_uhva != gpc->uhva) {
|
||||
ret = hva_to_pfn_retry(kvm, gpc);
|
||||
} else {
|
||||
/* If the HVA→PFN mapping was already valid, don't unmap it. */
|
||||
old_pfn = KVM_PFN_ERR_FAULT;
|
||||
@ -242,9 +293,26 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
|
||||
}
|
||||
|
||||
out:
|
||||
/*
|
||||
* Invalidate the cache and purge the pfn/khva if the refresh failed.
|
||||
* Some/all of the uhva, gpa, and memslot generation info may still be
|
||||
* valid, leave it as is.
|
||||
*/
|
||||
if (ret) {
|
||||
gpc->valid = false;
|
||||
gpc->pfn = KVM_PFN_ERR_FAULT;
|
||||
gpc->khva = NULL;
|
||||
}
|
||||
|
||||
/* Snapshot the new pfn before dropping the lock! */
|
||||
new_pfn = gpc->pfn;
|
||||
|
||||
write_unlock_irq(&gpc->lock);
|
||||
|
||||
__release_gpc(kvm, old_pfn, old_khva, old_gpa);
|
||||
mutex_unlock(&gpc->refresh_lock);
|
||||
|
||||
if (old_pfn != new_pfn)
|
||||
gpc_unmap_khva(kvm, old_pfn, old_khva);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -254,14 +322,13 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
|
||||
{
|
||||
void *old_khva;
|
||||
kvm_pfn_t old_pfn;
|
||||
gpa_t old_gpa;
|
||||
|
||||
mutex_lock(&gpc->refresh_lock);
|
||||
write_lock_irq(&gpc->lock);
|
||||
|
||||
gpc->valid = false;
|
||||
|
||||
old_khva = gpc->khva - offset_in_page(gpc->khva);
|
||||
old_gpa = gpc->gpa;
|
||||
old_pfn = gpc->pfn;
|
||||
|
||||
/*
|
||||
@ -272,8 +339,9 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
|
||||
gpc->pfn = KVM_PFN_ERR_FAULT;
|
||||
|
||||
write_unlock_irq(&gpc->lock);
|
||||
mutex_unlock(&gpc->refresh_lock);
|
||||
|
||||
__release_gpc(kvm, old_pfn, old_khva, old_gpa);
|
||||
gpc_unmap_khva(kvm, old_pfn, old_khva);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_unmap);
|
||||
|
||||
@ -286,6 +354,7 @@ int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
|
||||
|
||||
if (!gpc->active) {
|
||||
rwlock_init(&gpc->lock);
|
||||
mutex_init(&gpc->refresh_lock);
|
||||
|
||||
gpc->khva = NULL;
|
||||
gpc->pfn = KVM_PFN_ERR_FAULT;
|
||||
|
Loading…
Reference in New Issue
Block a user