2017-11-24 14:00:33 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
2008-03-25 17:47:20 +00:00
|
|
|
/*
|
2017-11-21 15:02:52 +00:00
|
|
|
* hosting IBM Z kernel virtual machines (s390x)
|
2008-03-25 17:47:20 +00:00
|
|
|
*
|
2019-10-02 08:46:58 +00:00
|
|
|
* Copyright IBM Corp. 2008, 2020
|
2008-03-25 17:47:20 +00:00
|
|
|
*
|
|
|
|
* Author(s): Carsten Otte <cotte@de.ibm.com>
|
|
|
|
* Christian Borntraeger <borntraeger@de.ibm.com>
|
2009-05-25 11:40:51 +00:00
|
|
|
* Christian Ehrhardt <ehrhardt@de.ibm.com>
|
2012-08-02 14:10:17 +00:00
|
|
|
* Jason J. Herne <jjherne@us.ibm.com>
|
2008-03-25 17:47:20 +00:00
|
|
|
*/
|
|
|
|
|
2018-12-03 09:20:22 +00:00
|
|
|
#define KMSG_COMPONENT "kvm-s390"
|
|
|
|
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
|
|
|
|
|
2008-03-25 17:47:20 +00:00
|
|
|
#include <linux/compiler.h>
|
|
|
|
#include <linux/err.h>
|
|
|
|
#include <linux/fs.h>
|
2009-05-12 15:21:49 +00:00
|
|
|
#include <linux/hrtimer.h>
|
2008-03-25 17:47:20 +00:00
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/kvm.h>
|
|
|
|
#include <linux/kvm_host.h>
|
2016-03-08 10:54:42 +00:00
|
|
|
#include <linux/mman.h>
|
2008-03-25 17:47:20 +00:00
|
|
|
#include <linux/module.h>
|
2017-02-09 20:20:25 +00:00
|
|
|
#include <linux/moduleparam.h>
|
2014-09-03 08:13:53 +00:00
|
|
|
#include <linux/random.h>
|
2008-03-25 17:47:20 +00:00
|
|
|
#include <linux/slab.h>
|
KVM: s390: interrupt subsystem, cpu timer, waitpsw
This patch contains the s390 interrupt subsystem (similar to in kernel apic)
including timer interrupts (similar to in-kernel-pit) and enabled wait
(similar to in kernel hlt).
In order to achieve that, this patch also introduces intercept handling
for instruction intercepts, and it implements load control instructions.
This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both
the vm file descriptors and the vcpu file descriptors. In case this ioctl is
issued against a vm file descriptor, the interrupt is considered floating.
Floating interrupts may be delivered to any virtual cpu in the configuration.
The following interrupts are supported:
SIGP STOP - interprocessor signal that stops a remote cpu
SIGP SET PREFIX - interprocessor signal that sets the prefix register of a
(stopped) remote cpu
INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed
and for smp_call_function() in the guest.
PROGRAM INT - exception during program execution such as page fault, illegal
instruction and friends
RESTART - interprocessor signal that starts a stopped cpu
INT VIRTIO - floating interrupt for virtio signalisation
INT SERVICE - floating interrupt for signalisations from the system
service processor
struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting
an interrupt, also carrys parameter data for interrupts along with the interrupt
type. Interrupts on s390 usually have a state that represents the current
operation, or identifies which device has caused the interruption on s390.
kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a
disabled wait (that is, disabled for interrupts), we exit to userspace. In case
of an enabled wait we set up a timer that equals the cpu clock comparator value
and sleep on a wait queue.
[christian: change virtio interrupt to 0x2603]
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-25 17:47:26 +00:00
|
|
|
#include <linux/timer.h>
|
2015-02-06 14:01:21 +00:00
|
|
|
#include <linux/vmalloc.h>
|
2015-03-19 16:36:43 +00:00
|
|
|
#include <linux/bitmap.h>
|
2017-02-02 18:15:33 +00:00
|
|
|
#include <linux/sched/signal.h>
|
2016-08-04 15:54:42 +00:00
|
|
|
#include <linux/string.h>
|
2020-06-09 04:32:42 +00:00
|
|
|
#include <linux/pgtable.h>
|
2022-06-28 13:56:10 +00:00
|
|
|
#include <linux/mmu_notifier.h>
|
2017-02-02 18:15:33 +00:00
|
|
|
|
2010-02-26 21:37:43 +00:00
|
|
|
#include <asm/asm-offsets.h>
|
2008-03-25 17:47:20 +00:00
|
|
|
#include <asm/lowcore.h>
|
2016-05-31 13:06:51 +00:00
|
|
|
#include <asm/stp.h>
|
2016-03-08 10:49:57 +00:00
|
|
|
#include <asm/gmap.h>
|
2009-03-26 14:24:01 +00:00
|
|
|
#include <asm/nmi.h>
|
2012-03-28 17:30:02 +00:00
|
|
|
#include <asm/switch_to.h>
|
2013-07-03 13:18:35 +00:00
|
|
|
#include <asm/isc.h>
|
2012-05-15 12:15:25 +00:00
|
|
|
#include <asm/sclp.h>
|
2016-05-18 14:03:47 +00:00
|
|
|
#include <asm/cpacf.h>
|
- ARM: GICv3 ITS emulation and various fixes. Removal of the old
VGIC implementation.
- s390: support for trapping software breakpoints, nested virtualization
(vSIE), the STHYI opcode, initial extensions for CPU model support.
- MIPS: support for MIPS64 hosts (32-bit guests only) and lots of cleanups,
preliminary to this and the upcoming support for hardware virtualization
extensions.
- x86: support for execute-only mappings in nested EPT; reduced vmexit
latency for TSC deadline timer (by about 30%) on Intel hosts; support for
more than 255 vCPUs.
- PPC: bugfixes.
The ugly bit is the conflicts. A couple of them are simple conflicts due
to 4.7 fixes, but most of them are with other trees. There was definitely
too much reliance on Acked-by here. Some conflicts are for KVM patches
where _I_ gave my Acked-by, but the worst are for this pull request's
patches that touch files outside arch/*/kvm. KVM submaintainers should
probably learn to synchronize better with arch maintainers, with the
latter providing topic branches whenever possible instead of Acked-by.
This is what we do with arch/x86. And I should learn to refuse pull
requests when linux-next sends scary signals, even if that means that
submaintainers have to rebase their branches.
Anyhow, here's the list:
- arch/x86/kvm/vmx.c: handle_pcommit and EXIT_REASON_PCOMMIT was removed
by the nvdimm tree. This tree adds handle_preemption_timer and
EXIT_REASON_PREEMPTION_TIMER at the same place. In general all mentions
of pcommit have to go.
There is also a conflict between a stable fix and this patch, where the
stable fix removed the vmx_create_pml_buffer function and its call.
- virt/kvm/kvm_main.c: kvm_cpu_notifier was removed by the hotplug tree.
This tree adds kvm_io_bus_get_dev at the same place.
- virt/kvm/arm/vgic.c: a few final bugfixes went into 4.7 before the
file was completely removed for 4.8.
- include/linux/irqchip/arm-gic-v3.h: this one is entirely our fault;
this is a change that should have gone in through the irqchip tree and
pulled by kvm-arm. I think I would have rejected this kvm-arm pull
request. The KVM version is the right one, except that it lacks
GITS_BASER_PAGES_SHIFT.
- arch/powerpc: what a mess. For the idle_book3s.S conflict, the KVM
tree is the right one; everything else is trivial. In this case I am
not quite sure what went wrong. The commit that is causing the mess
(fd7bacbca47a, "KVM: PPC: Book3S HV: Fix TB corruption in guest exit
path on HMI interrupt", 2016-05-15) touches both arch/powerpc/kernel/
and arch/powerpc/kvm/. It's large, but at 396 insertions/5 deletions
I guessed that it wasn't really possible to split it and that the 5
deletions wouldn't conflict. That wasn't the case.
- arch/s390: also messy. First is hypfs_diag.c where the KVM tree
moved some code and the s390 tree patched it. You have to reapply the
relevant part of commits 6c22c9863760, plus all of e030c1125eab, to
arch/s390/kernel/diag.c. Or pick the linux-next conflict
resolution from http://marc.info/?l=kvm&m=146717549531603&w=2.
Second, there is a conflict in gmap.c between a stable fix and 4.8.
The KVM version here is the correct one.
I have pushed my resolution at refs/heads/merge-20160802 (commit
3d1f53419842) at git://git.kernel.org/pub/scm/virt/kvm/kvm.git.
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.22 (GNU/Linux)
iQEcBAABAgAGBQJXoGm7AAoJEL/70l94x66DugQIAIj703ePAFepB/fCrKHkZZia
SGrsBdvAtNsOhr7FQ5qvvjLxiv/cv7CymeuJivX8H+4kuUHUllDzey+RPHYHD9X7
U6n1PdCH9F15a3IXc8tDjlDdOMNIKJixYuq1UyNZMU6NFwl00+TZf9JF8A2US65b
x/41W98ilL6nNBAsoDVmCLtPNWAqQ3lajaZELGfcqRQ9ZGKcAYOaLFXHv2YHf2XC
qIDMf+slBGSQ66UoATnYV2gAopNlWbZ7n0vO6tE2KyvhHZ1m399aBX1+k8la/0JI
69r+Tz7ZHUSFtmlmyByi5IAB87myy2WQHyAPwj+4vwJkDGPcl0TrupzbG7+T05Y=
=42ti
-----END PGP SIGNATURE-----
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
- ARM: GICv3 ITS emulation and various fixes. Removal of the
old VGIC implementation.
- s390: support for trapping software breakpoints, nested
virtualization (vSIE), the STHYI opcode, initial extensions
for CPU model support.
- MIPS: support for MIPS64 hosts (32-bit guests only) and lots
of cleanups, preliminary to this and the upcoming support for
hardware virtualization extensions.
- x86: support for execute-only mappings in nested EPT; reduced
vmexit latency for TSC deadline timer (by about 30%) on Intel
hosts; support for more than 255 vCPUs.
- PPC: bugfixes.
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (302 commits)
KVM: PPC: Introduce KVM_CAP_PPC_HTM
MIPS: Select HAVE_KVM for MIPS64_R{2,6}
MIPS: KVM: Reset CP0_PageMask during host TLB flush
MIPS: KVM: Fix ptr->int cast via KVM_GUEST_KSEGX()
MIPS: KVM: Sign extend MFC0/RDHWR results
MIPS: KVM: Fix 64-bit big endian dynamic translation
MIPS: KVM: Fail if ebase doesn't fit in CP0_EBase
MIPS: KVM: Use 64-bit CP0_EBase when appropriate
MIPS: KVM: Set CP0_Status.KX on MIPS64
MIPS: KVM: Make entry code MIPS64 friendly
MIPS: KVM: Use kmap instead of CKSEG0ADDR()
MIPS: KVM: Use virt_to_phys() to get commpage PFN
MIPS: Fix definition of KSEGX() for 64-bit
KVM: VMX: Add VMCS to CPU's loaded VMCSs before VMPTRLD
kvm: x86: nVMX: maintain internal copy of current VMCS
KVM: PPC: Book3S HV: Save/restore TM state in H_CEDE
KVM: PPC: Book3S HV: Pull out TM state save/restore into separate procedures
KVM: arm64: vgic-its: Simplify MAPI error handling
KVM: arm64: vgic-its: Make vgic_its_cmd_handle_mapi similar to other handlers
KVM: arm64: vgic-its: Turn device_id validation into generic ID validation
...
2016-08-02 20:11:27 +00:00
|
|
|
#include <asm/timex.h>
|
2018-09-25 23:16:18 +00:00
|
|
|
#include <asm/ap.h>
|
2019-09-30 08:19:18 +00:00
|
|
|
#include <asm/uv.h>
|
2020-11-21 10:14:56 +00:00
|
|
|
#include <asm/fpu/api.h>
|
2008-03-25 17:47:23 +00:00
|
|
|
#include "kvm-s390.h"
|
2008-03-25 17:47:20 +00:00
|
|
|
#include "gaccess.h"
|
2022-06-06 20:33:16 +00:00
|
|
|
#include "pci.h"
|
2008-03-25 17:47:20 +00:00
|
|
|
|
2012-07-23 15:20:29 +00:00
|
|
|
#define CREATE_TRACE_POINTS
|
|
|
|
#include "trace.h"
|
2012-07-23 15:20:30 +00:00
|
|
|
#include "trace-s390.h"
|
2012-07-23 15:20:29 +00:00
|
|
|
|
2015-02-06 14:01:21 +00:00
|
|
|
#define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
|
2014-11-24 16:13:46 +00:00
|
|
|
#define LOCAL_IRQS 32
|
|
|
|
#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
|
|
|
|
(KVM_MAX_VCPUS + LOCAL_IRQS))
|
2015-02-06 14:01:21 +00:00
|
|
|
|
2021-06-18 22:27:05 +00:00
|
|
|
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
|
|
|
|
KVM_GENERIC_VM_STATS(),
|
|
|
|
STATS_DESC_COUNTER(VM, inject_io),
|
|
|
|
STATS_DESC_COUNTER(VM, inject_float_mchk),
|
|
|
|
STATS_DESC_COUNTER(VM, inject_pfault_done),
|
|
|
|
STATS_DESC_COUNTER(VM, inject_service_signal),
|
2022-06-06 20:33:17 +00:00
|
|
|
STATS_DESC_COUNTER(VM, inject_virtio),
|
|
|
|
STATS_DESC_COUNTER(VM, aen_forward)
|
2021-06-18 22:27:05 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
const struct kvm_stats_header kvm_vm_stats_header = {
|
|
|
|
.name_size = KVM_STATS_NAME_SIZE,
|
|
|
|
.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
|
|
|
|
.id_offset = sizeof(struct kvm_stats_header),
|
|
|
|
.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
|
|
|
|
.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
|
|
|
|
sizeof(kvm_vm_stats_desc),
|
|
|
|
};
|
|
|
|
|
2021-06-18 22:27:06 +00:00
|
|
|
const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
|
|
|
|
KVM_GENERIC_VCPU_STATS(),
|
|
|
|
STATS_DESC_COUNTER(VCPU, exit_userspace),
|
|
|
|
STATS_DESC_COUNTER(VCPU, exit_null),
|
|
|
|
STATS_DESC_COUNTER(VCPU, exit_external_request),
|
|
|
|
STATS_DESC_COUNTER(VCPU, exit_io_request),
|
|
|
|
STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
|
|
|
|
STATS_DESC_COUNTER(VCPU, exit_stop_request),
|
|
|
|
STATS_DESC_COUNTER(VCPU, exit_validity),
|
|
|
|
STATS_DESC_COUNTER(VCPU, exit_instruction),
|
|
|
|
STATS_DESC_COUNTER(VCPU, exit_pei),
|
|
|
|
STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_lctl),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_lctlg),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_stctl),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_stctg),
|
|
|
|
STATS_DESC_COUNTER(VCPU, exit_program_interruption),
|
|
|
|
STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
|
|
|
|
STATS_DESC_COUNTER(VCPU, exit_operation_exception),
|
|
|
|
STATS_DESC_COUNTER(VCPU, deliver_ckc),
|
|
|
|
STATS_DESC_COUNTER(VCPU, deliver_cputm),
|
|
|
|
STATS_DESC_COUNTER(VCPU, deliver_external_call),
|
|
|
|
STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
|
|
|
|
STATS_DESC_COUNTER(VCPU, deliver_service_signal),
|
|
|
|
STATS_DESC_COUNTER(VCPU, deliver_virtio),
|
|
|
|
STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
|
|
|
|
STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
|
|
|
|
STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
|
|
|
|
STATS_DESC_COUNTER(VCPU, deliver_program),
|
|
|
|
STATS_DESC_COUNTER(VCPU, deliver_io),
|
|
|
|
STATS_DESC_COUNTER(VCPU, deliver_machine_check),
|
|
|
|
STATS_DESC_COUNTER(VCPU, exit_wait_state),
|
|
|
|
STATS_DESC_COUNTER(VCPU, inject_ckc),
|
|
|
|
STATS_DESC_COUNTER(VCPU, inject_cputm),
|
|
|
|
STATS_DESC_COUNTER(VCPU, inject_external_call),
|
|
|
|
STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
|
|
|
|
STATS_DESC_COUNTER(VCPU, inject_mchk),
|
|
|
|
STATS_DESC_COUNTER(VCPU, inject_pfault_init),
|
|
|
|
STATS_DESC_COUNTER(VCPU, inject_program),
|
|
|
|
STATS_DESC_COUNTER(VCPU, inject_restart),
|
|
|
|
STATS_DESC_COUNTER(VCPU, inject_set_prefix),
|
|
|
|
STATS_DESC_COUNTER(VCPU, inject_stop_signal),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_epsw),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_gs),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_io_other),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_lpsw),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_lpswe),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_pfmf),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_ptff),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_sck),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_sckpf),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_stidp),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_spx),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_stpx),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_stap),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_iske),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_ri),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_rrbe),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_sske),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_stsi),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_stfl),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_tb),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_tpi),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_tprot),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_tsch),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_sie),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_essa),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_sthyi),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
|
2021-07-26 15:01:08 +00:00
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
|
|
|
|
STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
|
|
|
|
STATS_DESC_COUNTER(VCPU, diag_9c_forward),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
|
|
|
|
STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
|
2021-06-18 22:27:06 +00:00
|
|
|
STATS_DESC_COUNTER(VCPU, pfault_sync)
|
|
|
|
};
|
|
|
|
|
|
|
|
const struct kvm_stats_header kvm_vcpu_stats_header = {
|
|
|
|
.name_size = KVM_STATS_NAME_SIZE,
|
|
|
|
.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
|
|
|
|
.id_offset = sizeof(struct kvm_stats_header),
|
|
|
|
.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
|
|
|
|
.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
|
|
|
|
sizeof(kvm_vcpu_stats_desc),
|
|
|
|
};
|
|
|
|
|
2016-02-02 14:41:22 +00:00
|
|
|
/* allow nested virtualization in KVM (if enabled by user space) */
|
|
|
|
static int nested;
|
|
|
|
module_param(nested, int, S_IRUGO);
|
|
|
|
MODULE_PARM_DESC(nested, "Nested virtualization support");
|
|
|
|
|
2018-07-13 10:28:31 +00:00
|
|
|
/* allow 1m huge page guest backing, if !nested */
|
|
|
|
static int hpage;
|
|
|
|
module_param(hpage, int, 0444);
|
|
|
|
MODULE_PARM_DESC(hpage, "1m huge page backing support");
|
2008-03-25 17:47:20 +00:00
|
|
|
|
2019-03-05 10:30:02 +00:00
|
|
|
/* maximum percentage of steal time for polling. >100 is treated like 100 */
|
|
|
|
static u8 halt_poll_max_steal = 10;
|
|
|
|
module_param(halt_poll_max_steal, byte, 0644);
|
2019-05-04 06:51:45 +00:00
|
|
|
MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
|
2019-03-05 10:30:02 +00:00
|
|
|
|
2020-02-27 09:10:31 +00:00
|
|
|
/* if set to true, the GISA will be initialized and used if available */
|
|
|
|
static bool use_gisa = true;
|
|
|
|
module_param(use_gisa, bool, 0644);
|
|
|
|
MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
|
|
|
|
|
2020-09-07 13:26:07 +00:00
|
|
|
/* maximum diag9c forwarding per second */
|
|
|
|
unsigned int diag9c_forwarding_hz;
|
|
|
|
module_param(diag9c_forwarding_hz, uint, 0644);
|
|
|
|
MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
|
|
|
|
|
2022-11-11 17:06:32 +00:00
|
|
|
/*
|
|
|
|
* allow asynchronous deinit for protected guests; enable by default since
|
|
|
|
* the feature is opt-in anyway
|
|
|
|
*/
|
|
|
|
static int async_destroy = 1;
|
|
|
|
module_param(async_destroy, int, 0444);
|
|
|
|
MODULE_PARM_DESC(async_destroy, "Asynchronous destroy for protected guests");
|
KVM: s390: pv: asynchronous destroy for reboot
Until now, destroying a protected guest was an entirely synchronous
operation that could potentially take a very long time, depending on
the size of the guest, due to the time needed to clean up the address
space from protected pages.
This patch implements an asynchronous destroy mechanism, that allows a
protected guest to reboot significantly faster than previously.
This is achieved by clearing the pages of the old guest in background.
In case of reboot, the new guest will be able to run in the same
address space almost immediately.
The old protected guest is then only destroyed when all of its memory
has been destroyed or otherwise made non protected.
Two new PV commands are added for the KVM_S390_PV_COMMAND ioctl:
KVM_PV_ASYNC_CLEANUP_PREPARE: set aside the current protected VM for
later asynchronous teardown. The current KVM VM will then continue
immediately as non-protected. If a protected VM had already been
set aside for asynchronous teardown, but without starting the teardown
process, this call will fail. There can be at most one VM set aside at
any time. Once it is set aside, the protected VM only exists in the
context of the Ultravisor, it is not associated with the KVM VM
anymore. Its protected CPUs have already been destroyed, but not its
memory. This command can be issued again immediately after starting
KVM_PV_ASYNC_CLEANUP_PERFORM, without having to wait for completion.
KVM_PV_ASYNC_CLEANUP_PERFORM: tears down the protected VM previously
set aside using KVM_PV_ASYNC_CLEANUP_PREPARE. Ideally the
KVM_PV_ASYNC_CLEANUP_PERFORM PV command should be issued by userspace
from a separate thread. If a fatal signal is received (or if the
process terminates naturally), the command will terminate immediately
without completing. All protected VMs whose teardown was interrupted
will be put in the need_cleanup list. The rest of the normal KVM
teardown process will take care of properly cleaning up all remaining
protected VMs, including the ones on the need_cleanup list.
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Link: https://lore.kernel.org/r/20221111170632.77622-2-imbrenda@linux.ibm.com
Message-Id: <20221111170632.77622-2-imbrenda@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
2022-11-11 17:06:27 +00:00
|
|
|
|
2018-02-09 16:26:29 +00:00
|
|
|
/*
|
|
|
|
* For now we handle at most 16 double words as this is what the s390 base
|
|
|
|
* kernel handles and stores in the prefix page. If we ever need to go beyond
|
|
|
|
* this, this requires changes to code, but the external uapi can stay.
|
|
|
|
*/
|
|
|
|
#define SIZE_INTERNAL 16
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Base feature mask that defines default mask for facilities. Consists of the
|
|
|
|
* defines in FACILITIES_KVM and the non-hypervisor managed bits.
|
|
|
|
*/
|
|
|
|
static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
|
|
|
|
/*
|
|
|
|
* Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
|
|
|
|
* and defines the facilities that can be enabled via a cpu model.
|
|
|
|
*/
|
|
|
|
static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
|
|
|
|
|
|
|
|
static unsigned long kvm_s390_fac_size(void)
|
2013-07-26 13:04:04 +00:00
|
|
|
{
|
2018-02-09 16:26:29 +00:00
|
|
|
BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
|
|
|
|
BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
|
|
|
|
BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
|
2021-05-05 20:01:10 +00:00
|
|
|
sizeof(stfle_fac_list));
|
2018-02-09 16:26:29 +00:00
|
|
|
|
|
|
|
return SIZE_INTERNAL;
|
2013-07-26 13:04:04 +00:00
|
|
|
}
|
|
|
|
|
2015-03-19 16:36:43 +00:00
|
|
|
/* available cpu features supported by kvm */
|
|
|
|
static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
|
2016-05-18 14:03:47 +00:00
|
|
|
/* available subfunctions indicated via query / "test bit" */
|
|
|
|
static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
|
2015-03-19 16:36:43 +00:00
|
|
|
|
2015-02-02 14:42:51 +00:00
|
|
|
static struct gmap_notifier gmap_notifier;
|
2015-07-08 11:19:48 +00:00
|
|
|
static struct gmap_notifier vsie_gmap_notifier;
|
2015-07-22 13:50:58 +00:00
|
|
|
debug_info_t *kvm_s390_dbf;
|
2019-10-02 08:46:58 +00:00
|
|
|
debug_info_t *kvm_s390_dbf_uv;
|
2015-02-02 14:42:51 +00:00
|
|
|
|
2008-03-25 17:47:20 +00:00
|
|
|
/* Section: not file related */
|
2019-09-30 08:19:18 +00:00
|
|
|
/* forward declarations */
|
2016-03-08 10:52:54 +00:00
|
|
|
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
|
|
|
|
unsigned long end);
|
2019-09-30 08:19:18 +00:00
|
|
|
static int sca_switch_to_extended(struct kvm *kvm);
|
2013-05-17 12:41:36 +00:00
|
|
|
|
2018-02-07 11:46:45 +00:00
|
|
|
static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
|
|
|
|
{
|
|
|
|
u8 delta_idx = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The TOD jumps by delta, we have to compensate this by adding
|
|
|
|
* -delta to the epoch.
|
|
|
|
*/
|
|
|
|
delta = -delta;
|
|
|
|
|
|
|
|
/* sign-extension - we're adding to signed values below */
|
|
|
|
if ((s64)delta < 0)
|
|
|
|
delta_idx = -1;
|
|
|
|
|
|
|
|
scb->epoch += delta;
|
|
|
|
if (scb->ecd & ECD_MEF) {
|
|
|
|
scb->epdx += delta_idx;
|
|
|
|
if (scb->epoch < delta)
|
|
|
|
scb->epdx += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-13 08:58:41 +00:00
|
|
|
/*
|
|
|
|
* This callback is executed during stop_machine(). All CPUs are therefore
|
|
|
|
* temporarily stopped. In order not to change guest behavior, we have to
|
|
|
|
* disable preemption whenever we touch the epoch of kvm and the VCPUs,
|
|
|
|
* so a CPU won't be stopped while calculating with the epoch.
|
|
|
|
*/
|
|
|
|
static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
|
|
|
|
void *v)
|
|
|
|
{
|
|
|
|
struct kvm *kvm;
|
|
|
|
struct kvm_vcpu *vcpu;
|
2021-11-16 16:04:02 +00:00
|
|
|
unsigned long i;
|
2015-05-13 08:58:41 +00:00
|
|
|
unsigned long long *delta = v;
|
|
|
|
|
|
|
|
list_for_each_entry(kvm, &vm_list, vm_list) {
|
|
|
|
kvm_for_each_vcpu(i, vcpu, kvm) {
|
2018-02-07 11:46:45 +00:00
|
|
|
kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
|
|
|
|
if (i == 0) {
|
|
|
|
kvm->arch.epoch = vcpu->arch.sie_block->epoch;
|
|
|
|
kvm->arch.epdx = vcpu->arch.sie_block->epdx;
|
|
|
|
}
|
2016-02-15 08:42:25 +00:00
|
|
|
if (vcpu->arch.cputm_enabled)
|
|
|
|
vcpu->arch.cputm_start += *delta;
|
2015-10-29 09:30:36 +00:00
|
|
|
if (vcpu->arch.vsie_block)
|
2018-02-07 11:46:45 +00:00
|
|
|
kvm_clock_sync_scb(vcpu->arch.vsie_block,
|
|
|
|
*delta);
|
2015-05-13 08:58:41 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return NOTIFY_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct notifier_block kvm_clock_notifier = {
|
|
|
|
.notifier_call = kvm_clock_sync,
|
|
|
|
};
|
|
|
|
|
2016-01-21 12:22:54 +00:00
|
|
|
static void allow_cpu_feat(unsigned long nr)
|
|
|
|
{
|
|
|
|
set_bit_inv(nr, kvm_s390_available_cpu_feat);
|
|
|
|
}
|
|
|
|
|
2016-05-18 14:03:47 +00:00
|
|
|
static inline int plo_test_bit(unsigned char nr)
|
|
|
|
{
|
2021-06-21 14:03:56 +00:00
|
|
|
unsigned long function = (unsigned long)nr | 0x100;
|
2016-12-13 13:25:32 +00:00
|
|
|
int cc;
|
2016-05-18 14:03:47 +00:00
|
|
|
|
|
|
|
asm volatile(
|
2021-06-21 14:03:56 +00:00
|
|
|
" lgr 0,%[function]\n"
|
2016-05-18 14:03:47 +00:00
|
|
|
/* Parameter registers are ignored for "test bit" */
|
|
|
|
" plo 0,0,0,0(0)\n"
|
|
|
|
" ipm %0\n"
|
|
|
|
" srl %0,28\n"
|
|
|
|
: "=d" (cc)
|
2021-06-21 14:03:56 +00:00
|
|
|
: [function] "d" (function)
|
|
|
|
: "cc", "0");
|
2016-05-18 14:03:47 +00:00
|
|
|
return cc == 0;
|
|
|
|
}
|
|
|
|
|
2019-10-02 12:34:37 +00:00
|
|
|
static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
|
2019-02-20 08:04:07 +00:00
|
|
|
{
|
|
|
|
asm volatile(
|
2021-06-21 14:03:56 +00:00
|
|
|
" lghi 0,0\n"
|
|
|
|
" lgr 1,%[query]\n"
|
|
|
|
/* Parameter registers are ignored */
|
2019-02-20 08:04:07 +00:00
|
|
|
" .insn rrf,%[opc] << 16,2,4,6,0\n"
|
2019-10-02 12:24:47 +00:00
|
|
|
:
|
2021-06-21 14:03:56 +00:00
|
|
|
: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
|
|
|
|
: "cc", "memory", "0", "1");
|
2019-02-20 08:04:07 +00:00
|
|
|
}
|
|
|
|
|
2018-12-28 09:59:06 +00:00
|
|
|
#define INSN_SORTL 0xb938
|
2018-12-28 09:46:04 +00:00
|
|
|
#define INSN_DFLTCC 0xb939
|
2018-12-28 09:59:06 +00:00
|
|
|
|
2022-11-30 23:09:12 +00:00
|
|
|
static void __init kvm_s390_cpu_feat_init(void)
|
2016-01-21 12:22:54 +00:00
|
|
|
{
|
2016-05-18 14:03:47 +00:00
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < 256; ++i) {
|
|
|
|
if (plo_test_bit(i))
|
|
|
|
kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (test_facility(28)) /* TOD-clock steering */
|
- ARM: GICv3 ITS emulation and various fixes. Removal of the old
VGIC implementation.
- s390: support for trapping software breakpoints, nested virtualization
(vSIE), the STHYI opcode, initial extensions for CPU model support.
- MIPS: support for MIPS64 hosts (32-bit guests only) and lots of cleanups,
preliminary to this and the upcoming support for hardware virtualization
extensions.
- x86: support for execute-only mappings in nested EPT; reduced vmexit
latency for TSC deadline timer (by about 30%) on Intel hosts; support for
more than 255 vCPUs.
- PPC: bugfixes.
The ugly bit is the conflicts. A couple of them are simple conflicts due
to 4.7 fixes, but most of them are with other trees. There was definitely
too much reliance on Acked-by here. Some conflicts are for KVM patches
where _I_ gave my Acked-by, but the worst are for this pull request's
patches that touch files outside arch/*/kvm. KVM submaintainers should
probably learn to synchronize better with arch maintainers, with the
latter providing topic branches whenever possible instead of Acked-by.
This is what we do with arch/x86. And I should learn to refuse pull
requests when linux-next sends scary signals, even if that means that
submaintainers have to rebase their branches.
Anyhow, here's the list:
- arch/x86/kvm/vmx.c: handle_pcommit and EXIT_REASON_PCOMMIT was removed
by the nvdimm tree. This tree adds handle_preemption_timer and
EXIT_REASON_PREEMPTION_TIMER at the same place. In general all mentions
of pcommit have to go.
There is also a conflict between a stable fix and this patch, where the
stable fix removed the vmx_create_pml_buffer function and its call.
- virt/kvm/kvm_main.c: kvm_cpu_notifier was removed by the hotplug tree.
This tree adds kvm_io_bus_get_dev at the same place.
- virt/kvm/arm/vgic.c: a few final bugfixes went into 4.7 before the
file was completely removed for 4.8.
- include/linux/irqchip/arm-gic-v3.h: this one is entirely our fault;
this is a change that should have gone in through the irqchip tree and
pulled by kvm-arm. I think I would have rejected this kvm-arm pull
request. The KVM version is the right one, except that it lacks
GITS_BASER_PAGES_SHIFT.
- arch/powerpc: what a mess. For the idle_book3s.S conflict, the KVM
tree is the right one; everything else is trivial. In this case I am
not quite sure what went wrong. The commit that is causing the mess
(fd7bacbca47a, "KVM: PPC: Book3S HV: Fix TB corruption in guest exit
path on HMI interrupt", 2016-05-15) touches both arch/powerpc/kernel/
and arch/powerpc/kvm/. It's large, but at 396 insertions/5 deletions
I guessed that it wasn't really possible to split it and that the 5
deletions wouldn't conflict. That wasn't the case.
- arch/s390: also messy. First is hypfs_diag.c where the KVM tree
moved some code and the s390 tree patched it. You have to reapply the
relevant part of commits 6c22c9863760, plus all of e030c1125eab, to
arch/s390/kernel/diag.c. Or pick the linux-next conflict
resolution from http://marc.info/?l=kvm&m=146717549531603&w=2.
Second, there is a conflict in gmap.c between a stable fix and 4.8.
The KVM version here is the correct one.
I have pushed my resolution at refs/heads/merge-20160802 (commit
3d1f53419842) at git://git.kernel.org/pub/scm/virt/kvm/kvm.git.
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.22 (GNU/Linux)
iQEcBAABAgAGBQJXoGm7AAoJEL/70l94x66DugQIAIj703ePAFepB/fCrKHkZZia
SGrsBdvAtNsOhr7FQ5qvvjLxiv/cv7CymeuJivX8H+4kuUHUllDzey+RPHYHD9X7
U6n1PdCH9F15a3IXc8tDjlDdOMNIKJixYuq1UyNZMU6NFwl00+TZf9JF8A2US65b
x/41W98ilL6nNBAsoDVmCLtPNWAqQ3lajaZELGfcqRQ9ZGKcAYOaLFXHv2YHf2XC
qIDMf+slBGSQ66UoATnYV2gAopNlWbZ7n0vO6tE2KyvhHZ1m399aBX1+k8la/0JI
69r+Tz7ZHUSFtmlmyByi5IAB87myy2WQHyAPwj+4vwJkDGPcl0TrupzbG7+T05Y=
=42ti
-----END PGP SIGNATURE-----
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
- ARM: GICv3 ITS emulation and various fixes. Removal of the
old VGIC implementation.
- s390: support for trapping software breakpoints, nested
virtualization (vSIE), the STHYI opcode, initial extensions
for CPU model support.
- MIPS: support for MIPS64 hosts (32-bit guests only) and lots
of cleanups, preliminary to this and the upcoming support for
hardware virtualization extensions.
- x86: support for execute-only mappings in nested EPT; reduced
vmexit latency for TSC deadline timer (by about 30%) on Intel
hosts; support for more than 255 vCPUs.
- PPC: bugfixes.
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (302 commits)
KVM: PPC: Introduce KVM_CAP_PPC_HTM
MIPS: Select HAVE_KVM for MIPS64_R{2,6}
MIPS: KVM: Reset CP0_PageMask during host TLB flush
MIPS: KVM: Fix ptr->int cast via KVM_GUEST_KSEGX()
MIPS: KVM: Sign extend MFC0/RDHWR results
MIPS: KVM: Fix 64-bit big endian dynamic translation
MIPS: KVM: Fail if ebase doesn't fit in CP0_EBase
MIPS: KVM: Use 64-bit CP0_EBase when appropriate
MIPS: KVM: Set CP0_Status.KX on MIPS64
MIPS: KVM: Make entry code MIPS64 friendly
MIPS: KVM: Use kmap instead of CKSEG0ADDR()
MIPS: KVM: Use virt_to_phys() to get commpage PFN
MIPS: Fix definition of KSEGX() for 64-bit
KVM: VMX: Add VMCS to CPU's loaded VMCSs before VMPTRLD
kvm: x86: nVMX: maintain internal copy of current VMCS
KVM: PPC: Book3S HV: Save/restore TM state in H_CEDE
KVM: PPC: Book3S HV: Pull out TM state save/restore into separate procedures
KVM: arm64: vgic-its: Simplify MAPI error handling
KVM: arm64: vgic-its: Make vgic_its_cmd_handle_mapi similar to other handlers
KVM: arm64: vgic-its: Turn device_id validation into generic ID validation
...
2016-08-02 20:11:27 +00:00
|
|
|
ptff(kvm_s390_available_subfunc.ptff,
|
|
|
|
sizeof(kvm_s390_available_subfunc.ptff),
|
|
|
|
PTFF_QAF);
|
2016-05-18 14:03:47 +00:00
|
|
|
|
|
|
|
if (test_facility(17)) { /* MSA */
|
2016-08-18 10:59:46 +00:00
|
|
|
__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
|
|
|
|
kvm_s390_available_subfunc.kmac);
|
|
|
|
__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
|
|
|
|
kvm_s390_available_subfunc.kmc);
|
|
|
|
__cpacf_query(CPACF_KM, (cpacf_mask_t *)
|
|
|
|
kvm_s390_available_subfunc.km);
|
|
|
|
__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
|
|
|
|
kvm_s390_available_subfunc.kimd);
|
|
|
|
__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
|
|
|
|
kvm_s390_available_subfunc.klmd);
|
2016-05-18 14:03:47 +00:00
|
|
|
}
|
|
|
|
if (test_facility(76)) /* MSA3 */
|
2016-08-18 10:59:46 +00:00
|
|
|
__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
|
|
|
|
kvm_s390_available_subfunc.pckmo);
|
2016-05-18 14:03:47 +00:00
|
|
|
if (test_facility(77)) { /* MSA4 */
|
2016-08-18 10:59:46 +00:00
|
|
|
__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
|
|
|
|
kvm_s390_available_subfunc.kmctr);
|
|
|
|
__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
|
|
|
|
kvm_s390_available_subfunc.kmf);
|
|
|
|
__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
|
|
|
|
kvm_s390_available_subfunc.kmo);
|
|
|
|
__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
|
|
|
|
kvm_s390_available_subfunc.pcc);
|
2016-05-18 14:03:47 +00:00
|
|
|
}
|
|
|
|
if (test_facility(57)) /* MSA5 */
|
2017-02-24 09:11:54 +00:00
|
|
|
__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
|
2016-08-18 10:59:46 +00:00
|
|
|
kvm_s390_available_subfunc.ppno);
|
2016-05-18 14:03:47 +00:00
|
|
|
|
2017-03-20 13:57:42 +00:00
|
|
|
if (test_facility(146)) /* MSA8 */
|
|
|
|
__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
|
|
|
|
kvm_s390_available_subfunc.kma);
|
|
|
|
|
2018-12-28 08:33:35 +00:00
|
|
|
if (test_facility(155)) /* MSA9 */
|
|
|
|
__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
|
|
|
|
kvm_s390_available_subfunc.kdsa);
|
|
|
|
|
2018-12-28 09:59:06 +00:00
|
|
|
if (test_facility(150)) /* SORTL */
|
|
|
|
__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
|
|
|
|
|
2018-12-28 09:46:04 +00:00
|
|
|
if (test_facility(151)) /* DFLTCC */
|
|
|
|
__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
|
|
|
|
|
2016-01-21 12:22:54 +00:00
|
|
|
if (MACHINE_HAS_ESOP)
|
|
|
|
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
|
2015-07-08 11:19:48 +00:00
|
|
|
/*
|
|
|
|
* We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
|
|
|
|
* 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
|
|
|
|
*/
|
|
|
|
if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
|
2016-02-02 14:41:22 +00:00
|
|
|
!test_facility(3) || !nested)
|
2015-07-08 11:19:48 +00:00
|
|
|
return;
|
|
|
|
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
|
2015-11-25 10:02:26 +00:00
|
|
|
if (sclp.has_64bscao)
|
|
|
|
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
|
2015-11-25 08:59:49 +00:00
|
|
|
if (sclp.has_siif)
|
|
|
|
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
|
2015-11-24 15:32:35 +00:00
|
|
|
if (sclp.has_gpere)
|
|
|
|
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
|
2015-11-24 15:41:33 +00:00
|
|
|
if (sclp.has_gsls)
|
|
|
|
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
|
2015-11-24 15:53:51 +00:00
|
|
|
if (sclp.has_ib)
|
|
|
|
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
|
2015-11-24 15:54:37 +00:00
|
|
|
if (sclp.has_cei)
|
|
|
|
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
|
2015-11-24 15:56:23 +00:00
|
|
|
if (sclp.has_ibs)
|
|
|
|
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
|
2017-02-24 21:12:56 +00:00
|
|
|
if (sclp.has_kss)
|
|
|
|
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
|
2016-04-13 15:06:50 +00:00
|
|
|
/*
|
|
|
|
* KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
|
|
|
|
* all skey handling functions read/set the skey from the PGSTE
|
|
|
|
* instead of the real storage key.
|
|
|
|
*
|
|
|
|
* KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
|
|
|
|
* pages being detected as preserved although they are resident.
|
|
|
|
*
|
|
|
|
* KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
|
|
|
|
* have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
|
|
|
|
*
|
|
|
|
* For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
|
|
|
|
* KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
|
|
|
|
* correctly shadowed. We can do that for the PGSTE but not for PTE.I.
|
|
|
|
*
|
|
|
|
* KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
|
|
|
|
* cannot easily shadow the SCA because of the ipte lock.
|
|
|
|
*/
|
2016-01-21 12:22:54 +00:00
|
|
|
}
|
|
|
|
|
2022-11-30 23:09:12 +00:00
|
|
|
static int __init __kvm_s390_init(void)
|
2008-03-25 17:47:20 +00:00
|
|
|
{
|
2019-10-02 07:56:27 +00:00
|
|
|
int rc = -ENOMEM;
|
2018-11-30 14:32:06 +00:00
|
|
|
|
2015-07-22 13:50:58 +00:00
|
|
|
kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
|
|
|
|
if (!kvm_s390_dbf)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2019-10-02 08:46:58 +00:00
|
|
|
kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
|
|
|
|
if (!kvm_s390_dbf_uv)
|
2022-11-30 23:08:49 +00:00
|
|
|
goto err_kvm_uv;
|
2019-10-02 08:46:58 +00:00
|
|
|
|
|
|
|
if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
|
|
|
|
debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
|
2022-11-30 23:08:49 +00:00
|
|
|
goto err_debug_view;
|
2015-07-22 13:50:58 +00:00
|
|
|
|
2016-01-21 12:22:54 +00:00
|
|
|
kvm_s390_cpu_feat_init();
|
|
|
|
|
2014-09-02 09:27:35 +00:00
|
|
|
/* Register floating interrupt controller interface. */
|
2018-11-30 14:32:06 +00:00
|
|
|
rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
|
|
|
|
if (rc) {
|
2018-12-10 10:15:16 +00:00
|
|
|
pr_err("A FLIC registration call failed with rc=%d\n", rc);
|
2022-11-30 23:08:49 +00:00
|
|
|
goto err_flic;
|
2018-11-30 14:32:06 +00:00
|
|
|
}
|
2019-01-31 08:52:45 +00:00
|
|
|
|
2022-09-20 19:30:25 +00:00
|
|
|
if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
|
2022-06-06 20:33:16 +00:00
|
|
|
rc = kvm_s390_pci_init();
|
|
|
|
if (rc) {
|
|
|
|
pr_err("Unable to allocate AIFT for PCI\n");
|
2022-11-30 23:08:49 +00:00
|
|
|
goto err_pci;
|
2022-06-06 20:33:16 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-01-31 08:52:45 +00:00
|
|
|
rc = kvm_s390_gib_init(GAL_ISC);
|
|
|
|
if (rc)
|
2022-11-30 23:08:49 +00:00
|
|
|
goto err_gib;
|
2019-01-31 08:52:45 +00:00
|
|
|
|
2022-11-30 23:08:50 +00:00
|
|
|
gmap_notifier.notifier_call = kvm_gmap_notifier;
|
|
|
|
gmap_register_pte_notifier(&gmap_notifier);
|
|
|
|
vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
|
|
|
|
gmap_register_pte_notifier(&vsie_gmap_notifier);
|
|
|
|
atomic_notifier_chain_register(&s390_epoch_delta_notifier,
|
|
|
|
&kvm_clock_notifier);
|
|
|
|
|
2018-11-30 14:32:06 +00:00
|
|
|
return 0;
|
|
|
|
|
2022-11-30 23:08:49 +00:00
|
|
|
err_gib:
|
|
|
|
if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
|
|
|
|
kvm_s390_pci_exit();
|
|
|
|
err_pci:
|
|
|
|
err_flic:
|
|
|
|
err_debug_view:
|
|
|
|
debug_unregister(kvm_s390_dbf_uv);
|
|
|
|
err_kvm_uv:
|
|
|
|
debug_unregister(kvm_s390_dbf);
|
2018-11-30 14:32:06 +00:00
|
|
|
return rc;
|
2008-03-25 17:47:20 +00:00
|
|
|
}
|
|
|
|
|
2022-11-30 23:09:11 +00:00
|
|
|
static void __kvm_s390_exit(void)
|
2015-07-22 13:50:58 +00:00
|
|
|
{
|
2022-11-30 23:08:50 +00:00
|
|
|
gmap_unregister_pte_notifier(&gmap_notifier);
|
|
|
|
gmap_unregister_pte_notifier(&vsie_gmap_notifier);
|
|
|
|
atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
|
|
|
|
&kvm_clock_notifier);
|
|
|
|
|
2019-01-31 08:52:40 +00:00
|
|
|
kvm_s390_gib_destroy();
|
2022-09-20 19:30:25 +00:00
|
|
|
if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
|
2022-06-06 20:33:16 +00:00
|
|
|
kvm_s390_pci_exit();
|
2015-07-22 13:50:58 +00:00
|
|
|
debug_unregister(kvm_s390_dbf);
|
2019-10-02 08:46:58 +00:00
|
|
|
debug_unregister(kvm_s390_dbf_uv);
|
2015-07-22 13:50:58 +00:00
|
|
|
}
|
|
|
|
|
2008-03-25 17:47:20 +00:00
|
|
|
/* Section: device related */
|
|
|
|
long kvm_arch_dev_ioctl(struct file *filp,
|
|
|
|
unsigned int ioctl, unsigned long arg)
|
|
|
|
{
|
|
|
|
if (ioctl == KVM_S390_ENABLE_SIE)
|
|
|
|
return s390_enable_sie();
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2014-07-14 16:27:35 +00:00
|
|
|
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
2008-03-25 17:47:20 +00:00
|
|
|
{
|
2009-11-19 13:21:16 +00:00
|
|
|
int r;
|
|
|
|
|
2008-07-25 13:49:13 +00:00
|
|
|
switch (ext) {
|
2009-11-19 13:21:16 +00:00
|
|
|
case KVM_CAP_S390_PSW:
|
2011-09-20 15:07:29 +00:00
|
|
|
case KVM_CAP_S390_GMAP:
|
2011-11-17 10:00:44 +00:00
|
|
|
case KVM_CAP_SYNC_MMU:
|
2012-01-04 09:25:29 +00:00
|
|
|
#ifdef CONFIG_KVM_S390_UCONTROL
|
|
|
|
case KVM_CAP_S390_UCONTROL:
|
|
|
|
#endif
|
2013-10-07 15:11:48 +00:00
|
|
|
case KVM_CAP_ASYNC_PF:
|
2012-01-11 10:20:31 +00:00
|
|
|
case KVM_CAP_SYNC_REGS:
|
2012-05-15 12:15:26 +00:00
|
|
|
case KVM_CAP_ONE_REG:
|
2012-12-20 14:32:11 +00:00
|
|
|
case KVM_CAP_ENABLE_CAP:
|
2012-12-20 14:32:12 +00:00
|
|
|
case KVM_CAP_S390_CSS_SUPPORT:
|
2013-02-28 11:33:21 +00:00
|
|
|
case KVM_CAP_IOEVENTFD:
|
2013-10-07 14:13:45 +00:00
|
|
|
case KVM_CAP_DEVICE_CTRL:
|
2014-07-15 07:54:39 +00:00
|
|
|
case KVM_CAP_S390_IRQCHIP:
|
2014-04-09 11:13:00 +00:00
|
|
|
case KVM_CAP_VM_ATTRIBUTES:
|
2014-04-10 15:35:00 +00:00
|
|
|
case KVM_CAP_MP_STATE:
|
2017-02-08 10:50:15 +00:00
|
|
|
case KVM_CAP_IMMEDIATE_EXIT:
|
2014-11-11 19:57:06 +00:00
|
|
|
case KVM_CAP_S390_INJECT_IRQ:
|
2014-10-09 12:10:13 +00:00
|
|
|
case KVM_CAP_S390_USER_SIGP:
|
2015-01-30 15:55:56 +00:00
|
|
|
case KVM_CAP_S390_USER_STSI:
|
2014-09-23 13:23:01 +00:00
|
|
|
case KVM_CAP_S390_SKEYS:
|
2014-11-24 16:13:46 +00:00
|
|
|
case KVM_CAP_S390_IRQ_STATE:
|
2016-06-21 12:19:51 +00:00
|
|
|
case KVM_CAP_S390_USER_INSTR0:
|
2016-08-04 15:58:47 +00:00
|
|
|
case KVM_CAP_S390_CMMA_MIGRATION:
|
2017-03-10 08:29:38 +00:00
|
|
|
case KVM_CAP_S390_AIS:
|
2017-11-09 09:00:45 +00:00
|
|
|
case KVM_CAP_S390_AIS_MIGRATION:
|
2020-01-31 10:02:02 +00:00
|
|
|
case KVM_CAP_S390_VCPU_RESETS:
|
2020-05-05 15:47:50 +00:00
|
|
|
case KVM_CAP_SET_GUEST_DEBUG:
|
2020-06-22 15:46:36 +00:00
|
|
|
case KVM_CAP_S390_DIAG318:
|
2009-11-19 13:21:16 +00:00
|
|
|
r = 1;
|
|
|
|
break;
|
2021-04-01 13:54:47 +00:00
|
|
|
case KVM_CAP_SET_GUEST_DEBUG2:
|
|
|
|
r = KVM_GUESTDBG_VALID_MASK;
|
|
|
|
break;
|
2018-07-13 10:28:31 +00:00
|
|
|
case KVM_CAP_S390_HPAGE_1M:
|
|
|
|
r = 0;
|
2018-08-01 10:48:28 +00:00
|
|
|
if (hpage && !kvm_is_ucontrol(kvm))
|
2018-07-13 10:28:31 +00:00
|
|
|
r = 1;
|
|
|
|
break;
|
2015-02-06 14:01:21 +00:00
|
|
|
case KVM_CAP_S390_MEM_OP:
|
|
|
|
r = MEM_OP_MAX_SIZE;
|
|
|
|
break;
|
2023-02-06 16:46:00 +00:00
|
|
|
case KVM_CAP_S390_MEM_OP_EXTENSION:
|
|
|
|
/*
|
|
|
|
* Flag bits indicating which extensions are supported.
|
|
|
|
* If r > 0, the base extension must also be supported/indicated,
|
|
|
|
* in order to maintain backwards compatibility.
|
|
|
|
*/
|
|
|
|
r = KVM_S390_MEMOP_EXTENSION_CAP_BASE |
|
|
|
|
KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG;
|
|
|
|
break;
|
2012-05-02 08:50:38 +00:00
|
|
|
case KVM_CAP_NR_VCPUS:
|
|
|
|
case KVM_CAP_MAX_VCPUS:
|
2019-05-23 16:43:08 +00:00
|
|
|
case KVM_CAP_MAX_VCPU_ID:
|
2015-11-24 12:33:49 +00:00
|
|
|
r = KVM_S390_BSCA_CPU_SLOTS;
|
2016-08-08 20:39:32 +00:00
|
|
|
if (!kvm_s390_use_sca_entries())
|
|
|
|
r = KVM_MAX_VCPUS;
|
|
|
|
else if (sclp.has_esca && sclp.has_64bscao)
|
2015-11-24 12:33:49 +00:00
|
|
|
r = KVM_S390_ESCA_CPU_SLOTS;
|
2021-11-16 16:34:42 +00:00
|
|
|
if (ext == KVM_CAP_NR_VCPUS)
|
|
|
|
r = min_t(unsigned int, num_online_cpus(), r);
|
2012-05-02 08:50:38 +00:00
|
|
|
break;
|
2012-05-15 12:15:25 +00:00
|
|
|
case KVM_CAP_S390_COW:
|
2012-11-07 12:17:37 +00:00
|
|
|
r = MACHINE_HAS_ESOP;
|
2012-05-15 12:15:25 +00:00
|
|
|
break;
|
2014-06-09 14:57:26 +00:00
|
|
|
case KVM_CAP_S390_VECTOR_REGISTERS:
|
|
|
|
r = MACHINE_HAS_VX;
|
|
|
|
break;
|
2016-01-07 10:24:29 +00:00
|
|
|
case KVM_CAP_S390_RI:
|
|
|
|
r = test_facility(64);
|
|
|
|
break;
|
2016-11-29 06:17:55 +00:00
|
|
|
case KVM_CAP_S390_GS:
|
|
|
|
r = test_facility(133);
|
|
|
|
break;
|
2018-01-17 13:44:34 +00:00
|
|
|
case KVM_CAP_S390_BPB:
|
|
|
|
r = test_facility(82);
|
|
|
|
break;
|
2022-11-11 17:06:29 +00:00
|
|
|
case KVM_CAP_S390_PROTECTED_ASYNC_DISABLE:
|
|
|
|
r = async_destroy && is_prot_virt_host();
|
|
|
|
break;
|
2020-02-18 20:08:07 +00:00
|
|
|
case KVM_CAP_S390_PROTECTED:
|
|
|
|
r = is_prot_virt_host();
|
|
|
|
break;
|
2022-05-17 16:36:26 +00:00
|
|
|
case KVM_CAP_S390_PROTECTED_DUMP: {
|
|
|
|
u64 pv_cmds_dump[] = {
|
|
|
|
BIT_UVC_CMD_DUMP_INIT,
|
|
|
|
BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE,
|
|
|
|
BIT_UVC_CMD_DUMP_CPU,
|
|
|
|
BIT_UVC_CMD_DUMP_COMPLETE,
|
|
|
|
};
|
|
|
|
int i;
|
|
|
|
|
|
|
|
r = is_prot_virt_host();
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) {
|
|
|
|
if (!test_bit_inv(pv_cmds_dump[i],
|
|
|
|
(unsigned long *)&uv_info.inst_calls_list)) {
|
|
|
|
r = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2022-06-06 20:33:24 +00:00
|
|
|
case KVM_CAP_S390_ZPCI_OP:
|
|
|
|
r = kvm_s390_pci_interp_allowed();
|
|
|
|
break;
|
2022-07-14 19:43:34 +00:00
|
|
|
case KVM_CAP_S390_CPU_TOPOLOGY:
|
|
|
|
r = test_facility(11);
|
|
|
|
break;
|
2008-07-25 13:49:13 +00:00
|
|
|
default:
|
2009-11-19 13:21:16 +00:00
|
|
|
r = 0;
|
2008-07-25 13:49:13 +00:00
|
|
|
}
|
2009-11-19 13:21:16 +00:00
|
|
|
return r;
|
2008-03-25 17:47:20 +00:00
|
|
|
}
|
|
|
|
|
2020-02-18 21:07:29 +00:00
|
|
|
void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
|
2012-08-02 14:10:17 +00:00
|
|
|
{
|
2018-07-17 12:21:22 +00:00
|
|
|
int i;
|
2012-08-02 14:10:17 +00:00
|
|
|
gfn_t cur_gfn, last_gfn;
|
2018-07-17 12:21:22 +00:00
|
|
|
unsigned long gaddr, vmaddr;
|
2012-08-02 14:10:17 +00:00
|
|
|
struct gmap *gmap = kvm->arch.gmap;
|
2018-07-17 12:21:22 +00:00
|
|
|
DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
|
2012-08-02 14:10:17 +00:00
|
|
|
|
2018-07-17 12:21:22 +00:00
|
|
|
/* Loop over all guest segments */
|
|
|
|
cur_gfn = memslot->base_gfn;
|
2012-08-02 14:10:17 +00:00
|
|
|
last_gfn = memslot->base_gfn + memslot->npages;
|
2018-07-17 12:21:22 +00:00
|
|
|
for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
|
|
|
|
gaddr = gfn_to_gpa(cur_gfn);
|
|
|
|
vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
|
|
|
|
if (kvm_is_error_hva(vmaddr))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
bitmap_zero(bitmap, _PAGE_ENTRIES);
|
|
|
|
gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
|
|
|
|
for (i = 0; i < _PAGE_ENTRIES; i++) {
|
|
|
|
if (test_bit(i, bitmap))
|
|
|
|
mark_page_dirty(kvm, cur_gfn + i);
|
|
|
|
}
|
2012-08-02 14:10:17 +00:00
|
|
|
|
2016-02-03 10:12:34 +00:00
|
|
|
if (fatal_signal_pending(current))
|
|
|
|
return;
|
2016-02-02 14:15:56 +00:00
|
|
|
cond_resched();
|
2012-08-02 14:10:17 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-03-25 17:47:20 +00:00
|
|
|
/* Section: vm related */
|
2015-04-21 13:31:59 +00:00
|
|
|
static void sca_del_vcpu(struct kvm_vcpu *vcpu);
|
|
|
|
|
2008-03-25 17:47:20 +00:00
|
|
|
/*
|
|
|
|
* Get (and clear) the dirty memory log for a memory slot.
|
|
|
|
*/
|
|
|
|
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
|
|
|
|
struct kvm_dirty_log *log)
|
|
|
|
{
|
2012-08-02 14:10:17 +00:00
|
|
|
int r;
|
|
|
|
unsigned long n;
|
|
|
|
struct kvm_memory_slot *memslot;
|
KVM: Ensure validity of memslot with respect to kvm_get_dirty_log()
Rework kvm_get_dirty_log() so that it "returns" the associated memslot
on success. A future patch will rework memslot handling such that
id_to_memslot() can return NULL, returning the memslot makes it more
obvious that the validity of the memslot has been verified, i.e.
precludes the need to add validity checks in the arch code that are
technically unnecessary.
To maintain ordering in s390, move the call to kvm_arch_sync_dirty_log()
from s390's kvm_vm_ioctl_get_dirty_log() to the new kvm_get_dirty_log().
This is a nop for PPC, the only other arch that doesn't select
KVM_GENERIC_DIRTYLOG_READ_PROTECT, as its sync_dirty_log() is empty.
Ideally, moving the sync_dirty_log() call would be done in a separate
patch, but it can't be done in a follow-on patch because that would
temporarily break s390's ordering. Making the move in a preparatory
patch would be functionally correct, but would create an odd scenario
where the moved sync_dirty_log() would operate on a "different" memslot
due to consuming the result of a different id_to_memslot(). The
memslot couldn't actually be different as slots_lock is held, but the
code is confusing enough as it is, i.e. moving sync_dirty_log() in this
patch is the lesser of all evils.
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2020-02-18 21:07:30 +00:00
|
|
|
int is_dirty;
|
2012-08-02 14:10:17 +00:00
|
|
|
|
2017-02-02 15:39:31 +00:00
|
|
|
if (kvm_is_ucontrol(kvm))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2012-08-02 14:10:17 +00:00
|
|
|
mutex_lock(&kvm->slots_lock);
|
|
|
|
|
|
|
|
r = -EINVAL;
|
|
|
|
if (log->slot >= KVM_USER_MEM_SLOTS)
|
|
|
|
goto out;
|
|
|
|
|
KVM: Ensure validity of memslot with respect to kvm_get_dirty_log()
Rework kvm_get_dirty_log() so that it "returns" the associated memslot
on success. A future patch will rework memslot handling such that
id_to_memslot() can return NULL, returning the memslot makes it more
obvious that the validity of the memslot has been verified, i.e.
precludes the need to add validity checks in the arch code that are
technically unnecessary.
To maintain ordering in s390, move the call to kvm_arch_sync_dirty_log()
from s390's kvm_vm_ioctl_get_dirty_log() to the new kvm_get_dirty_log().
This is a nop for PPC, the only other arch that doesn't select
KVM_GENERIC_DIRTYLOG_READ_PROTECT, as its sync_dirty_log() is empty.
Ideally, moving the sync_dirty_log() call would be done in a separate
patch, but it can't be done in a follow-on patch because that would
temporarily break s390's ordering. Making the move in a preparatory
patch would be functionally correct, but would create an odd scenario
where the moved sync_dirty_log() would operate on a "different" memslot
due to consuming the result of a different id_to_memslot(). The
memslot couldn't actually be different as slots_lock is held, but the
code is confusing enough as it is, i.e. moving sync_dirty_log() in this
patch is the lesser of all evils.
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2020-02-18 21:07:30 +00:00
|
|
|
r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
|
2012-08-02 14:10:17 +00:00
|
|
|
if (r)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
/* Clear the dirty log */
|
|
|
|
if (is_dirty) {
|
|
|
|
n = kvm_dirty_bitmap_bytes(memslot);
|
|
|
|
memset(memslot->dirty_bitmap, 0, n);
|
|
|
|
}
|
|
|
|
r = 0;
|
|
|
|
out:
|
|
|
|
mutex_unlock(&kvm->slots_lock);
|
|
|
|
return r;
|
2008-03-25 17:47:20 +00:00
|
|
|
}
|
|
|
|
|
2016-06-21 12:19:51 +00:00
|
|
|
static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
|
|
|
|
{
|
2021-11-16 16:04:02 +00:00
|
|
|
unsigned long i;
|
2016-06-21 12:19:51 +00:00
|
|
|
struct kvm_vcpu *vcpu;
|
|
|
|
|
|
|
|
kvm_for_each_vcpu(i, vcpu, kvm) {
|
|
|
|
kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-16 09:40:56 +00:00
|
|
|
int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
|
2013-10-23 16:26:34 +00:00
|
|
|
{
|
|
|
|
int r;
|
|
|
|
|
|
|
|
if (cap->flags)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
switch (cap->cap) {
|
2013-07-15 11:36:01 +00:00
|
|
|
case KVM_CAP_S390_IRQCHIP:
|
2015-07-22 13:52:10 +00:00
|
|
|
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
|
2013-07-15 11:36:01 +00:00
|
|
|
kvm->arch.use_irqchip = 1;
|
|
|
|
r = 0;
|
|
|
|
break;
|
2014-10-09 12:10:13 +00:00
|
|
|
case KVM_CAP_S390_USER_SIGP:
|
2015-07-22 13:52:10 +00:00
|
|
|
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
|
2014-10-09 12:10:13 +00:00
|
|
|
kvm->arch.user_sigp = 1;
|
|
|
|
r = 0;
|
|
|
|
break;
|
2014-06-09 14:57:26 +00:00
|
|
|
case KVM_CAP_S390_VECTOR_REGISTERS:
|
2015-11-06 11:08:48 +00:00
|
|
|
mutex_lock(&kvm->lock);
|
2016-06-13 12:50:04 +00:00
|
|
|
if (kvm->created_vcpus) {
|
2015-11-06 11:08:48 +00:00
|
|
|
r = -EBUSY;
|
|
|
|
} else if (MACHINE_HAS_VX) {
|
2015-12-02 07:53:52 +00:00
|
|
|
set_kvm_facility(kvm->arch.model.fac_mask, 129);
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_list, 129);
|
2016-06-03 12:37:17 +00:00
|
|
|
if (test_facility(134)) {
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_mask, 134);
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_list, 134);
|
|
|
|
}
|
2016-02-10 09:31:23 +00:00
|
|
|
if (test_facility(135)) {
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_mask, 135);
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_list, 135);
|
|
|
|
}
|
2018-12-28 08:43:37 +00:00
|
|
|
if (test_facility(148)) {
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_mask, 148);
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_list, 148);
|
|
|
|
}
|
2018-12-28 08:45:58 +00:00
|
|
|
if (test_facility(152)) {
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_mask, 152);
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_list, 152);
|
|
|
|
}
|
2021-01-25 12:39:45 +00:00
|
|
|
if (test_facility(192)) {
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_mask, 192);
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_list, 192);
|
|
|
|
}
|
2015-03-16 15:05:41 +00:00
|
|
|
r = 0;
|
|
|
|
} else
|
|
|
|
r = -EINVAL;
|
2015-11-06 11:08:48 +00:00
|
|
|
mutex_unlock(&kvm->lock);
|
2015-07-22 13:52:10 +00:00
|
|
|
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
|
|
|
|
r ? "(not available)" : "(success)");
|
2014-06-09 14:57:26 +00:00
|
|
|
break;
|
2016-01-07 10:24:29 +00:00
|
|
|
case KVM_CAP_S390_RI:
|
|
|
|
r = -EINVAL;
|
|
|
|
mutex_lock(&kvm->lock);
|
2016-06-13 12:50:04 +00:00
|
|
|
if (kvm->created_vcpus) {
|
2016-01-07 10:24:29 +00:00
|
|
|
r = -EBUSY;
|
|
|
|
} else if (test_facility(64)) {
|
2015-12-02 07:53:52 +00:00
|
|
|
set_kvm_facility(kvm->arch.model.fac_mask, 64);
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_list, 64);
|
2016-01-07 10:24:29 +00:00
|
|
|
r = 0;
|
|
|
|
}
|
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
|
|
|
|
r ? "(not available)" : "(success)");
|
|
|
|
break;
|
2017-03-10 08:29:38 +00:00
|
|
|
case KVM_CAP_S390_AIS:
|
|
|
|
mutex_lock(&kvm->lock);
|
|
|
|
if (kvm->created_vcpus) {
|
|
|
|
r = -EBUSY;
|
|
|
|
} else {
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_mask, 72);
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_list, 72);
|
|
|
|
r = 0;
|
|
|
|
}
|
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
VM_EVENT(kvm, 3, "ENABLE: AIS %s",
|
|
|
|
r ? "(not available)" : "(success)");
|
|
|
|
break;
|
2016-11-29 06:17:55 +00:00
|
|
|
case KVM_CAP_S390_GS:
|
|
|
|
r = -EINVAL;
|
|
|
|
mutex_lock(&kvm->lock);
|
2017-11-16 14:12:52 +00:00
|
|
|
if (kvm->created_vcpus) {
|
2016-11-29 06:17:55 +00:00
|
|
|
r = -EBUSY;
|
|
|
|
} else if (test_facility(133)) {
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_mask, 133);
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_list, 133);
|
|
|
|
r = 0;
|
|
|
|
}
|
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
|
|
|
|
r ? "(not available)" : "(success)");
|
|
|
|
break;
|
2018-07-13 10:28:31 +00:00
|
|
|
case KVM_CAP_S390_HPAGE_1M:
|
|
|
|
mutex_lock(&kvm->lock);
|
|
|
|
if (kvm->created_vcpus)
|
|
|
|
r = -EBUSY;
|
2018-08-01 10:48:28 +00:00
|
|
|
else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
|
2018-07-13 10:28:31 +00:00
|
|
|
r = -EINVAL;
|
|
|
|
else {
|
|
|
|
r = 0;
|
2020-06-09 04:33:25 +00:00
|
|
|
mmap_write_lock(kvm->mm);
|
2018-07-13 10:28:31 +00:00
|
|
|
kvm->mm->context.allow_gmap_hpage_1m = 1;
|
2020-06-09 04:33:25 +00:00
|
|
|
mmap_write_unlock(kvm->mm);
|
2018-07-13 10:28:31 +00:00
|
|
|
/*
|
|
|
|
* We might have to create fake 4k page
|
|
|
|
* tables. To avoid that the hardware works on
|
|
|
|
* stale PGSTEs, we emulate these instructions.
|
|
|
|
*/
|
|
|
|
kvm->arch.use_skf = 0;
|
|
|
|
kvm->arch.use_pfmfi = 0;
|
|
|
|
}
|
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
|
|
|
|
r ? "(not available)" : "(success)");
|
|
|
|
break;
|
2015-01-30 15:55:56 +00:00
|
|
|
case KVM_CAP_S390_USER_STSI:
|
2015-07-22 13:52:10 +00:00
|
|
|
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
|
2015-01-30 15:55:56 +00:00
|
|
|
kvm->arch.user_stsi = 1;
|
|
|
|
r = 0;
|
|
|
|
break;
|
2016-06-21 12:19:51 +00:00
|
|
|
case KVM_CAP_S390_USER_INSTR0:
|
|
|
|
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
|
|
|
|
kvm->arch.user_instr0 = 1;
|
|
|
|
icpt_operexc_on_all_vcpus(kvm);
|
|
|
|
r = 0;
|
|
|
|
break;
|
2022-07-14 19:43:34 +00:00
|
|
|
case KVM_CAP_S390_CPU_TOPOLOGY:
|
|
|
|
r = -EINVAL;
|
|
|
|
mutex_lock(&kvm->lock);
|
|
|
|
if (kvm->created_vcpus) {
|
|
|
|
r = -EBUSY;
|
|
|
|
} else if (test_facility(11)) {
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_mask, 11);
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_list, 11);
|
|
|
|
r = 0;
|
|
|
|
}
|
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
|
|
|
|
r ? "(not available)" : "(success)");
|
|
|
|
break;
|
2013-10-23 16:26:34 +00:00
|
|
|
default:
|
|
|
|
r = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2014-10-31 13:10:41 +00:00
|
|
|
static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
switch (attr->attr) {
|
|
|
|
case KVM_S390_VM_MEM_LIMIT_SIZE:
|
|
|
|
ret = 0;
|
2015-07-22 13:52:10 +00:00
|
|
|
VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
|
2014-12-01 16:24:42 +00:00
|
|
|
kvm->arch.mem_limit);
|
|
|
|
if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
|
2014-10-31 13:10:41 +00:00
|
|
|
ret = -EFAULT;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ret = -ENXIO;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
|
2014-04-09 11:13:00 +00:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
unsigned int idx;
|
|
|
|
switch (attr->attr) {
|
|
|
|
case KVM_S390_VM_MEM_ENABLE_CMMA:
|
2016-03-03 08:48:47 +00:00
|
|
|
ret = -ENXIO;
|
2015-11-24 12:53:04 +00:00
|
|
|
if (!sclp.has_cmma)
|
2015-05-07 13:41:57 +00:00
|
|
|
break;
|
|
|
|
|
2015-07-22 13:52:10 +00:00
|
|
|
VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
|
2014-04-09 11:13:00 +00:00
|
|
|
mutex_lock(&kvm->lock);
|
2018-07-13 10:28:31 +00:00
|
|
|
if (kvm->created_vcpus)
|
|
|
|
ret = -EBUSY;
|
|
|
|
else if (kvm->mm->context.allow_gmap_hpage_1m)
|
|
|
|
ret = -EINVAL;
|
|
|
|
else {
|
2014-04-09 11:13:00 +00:00
|
|
|
kvm->arch.use_cmma = 1;
|
2018-02-16 11:16:14 +00:00
|
|
|
/* Not compatible with cmma. */
|
|
|
|
kvm->arch.use_pfmfi = 0;
|
2014-04-09 11:13:00 +00:00
|
|
|
ret = 0;
|
|
|
|
}
|
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
break;
|
|
|
|
case KVM_S390_VM_MEM_CLR_CMMA:
|
2016-03-03 08:48:47 +00:00
|
|
|
ret = -ENXIO;
|
|
|
|
if (!sclp.has_cmma)
|
|
|
|
break;
|
2015-06-18 11:17:11 +00:00
|
|
|
ret = -EINVAL;
|
|
|
|
if (!kvm->arch.use_cmma)
|
|
|
|
break;
|
|
|
|
|
2015-07-22 13:52:10 +00:00
|
|
|
VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
|
2014-04-09 11:13:00 +00:00
|
|
|
mutex_lock(&kvm->lock);
|
|
|
|
idx = srcu_read_lock(&kvm->srcu);
|
2014-10-23 10:07:14 +00:00
|
|
|
s390_reset_cmma(kvm->arch.gmap->mm);
|
2014-04-09 11:13:00 +00:00
|
|
|
srcu_read_unlock(&kvm->srcu, idx);
|
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
ret = 0;
|
|
|
|
break;
|
2014-10-31 13:10:41 +00:00
|
|
|
case KVM_S390_VM_MEM_LIMIT_SIZE: {
|
|
|
|
unsigned long new_limit;
|
|
|
|
|
|
|
|
if (kvm_is_ucontrol(kvm))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (get_user(new_limit, (u64 __user *)attr->addr))
|
|
|
|
return -EFAULT;
|
|
|
|
|
2014-12-01 16:24:42 +00:00
|
|
|
if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
|
|
|
|
new_limit > kvm->arch.mem_limit)
|
2014-10-31 13:10:41 +00:00
|
|
|
return -E2BIG;
|
|
|
|
|
2014-12-01 16:24:42 +00:00
|
|
|
if (!new_limit)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2016-03-08 10:55:04 +00:00
|
|
|
/* gmap_create takes last usable address */
|
2014-12-01 16:24:42 +00:00
|
|
|
if (new_limit != KVM_S390_NO_MEM_LIMIT)
|
|
|
|
new_limit -= 1;
|
|
|
|
|
2014-10-31 13:10:41 +00:00
|
|
|
ret = -EBUSY;
|
|
|
|
mutex_lock(&kvm->lock);
|
2016-06-13 12:50:04 +00:00
|
|
|
if (!kvm->created_vcpus) {
|
2016-03-08 10:55:04 +00:00
|
|
|
/* gmap_create will round the limit up */
|
|
|
|
struct gmap *new = gmap_create(current->mm, new_limit);
|
2014-10-31 13:10:41 +00:00
|
|
|
|
|
|
|
if (!new) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
} else {
|
2016-03-08 10:55:04 +00:00
|
|
|
gmap_remove(kvm->arch.gmap);
|
2014-10-31 13:10:41 +00:00
|
|
|
new->private = kvm;
|
|
|
|
kvm->arch.gmap = new;
|
|
|
|
ret = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
mutex_unlock(&kvm->lock);
|
2014-12-01 16:24:42 +00:00
|
|
|
VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
|
|
|
|
VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
|
|
|
|
(void *) kvm->arch.gmap->asce);
|
2014-10-31 13:10:41 +00:00
|
|
|
break;
|
|
|
|
}
|
2014-04-09 11:13:00 +00:00
|
|
|
default:
|
|
|
|
ret = -ENXIO;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-09-03 08:13:53 +00:00
|
|
|
static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
|
|
|
|
|
2018-04-22 15:37:03 +00:00
|
|
|
void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
|
2014-09-03 08:13:53 +00:00
|
|
|
{
|
|
|
|
struct kvm_vcpu *vcpu;
|
2021-11-16 16:04:02 +00:00
|
|
|
unsigned long i;
|
2014-09-03 08:13:53 +00:00
|
|
|
|
2018-04-22 15:37:03 +00:00
|
|
|
kvm_s390_vcpu_block_all(kvm);
|
|
|
|
|
2018-09-25 23:16:17 +00:00
|
|
|
kvm_for_each_vcpu(i, vcpu, kvm) {
|
2018-04-22 15:37:03 +00:00
|
|
|
kvm_s390_vcpu_crypto_setup(vcpu);
|
2018-09-25 23:16:17 +00:00
|
|
|
/* recreate the shadow crycb by leaving the VSIE handler */
|
|
|
|
kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
|
|
|
|
}
|
2018-04-22 15:37:03 +00:00
|
|
|
|
|
|
|
kvm_s390_vcpu_unblock_all(kvm);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
|
|
|
|
{
|
2014-09-03 08:13:53 +00:00
|
|
|
mutex_lock(&kvm->lock);
|
|
|
|
switch (attr->attr) {
|
|
|
|
case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
|
2018-10-04 12:42:43 +00:00
|
|
|
if (!test_kvm_facility(kvm, 76)) {
|
|
|
|
mutex_unlock(&kvm->lock);
|
2018-09-25 23:16:39 +00:00
|
|
|
return -EINVAL;
|
2018-10-04 12:42:43 +00:00
|
|
|
}
|
2014-09-03 08:13:53 +00:00
|
|
|
get_random_bytes(
|
|
|
|
kvm->arch.crypto.crycb->aes_wrapping_key_mask,
|
|
|
|
sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
|
|
|
|
kvm->arch.crypto.aes_kw = 1;
|
2015-07-22 13:52:10 +00:00
|
|
|
VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
|
2014-09-03 08:13:53 +00:00
|
|
|
break;
|
|
|
|
case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
|
2018-10-04 12:42:43 +00:00
|
|
|
if (!test_kvm_facility(kvm, 76)) {
|
|
|
|
mutex_unlock(&kvm->lock);
|
2018-09-25 23:16:39 +00:00
|
|
|
return -EINVAL;
|
2018-10-04 12:42:43 +00:00
|
|
|
}
|
2014-09-03 08:13:53 +00:00
|
|
|
get_random_bytes(
|
|
|
|
kvm->arch.crypto.crycb->dea_wrapping_key_mask,
|
|
|
|
sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
|
|
|
|
kvm->arch.crypto.dea_kw = 1;
|
2015-07-22 13:52:10 +00:00
|
|
|
VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
|
2014-09-03 08:13:53 +00:00
|
|
|
break;
|
|
|
|
case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
|
2018-10-04 12:42:43 +00:00
|
|
|
if (!test_kvm_facility(kvm, 76)) {
|
|
|
|
mutex_unlock(&kvm->lock);
|
2018-09-25 23:16:39 +00:00
|
|
|
return -EINVAL;
|
2018-10-04 12:42:43 +00:00
|
|
|
}
|
2014-09-03 08:13:53 +00:00
|
|
|
kvm->arch.crypto.aes_kw = 0;
|
|
|
|
memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
|
|
|
|
sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
|
2015-07-22 13:52:10 +00:00
|
|
|
VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
|
2014-09-03 08:13:53 +00:00
|
|
|
break;
|
|
|
|
case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
|
2018-10-04 12:42:43 +00:00
|
|
|
if (!test_kvm_facility(kvm, 76)) {
|
|
|
|
mutex_unlock(&kvm->lock);
|
2018-09-25 23:16:39 +00:00
|
|
|
return -EINVAL;
|
2018-10-04 12:42:43 +00:00
|
|
|
}
|
2014-09-03 08:13:53 +00:00
|
|
|
kvm->arch.crypto.dea_kw = 0;
|
|
|
|
memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
|
|
|
|
sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
|
2015-07-22 13:52:10 +00:00
|
|
|
VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
|
2014-09-03 08:13:53 +00:00
|
|
|
break;
|
2018-09-25 23:16:39 +00:00
|
|
|
case KVM_S390_VM_CRYPTO_ENABLE_APIE:
|
|
|
|
if (!ap_instructions_available()) {
|
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
kvm->arch.crypto.apie = 1;
|
|
|
|
break;
|
|
|
|
case KVM_S390_VM_CRYPTO_DISABLE_APIE:
|
|
|
|
if (!ap_instructions_available()) {
|
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
kvm->arch.crypto.apie = 0;
|
|
|
|
break;
|
2014-09-03 08:13:53 +00:00
|
|
|
default:
|
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
return -ENXIO;
|
|
|
|
}
|
|
|
|
|
2018-04-22 15:37:03 +00:00
|
|
|
kvm_s390_vcpu_crypto_reset_all(kvm);
|
2014-09-03 08:13:53 +00:00
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-06-06 20:33:18 +00:00
|
|
|
static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
/* Only set the ECB bits after guest requests zPCI interpretation */
|
|
|
|
if (!vcpu->kvm->arch.use_zpci_interp)
|
|
|
|
return;
|
|
|
|
|
|
|
|
vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI;
|
|
|
|
vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI;
|
|
|
|
}
|
|
|
|
|
|
|
|
void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
struct kvm_vcpu *vcpu;
|
|
|
|
unsigned long i;
|
|
|
|
|
|
|
|
lockdep_assert_held(&kvm->lock);
|
|
|
|
|
|
|
|
if (!kvm_s390_pci_interp_allowed())
|
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If host is configured for PCI and the necessary facilities are
|
|
|
|
* available, turn on interpretation for the life of this guest
|
|
|
|
*/
|
|
|
|
kvm->arch.use_zpci_interp = 1;
|
|
|
|
|
|
|
|
kvm_s390_vcpu_block_all(kvm);
|
|
|
|
|
|
|
|
kvm_for_each_vcpu(i, vcpu, kvm) {
|
|
|
|
kvm_s390_vcpu_pci_setup(vcpu);
|
|
|
|
kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
|
|
|
|
}
|
|
|
|
|
|
|
|
kvm_s390_vcpu_unblock_all(kvm);
|
|
|
|
}
|
|
|
|
|
2016-08-04 15:54:42 +00:00
|
|
|
static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
|
|
|
|
{
|
2021-11-16 16:04:02 +00:00
|
|
|
unsigned long cx;
|
2016-08-04 15:54:42 +00:00
|
|
|
struct kvm_vcpu *vcpu;
|
|
|
|
|
|
|
|
kvm_for_each_vcpu(cx, vcpu, kvm)
|
|
|
|
kvm_s390_sync_request(req, vcpu);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Must be called with kvm->srcu held to avoid races on memslots, and with
|
2017-12-22 09:54:20 +00:00
|
|
|
* kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
|
2016-08-04 15:54:42 +00:00
|
|
|
*/
|
|
|
|
static int kvm_s390_vm_start_migration(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
struct kvm_memory_slot *ms;
|
|
|
|
struct kvm_memslots *slots;
|
2018-04-30 16:33:25 +00:00
|
|
|
unsigned long ram_pages = 0;
|
KVM: Keep memslots in tree-based structures instead of array-based ones
The current memslot code uses a (reverse gfn-ordered) memslot array for
keeping track of them.
Because the memslot array that is currently in use cannot be modified
every memslot management operation (create, delete, move, change flags)
has to make a copy of the whole array so it has a scratch copy to work on.
Strictly speaking, however, it is only necessary to make copy of the
memslot that is being modified, copying all the memslots currently present
is just a limitation of the array-based memslot implementation.
Two memslot sets, however, are still needed so the VM continues to run
on the currently active set while the requested operation is being
performed on the second, currently inactive one.
In order to have two memslot sets, but only one copy of actual memslots
it is necessary to split out the memslot data from the memslot sets.
The memslots themselves should be also kept independent of each other
so they can be individually added or deleted.
These two memslot sets should normally point to the same set of
memslots. They can, however, be desynchronized when performing a
memslot management operation by replacing the memslot to be modified
by its copy. After the operation is complete, both memslot sets once
again point to the same, common set of memslot data.
This commit implements the aforementioned idea.
For tracking of gfns an ordinary rbtree is used since memslots cannot
overlap in the guest address space and so this data structure is
sufficient for ensuring that lookups are done quickly.
The "last used slot" mini-caches (both per-slot set one and per-vCPU one),
that keep track of the last found-by-gfn memslot, are still present in the
new code.
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <17c0cf3663b760a0d3753d4ac08c0753e941b811.1638817641.git.maciej.szmigiero@oracle.com>
2021-12-06 19:54:30 +00:00
|
|
|
int bkt;
|
2016-08-04 15:54:42 +00:00
|
|
|
|
|
|
|
/* migration mode already enabled */
|
2018-04-30 16:33:25 +00:00
|
|
|
if (kvm->arch.migration_mode)
|
2016-08-04 15:54:42 +00:00
|
|
|
return 0;
|
|
|
|
slots = kvm_memslots(kvm);
|
KVM: Keep memslots in tree-based structures instead of array-based ones
The current memslot code uses a (reverse gfn-ordered) memslot array for
keeping track of them.
Because the memslot array that is currently in use cannot be modified
every memslot management operation (create, delete, move, change flags)
has to make a copy of the whole array so it has a scratch copy to work on.
Strictly speaking, however, it is only necessary to make copy of the
memslot that is being modified, copying all the memslots currently present
is just a limitation of the array-based memslot implementation.
Two memslot sets, however, are still needed so the VM continues to run
on the currently active set while the requested operation is being
performed on the second, currently inactive one.
In order to have two memslot sets, but only one copy of actual memslots
it is necessary to split out the memslot data from the memslot sets.
The memslots themselves should be also kept independent of each other
so they can be individually added or deleted.
These two memslot sets should normally point to the same set of
memslots. They can, however, be desynchronized when performing a
memslot management operation by replacing the memslot to be modified
by its copy. After the operation is complete, both memslot sets once
again point to the same, common set of memslot data.
This commit implements the aforementioned idea.
For tracking of gfns an ordinary rbtree is used since memslots cannot
overlap in the guest address space and so this data structure is
sufficient for ensuring that lookups are done quickly.
The "last used slot" mini-caches (both per-slot set one and per-vCPU one),
that keep track of the last found-by-gfn memslot, are still present in the
new code.
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <17c0cf3663b760a0d3753d4ac08c0753e941b811.1638817641.git.maciej.szmigiero@oracle.com>
2021-12-06 19:54:30 +00:00
|
|
|
if (!slots || kvm_memslots_empty(slots))
|
2016-08-04 15:54:42 +00:00
|
|
|
return -EINVAL;
|
|
|
|
|
2018-04-30 16:33:25 +00:00
|
|
|
if (!kvm->arch.use_cmma) {
|
|
|
|
kvm->arch.migration_mode = 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
/* mark all the pages in active slots as dirty */
|
KVM: Keep memslots in tree-based structures instead of array-based ones
The current memslot code uses a (reverse gfn-ordered) memslot array for
keeping track of them.
Because the memslot array that is currently in use cannot be modified
every memslot management operation (create, delete, move, change flags)
has to make a copy of the whole array so it has a scratch copy to work on.
Strictly speaking, however, it is only necessary to make copy of the
memslot that is being modified, copying all the memslots currently present
is just a limitation of the array-based memslot implementation.
Two memslot sets, however, are still needed so the VM continues to run
on the currently active set while the requested operation is being
performed on the second, currently inactive one.
In order to have two memslot sets, but only one copy of actual memslots
it is necessary to split out the memslot data from the memslot sets.
The memslots themselves should be also kept independent of each other
so they can be individually added or deleted.
These two memslot sets should normally point to the same set of
memslots. They can, however, be desynchronized when performing a
memslot management operation by replacing the memslot to be modified
by its copy. After the operation is complete, both memslot sets once
again point to the same, common set of memslot data.
This commit implements the aforementioned idea.
For tracking of gfns an ordinary rbtree is used since memslots cannot
overlap in the guest address space and so this data structure is
sufficient for ensuring that lookups are done quickly.
The "last used slot" mini-caches (both per-slot set one and per-vCPU one),
that keep track of the last found-by-gfn memslot, are still present in the
new code.
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <17c0cf3663b760a0d3753d4ac08c0753e941b811.1638817641.git.maciej.szmigiero@oracle.com>
2021-12-06 19:54:30 +00:00
|
|
|
kvm_for_each_memslot(ms, bkt, slots) {
|
2019-09-11 07:52:18 +00:00
|
|
|
if (!ms->dirty_bitmap)
|
|
|
|
return -EINVAL;
|
2016-08-04 15:54:42 +00:00
|
|
|
/*
|
2018-04-30 16:33:25 +00:00
|
|
|
* The second half of the bitmap is only used on x86,
|
|
|
|
* and would be wasted otherwise, so we put it to good
|
|
|
|
* use here to keep track of the state of the storage
|
|
|
|
* attributes.
|
2016-08-04 15:54:42 +00:00
|
|
|
*/
|
2018-04-30 16:33:25 +00:00
|
|
|
memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
|
|
|
|
ram_pages += ms->npages;
|
2016-08-04 15:54:42 +00:00
|
|
|
}
|
2018-04-30 16:33:25 +00:00
|
|
|
atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
|
|
|
|
kvm->arch.migration_mode = 1;
|
|
|
|
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
|
2016-08-04 15:54:42 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2017-12-22 09:54:20 +00:00
|
|
|
* Must be called with kvm->slots_lock to avoid races with ourselves and
|
2016-08-04 15:54:42 +00:00
|
|
|
* kvm_s390_vm_start_migration.
|
|
|
|
*/
|
|
|
|
static int kvm_s390_vm_stop_migration(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
/* migration mode already disabled */
|
2018-04-30 16:33:25 +00:00
|
|
|
if (!kvm->arch.migration_mode)
|
2016-08-04 15:54:42 +00:00
|
|
|
return 0;
|
2018-04-30 16:33:25 +00:00
|
|
|
kvm->arch.migration_mode = 0;
|
|
|
|
if (kvm->arch.use_cmma)
|
2016-08-04 15:54:42 +00:00
|
|
|
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_s390_vm_set_migration(struct kvm *kvm,
|
|
|
|
struct kvm_device_attr *attr)
|
|
|
|
{
|
2017-12-22 09:54:20 +00:00
|
|
|
int res = -ENXIO;
|
2016-08-04 15:54:42 +00:00
|
|
|
|
2017-12-22 09:54:20 +00:00
|
|
|
mutex_lock(&kvm->slots_lock);
|
2016-08-04 15:54:42 +00:00
|
|
|
switch (attr->attr) {
|
|
|
|
case KVM_S390_VM_MIGRATION_START:
|
|
|
|
res = kvm_s390_vm_start_migration(kvm);
|
|
|
|
break;
|
|
|
|
case KVM_S390_VM_MIGRATION_STOP:
|
|
|
|
res = kvm_s390_vm_stop_migration(kvm);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2017-12-22 09:54:20 +00:00
|
|
|
mutex_unlock(&kvm->slots_lock);
|
2016-08-04 15:54:42 +00:00
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_s390_vm_get_migration(struct kvm *kvm,
|
|
|
|
struct kvm_device_attr *attr)
|
|
|
|
{
|
2018-04-30 16:33:25 +00:00
|
|
|
u64 mig = kvm->arch.migration_mode;
|
2016-08-04 15:54:42 +00:00
|
|
|
|
|
|
|
if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
|
|
|
|
return -ENXIO;
|
|
|
|
|
|
|
|
if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
|
|
|
|
return -EFAULT;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
KVM: s390: pv: don't allow userspace to set the clock under PV
When running under PV, the guest's TOD clock is under control of the
ultravisor and the hypervisor isn't allowed to change it. Hence, don't
allow userspace to change the guest's TOD clock by returning
-EOPNOTSUPP.
When userspace changes the guest's TOD clock, KVM updates its
kvm.arch.epoch field and, in addition, the epoch field in all state
descriptions of all VCPUs.
But, under PV, the ultravisor will ignore the epoch field in the state
description and simply overwrite it on next SIE exit with the actual
guest epoch. This leads to KVM having an incorrect view of the guest's
TOD clock: it has updated its internal kvm.arch.epoch field, but the
ultravisor ignores the field in the state description.
Whenever a guest is now waiting for a clock comparator, KVM will
incorrectly calculate the time when the guest should wake up, possibly
causing the guest to sleep for much longer than expected.
With this change, kvm_s390_set_tod() will now take the kvm->lock to be
able to call kvm_s390_pv_is_protected(). Since kvm_s390_set_tod_clock()
also takes kvm->lock, use __kvm_s390_set_tod_clock() instead.
The function kvm_s390_set_tod_clock is now unused, hence remove it.
Update the documentation to indicate the TOD clock attr calls can now
return -EOPNOTSUPP.
Fixes: 0f3035047140 ("KVM: s390: protvirt: Do only reset registers that are accessible")
Reported-by: Marc Hartmayer <mhartmay@linux.ibm.com>
Signed-off-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Link: https://lore.kernel.org/r/20221011160712.928239-2-nrb@linux.ibm.com
Message-Id: <20221011160712.928239-2-nrb@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
2022-10-11 16:07:12 +00:00
|
|
|
static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
|
|
|
|
|
2016-07-26 19:29:44 +00:00
|
|
|
static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
|
|
|
|
{
|
|
|
|
struct kvm_s390_vm_tod_clock gtod;
|
|
|
|
|
|
|
|
if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
2018-02-07 11:46:43 +00:00
|
|
|
if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
|
2016-07-26 19:29:44 +00:00
|
|
|
return -EINVAL;
|
KVM: s390: pv: don't allow userspace to set the clock under PV
When running under PV, the guest's TOD clock is under control of the
ultravisor and the hypervisor isn't allowed to change it. Hence, don't
allow userspace to change the guest's TOD clock by returning
-EOPNOTSUPP.
When userspace changes the guest's TOD clock, KVM updates its
kvm.arch.epoch field and, in addition, the epoch field in all state
descriptions of all VCPUs.
But, under PV, the ultravisor will ignore the epoch field in the state
description and simply overwrite it on next SIE exit with the actual
guest epoch. This leads to KVM having an incorrect view of the guest's
TOD clock: it has updated its internal kvm.arch.epoch field, but the
ultravisor ignores the field in the state description.
Whenever a guest is now waiting for a clock comparator, KVM will
incorrectly calculate the time when the guest should wake up, possibly
causing the guest to sleep for much longer than expected.
With this change, kvm_s390_set_tod() will now take the kvm->lock to be
able to call kvm_s390_pv_is_protected(). Since kvm_s390_set_tod_clock()
also takes kvm->lock, use __kvm_s390_set_tod_clock() instead.
The function kvm_s390_set_tod_clock is now unused, hence remove it.
Update the documentation to indicate the TOD clock attr calls can now
return -EOPNOTSUPP.
Fixes: 0f3035047140 ("KVM: s390: protvirt: Do only reset registers that are accessible")
Reported-by: Marc Hartmayer <mhartmay@linux.ibm.com>
Signed-off-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Link: https://lore.kernel.org/r/20221011160712.928239-2-nrb@linux.ibm.com
Message-Id: <20221011160712.928239-2-nrb@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
2022-10-11 16:07:12 +00:00
|
|
|
__kvm_s390_set_tod_clock(kvm, >od);
|
2016-07-26 19:29:44 +00:00
|
|
|
|
|
|
|
VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
|
|
|
|
gtod.epoch_idx, gtod.tod);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-11-25 14:46:02 +00:00
|
|
|
static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
|
|
|
|
{
|
|
|
|
u8 gtod_high;
|
|
|
|
|
|
|
|
if (copy_from_user(>od_high, (void __user *)attr->addr,
|
|
|
|
sizeof(gtod_high)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
if (gtod_high != 0)
|
|
|
|
return -EINVAL;
|
2015-10-12 11:27:29 +00:00
|
|
|
VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
|
2014-11-25 14:46:02 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
|
|
|
|
{
|
2018-02-07 11:46:43 +00:00
|
|
|
struct kvm_s390_vm_tod_clock gtod = { 0 };
|
2014-11-25 14:46:02 +00:00
|
|
|
|
2018-02-07 11:46:43 +00:00
|
|
|
if (copy_from_user(>od.tod, (void __user *)attr->addr,
|
|
|
|
sizeof(gtod.tod)))
|
2014-11-25 14:46:02 +00:00
|
|
|
return -EFAULT;
|
|
|
|
|
KVM: s390: pv: don't allow userspace to set the clock under PV
When running under PV, the guest's TOD clock is under control of the
ultravisor and the hypervisor isn't allowed to change it. Hence, don't
allow userspace to change the guest's TOD clock by returning
-EOPNOTSUPP.
When userspace changes the guest's TOD clock, KVM updates its
kvm.arch.epoch field and, in addition, the epoch field in all state
descriptions of all VCPUs.
But, under PV, the ultravisor will ignore the epoch field in the state
description and simply overwrite it on next SIE exit with the actual
guest epoch. This leads to KVM having an incorrect view of the guest's
TOD clock: it has updated its internal kvm.arch.epoch field, but the
ultravisor ignores the field in the state description.
Whenever a guest is now waiting for a clock comparator, KVM will
incorrectly calculate the time when the guest should wake up, possibly
causing the guest to sleep for much longer than expected.
With this change, kvm_s390_set_tod() will now take the kvm->lock to be
able to call kvm_s390_pv_is_protected(). Since kvm_s390_set_tod_clock()
also takes kvm->lock, use __kvm_s390_set_tod_clock() instead.
The function kvm_s390_set_tod_clock is now unused, hence remove it.
Update the documentation to indicate the TOD clock attr calls can now
return -EOPNOTSUPP.
Fixes: 0f3035047140 ("KVM: s390: protvirt: Do only reset registers that are accessible")
Reported-by: Marc Hartmayer <mhartmay@linux.ibm.com>
Signed-off-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Link: https://lore.kernel.org/r/20221011160712.928239-2-nrb@linux.ibm.com
Message-Id: <20221011160712.928239-2-nrb@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
2022-10-11 16:07:12 +00:00
|
|
|
__kvm_s390_set_tod_clock(kvm, >od);
|
2018-02-07 11:46:43 +00:00
|
|
|
VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
|
2014-11-25 14:46:02 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (attr->flags)
|
|
|
|
return -EINVAL;
|
|
|
|
|
KVM: s390: pv: don't allow userspace to set the clock under PV
When running under PV, the guest's TOD clock is under control of the
ultravisor and the hypervisor isn't allowed to change it. Hence, don't
allow userspace to change the guest's TOD clock by returning
-EOPNOTSUPP.
When userspace changes the guest's TOD clock, KVM updates its
kvm.arch.epoch field and, in addition, the epoch field in all state
descriptions of all VCPUs.
But, under PV, the ultravisor will ignore the epoch field in the state
description and simply overwrite it on next SIE exit with the actual
guest epoch. This leads to KVM having an incorrect view of the guest's
TOD clock: it has updated its internal kvm.arch.epoch field, but the
ultravisor ignores the field in the state description.
Whenever a guest is now waiting for a clock comparator, KVM will
incorrectly calculate the time when the guest should wake up, possibly
causing the guest to sleep for much longer than expected.
With this change, kvm_s390_set_tod() will now take the kvm->lock to be
able to call kvm_s390_pv_is_protected(). Since kvm_s390_set_tod_clock()
also takes kvm->lock, use __kvm_s390_set_tod_clock() instead.
The function kvm_s390_set_tod_clock is now unused, hence remove it.
Update the documentation to indicate the TOD clock attr calls can now
return -EOPNOTSUPP.
Fixes: 0f3035047140 ("KVM: s390: protvirt: Do only reset registers that are accessible")
Reported-by: Marc Hartmayer <mhartmay@linux.ibm.com>
Signed-off-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Link: https://lore.kernel.org/r/20221011160712.928239-2-nrb@linux.ibm.com
Message-Id: <20221011160712.928239-2-nrb@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
2022-10-11 16:07:12 +00:00
|
|
|
mutex_lock(&kvm->lock);
|
|
|
|
/*
|
|
|
|
* For protected guests, the TOD is managed by the ultravisor, so trying
|
|
|
|
* to change it will never bring the expected results.
|
|
|
|
*/
|
|
|
|
if (kvm_s390_pv_is_protected(kvm)) {
|
|
|
|
ret = -EOPNOTSUPP;
|
|
|
|
goto out_unlock;
|
|
|
|
}
|
|
|
|
|
2014-11-25 14:46:02 +00:00
|
|
|
switch (attr->attr) {
|
2016-07-26 19:29:44 +00:00
|
|
|
case KVM_S390_VM_TOD_EXT:
|
|
|
|
ret = kvm_s390_set_tod_ext(kvm, attr);
|
|
|
|
break;
|
2014-11-25 14:46:02 +00:00
|
|
|
case KVM_S390_VM_TOD_HIGH:
|
|
|
|
ret = kvm_s390_set_tod_high(kvm, attr);
|
|
|
|
break;
|
|
|
|
case KVM_S390_VM_TOD_LOW:
|
|
|
|
ret = kvm_s390_set_tod_low(kvm, attr);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ret = -ENXIO;
|
|
|
|
break;
|
|
|
|
}
|
KVM: s390: pv: don't allow userspace to set the clock under PV
When running under PV, the guest's TOD clock is under control of the
ultravisor and the hypervisor isn't allowed to change it. Hence, don't
allow userspace to change the guest's TOD clock by returning
-EOPNOTSUPP.
When userspace changes the guest's TOD clock, KVM updates its
kvm.arch.epoch field and, in addition, the epoch field in all state
descriptions of all VCPUs.
But, under PV, the ultravisor will ignore the epoch field in the state
description and simply overwrite it on next SIE exit with the actual
guest epoch. This leads to KVM having an incorrect view of the guest's
TOD clock: it has updated its internal kvm.arch.epoch field, but the
ultravisor ignores the field in the state description.
Whenever a guest is now waiting for a clock comparator, KVM will
incorrectly calculate the time when the guest should wake up, possibly
causing the guest to sleep for much longer than expected.
With this change, kvm_s390_set_tod() will now take the kvm->lock to be
able to call kvm_s390_pv_is_protected(). Since kvm_s390_set_tod_clock()
also takes kvm->lock, use __kvm_s390_set_tod_clock() instead.
The function kvm_s390_set_tod_clock is now unused, hence remove it.
Update the documentation to indicate the TOD clock attr calls can now
return -EOPNOTSUPP.
Fixes: 0f3035047140 ("KVM: s390: protvirt: Do only reset registers that are accessible")
Reported-by: Marc Hartmayer <mhartmay@linux.ibm.com>
Signed-off-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Link: https://lore.kernel.org/r/20221011160712.928239-2-nrb@linux.ibm.com
Message-Id: <20221011160712.928239-2-nrb@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
2022-10-11 16:07:12 +00:00
|
|
|
|
|
|
|
out_unlock:
|
|
|
|
mutex_unlock(&kvm->lock);
|
2014-11-25 14:46:02 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-04-27 12:36:13 +00:00
|
|
|
static void kvm_s390_get_tod_clock(struct kvm *kvm,
|
|
|
|
struct kvm_s390_vm_tod_clock *gtod)
|
2016-07-26 19:29:44 +00:00
|
|
|
{
|
2021-02-08 15:27:33 +00:00
|
|
|
union tod_clock clk;
|
2016-07-26 19:29:44 +00:00
|
|
|
|
|
|
|
preempt_disable();
|
|
|
|
|
2021-02-08 15:27:33 +00:00
|
|
|
store_tod_clock_ext(&clk);
|
2016-07-26 19:29:44 +00:00
|
|
|
|
2021-02-08 15:27:33 +00:00
|
|
|
gtod->tod = clk.tod + kvm->arch.epoch;
|
2018-04-27 12:36:13 +00:00
|
|
|
gtod->epoch_idx = 0;
|
|
|
|
if (test_kvm_facility(kvm, 139)) {
|
2021-02-08 15:27:33 +00:00
|
|
|
gtod->epoch_idx = clk.ei + kvm->arch.epdx;
|
|
|
|
if (gtod->tod < clk.tod)
|
2018-04-27 12:36:13 +00:00
|
|
|
gtod->epoch_idx += 1;
|
|
|
|
}
|
2016-07-26 19:29:44 +00:00
|
|
|
|
|
|
|
preempt_enable();
|
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
|
|
|
|
{
|
|
|
|
struct kvm_s390_vm_tod_clock gtod;
|
|
|
|
|
|
|
|
memset(>od, 0, sizeof(gtod));
|
2018-04-27 12:36:13 +00:00
|
|
|
kvm_s390_get_tod_clock(kvm, >od);
|
2016-07-26 19:29:44 +00:00
|
|
|
if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
|
|
|
|
gtod.epoch_idx, gtod.tod);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-11-25 14:46:02 +00:00
|
|
|
static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
|
|
|
|
{
|
|
|
|
u8 gtod_high = 0;
|
|
|
|
|
|
|
|
if (copy_to_user((void __user *)attr->addr, >od_high,
|
|
|
|
sizeof(gtod_high)))
|
|
|
|
return -EFAULT;
|
2015-10-12 11:27:29 +00:00
|
|
|
VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
|
2014-11-25 14:46:02 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
|
|
|
|
{
|
2015-09-29 14:27:24 +00:00
|
|
|
u64 gtod;
|
2014-11-25 14:46:02 +00:00
|
|
|
|
2015-09-29 14:20:36 +00:00
|
|
|
gtod = kvm_s390_get_tod_clock_fast(kvm);
|
2014-11-25 14:46:02 +00:00
|
|
|
if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
|
|
|
|
return -EFAULT;
|
2015-10-12 11:27:29 +00:00
|
|
|
VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
|
2014-11-25 14:46:02 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (attr->flags)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
switch (attr->attr) {
|
2016-07-26 19:29:44 +00:00
|
|
|
case KVM_S390_VM_TOD_EXT:
|
|
|
|
ret = kvm_s390_get_tod_ext(kvm, attr);
|
|
|
|
break;
|
2014-11-25 14:46:02 +00:00
|
|
|
case KVM_S390_VM_TOD_HIGH:
|
|
|
|
ret = kvm_s390_get_tod_high(kvm, attr);
|
|
|
|
break;
|
|
|
|
case KVM_S390_VM_TOD_LOW:
|
|
|
|
ret = kvm_s390_get_tod_low(kvm, attr);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ret = -ENXIO;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-02-02 14:49:35 +00:00
|
|
|
static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
|
|
|
|
{
|
|
|
|
struct kvm_s390_vm_cpu_processor *proc;
|
2016-04-04 11:59:42 +00:00
|
|
|
u16 lowest_ibc, unblocked_ibc;
|
2015-02-02 14:49:35 +00:00
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
mutex_lock(&kvm->lock);
|
2016-06-13 12:50:04 +00:00
|
|
|
if (kvm->created_vcpus) {
|
2015-02-02 14:49:35 +00:00
|
|
|
ret = -EBUSY;
|
|
|
|
goto out;
|
|
|
|
}
|
2020-11-06 07:34:23 +00:00
|
|
|
proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
|
2015-02-02 14:49:35 +00:00
|
|
|
if (!proc) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (!copy_from_user(proc, (void __user *)attr->addr,
|
|
|
|
sizeof(*proc))) {
|
2016-04-04 12:27:51 +00:00
|
|
|
kvm->arch.model.cpuid = proc->cpuid;
|
2016-04-04 11:59:42 +00:00
|
|
|
lowest_ibc = sclp.ibc >> 16 & 0xfff;
|
|
|
|
unblocked_ibc = sclp.ibc & 0xfff;
|
2016-06-10 07:22:31 +00:00
|
|
|
if (lowest_ibc && proc->ibc) {
|
2016-04-04 11:59:42 +00:00
|
|
|
if (proc->ibc > unblocked_ibc)
|
|
|
|
kvm->arch.model.ibc = unblocked_ibc;
|
|
|
|
else if (proc->ibc < lowest_ibc)
|
|
|
|
kvm->arch.model.ibc = lowest_ibc;
|
|
|
|
else
|
|
|
|
kvm->arch.model.ibc = proc->ibc;
|
|
|
|
}
|
2015-12-02 07:53:52 +00:00
|
|
|
memcpy(kvm->arch.model.fac_list, proc->fac_list,
|
2015-02-02 14:49:35 +00:00
|
|
|
S390_ARCH_FAC_LIST_SIZE_BYTE);
|
2017-01-18 15:01:02 +00:00
|
|
|
VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
|
|
|
|
kvm->arch.model.ibc,
|
|
|
|
kvm->arch.model.cpuid);
|
|
|
|
VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
|
|
|
|
kvm->arch.model.fac_list[0],
|
|
|
|
kvm->arch.model.fac_list[1],
|
|
|
|
kvm->arch.model.fac_list[2]);
|
2015-02-02 14:49:35 +00:00
|
|
|
} else
|
|
|
|
ret = -EFAULT;
|
|
|
|
kfree(proc);
|
|
|
|
out:
|
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-03-19 16:36:43 +00:00
|
|
|
static int kvm_s390_set_processor_feat(struct kvm *kvm,
|
|
|
|
struct kvm_device_attr *attr)
|
|
|
|
{
|
|
|
|
struct kvm_s390_vm_cpu_feat data;
|
|
|
|
|
|
|
|
if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
|
|
|
|
return -EFAULT;
|
|
|
|
if (!bitmap_subset((unsigned long *) data.feat,
|
|
|
|
kvm_s390_available_cpu_feat,
|
|
|
|
KVM_S390_VM_CPU_FEAT_NR_BITS))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
mutex_lock(&kvm->lock);
|
2017-11-16 11:30:15 +00:00
|
|
|
if (kvm->created_vcpus) {
|
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
return -EBUSY;
|
2015-03-19 16:36:43 +00:00
|
|
|
}
|
2022-04-28 20:51:15 +00:00
|
|
|
bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
|
2015-03-19 16:36:43 +00:00
|
|
|
mutex_unlock(&kvm->lock);
|
2017-11-16 11:30:15 +00:00
|
|
|
VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
|
|
|
|
data.feat[0],
|
|
|
|
data.feat[1],
|
|
|
|
data.feat[2]);
|
|
|
|
return 0;
|
2015-03-19 16:36:43 +00:00
|
|
|
}
|
|
|
|
|
2016-05-18 14:03:47 +00:00
|
|
|
static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
|
|
|
|
struct kvm_device_attr *attr)
|
|
|
|
{
|
2019-02-18 12:48:25 +00:00
|
|
|
mutex_lock(&kvm->lock);
|
|
|
|
if (kvm->created_vcpus) {
|
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
return -EBUSY;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
|
|
|
|
sizeof(struct kvm_s390_vm_cpu_subfunc))) {
|
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
|
2019-02-20 16:38:42 +00:00
|
|
|
VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
|
|
|
|
VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
|
2018-12-28 08:33:35 +00:00
|
|
|
VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
|
2018-12-28 09:59:06 +00:00
|
|
|
VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
|
2018-12-28 09:46:04 +00:00
|
|
|
VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
|
2019-02-20 16:38:42 +00:00
|
|
|
|
2019-02-18 12:48:25 +00:00
|
|
|
return 0;
|
2016-05-18 14:03:47 +00:00
|
|
|
}
|
|
|
|
|
2015-02-02 14:49:35 +00:00
|
|
|
static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
|
|
|
|
{
|
|
|
|
int ret = -ENXIO;
|
|
|
|
|
|
|
|
switch (attr->attr) {
|
|
|
|
case KVM_S390_VM_CPU_PROCESSOR:
|
|
|
|
ret = kvm_s390_set_processor(kvm, attr);
|
|
|
|
break;
|
2015-03-19 16:36:43 +00:00
|
|
|
case KVM_S390_VM_CPU_PROCESSOR_FEAT:
|
|
|
|
ret = kvm_s390_set_processor_feat(kvm, attr);
|
|
|
|
break;
|
2016-05-18 14:03:47 +00:00
|
|
|
case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
|
|
|
|
ret = kvm_s390_set_processor_subfunc(kvm, attr);
|
|
|
|
break;
|
2015-02-02 14:49:35 +00:00
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
|
|
|
|
{
|
|
|
|
struct kvm_s390_vm_cpu_processor *proc;
|
|
|
|
int ret = 0;
|
|
|
|
|
2020-11-06 07:34:23 +00:00
|
|
|
proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
|
2015-02-02 14:49:35 +00:00
|
|
|
if (!proc) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
2016-04-04 12:27:51 +00:00
|
|
|
proc->cpuid = kvm->arch.model.cpuid;
|
2015-02-02 14:49:35 +00:00
|
|
|
proc->ibc = kvm->arch.model.ibc;
|
2015-12-02 07:53:52 +00:00
|
|
|
memcpy(&proc->fac_list, kvm->arch.model.fac_list,
|
|
|
|
S390_ARCH_FAC_LIST_SIZE_BYTE);
|
2017-01-18 15:01:02 +00:00
|
|
|
VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
|
|
|
|
kvm->arch.model.ibc,
|
|
|
|
kvm->arch.model.cpuid);
|
|
|
|
VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
|
|
|
|
kvm->arch.model.fac_list[0],
|
|
|
|
kvm->arch.model.fac_list[1],
|
|
|
|
kvm->arch.model.fac_list[2]);
|
2015-02-02 14:49:35 +00:00
|
|
|
if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
|
|
|
|
ret = -EFAULT;
|
|
|
|
kfree(proc);
|
|
|
|
out:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
|
|
|
|
{
|
|
|
|
struct kvm_s390_vm_cpu_machine *mach;
|
|
|
|
int ret = 0;
|
|
|
|
|
2020-11-06 07:34:23 +00:00
|
|
|
mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
|
2015-02-02 14:49:35 +00:00
|
|
|
if (!mach) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
get_cpu_id((struct cpuid *) &mach->cpuid);
|
2015-05-06 11:18:59 +00:00
|
|
|
mach->ibc = sclp.ibc;
|
2015-12-02 07:53:52 +00:00
|
|
|
memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
|
2015-02-24 12:51:04 +00:00
|
|
|
S390_ARCH_FAC_LIST_SIZE_BYTE);
|
2021-05-05 20:01:10 +00:00
|
|
|
memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
|
|
|
|
sizeof(stfle_fac_list));
|
2017-01-18 15:01:02 +00:00
|
|
|
VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
|
|
|
|
kvm->arch.model.ibc,
|
|
|
|
kvm->arch.model.cpuid);
|
|
|
|
VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
|
|
|
|
mach->fac_mask[0],
|
|
|
|
mach->fac_mask[1],
|
|
|
|
mach->fac_mask[2]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
|
|
|
|
mach->fac_list[0],
|
|
|
|
mach->fac_list[1],
|
|
|
|
mach->fac_list[2]);
|
2015-02-02 14:49:35 +00:00
|
|
|
if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
|
|
|
|
ret = -EFAULT;
|
|
|
|
kfree(mach);
|
|
|
|
out:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-03-19 16:36:43 +00:00
|
|
|
static int kvm_s390_get_processor_feat(struct kvm *kvm,
|
|
|
|
struct kvm_device_attr *attr)
|
|
|
|
{
|
|
|
|
struct kvm_s390_vm_cpu_feat data;
|
|
|
|
|
2022-04-28 20:51:15 +00:00
|
|
|
bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
|
2015-03-19 16:36:43 +00:00
|
|
|
if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
|
|
|
|
return -EFAULT;
|
2017-11-16 11:30:15 +00:00
|
|
|
VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
|
|
|
|
data.feat[0],
|
|
|
|
data.feat[1],
|
|
|
|
data.feat[2]);
|
2015-03-19 16:36:43 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_s390_get_machine_feat(struct kvm *kvm,
|
|
|
|
struct kvm_device_attr *attr)
|
|
|
|
{
|
|
|
|
struct kvm_s390_vm_cpu_feat data;
|
|
|
|
|
2022-04-28 20:51:15 +00:00
|
|
|
bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
|
2015-03-19 16:36:43 +00:00
|
|
|
if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
|
|
|
|
return -EFAULT;
|
2017-11-16 11:30:15 +00:00
|
|
|
VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
|
|
|
|
data.feat[0],
|
|
|
|
data.feat[1],
|
|
|
|
data.feat[2]);
|
2015-03-19 16:36:43 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-05-18 14:03:47 +00:00
|
|
|
static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
|
|
|
|
struct kvm_device_attr *attr)
|
|
|
|
{
|
2019-02-18 12:48:25 +00:00
|
|
|
if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
|
|
|
|
sizeof(struct kvm_s390_vm_cpu_subfunc)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
2019-02-20 16:38:42 +00:00
|
|
|
VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
|
2018-12-28 08:33:35 +00:00
|
|
|
VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
|
2018-12-28 09:59:06 +00:00
|
|
|
VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
|
2018-12-28 09:46:04 +00:00
|
|
|
VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
|
|
|
|
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
|
2019-02-20 16:38:42 +00:00
|
|
|
|
2019-02-18 12:48:25 +00:00
|
|
|
return 0;
|
2016-05-18 14:03:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
|
|
|
|
struct kvm_device_attr *attr)
|
|
|
|
{
|
|
|
|
if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
|
|
|
|
sizeof(struct kvm_s390_vm_cpu_subfunc)))
|
|
|
|
return -EFAULT;
|
2019-02-20 16:38:42 +00:00
|
|
|
|
|
|
|
VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.km)[0],
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
|
|
|
|
VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
|
2018-12-28 08:33:35 +00:00
|
|
|
VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
|
2018-12-28 09:59:06 +00:00
|
|
|
VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
|
2018-12-28 09:46:04 +00:00
|
|
|
VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
|
|
|
|
((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
|
2019-02-20 16:38:42 +00:00
|
|
|
|
2016-05-18 14:03:47 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2019-02-18 12:48:25 +00:00
|
|
|
|
2015-02-02 14:49:35 +00:00
|
|
|
static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
|
|
|
|
{
|
|
|
|
int ret = -ENXIO;
|
|
|
|
|
|
|
|
switch (attr->attr) {
|
|
|
|
case KVM_S390_VM_CPU_PROCESSOR:
|
|
|
|
ret = kvm_s390_get_processor(kvm, attr);
|
|
|
|
break;
|
|
|
|
case KVM_S390_VM_CPU_MACHINE:
|
|
|
|
ret = kvm_s390_get_machine(kvm, attr);
|
|
|
|
break;
|
2015-03-19 16:36:43 +00:00
|
|
|
case KVM_S390_VM_CPU_PROCESSOR_FEAT:
|
|
|
|
ret = kvm_s390_get_processor_feat(kvm, attr);
|
|
|
|
break;
|
|
|
|
case KVM_S390_VM_CPU_MACHINE_FEAT:
|
|
|
|
ret = kvm_s390_get_machine_feat(kvm, attr);
|
|
|
|
break;
|
2016-05-18 14:03:47 +00:00
|
|
|
case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
|
|
|
|
ret = kvm_s390_get_processor_subfunc(kvm, attr);
|
|
|
|
break;
|
|
|
|
case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
|
|
|
|
ret = kvm_s390_get_machine_subfunc(kvm, attr);
|
|
|
|
break;
|
2015-02-02 14:49:35 +00:00
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2022-07-14 10:18:23 +00:00
|
|
|
/**
|
|
|
|
* kvm_s390_update_topology_change_report - update CPU topology change report
|
|
|
|
* @kvm: guest KVM description
|
|
|
|
* @val: set or clear the MTCR bit
|
|
|
|
*
|
|
|
|
* Updates the Multiprocessor Topology-Change-Report bit to signal
|
|
|
|
* the guest with a topology change.
|
|
|
|
* This is only relevant if the topology facility is present.
|
|
|
|
*
|
|
|
|
* The SCA version, bsca or esca, doesn't matter as offset is the same.
|
|
|
|
*/
|
|
|
|
static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
|
|
|
|
{
|
|
|
|
union sca_utility new, old;
|
|
|
|
struct bsca_block *sca;
|
|
|
|
|
|
|
|
read_lock(&kvm->arch.sca_lock);
|
|
|
|
sca = kvm->arch.sca;
|
|
|
|
do {
|
|
|
|
old = READ_ONCE(sca->utility);
|
|
|
|
new = old;
|
|
|
|
new.mtcr = val;
|
|
|
|
} while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
|
|
|
|
read_unlock(&kvm->arch.sca_lock);
|
|
|
|
}
|
|
|
|
|
2022-07-14 19:43:34 +00:00
|
|
|
static int kvm_s390_set_topo_change_indication(struct kvm *kvm,
|
|
|
|
struct kvm_device_attr *attr)
|
|
|
|
{
|
|
|
|
if (!test_kvm_facility(kvm, 11))
|
|
|
|
return -ENXIO;
|
|
|
|
|
|
|
|
kvm_s390_update_topology_change_report(kvm, !!attr->attr);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
|
|
|
|
struct kvm_device_attr *attr)
|
|
|
|
{
|
|
|
|
u8 topo;
|
|
|
|
|
|
|
|
if (!test_kvm_facility(kvm, 11))
|
|
|
|
return -ENXIO;
|
|
|
|
|
|
|
|
read_lock(&kvm->arch.sca_lock);
|
|
|
|
topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
|
|
|
|
read_unlock(&kvm->arch.sca_lock);
|
|
|
|
|
|
|
|
return put_user(topo, (u8 __user *)attr->addr);
|
|
|
|
}
|
|
|
|
|
2014-04-09 11:13:00 +00:00
|
|
|
static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
switch (attr->group) {
|
2014-04-09 11:13:00 +00:00
|
|
|
case KVM_S390_VM_MEM_CTRL:
|
2014-10-31 13:10:41 +00:00
|
|
|
ret = kvm_s390_set_mem_control(kvm, attr);
|
2014-04-09 11:13:00 +00:00
|
|
|
break;
|
2014-11-25 14:46:02 +00:00
|
|
|
case KVM_S390_VM_TOD:
|
|
|
|
ret = kvm_s390_set_tod(kvm, attr);
|
|
|
|
break;
|
2015-02-02 14:49:35 +00:00
|
|
|
case KVM_S390_VM_CPU_MODEL:
|
|
|
|
ret = kvm_s390_set_cpu_model(kvm, attr);
|
|
|
|
break;
|
2014-09-03 08:13:53 +00:00
|
|
|
case KVM_S390_VM_CRYPTO:
|
|
|
|
ret = kvm_s390_vm_set_crypto(kvm, attr);
|
|
|
|
break;
|
2016-08-04 15:54:42 +00:00
|
|
|
case KVM_S390_VM_MIGRATION:
|
|
|
|
ret = kvm_s390_vm_set_migration(kvm, attr);
|
|
|
|
break;
|
2022-07-14 19:43:34 +00:00
|
|
|
case KVM_S390_VM_CPU_TOPOLOGY:
|
|
|
|
ret = kvm_s390_set_topo_change_indication(kvm, attr);
|
|
|
|
break;
|
2014-04-09 11:13:00 +00:00
|
|
|
default:
|
|
|
|
ret = -ENXIO;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
|
|
|
|
{
|
2014-10-31 13:10:41 +00:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
switch (attr->group) {
|
|
|
|
case KVM_S390_VM_MEM_CTRL:
|
|
|
|
ret = kvm_s390_get_mem_control(kvm, attr);
|
|
|
|
break;
|
2014-11-25 14:46:02 +00:00
|
|
|
case KVM_S390_VM_TOD:
|
|
|
|
ret = kvm_s390_get_tod(kvm, attr);
|
|
|
|
break;
|
2015-02-02 14:49:35 +00:00
|
|
|
case KVM_S390_VM_CPU_MODEL:
|
|
|
|
ret = kvm_s390_get_cpu_model(kvm, attr);
|
|
|
|
break;
|
2016-08-04 15:54:42 +00:00
|
|
|
case KVM_S390_VM_MIGRATION:
|
|
|
|
ret = kvm_s390_vm_get_migration(kvm, attr);
|
|
|
|
break;
|
2022-07-14 19:43:34 +00:00
|
|
|
case KVM_S390_VM_CPU_TOPOLOGY:
|
|
|
|
ret = kvm_s390_get_topo_change_indication(kvm, attr);
|
|
|
|
break;
|
2014-10-31 13:10:41 +00:00
|
|
|
default:
|
|
|
|
ret = -ENXIO;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
2014-04-09 11:13:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
switch (attr->group) {
|
2014-04-09 11:13:00 +00:00
|
|
|
case KVM_S390_VM_MEM_CTRL:
|
|
|
|
switch (attr->attr) {
|
|
|
|
case KVM_S390_VM_MEM_ENABLE_CMMA:
|
|
|
|
case KVM_S390_VM_MEM_CLR_CMMA:
|
2016-03-03 08:48:47 +00:00
|
|
|
ret = sclp.has_cmma ? 0 : -ENXIO;
|
|
|
|
break;
|
2014-10-31 13:10:41 +00:00
|
|
|
case KVM_S390_VM_MEM_LIMIT_SIZE:
|
2014-04-09 11:13:00 +00:00
|
|
|
ret = 0;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ret = -ENXIO;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2014-11-25 14:46:02 +00:00
|
|
|
case KVM_S390_VM_TOD:
|
|
|
|
switch (attr->attr) {
|
|
|
|
case KVM_S390_VM_TOD_LOW:
|
|
|
|
case KVM_S390_VM_TOD_HIGH:
|
|
|
|
ret = 0;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ret = -ENXIO;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2015-02-02 14:49:35 +00:00
|
|
|
case KVM_S390_VM_CPU_MODEL:
|
|
|
|
switch (attr->attr) {
|
|
|
|
case KVM_S390_VM_CPU_PROCESSOR:
|
|
|
|
case KVM_S390_VM_CPU_MACHINE:
|
2015-03-19 16:36:43 +00:00
|
|
|
case KVM_S390_VM_CPU_PROCESSOR_FEAT:
|
|
|
|
case KVM_S390_VM_CPU_MACHINE_FEAT:
|
2016-05-18 14:03:47 +00:00
|
|
|
case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
|
2019-02-18 12:48:25 +00:00
|
|
|
case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
|
2015-02-02 14:49:35 +00:00
|
|
|
ret = 0;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ret = -ENXIO;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2014-09-03 08:13:53 +00:00
|
|
|
case KVM_S390_VM_CRYPTO:
|
|
|
|
switch (attr->attr) {
|
|
|
|
case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
|
|
|
|
case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
|
|
|
|
case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
|
|
|
|
case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
|
|
|
|
ret = 0;
|
|
|
|
break;
|
2018-09-25 23:16:39 +00:00
|
|
|
case KVM_S390_VM_CRYPTO_ENABLE_APIE:
|
|
|
|
case KVM_S390_VM_CRYPTO_DISABLE_APIE:
|
|
|
|
ret = ap_instructions_available() ? 0 : -ENXIO;
|
|
|
|
break;
|
2014-09-03 08:13:53 +00:00
|
|
|
default:
|
|
|
|
ret = -ENXIO;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2016-08-04 15:54:42 +00:00
|
|
|
case KVM_S390_VM_MIGRATION:
|
|
|
|
ret = 0;
|
|
|
|
break;
|
2022-07-14 19:43:34 +00:00
|
|
|
case KVM_S390_VM_CPU_TOPOLOGY:
|
|
|
|
ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO;
|
|
|
|
break;
|
2014-04-09 11:13:00 +00:00
|
|
|
default:
|
|
|
|
ret = -ENXIO;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-09-23 13:23:01 +00:00
|
|
|
static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
|
|
|
|
{
|
|
|
|
uint8_t *keys;
|
|
|
|
uint64_t hva;
|
2017-07-10 11:35:48 +00:00
|
|
|
int srcu_idx, i, r = 0;
|
2014-09-23 13:23:01 +00:00
|
|
|
|
|
|
|
if (args->flags != 0)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* Is this guest using storage keys? */
|
2018-02-15 15:33:47 +00:00
|
|
|
if (!mm_uses_skeys(current->mm))
|
2014-09-23 13:23:01 +00:00
|
|
|
return KVM_S390_GET_SKEYS_NONE;
|
|
|
|
|
|
|
|
/* Enforce sane limit on memory allocation */
|
|
|
|
if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2020-11-06 07:34:23 +00:00
|
|
|
keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
|
2014-09-23 13:23:01 +00:00
|
|
|
if (!keys)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2020-06-09 04:33:25 +00:00
|
|
|
mmap_read_lock(current->mm);
|
2017-07-10 11:35:48 +00:00
|
|
|
srcu_idx = srcu_read_lock(&kvm->srcu);
|
2014-09-23 13:23:01 +00:00
|
|
|
for (i = 0; i < args->count; i++) {
|
|
|
|
hva = gfn_to_hva(kvm, args->start_gfn + i);
|
|
|
|
if (kvm_is_error_hva(hva)) {
|
|
|
|
r = -EFAULT;
|
2016-03-08 10:53:35 +00:00
|
|
|
break;
|
2014-09-23 13:23:01 +00:00
|
|
|
}
|
|
|
|
|
2016-05-09 09:22:34 +00:00
|
|
|
r = get_guest_storage_key(current->mm, hva, &keys[i]);
|
|
|
|
if (r)
|
2016-03-08 10:53:35 +00:00
|
|
|
break;
|
2014-09-23 13:23:01 +00:00
|
|
|
}
|
2017-07-10 11:35:48 +00:00
|
|
|
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
2020-06-09 04:33:25 +00:00
|
|
|
mmap_read_unlock(current->mm);
|
2016-03-08 10:53:35 +00:00
|
|
|
|
|
|
|
if (!r) {
|
|
|
|
r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
|
|
|
|
sizeof(uint8_t) * args->count);
|
|
|
|
if (r)
|
|
|
|
r = -EFAULT;
|
2014-09-23 13:23:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
kvfree(keys);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
|
|
|
|
{
|
|
|
|
uint8_t *keys;
|
|
|
|
uint64_t hva;
|
2017-07-10 11:35:48 +00:00
|
|
|
int srcu_idx, i, r = 0;
|
2018-07-18 12:40:22 +00:00
|
|
|
bool unlocked;
|
2014-09-23 13:23:01 +00:00
|
|
|
|
|
|
|
if (args->flags != 0)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* Enforce sane limit on memory allocation */
|
|
|
|
if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2020-11-06 07:34:23 +00:00
|
|
|
keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
|
2014-09-23 13:23:01 +00:00
|
|
|
if (!keys)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
|
|
|
|
sizeof(uint8_t) * args->count);
|
|
|
|
if (r) {
|
|
|
|
r = -EFAULT;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Enable storage key handling for the guest */
|
2015-05-07 13:16:13 +00:00
|
|
|
r = s390_enable_skey();
|
|
|
|
if (r)
|
|
|
|
goto out;
|
2014-09-23 13:23:01 +00:00
|
|
|
|
2018-07-18 12:40:22 +00:00
|
|
|
i = 0;
|
2020-06-09 04:33:25 +00:00
|
|
|
mmap_read_lock(current->mm);
|
2017-07-10 11:35:48 +00:00
|
|
|
srcu_idx = srcu_read_lock(&kvm->srcu);
|
2018-07-18 12:40:22 +00:00
|
|
|
while (i < args->count) {
|
|
|
|
unlocked = false;
|
2014-09-23 13:23:01 +00:00
|
|
|
hva = gfn_to_hva(kvm, args->start_gfn + i);
|
|
|
|
if (kvm_is_error_hva(hva)) {
|
|
|
|
r = -EFAULT;
|
2016-03-08 10:53:35 +00:00
|
|
|
break;
|
2014-09-23 13:23:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Lowest order bit is reserved */
|
|
|
|
if (keys[i] & 0x01) {
|
|
|
|
r = -EINVAL;
|
2016-03-08 10:53:35 +00:00
|
|
|
break;
|
2014-09-23 13:23:01 +00:00
|
|
|
}
|
|
|
|
|
2016-05-09 11:08:07 +00:00
|
|
|
r = set_guest_storage_key(current->mm, hva, keys[i], 0);
|
2018-07-18 12:40:22 +00:00
|
|
|
if (r) {
|
2020-08-12 01:39:01 +00:00
|
|
|
r = fixup_user_fault(current->mm, hva,
|
2018-07-18 12:40:22 +00:00
|
|
|
FAULT_FLAG_WRITE, &unlocked);
|
|
|
|
if (r)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!r)
|
|
|
|
i++;
|
2014-09-23 13:23:01 +00:00
|
|
|
}
|
2017-07-10 11:35:48 +00:00
|
|
|
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
2020-06-09 04:33:25 +00:00
|
|
|
mmap_read_unlock(current->mm);
|
2014-09-23 13:23:01 +00:00
|
|
|
out:
|
|
|
|
kvfree(keys);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2016-08-04 15:58:47 +00:00
|
|
|
/*
|
|
|
|
* Base address and length must be sent at the start of each block, therefore
|
|
|
|
* it's cheaper to send some clean data, as long as it's less than the size of
|
|
|
|
* two longs.
|
|
|
|
*/
|
|
|
|
#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
|
|
|
|
/* for consistency */
|
|
|
|
#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
|
|
|
|
|
2018-04-30 16:33:25 +00:00
|
|
|
static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
|
|
|
|
u8 *res, unsigned long bufsize)
|
|
|
|
{
|
|
|
|
unsigned long pgstev, hva, cur_gfn = args->start_gfn;
|
|
|
|
|
|
|
|
args->count = 0;
|
|
|
|
while (args->count < bufsize) {
|
|
|
|
hva = gfn_to_hva(kvm, cur_gfn);
|
|
|
|
/*
|
|
|
|
* We return an error if the first value was invalid, but we
|
|
|
|
* return successfully if at least one value was copied.
|
|
|
|
*/
|
|
|
|
if (kvm_is_error_hva(hva))
|
|
|
|
return args->count ? 0 : -EFAULT;
|
|
|
|
if (get_pgste(kvm->mm, hva, &pgstev) < 0)
|
|
|
|
pgstev = 0;
|
|
|
|
res[args->count++] = (pgstev >> 24) & 0x43;
|
|
|
|
cur_gfn++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-12-06 19:54:25 +00:00
|
|
|
static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
|
|
|
|
gfn_t gfn)
|
|
|
|
{
|
|
|
|
return ____gfn_to_memslot(slots, gfn, true);
|
|
|
|
}
|
|
|
|
|
2018-04-30 16:33:25 +00:00
|
|
|
static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
|
|
|
|
unsigned long cur_gfn)
|
|
|
|
{
|
2021-12-06 19:54:25 +00:00
|
|
|
struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
|
2018-04-30 16:33:25 +00:00
|
|
|
unsigned long ofs = cur_gfn - ms->base_gfn;
|
KVM: Keep memslots in tree-based structures instead of array-based ones
The current memslot code uses a (reverse gfn-ordered) memslot array for
keeping track of them.
Because the memslot array that is currently in use cannot be modified
every memslot management operation (create, delete, move, change flags)
has to make a copy of the whole array so it has a scratch copy to work on.
Strictly speaking, however, it is only necessary to make copy of the
memslot that is being modified, copying all the memslots currently present
is just a limitation of the array-based memslot implementation.
Two memslot sets, however, are still needed so the VM continues to run
on the currently active set while the requested operation is being
performed on the second, currently inactive one.
In order to have two memslot sets, but only one copy of actual memslots
it is necessary to split out the memslot data from the memslot sets.
The memslots themselves should be also kept independent of each other
so they can be individually added or deleted.
These two memslot sets should normally point to the same set of
memslots. They can, however, be desynchronized when performing a
memslot management operation by replacing the memslot to be modified
by its copy. After the operation is complete, both memslot sets once
again point to the same, common set of memslot data.
This commit implements the aforementioned idea.
For tracking of gfns an ordinary rbtree is used since memslots cannot
overlap in the guest address space and so this data structure is
sufficient for ensuring that lookups are done quickly.
The "last used slot" mini-caches (both per-slot set one and per-vCPU one),
that keep track of the last found-by-gfn memslot, are still present in the
new code.
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <17c0cf3663b760a0d3753d4ac08c0753e941b811.1638817641.git.maciej.szmigiero@oracle.com>
2021-12-06 19:54:30 +00:00
|
|
|
struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
|
2018-04-30 16:33:25 +00:00
|
|
|
|
|
|
|
if (ms->base_gfn + ms->npages <= cur_gfn) {
|
KVM: Keep memslots in tree-based structures instead of array-based ones
The current memslot code uses a (reverse gfn-ordered) memslot array for
keeping track of them.
Because the memslot array that is currently in use cannot be modified
every memslot management operation (create, delete, move, change flags)
has to make a copy of the whole array so it has a scratch copy to work on.
Strictly speaking, however, it is only necessary to make copy of the
memslot that is being modified, copying all the memslots currently present
is just a limitation of the array-based memslot implementation.
Two memslot sets, however, are still needed so the VM continues to run
on the currently active set while the requested operation is being
performed on the second, currently inactive one.
In order to have two memslot sets, but only one copy of actual memslots
it is necessary to split out the memslot data from the memslot sets.
The memslots themselves should be also kept independent of each other
so they can be individually added or deleted.
These two memslot sets should normally point to the same set of
memslots. They can, however, be desynchronized when performing a
memslot management operation by replacing the memslot to be modified
by its copy. After the operation is complete, both memslot sets once
again point to the same, common set of memslot data.
This commit implements the aforementioned idea.
For tracking of gfns an ordinary rbtree is used since memslots cannot
overlap in the guest address space and so this data structure is
sufficient for ensuring that lookups are done quickly.
The "last used slot" mini-caches (both per-slot set one and per-vCPU one),
that keep track of the last found-by-gfn memslot, are still present in the
new code.
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <17c0cf3663b760a0d3753d4ac08c0753e941b811.1638817641.git.maciej.szmigiero@oracle.com>
2021-12-06 19:54:30 +00:00
|
|
|
mnode = rb_next(mnode);
|
2018-04-30 16:33:25 +00:00
|
|
|
/* If we are above the highest slot, wrap around */
|
KVM: Keep memslots in tree-based structures instead of array-based ones
The current memslot code uses a (reverse gfn-ordered) memslot array for
keeping track of them.
Because the memslot array that is currently in use cannot be modified
every memslot management operation (create, delete, move, change flags)
has to make a copy of the whole array so it has a scratch copy to work on.
Strictly speaking, however, it is only necessary to make copy of the
memslot that is being modified, copying all the memslots currently present
is just a limitation of the array-based memslot implementation.
Two memslot sets, however, are still needed so the VM continues to run
on the currently active set while the requested operation is being
performed on the second, currently inactive one.
In order to have two memslot sets, but only one copy of actual memslots
it is necessary to split out the memslot data from the memslot sets.
The memslots themselves should be also kept independent of each other
so they can be individually added or deleted.
These two memslot sets should normally point to the same set of
memslots. They can, however, be desynchronized when performing a
memslot management operation by replacing the memslot to be modified
by its copy. After the operation is complete, both memslot sets once
again point to the same, common set of memslot data.
This commit implements the aforementioned idea.
For tracking of gfns an ordinary rbtree is used since memslots cannot
overlap in the guest address space and so this data structure is
sufficient for ensuring that lookups are done quickly.
The "last used slot" mini-caches (both per-slot set one and per-vCPU one),
that keep track of the last found-by-gfn memslot, are still present in the
new code.
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <17c0cf3663b760a0d3753d4ac08c0753e941b811.1638817641.git.maciej.szmigiero@oracle.com>
2021-12-06 19:54:30 +00:00
|
|
|
if (!mnode)
|
|
|
|
mnode = rb_first(&slots->gfn_tree);
|
2018-04-30 16:33:25 +00:00
|
|
|
|
KVM: Keep memslots in tree-based structures instead of array-based ones
The current memslot code uses a (reverse gfn-ordered) memslot array for
keeping track of them.
Because the memslot array that is currently in use cannot be modified
every memslot management operation (create, delete, move, change flags)
has to make a copy of the whole array so it has a scratch copy to work on.
Strictly speaking, however, it is only necessary to make copy of the
memslot that is being modified, copying all the memslots currently present
is just a limitation of the array-based memslot implementation.
Two memslot sets, however, are still needed so the VM continues to run
on the currently active set while the requested operation is being
performed on the second, currently inactive one.
In order to have two memslot sets, but only one copy of actual memslots
it is necessary to split out the memslot data from the memslot sets.
The memslots themselves should be also kept independent of each other
so they can be individually added or deleted.
These two memslot sets should normally point to the same set of
memslots. They can, however, be desynchronized when performing a
memslot management operation by replacing the memslot to be modified
by its copy. After the operation is complete, both memslot sets once
again point to the same, common set of memslot data.
This commit implements the aforementioned idea.
For tracking of gfns an ordinary rbtree is used since memslots cannot
overlap in the guest address space and so this data structure is
sufficient for ensuring that lookups are done quickly.
The "last used slot" mini-caches (both per-slot set one and per-vCPU one),
that keep track of the last found-by-gfn memslot, are still present in the
new code.
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <17c0cf3663b760a0d3753d4ac08c0753e941b811.1638817641.git.maciej.szmigiero@oracle.com>
2021-12-06 19:54:30 +00:00
|
|
|
ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
|
2018-04-30 16:33:25 +00:00
|
|
|
ofs = 0;
|
|
|
|
}
|
|
|
|
ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
|
KVM: Keep memslots in tree-based structures instead of array-based ones
The current memslot code uses a (reverse gfn-ordered) memslot array for
keeping track of them.
Because the memslot array that is currently in use cannot be modified
every memslot management operation (create, delete, move, change flags)
has to make a copy of the whole array so it has a scratch copy to work on.
Strictly speaking, however, it is only necessary to make copy of the
memslot that is being modified, copying all the memslots currently present
is just a limitation of the array-based memslot implementation.
Two memslot sets, however, are still needed so the VM continues to run
on the currently active set while the requested operation is being
performed on the second, currently inactive one.
In order to have two memslot sets, but only one copy of actual memslots
it is necessary to split out the memslot data from the memslot sets.
The memslots themselves should be also kept independent of each other
so they can be individually added or deleted.
These two memslot sets should normally point to the same set of
memslots. They can, however, be desynchronized when performing a
memslot management operation by replacing the memslot to be modified
by its copy. After the operation is complete, both memslot sets once
again point to the same, common set of memslot data.
This commit implements the aforementioned idea.
For tracking of gfns an ordinary rbtree is used since memslots cannot
overlap in the guest address space and so this data structure is
sufficient for ensuring that lookups are done quickly.
The "last used slot" mini-caches (both per-slot set one and per-vCPU one),
that keep track of the last found-by-gfn memslot, are still present in the
new code.
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <17c0cf3663b760a0d3753d4ac08c0753e941b811.1638817641.git.maciej.szmigiero@oracle.com>
2021-12-06 19:54:30 +00:00
|
|
|
while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
|
|
|
|
ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
|
2021-08-14 21:17:03 +00:00
|
|
|
ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
|
2018-04-30 16:33:25 +00:00
|
|
|
}
|
|
|
|
return ms->base_gfn + ofs;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
|
|
|
|
u8 *res, unsigned long bufsize)
|
|
|
|
{
|
|
|
|
unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
|
|
|
|
struct kvm_memslots *slots = kvm_memslots(kvm);
|
|
|
|
struct kvm_memory_slot *ms;
|
|
|
|
|
KVM: Keep memslots in tree-based structures instead of array-based ones
The current memslot code uses a (reverse gfn-ordered) memslot array for
keeping track of them.
Because the memslot array that is currently in use cannot be modified
every memslot management operation (create, delete, move, change flags)
has to make a copy of the whole array so it has a scratch copy to work on.
Strictly speaking, however, it is only necessary to make copy of the
memslot that is being modified, copying all the memslots currently present
is just a limitation of the array-based memslot implementation.
Two memslot sets, however, are still needed so the VM continues to run
on the currently active set while the requested operation is being
performed on the second, currently inactive one.
In order to have two memslot sets, but only one copy of actual memslots
it is necessary to split out the memslot data from the memslot sets.
The memslots themselves should be also kept independent of each other
so they can be individually added or deleted.
These two memslot sets should normally point to the same set of
memslots. They can, however, be desynchronized when performing a
memslot management operation by replacing the memslot to be modified
by its copy. After the operation is complete, both memslot sets once
again point to the same, common set of memslot data.
This commit implements the aforementioned idea.
For tracking of gfns an ordinary rbtree is used since memslots cannot
overlap in the guest address space and so this data structure is
sufficient for ensuring that lookups are done quickly.
The "last used slot" mini-caches (both per-slot set one and per-vCPU one),
that keep track of the last found-by-gfn memslot, are still present in the
new code.
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <17c0cf3663b760a0d3753d4ac08c0753e941b811.1638817641.git.maciej.szmigiero@oracle.com>
2021-12-06 19:54:30 +00:00
|
|
|
if (unlikely(kvm_memslots_empty(slots)))
|
2020-03-20 20:55:40 +00:00
|
|
|
return 0;
|
|
|
|
|
2018-04-30 16:33:25 +00:00
|
|
|
cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
|
|
|
|
ms = gfn_to_memslot(kvm, cur_gfn);
|
|
|
|
args->count = 0;
|
|
|
|
args->start_gfn = cur_gfn;
|
|
|
|
if (!ms)
|
|
|
|
return 0;
|
|
|
|
next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
|
2021-12-06 19:54:29 +00:00
|
|
|
mem_end = kvm_s390_get_gfn_end(slots);
|
2018-04-30 16:33:25 +00:00
|
|
|
|
|
|
|
while (args->count < bufsize) {
|
|
|
|
hva = gfn_to_hva(kvm, cur_gfn);
|
|
|
|
if (kvm_is_error_hva(hva))
|
|
|
|
return 0;
|
|
|
|
/* Decrement only if we actually flipped the bit to 0 */
|
|
|
|
if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
|
|
|
|
atomic64_dec(&kvm->arch.cmma_dirty_pages);
|
|
|
|
if (get_pgste(kvm->mm, hva, &pgstev) < 0)
|
|
|
|
pgstev = 0;
|
|
|
|
/* Save the value */
|
|
|
|
res[args->count++] = (pgstev >> 24) & 0x43;
|
|
|
|
/* If the next bit is too far away, stop. */
|
|
|
|
if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
|
|
|
|
return 0;
|
|
|
|
/* If we reached the previous "next", find the next one */
|
|
|
|
if (cur_gfn == next_gfn)
|
|
|
|
next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
|
|
|
|
/* Reached the end of memory or of the buffer, stop */
|
|
|
|
if ((next_gfn >= mem_end) ||
|
|
|
|
(next_gfn - args->start_gfn >= bufsize))
|
|
|
|
return 0;
|
|
|
|
cur_gfn++;
|
|
|
|
/* Reached the end of the current memslot, take the next one. */
|
|
|
|
if (cur_gfn - ms->base_gfn >= ms->npages) {
|
|
|
|
ms = gfn_to_memslot(kvm, cur_gfn);
|
|
|
|
if (!ms)
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-08-04 15:58:47 +00:00
|
|
|
/*
|
|
|
|
* This function searches for the next page with dirty CMMA attributes, and
|
|
|
|
* saves the attributes in the buffer up to either the end of the buffer or
|
|
|
|
* until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
|
|
|
|
* no trailing clean bytes are saved.
|
|
|
|
* In case no dirty bits were found, or if CMMA was not enabled or used, the
|
|
|
|
* output buffer will indicate 0 as length.
|
|
|
|
*/
|
|
|
|
static int kvm_s390_get_cmma_bits(struct kvm *kvm,
|
|
|
|
struct kvm_s390_cmma_log *args)
|
|
|
|
{
|
2018-04-30 16:33:25 +00:00
|
|
|
unsigned long bufsize;
|
|
|
|
int srcu_idx, peek, ret;
|
|
|
|
u8 *values;
|
2016-08-04 15:58:47 +00:00
|
|
|
|
2018-04-30 16:33:25 +00:00
|
|
|
if (!kvm->arch.use_cmma)
|
2016-08-04 15:58:47 +00:00
|
|
|
return -ENXIO;
|
|
|
|
/* Invalid/unsupported flags were specified */
|
|
|
|
if (args->flags & ~KVM_S390_CMMA_PEEK)
|
|
|
|
return -EINVAL;
|
|
|
|
/* Migration mode query, and we are not doing a migration */
|
|
|
|
peek = !!(args->flags & KVM_S390_CMMA_PEEK);
|
2018-04-30 16:33:25 +00:00
|
|
|
if (!peek && !kvm->arch.migration_mode)
|
2016-08-04 15:58:47 +00:00
|
|
|
return -EINVAL;
|
|
|
|
/* CMMA is disabled or was not used, or the buffer has length zero */
|
|
|
|
bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
|
2018-02-16 11:16:14 +00:00
|
|
|
if (!bufsize || !kvm->mm->context.uses_cmm) {
|
2016-08-04 15:58:47 +00:00
|
|
|
memset(args, 0, sizeof(*args));
|
|
|
|
return 0;
|
|
|
|
}
|
2018-04-30 16:33:25 +00:00
|
|
|
/* We are not peeking, and there are no dirty pages */
|
|
|
|
if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
|
|
|
|
memset(args, 0, sizeof(*args));
|
|
|
|
return 0;
|
2016-08-04 15:58:47 +00:00
|
|
|
}
|
|
|
|
|
2018-04-30 16:33:25 +00:00
|
|
|
values = vmalloc(bufsize);
|
|
|
|
if (!values)
|
2016-08-04 15:58:47 +00:00
|
|
|
return -ENOMEM;
|
|
|
|
|
2020-06-09 04:33:25 +00:00
|
|
|
mmap_read_lock(kvm->mm);
|
2016-08-04 15:58:47 +00:00
|
|
|
srcu_idx = srcu_read_lock(&kvm->srcu);
|
2018-04-30 16:33:25 +00:00
|
|
|
if (peek)
|
|
|
|
ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
|
|
|
|
else
|
|
|
|
ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
|
2016-08-04 15:58:47 +00:00
|
|
|
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
2020-06-09 04:33:25 +00:00
|
|
|
mmap_read_unlock(kvm->mm);
|
2016-08-04 15:58:47 +00:00
|
|
|
|
2018-04-30 16:33:25 +00:00
|
|
|
if (kvm->arch.migration_mode)
|
|
|
|
args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
|
|
|
|
else
|
|
|
|
args->remaining = 0;
|
2016-08-04 15:58:47 +00:00
|
|
|
|
2018-04-30 16:33:25 +00:00
|
|
|
if (copy_to_user((void __user *)args->values, values, args->count))
|
|
|
|
ret = -EFAULT;
|
|
|
|
|
|
|
|
vfree(values);
|
|
|
|
return ret;
|
2016-08-04 15:58:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This function sets the CMMA attributes for the given pages. If the input
|
|
|
|
* buffer has zero length, no action is taken, otherwise the attributes are
|
2018-02-16 11:16:14 +00:00
|
|
|
* set and the mm->context.uses_cmm flag is set.
|
2016-08-04 15:58:47 +00:00
|
|
|
*/
|
|
|
|
static int kvm_s390_set_cmma_bits(struct kvm *kvm,
|
|
|
|
const struct kvm_s390_cmma_log *args)
|
|
|
|
{
|
|
|
|
unsigned long hva, mask, pgstev, i;
|
|
|
|
uint8_t *bits;
|
|
|
|
int srcu_idx, r = 0;
|
|
|
|
|
|
|
|
mask = args->mask;
|
|
|
|
|
|
|
|
if (!kvm->arch.use_cmma)
|
|
|
|
return -ENXIO;
|
|
|
|
/* invalid/unsupported flags */
|
|
|
|
if (args->flags != 0)
|
|
|
|
return -EINVAL;
|
|
|
|
/* Enforce sane limit on memory allocation */
|
|
|
|
if (args->count > KVM_S390_CMMA_SIZE_MAX)
|
|
|
|
return -EINVAL;
|
|
|
|
/* Nothing to do */
|
|
|
|
if (args->count == 0)
|
|
|
|
return 0;
|
|
|
|
|
treewide: Use array_size() in vmalloc()
The vmalloc() function has no 2-factor argument form, so multiplication
factors need to be wrapped in array_size(). This patch replaces cases of:
vmalloc(a * b)
with:
vmalloc(array_size(a, b))
as well as handling cases of:
vmalloc(a * b * c)
with:
vmalloc(array3_size(a, b, c))
This does, however, attempt to ignore constant size factors like:
vmalloc(4 * 1024)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
vmalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
vmalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
vmalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
vmalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
vmalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
vmalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
vmalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
vmalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
vmalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
vmalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
vmalloc(
- sizeof(TYPE) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
vmalloc(
- sizeof(TYPE) * COUNT_ID
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
vmalloc(
- sizeof(TYPE) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
vmalloc(
- sizeof(TYPE) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
vmalloc(
- sizeof(THING) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
vmalloc(
- sizeof(THING) * COUNT_ID
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
vmalloc(
- sizeof(THING) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
|
vmalloc(
- sizeof(THING) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
vmalloc(
- SIZE * COUNT
+ array_size(COUNT, SIZE)
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
vmalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vmalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vmalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vmalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vmalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
vmalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
vmalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
vmalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
vmalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
vmalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
vmalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
vmalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
vmalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
vmalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
vmalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vmalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vmalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vmalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vmalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vmalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vmalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vmalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
vmalloc(C1 * C2 * C3, ...)
|
vmalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants.
@@
expression E1, E2;
constant C1, C2;
@@
(
vmalloc(C1 * C2, ...)
|
vmalloc(
- E1 * E2
+ array_size(E1, E2)
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 21:27:11 +00:00
|
|
|
bits = vmalloc(array_size(sizeof(*bits), args->count));
|
2016-08-04 15:58:47 +00:00
|
|
|
if (!bits)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
r = copy_from_user(bits, (void __user *)args->values, args->count);
|
|
|
|
if (r) {
|
|
|
|
r = -EFAULT;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2020-06-09 04:33:25 +00:00
|
|
|
mmap_read_lock(kvm->mm);
|
2016-08-04 15:58:47 +00:00
|
|
|
srcu_idx = srcu_read_lock(&kvm->srcu);
|
|
|
|
for (i = 0; i < args->count; i++) {
|
|
|
|
hva = gfn_to_hva(kvm, args->start_gfn + i);
|
|
|
|
if (kvm_is_error_hva(hva)) {
|
|
|
|
r = -EFAULT;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
pgstev = bits[i];
|
|
|
|
pgstev = pgstev << 24;
|
2016-08-29 13:56:55 +00:00
|
|
|
mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
|
2016-08-04 15:58:47 +00:00
|
|
|
set_pgste_bits(kvm->mm, hva, mask, pgstev);
|
|
|
|
}
|
|
|
|
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
2020-06-09 04:33:25 +00:00
|
|
|
mmap_read_unlock(kvm->mm);
|
2016-08-04 15:58:47 +00:00
|
|
|
|
2018-02-16 11:16:14 +00:00
|
|
|
if (!kvm->mm->context.uses_cmm) {
|
2020-06-09 04:33:25 +00:00
|
|
|
mmap_write_lock(kvm->mm);
|
2018-02-16 11:16:14 +00:00
|
|
|
kvm->mm->context.uses_cmm = 1;
|
2020-06-09 04:33:25 +00:00
|
|
|
mmap_write_unlock(kvm->mm);
|
2016-08-04 15:58:47 +00:00
|
|
|
}
|
|
|
|
out:
|
|
|
|
vfree(bits);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2022-06-28 13:56:09 +00:00
|
|
|
/**
|
|
|
|
* kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to
|
|
|
|
* non protected.
|
|
|
|
* @kvm: the VM whose protected vCPUs are to be converted
|
|
|
|
* @rc: return value for the RC field of the UVC (in case of error)
|
|
|
|
* @rrc: return value for the RRC field of the UVC (in case of error)
|
|
|
|
*
|
|
|
|
* Does not stop in case of error, tries to convert as many
|
|
|
|
* CPUs as possible. In case of error, the RC and RRC of the last error are
|
|
|
|
* returned.
|
|
|
|
*
|
|
|
|
* Return: 0 in case of success, otherwise -EIO
|
|
|
|
*/
|
|
|
|
int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
|
2019-09-30 08:19:18 +00:00
|
|
|
{
|
|
|
|
struct kvm_vcpu *vcpu;
|
2021-11-16 16:04:02 +00:00
|
|
|
unsigned long i;
|
2022-06-28 13:56:09 +00:00
|
|
|
u16 _rc, _rrc;
|
|
|
|
int ret = 0;
|
2019-09-30 08:19:18 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We ignore failures and try to destroy as many CPUs as possible.
|
|
|
|
* At the same time we must not free the assigned resources when
|
|
|
|
* this fails, as the ultravisor has still access to that memory.
|
|
|
|
* So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
|
|
|
|
* behind.
|
|
|
|
* We want to return the first failure rc and rrc, though.
|
|
|
|
*/
|
|
|
|
kvm_for_each_vcpu(i, vcpu, kvm) {
|
|
|
|
mutex_lock(&vcpu->mutex);
|
2022-06-28 13:56:09 +00:00
|
|
|
if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) {
|
|
|
|
*rc = _rc;
|
|
|
|
*rrc = _rrc;
|
2019-09-30 08:19:18 +00:00
|
|
|
ret = -EIO;
|
|
|
|
}
|
|
|
|
mutex_unlock(&vcpu->mutex);
|
|
|
|
}
|
2022-02-09 15:22:17 +00:00
|
|
|
/* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
|
|
|
|
if (use_gisa)
|
|
|
|
kvm_s390_gisa_enable(kvm);
|
2019-09-30 08:19:18 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2022-06-28 13:56:09 +00:00
|
|
|
/**
|
|
|
|
* kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM
|
|
|
|
* to protected.
|
|
|
|
* @kvm: the VM whose protected vCPUs are to be converted
|
|
|
|
* @rc: return value for the RC field of the UVC (in case of error)
|
|
|
|
* @rrc: return value for the RRC field of the UVC (in case of error)
|
|
|
|
*
|
|
|
|
* Tries to undo the conversion in case of error.
|
|
|
|
*
|
|
|
|
* Return: 0 in case of success, otherwise -EIO
|
|
|
|
*/
|
2019-09-30 08:19:18 +00:00
|
|
|
static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
|
|
|
|
{
|
2021-11-16 16:04:02 +00:00
|
|
|
unsigned long i;
|
|
|
|
int r = 0;
|
2019-09-30 08:19:18 +00:00
|
|
|
u16 dummy;
|
|
|
|
|
|
|
|
struct kvm_vcpu *vcpu;
|
|
|
|
|
2022-02-09 15:22:17 +00:00
|
|
|
/* Disable the GISA if the ultravisor does not support AIV. */
|
|
|
|
if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
|
|
|
|
kvm_s390_gisa_disable(kvm);
|
|
|
|
|
2019-09-30 08:19:18 +00:00
|
|
|
kvm_for_each_vcpu(i, vcpu, kvm) {
|
|
|
|
mutex_lock(&vcpu->mutex);
|
|
|
|
r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
|
|
|
|
mutex_unlock(&vcpu->mutex);
|
|
|
|
if (r)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (r)
|
|
|
|
kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2022-05-17 16:36:21 +00:00
|
|
|
/*
|
|
|
|
* Here we provide user space with a direct interface to query UV
|
|
|
|
* related data like UV maxima and available features as well as
|
|
|
|
* feature specific data.
|
|
|
|
*
|
|
|
|
* To facilitate future extension of the data structures we'll try to
|
|
|
|
* write data up to the maximum requested length.
|
|
|
|
*/
|
|
|
|
static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info)
|
|
|
|
{
|
|
|
|
ssize_t len_min;
|
|
|
|
|
|
|
|
switch (info->header.id) {
|
|
|
|
case KVM_PV_INFO_VM: {
|
|
|
|
len_min = sizeof(info->header) + sizeof(info->vm);
|
|
|
|
|
|
|
|
if (info->header.len_max < len_min)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
memcpy(info->vm.inst_calls_list,
|
|
|
|
uv_info.inst_calls_list,
|
|
|
|
sizeof(uv_info.inst_calls_list));
|
|
|
|
|
|
|
|
/* It's max cpuid not max cpus, so it's off by one */
|
|
|
|
info->vm.max_cpus = uv_info.max_guest_cpu_id + 1;
|
|
|
|
info->vm.max_guests = uv_info.max_num_sec_conf;
|
|
|
|
info->vm.max_guest_addr = uv_info.max_sec_stor_addr;
|
|
|
|
info->vm.feature_indication = uv_info.uv_feature_indications;
|
|
|
|
|
|
|
|
return len_min;
|
|
|
|
}
|
2022-05-17 16:36:23 +00:00
|
|
|
case KVM_PV_INFO_DUMP: {
|
|
|
|
len_min = sizeof(info->header) + sizeof(info->dump);
|
|
|
|
|
|
|
|
if (info->header.len_max < len_min)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len;
|
|
|
|
info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len;
|
|
|
|
info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len;
|
|
|
|
return len_min;
|
|
|
|
}
|
2022-05-17 16:36:21 +00:00
|
|
|
default:
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-17 16:36:24 +00:00
|
|
|
static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
|
|
|
|
struct kvm_s390_pv_dmp dmp)
|
|
|
|
{
|
|
|
|
int r = -EINVAL;
|
|
|
|
void __user *result_buff = (void __user *)dmp.buff_addr;
|
|
|
|
|
|
|
|
switch (dmp.subcmd) {
|
|
|
|
case KVM_PV_DUMP_INIT: {
|
|
|
|
if (kvm->arch.pv.dumping)
|
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Block SIE entry as concurrent dump UVCs could lead
|
|
|
|
* to validities.
|
|
|
|
*/
|
|
|
|
kvm_s390_vcpu_block_all(kvm);
|
|
|
|
|
|
|
|
r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
|
|
|
|
UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc);
|
|
|
|
KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x",
|
|
|
|
cmd->rc, cmd->rrc);
|
|
|
|
if (!r) {
|
|
|
|
kvm->arch.pv.dumping = true;
|
|
|
|
} else {
|
|
|
|
kvm_s390_vcpu_unblock_all(kvm);
|
|
|
|
r = -EINVAL;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case KVM_PV_DUMP_CONFIG_STOR_STATE: {
|
|
|
|
if (!kvm->arch.pv.dumping)
|
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* gaddr is an output parameter since we might stop
|
|
|
|
* early. As dmp will be copied back in our caller, we
|
|
|
|
* don't need to do it ourselves.
|
|
|
|
*/
|
|
|
|
r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len,
|
|
|
|
&cmd->rc, &cmd->rrc);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case KVM_PV_DUMP_COMPLETE: {
|
|
|
|
if (!kvm->arch.pv.dumping)
|
|
|
|
break;
|
|
|
|
|
|
|
|
r = -EINVAL;
|
|
|
|
if (dmp.buff_len < uv_info.conf_dump_finalize_len)
|
|
|
|
break;
|
|
|
|
|
|
|
|
r = kvm_s390_pv_dump_complete(kvm, result_buff,
|
|
|
|
&cmd->rc, &cmd->rrc);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
r = -ENOTTY;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2019-09-30 08:19:18 +00:00
|
|
|
static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
|
|
|
|
{
|
KVM: s390: pv: asynchronous destroy for reboot
Until now, destroying a protected guest was an entirely synchronous
operation that could potentially take a very long time, depending on
the size of the guest, due to the time needed to clean up the address
space from protected pages.
This patch implements an asynchronous destroy mechanism, that allows a
protected guest to reboot significantly faster than previously.
This is achieved by clearing the pages of the old guest in background.
In case of reboot, the new guest will be able to run in the same
address space almost immediately.
The old protected guest is then only destroyed when all of its memory
has been destroyed or otherwise made non protected.
Two new PV commands are added for the KVM_S390_PV_COMMAND ioctl:
KVM_PV_ASYNC_CLEANUP_PREPARE: set aside the current protected VM for
later asynchronous teardown. The current KVM VM will then continue
immediately as non-protected. If a protected VM had already been
set aside for asynchronous teardown, but without starting the teardown
process, this call will fail. There can be at most one VM set aside at
any time. Once it is set aside, the protected VM only exists in the
context of the Ultravisor, it is not associated with the KVM VM
anymore. Its protected CPUs have already been destroyed, but not its
memory. This command can be issued again immediately after starting
KVM_PV_ASYNC_CLEANUP_PERFORM, without having to wait for completion.
KVM_PV_ASYNC_CLEANUP_PERFORM: tears down the protected VM previously
set aside using KVM_PV_ASYNC_CLEANUP_PREPARE. Ideally the
KVM_PV_ASYNC_CLEANUP_PERFORM PV command should be issued by userspace
from a separate thread. If a fatal signal is received (or if the
process terminates naturally), the command will terminate immediately
without completing. All protected VMs whose teardown was interrupted
will be put in the need_cleanup list. The rest of the normal KVM
teardown process will take care of properly cleaning up all remaining
protected VMs, including the ones on the need_cleanup list.
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Link: https://lore.kernel.org/r/20221111170632.77622-2-imbrenda@linux.ibm.com
Message-Id: <20221111170632.77622-2-imbrenda@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
2022-11-11 17:06:27 +00:00
|
|
|
const bool need_lock = (cmd->cmd != KVM_PV_ASYNC_CLEANUP_PERFORM);
|
|
|
|
void __user *argp = (void __user *)cmd->data;
|
2019-09-30 08:19:18 +00:00
|
|
|
int r = 0;
|
|
|
|
u16 dummy;
|
KVM: s390: pv: asynchronous destroy for reboot
Until now, destroying a protected guest was an entirely synchronous
operation that could potentially take a very long time, depending on
the size of the guest, due to the time needed to clean up the address
space from protected pages.
This patch implements an asynchronous destroy mechanism, that allows a
protected guest to reboot significantly faster than previously.
This is achieved by clearing the pages of the old guest in background.
In case of reboot, the new guest will be able to run in the same
address space almost immediately.
The old protected guest is then only destroyed when all of its memory
has been destroyed or otherwise made non protected.
Two new PV commands are added for the KVM_S390_PV_COMMAND ioctl:
KVM_PV_ASYNC_CLEANUP_PREPARE: set aside the current protected VM for
later asynchronous teardown. The current KVM VM will then continue
immediately as non-protected. If a protected VM had already been
set aside for asynchronous teardown, but without starting the teardown
process, this call will fail. There can be at most one VM set aside at
any time. Once it is set aside, the protected VM only exists in the
context of the Ultravisor, it is not associated with the KVM VM
anymore. Its protected CPUs have already been destroyed, but not its
memory. This command can be issued again immediately after starting
KVM_PV_ASYNC_CLEANUP_PERFORM, without having to wait for completion.
KVM_PV_ASYNC_CLEANUP_PERFORM: tears down the protected VM previously
set aside using KVM_PV_ASYNC_CLEANUP_PREPARE. Ideally the
KVM_PV_ASYNC_CLEANUP_PERFORM PV command should be issued by userspace
from a separate thread. If a fatal signal is received (or if the
process terminates naturally), the command will terminate immediately
without completing. All protected VMs whose teardown was interrupted
will be put in the need_cleanup list. The rest of the normal KVM
teardown process will take care of properly cleaning up all remaining
protected VMs, including the ones on the need_cleanup list.
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Link: https://lore.kernel.org/r/20221111170632.77622-2-imbrenda@linux.ibm.com
Message-Id: <20221111170632.77622-2-imbrenda@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
2022-11-11 17:06:27 +00:00
|
|
|
|
|
|
|
if (need_lock)
|
|
|
|
mutex_lock(&kvm->lock);
|
2019-09-30 08:19:18 +00:00
|
|
|
|
|
|
|
switch (cmd->cmd) {
|
|
|
|
case KVM_PV_ENABLE: {
|
|
|
|
r = -EINVAL;
|
|
|
|
if (kvm_s390_pv_is_protected(kvm))
|
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* FMT 4 SIE needs esca. As we never switch back to bsca from
|
|
|
|
* esca, we need no cleanup in the error cases below
|
|
|
|
*/
|
|
|
|
r = sca_switch_to_extended(kvm);
|
|
|
|
if (r)
|
|
|
|
break;
|
|
|
|
|
2020-06-09 04:33:25 +00:00
|
|
|
mmap_write_lock(current->mm);
|
2019-07-16 11:08:37 +00:00
|
|
|
r = gmap_mark_unmergeable();
|
2020-06-09 04:33:25 +00:00
|
|
|
mmap_write_unlock(current->mm);
|
2019-07-16 11:08:37 +00:00
|
|
|
if (r)
|
|
|
|
break;
|
|
|
|
|
2019-09-30 08:19:18 +00:00
|
|
|
r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
|
|
|
|
if (r)
|
|
|
|
break;
|
|
|
|
|
|
|
|
r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
|
|
|
|
if (r)
|
|
|
|
kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
|
2020-02-03 08:13:37 +00:00
|
|
|
|
|
|
|
/* we need to block service interrupts from now on */
|
|
|
|
set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
|
2019-09-30 08:19:18 +00:00
|
|
|
break;
|
|
|
|
}
|
KVM: s390: pv: asynchronous destroy for reboot
Until now, destroying a protected guest was an entirely synchronous
operation that could potentially take a very long time, depending on
the size of the guest, due to the time needed to clean up the address
space from protected pages.
This patch implements an asynchronous destroy mechanism, that allows a
protected guest to reboot significantly faster than previously.
This is achieved by clearing the pages of the old guest in background.
In case of reboot, the new guest will be able to run in the same
address space almost immediately.
The old protected guest is then only destroyed when all of its memory
has been destroyed or otherwise made non protected.
Two new PV commands are added for the KVM_S390_PV_COMMAND ioctl:
KVM_PV_ASYNC_CLEANUP_PREPARE: set aside the current protected VM for
later asynchronous teardown. The current KVM VM will then continue
immediately as non-protected. If a protected VM had already been
set aside for asynchronous teardown, but without starting the teardown
process, this call will fail. There can be at most one VM set aside at
any time. Once it is set aside, the protected VM only exists in the
context of the Ultravisor, it is not associated with the KVM VM
anymore. Its protected CPUs have already been destroyed, but not its
memory. This command can be issued again immediately after starting
KVM_PV_ASYNC_CLEANUP_PERFORM, without having to wait for completion.
KVM_PV_ASYNC_CLEANUP_PERFORM: tears down the protected VM previously
set aside using KVM_PV_ASYNC_CLEANUP_PREPARE. Ideally the
KVM_PV_ASYNC_CLEANUP_PERFORM PV command should be issued by userspace
from a separate thread. If a fatal signal is received (or if the
process terminates naturally), the command will terminate immediately
without completing. All protected VMs whose teardown was interrupted
will be put in the need_cleanup list. The rest of the normal KVM
teardown process will take care of properly cleaning up all remaining
protected VMs, including the ones on the need_cleanup list.
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Link: https://lore.kernel.org/r/20221111170632.77622-2-imbrenda@linux.ibm.com
Message-Id: <20221111170632.77622-2-imbrenda@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
2022-11-11 17:06:27 +00:00
|
|
|
case KVM_PV_ASYNC_CLEANUP_PREPARE:
|
|
|
|
r = -EINVAL;
|
|
|
|
if (!kvm_s390_pv_is_protected(kvm) || !async_destroy)
|
|
|
|
break;
|
|
|
|
|
|
|
|
r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
|
|
|
|
/*
|
|
|
|
* If a CPU could not be destroyed, destroy VM will also fail.
|
|
|
|
* There is no point in trying to destroy it. Instead return
|
|
|
|
* the rc and rrc from the first CPU that failed destroying.
|
|
|
|
*/
|
|
|
|
if (r)
|
|
|
|
break;
|
|
|
|
r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc);
|
|
|
|
|
|
|
|
/* no need to block service interrupts any more */
|
|
|
|
clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
|
|
|
|
break;
|
|
|
|
case KVM_PV_ASYNC_CLEANUP_PERFORM:
|
|
|
|
r = -EINVAL;
|
|
|
|
if (!async_destroy)
|
|
|
|
break;
|
|
|
|
/* kvm->lock must not be held; this is asserted inside the function. */
|
|
|
|
r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc);
|
|
|
|
break;
|
2019-09-30 08:19:18 +00:00
|
|
|
case KVM_PV_DISABLE: {
|
|
|
|
r = -EINVAL;
|
|
|
|
if (!kvm_s390_pv_is_protected(kvm))
|
|
|
|
break;
|
|
|
|
|
|
|
|
r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
|
|
|
|
/*
|
|
|
|
* If a CPU could not be destroyed, destroy VM will also fail.
|
|
|
|
* There is no point in trying to destroy it. Instead return
|
|
|
|
* the rc and rrc from the first CPU that failed destroying.
|
|
|
|
*/
|
|
|
|
if (r)
|
|
|
|
break;
|
KVM: s390: pv: asynchronous destroy for reboot
Until now, destroying a protected guest was an entirely synchronous
operation that could potentially take a very long time, depending on
the size of the guest, due to the time needed to clean up the address
space from protected pages.
This patch implements an asynchronous destroy mechanism, that allows a
protected guest to reboot significantly faster than previously.
This is achieved by clearing the pages of the old guest in background.
In case of reboot, the new guest will be able to run in the same
address space almost immediately.
The old protected guest is then only destroyed when all of its memory
has been destroyed or otherwise made non protected.
Two new PV commands are added for the KVM_S390_PV_COMMAND ioctl:
KVM_PV_ASYNC_CLEANUP_PREPARE: set aside the current protected VM for
later asynchronous teardown. The current KVM VM will then continue
immediately as non-protected. If a protected VM had already been
set aside for asynchronous teardown, but without starting the teardown
process, this call will fail. There can be at most one VM set aside at
any time. Once it is set aside, the protected VM only exists in the
context of the Ultravisor, it is not associated with the KVM VM
anymore. Its protected CPUs have already been destroyed, but not its
memory. This command can be issued again immediately after starting
KVM_PV_ASYNC_CLEANUP_PERFORM, without having to wait for completion.
KVM_PV_ASYNC_CLEANUP_PERFORM: tears down the protected VM previously
set aside using KVM_PV_ASYNC_CLEANUP_PREPARE. Ideally the
KVM_PV_ASYNC_CLEANUP_PERFORM PV command should be issued by userspace
from a separate thread. If a fatal signal is received (or if the
process terminates naturally), the command will terminate immediately
without completing. All protected VMs whose teardown was interrupted
will be put in the need_cleanup list. The rest of the normal KVM
teardown process will take care of properly cleaning up all remaining
protected VMs, including the ones on the need_cleanup list.
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Link: https://lore.kernel.org/r/20221111170632.77622-2-imbrenda@linux.ibm.com
Message-Id: <20221111170632.77622-2-imbrenda@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
2022-11-11 17:06:27 +00:00
|
|
|
r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc);
|
2020-02-03 08:13:37 +00:00
|
|
|
|
|
|
|
/* no need to block service interrupts any more */
|
|
|
|
clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
|
2019-09-30 08:19:18 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case KVM_PV_SET_SEC_PARMS: {
|
|
|
|
struct kvm_s390_pv_sec_parm parms = {};
|
|
|
|
void *hdr;
|
|
|
|
|
|
|
|
r = -EINVAL;
|
|
|
|
if (!kvm_s390_pv_is_protected(kvm))
|
|
|
|
break;
|
|
|
|
|
|
|
|
r = -EFAULT;
|
|
|
|
if (copy_from_user(&parms, argp, sizeof(parms)))
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* Currently restricted to 8KB */
|
|
|
|
r = -EINVAL;
|
|
|
|
if (parms.length > PAGE_SIZE * 2)
|
|
|
|
break;
|
|
|
|
|
|
|
|
r = -ENOMEM;
|
|
|
|
hdr = vmalloc(parms.length);
|
|
|
|
if (!hdr)
|
|
|
|
break;
|
|
|
|
|
|
|
|
r = -EFAULT;
|
|
|
|
if (!copy_from_user(hdr, (void __user *)parms.origin,
|
|
|
|
parms.length))
|
|
|
|
r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
|
|
|
|
&cmd->rc, &cmd->rrc);
|
|
|
|
|
|
|
|
vfree(hdr);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case KVM_PV_UNPACK: {
|
|
|
|
struct kvm_s390_pv_unp unp = {};
|
|
|
|
|
|
|
|
r = -EINVAL;
|
2020-10-20 10:12:07 +00:00
|
|
|
if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
|
2019-09-30 08:19:18 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
r = -EFAULT;
|
|
|
|
if (copy_from_user(&unp, argp, sizeof(unp)))
|
|
|
|
break;
|
|
|
|
|
|
|
|
r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
|
|
|
|
&cmd->rc, &cmd->rrc);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case KVM_PV_VERIFY: {
|
|
|
|
r = -EINVAL;
|
|
|
|
if (!kvm_s390_pv_is_protected(kvm))
|
|
|
|
break;
|
|
|
|
|
|
|
|
r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
|
|
|
|
UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
|
|
|
|
KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
|
|
|
|
cmd->rrc);
|
|
|
|
break;
|
|
|
|
}
|
2019-05-09 11:07:21 +00:00
|
|
|
case KVM_PV_PREP_RESET: {
|
|
|
|
r = -EINVAL;
|
|
|
|
if (!kvm_s390_pv_is_protected(kvm))
|
|
|
|
break;
|
|
|
|
|
|
|
|
r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
|
|
|
|
UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
|
|
|
|
KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
|
|
|
|
cmd->rc, cmd->rrc);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case KVM_PV_UNSHARE_ALL: {
|
|
|
|
r = -EINVAL;
|
|
|
|
if (!kvm_s390_pv_is_protected(kvm))
|
|
|
|
break;
|
|
|
|
|
|
|
|
r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
|
|
|
|
UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
|
|
|
|
KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
|
|
|
|
cmd->rc, cmd->rrc);
|
|
|
|
break;
|
|
|
|
}
|
2022-05-17 16:36:21 +00:00
|
|
|
case KVM_PV_INFO: {
|
|
|
|
struct kvm_s390_pv_info info = {};
|
|
|
|
ssize_t data_len;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* No need to check the VM protection here.
|
|
|
|
*
|
|
|
|
* Maybe user space wants to query some of the data
|
|
|
|
* when the VM is still unprotected. If we see the
|
|
|
|
* need to fence a new data command we can still
|
|
|
|
* return an error in the info handler.
|
|
|
|
*/
|
|
|
|
|
|
|
|
r = -EFAULT;
|
|
|
|
if (copy_from_user(&info, argp, sizeof(info.header)))
|
|
|
|
break;
|
|
|
|
|
|
|
|
r = -EINVAL;
|
|
|
|
if (info.header.len_max < sizeof(info.header))
|
|
|
|
break;
|
|
|
|
|
|
|
|
data_len = kvm_s390_handle_pv_info(&info);
|
|
|
|
if (data_len < 0) {
|
|
|
|
r = data_len;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* If a data command struct is extended (multiple
|
|
|
|
* times) this can be used to determine how much of it
|
|
|
|
* is valid.
|
|
|
|
*/
|
|
|
|
info.header.len_written = data_len;
|
|
|
|
|
|
|
|
r = -EFAULT;
|
|
|
|
if (copy_to_user(argp, &info, data_len))
|
|
|
|
break;
|
|
|
|
|
|
|
|
r = 0;
|
|
|
|
break;
|
|
|
|
}
|
2022-05-17 16:36:24 +00:00
|
|
|
case KVM_PV_DUMP: {
|
|
|
|
struct kvm_s390_pv_dmp dmp;
|
|
|
|
|
|
|
|
r = -EINVAL;
|
|
|
|
if (!kvm_s390_pv_is_protected(kvm))
|
|
|
|
break;
|
|
|
|
|
|
|
|
r = -EFAULT;
|
|
|
|
if (copy_from_user(&dmp, argp, sizeof(dmp)))
|
|
|
|
break;
|
|
|
|
|
|
|
|
r = kvm_s390_pv_dmp(kvm, cmd, dmp);
|
|
|
|
if (r)
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (copy_to_user(argp, &dmp, sizeof(dmp))) {
|
|
|
|
r = -EFAULT;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
2019-09-30 08:19:18 +00:00
|
|
|
default:
|
|
|
|
r = -ENOTTY;
|
|
|
|
}
|
KVM: s390: pv: asynchronous destroy for reboot
Until now, destroying a protected guest was an entirely synchronous
operation that could potentially take a very long time, depending on
the size of the guest, due to the time needed to clean up the address
space from protected pages.
This patch implements an asynchronous destroy mechanism, that allows a
protected guest to reboot significantly faster than previously.
This is achieved by clearing the pages of the old guest in background.
In case of reboot, the new guest will be able to run in the same
address space almost immediately.
The old protected guest is then only destroyed when all of its memory
has been destroyed or otherwise made non protected.
Two new PV commands are added for the KVM_S390_PV_COMMAND ioctl:
KVM_PV_ASYNC_CLEANUP_PREPARE: set aside the current protected VM for
later asynchronous teardown. The current KVM VM will then continue
immediately as non-protected. If a protected VM had already been
set aside for asynchronous teardown, but without starting the teardown
process, this call will fail. There can be at most one VM set aside at
any time. Once it is set aside, the protected VM only exists in the
context of the Ultravisor, it is not associated with the KVM VM
anymore. Its protected CPUs have already been destroyed, but not its
memory. This command can be issued again immediately after starting
KVM_PV_ASYNC_CLEANUP_PERFORM, without having to wait for completion.
KVM_PV_ASYNC_CLEANUP_PERFORM: tears down the protected VM previously
set aside using KVM_PV_ASYNC_CLEANUP_PREPARE. Ideally the
KVM_PV_ASYNC_CLEANUP_PERFORM PV command should be issued by userspace
from a separate thread. If a fatal signal is received (or if the
process terminates naturally), the command will terminate immediately
without completing. All protected VMs whose teardown was interrupted
will be put in the need_cleanup list. The rest of the normal KVM
teardown process will take care of properly cleaning up all remaining
protected VMs, including the ones on the need_cleanup list.
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Link: https://lore.kernel.org/r/20221111170632.77622-2-imbrenda@linux.ibm.com
Message-Id: <20221111170632.77622-2-imbrenda@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
2022-11-11 17:06:27 +00:00
|
|
|
if (need_lock)
|
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
|
2019-09-30 08:19:18 +00:00
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2023-02-06 16:45:56 +00:00
|
|
|
static int mem_op_validate_common(struct kvm_s390_mem_op *mop, u64 supported_flags)
|
2022-02-11 18:22:11 +00:00
|
|
|
{
|
KVM: s390: Add missing vm MEM_OP size check
Check that size is not zero, preventing the following warning:
WARNING: CPU: 0 PID: 9692 at mm/vmalloc.c:3059 __vmalloc_node_range+0x528/0x648
Modules linked in:
CPU: 0 PID: 9692 Comm: memop Not tainted 5.17.0-rc3-e4+ #80
Hardware name: IBM 8561 T01 701 (LPAR)
Krnl PSW : 0704c00180000000 0000000082dc584c (__vmalloc_node_range+0x52c/0x648)
R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3
Krnl GPRS: 0000000000000083 ffffffffffffffff 0000000000000000 0000000000000001
0000038000000000 000003ff80000000 0000000000000cc0 000000008ebb8000
0000000087a8a700 000000004040aeb1 000003ffd9f7dec8 000000008ebb8000
000000009d9b8000 000000000102a1b4 00000380035afb68 00000380035afaa8
Krnl Code: 0000000082dc583e: d028a7f4ff80 trtr 2036(41,%r10),3968(%r15)
0000000082dc5844: af000000 mc 0,0
#0000000082dc5848: af000000 mc 0,0
>0000000082dc584c: a7d90000 lghi %r13,0
0000000082dc5850: b904002d lgr %r2,%r13
0000000082dc5854: eb6ff1080004 lmg %r6,%r15,264(%r15)
0000000082dc585a: 07fe bcr 15,%r14
0000000082dc585c: 47000700 bc 0,1792
Call Trace:
[<0000000082dc584c>] __vmalloc_node_range+0x52c/0x648
[<0000000082dc5b62>] vmalloc+0x5a/0x68
[<000003ff8067f4ca>] kvm_arch_vm_ioctl+0x2da/0x2a30 [kvm]
[<000003ff806705bc>] kvm_vm_ioctl+0x4ec/0x978 [kvm]
[<0000000082e562fe>] __s390x_sys_ioctl+0xbe/0x100
[<000000008360a9bc>] __do_syscall+0x1d4/0x200
[<0000000083618bd2>] system_call+0x82/0xb0
Last Breaking-Event-Address:
[<0000000082dc5348>] __vmalloc_node_range+0x28/0x648
Other than the warning, there is no ill effect from the missing check,
the condition is detected by subsequent code and causes a return
with ENOMEM.
Fixes: ef11c9463ae0 (KVM: s390: Add vm IOCTL for key checked guest absolute memory access)
Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Link: https://lore.kernel.org/r/20220221163237.4122868-1-scgl@linux.ibm.com
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
2022-02-21 16:32:37 +00:00
|
|
|
if (mop->flags & ~supported_flags || !mop->size)
|
2022-02-11 18:22:11 +00:00
|
|
|
return -EINVAL;
|
|
|
|
if (mop->size > MEM_OP_MAX_SIZE)
|
|
|
|
return -E2BIG;
|
|
|
|
if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
|
2023-02-06 16:45:56 +00:00
|
|
|
if (mop->key > 0xf)
|
2022-02-11 18:22:11 +00:00
|
|
|
return -EINVAL;
|
|
|
|
} else {
|
|
|
|
mop->key = 0;
|
|
|
|
}
|
2023-02-06 16:45:56 +00:00
|
|
|
return 0;
|
2022-02-11 18:22:10 +00:00
|
|
|
}
|
|
|
|
|
2023-02-06 16:45:57 +00:00
|
|
|
static int kvm_s390_vm_mem_op_abs(struct kvm *kvm, struct kvm_s390_mem_op *mop)
|
2022-02-11 18:22:11 +00:00
|
|
|
{
|
|
|
|
void __user *uaddr = (void __user *)mop->buf;
|
2023-02-06 16:45:58 +00:00
|
|
|
enum gacc_mode acc_mode;
|
2022-02-11 18:22:11 +00:00
|
|
|
void *tmpbuf = NULL;
|
|
|
|
int r, srcu_idx;
|
|
|
|
|
2023-02-06 16:45:56 +00:00
|
|
|
r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION |
|
|
|
|
KVM_S390_MEMOP_F_CHECK_ONLY);
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
|
2022-02-11 18:22:11 +00:00
|
|
|
if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
|
|
|
|
tmpbuf = vmalloc(mop->size);
|
|
|
|
if (!tmpbuf)
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
srcu_idx = srcu_read_lock(&kvm->srcu);
|
|
|
|
|
|
|
|
if (kvm_is_error_gpa(kvm, mop->gaddr)) {
|
|
|
|
r = PGM_ADDRESSING;
|
|
|
|
goto out_unlock;
|
|
|
|
}
|
|
|
|
|
2023-02-06 16:45:58 +00:00
|
|
|
acc_mode = mop->op == KVM_S390_MEMOP_ABSOLUTE_READ ? GACC_FETCH : GACC_STORE;
|
|
|
|
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
|
|
|
|
r = check_gpa_range(kvm, mop->gaddr, mop->size, acc_mode, mop->key);
|
|
|
|
goto out_unlock;
|
2022-02-11 18:22:11 +00:00
|
|
|
}
|
2023-02-06 16:45:58 +00:00
|
|
|
if (acc_mode == GACC_FETCH) {
|
|
|
|
r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
|
|
|
|
mop->size, GACC_FETCH, mop->key);
|
|
|
|
if (r)
|
|
|
|
goto out_unlock;
|
|
|
|
if (copy_to_user(uaddr, tmpbuf, mop->size))
|
|
|
|
r = -EFAULT;
|
|
|
|
} else {
|
|
|
|
if (copy_from_user(tmpbuf, uaddr, mop->size)) {
|
|
|
|
r = -EFAULT;
|
|
|
|
goto out_unlock;
|
2022-02-11 18:22:11 +00:00
|
|
|
}
|
2023-02-06 16:45:58 +00:00
|
|
|
r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
|
|
|
|
mop->size, GACC_STORE, mop->key);
|
2022-02-11 18:22:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
out_unlock:
|
|
|
|
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
|
|
|
|
|
|
|
vfree(tmpbuf);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2023-02-06 16:46:00 +00:00
|
|
|
static int kvm_s390_vm_mem_op_cmpxchg(struct kvm *kvm, struct kvm_s390_mem_op *mop)
|
|
|
|
{
|
|
|
|
void __user *uaddr = (void __user *)mop->buf;
|
|
|
|
void __user *old_addr = (void __user *)mop->old_addr;
|
|
|
|
union {
|
|
|
|
__uint128_t quad;
|
|
|
|
char raw[sizeof(__uint128_t)];
|
|
|
|
} old = { .quad = 0}, new = { .quad = 0 };
|
|
|
|
unsigned int off_in_quad = sizeof(new) - mop->size;
|
|
|
|
int r, srcu_idx;
|
|
|
|
bool success;
|
|
|
|
|
|
|
|
r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION);
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
/*
|
|
|
|
* This validates off_in_quad. Checking that size is a power
|
|
|
|
* of two is not necessary, as cmpxchg_guest_abs_with_key
|
|
|
|
* takes care of that
|
|
|
|
*/
|
|
|
|
if (mop->size > sizeof(new))
|
|
|
|
return -EINVAL;
|
|
|
|
if (copy_from_user(&new.raw[off_in_quad], uaddr, mop->size))
|
|
|
|
return -EFAULT;
|
|
|
|
if (copy_from_user(&old.raw[off_in_quad], old_addr, mop->size))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
srcu_idx = srcu_read_lock(&kvm->srcu);
|
|
|
|
|
|
|
|
if (kvm_is_error_gpa(kvm, mop->gaddr)) {
|
|
|
|
r = PGM_ADDRESSING;
|
|
|
|
goto out_unlock;
|
|
|
|
}
|
|
|
|
|
|
|
|
r = cmpxchg_guest_abs_with_key(kvm, mop->gaddr, mop->size, &old.quad,
|
|
|
|
new.quad, mop->key, &success);
|
|
|
|
if (!success && copy_to_user(old_addr, &old.raw[off_in_quad], mop->size))
|
|
|
|
r = -EFAULT;
|
|
|
|
|
|
|
|
out_unlock:
|
|
|
|
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2023-02-06 16:45:57 +00:00
|
|
|
static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* This is technically a heuristic only, if the kvm->lock is not
|
|
|
|
* taken, it is not guaranteed that the vm is/remains non-protected.
|
|
|
|
* This is ok from a kernel perspective, wrongdoing is detected
|
|
|
|
* on the access, -EFAULT is returned and the vm may crash the
|
|
|
|
* next time it accesses the memory in question.
|
|
|
|
* There is no sane usecase to do switching and a memop on two
|
|
|
|
* different CPUs at the same time.
|
|
|
|
*/
|
|
|
|
if (kvm_s390_pv_get_handle(kvm))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
switch (mop->op) {
|
|
|
|
case KVM_S390_MEMOP_ABSOLUTE_READ:
|
|
|
|
case KVM_S390_MEMOP_ABSOLUTE_WRITE:
|
|
|
|
return kvm_s390_vm_mem_op_abs(kvm, mop);
|
2023-02-06 16:46:00 +00:00
|
|
|
case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG:
|
|
|
|
return kvm_s390_vm_mem_op_cmpxchg(kvm, mop);
|
2023-02-06 16:45:57 +00:00
|
|
|
default:
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-03-25 17:47:20 +00:00
|
|
|
long kvm_arch_vm_ioctl(struct file *filp,
|
|
|
|
unsigned int ioctl, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct kvm *kvm = filp->private_data;
|
|
|
|
void __user *argp = (void __user *)arg;
|
2014-04-09 11:13:00 +00:00
|
|
|
struct kvm_device_attr attr;
|
2008-03-25 17:47:20 +00:00
|
|
|
int r;
|
|
|
|
|
|
|
|
switch (ioctl) {
|
KVM: s390: interrupt subsystem, cpu timer, waitpsw
This patch contains the s390 interrupt subsystem (similar to in kernel apic)
including timer interrupts (similar to in-kernel-pit) and enabled wait
(similar to in kernel hlt).
In order to achieve that, this patch also introduces intercept handling
for instruction intercepts, and it implements load control instructions.
This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both
the vm file descriptors and the vcpu file descriptors. In case this ioctl is
issued against a vm file descriptor, the interrupt is considered floating.
Floating interrupts may be delivered to any virtual cpu in the configuration.
The following interrupts are supported:
SIGP STOP - interprocessor signal that stops a remote cpu
SIGP SET PREFIX - interprocessor signal that sets the prefix register of a
(stopped) remote cpu
INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed
and for smp_call_function() in the guest.
PROGRAM INT - exception during program execution such as page fault, illegal
instruction and friends
RESTART - interprocessor signal that starts a stopped cpu
INT VIRTIO - floating interrupt for virtio signalisation
INT SERVICE - floating interrupt for signalisations from the system
service processor
struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting
an interrupt, also carrys parameter data for interrupts along with the interrupt
type. Interrupts on s390 usually have a state that represents the current
operation, or identifies which device has caused the interruption on s390.
kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a
disabled wait (that is, disabled for interrupts), we exit to userspace. In case
of an enabled wait we set up a timer that equals the cpu clock comparator value
and sleep on a wait queue.
[christian: change virtio interrupt to 0x2603]
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-25 17:47:26 +00:00
|
|
|
case KVM_S390_INTERRUPT: {
|
|
|
|
struct kvm_s390_interrupt s390int;
|
|
|
|
|
|
|
|
r = -EFAULT;
|
|
|
|
if (copy_from_user(&s390int, argp, sizeof(s390int)))
|
|
|
|
break;
|
|
|
|
r = kvm_s390_inject_vm(kvm, &s390int);
|
|
|
|
break;
|
|
|
|
}
|
2013-07-15 11:36:01 +00:00
|
|
|
case KVM_CREATE_IRQCHIP: {
|
|
|
|
struct kvm_irq_routing_entry routing;
|
|
|
|
|
|
|
|
r = -EINVAL;
|
|
|
|
if (kvm->arch.use_irqchip) {
|
|
|
|
/* Set up dummy routing. */
|
|
|
|
memset(&routing, 0, sizeof(routing));
|
2015-08-06 17:05:54 +00:00
|
|
|
r = kvm_set_irq_routing(kvm, &routing, 0, 0);
|
2013-07-15 11:36:01 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2014-04-09 11:13:00 +00:00
|
|
|
case KVM_SET_DEVICE_ATTR: {
|
|
|
|
r = -EFAULT;
|
|
|
|
if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
|
|
|
|
break;
|
|
|
|
r = kvm_s390_vm_set_attr(kvm, &attr);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case KVM_GET_DEVICE_ATTR: {
|
|
|
|
r = -EFAULT;
|
|
|
|
if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
|
|
|
|
break;
|
|
|
|
r = kvm_s390_vm_get_attr(kvm, &attr);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case KVM_HAS_DEVICE_ATTR: {
|
|
|
|
r = -EFAULT;
|
|
|
|
if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
|
|
|
|
break;
|
|
|
|
r = kvm_s390_vm_has_attr(kvm, &attr);
|
|
|
|
break;
|
|
|
|
}
|
2014-09-23 13:23:01 +00:00
|
|
|
case KVM_S390_GET_SKEYS: {
|
|
|
|
struct kvm_s390_skeys args;
|
|
|
|
|
|
|
|
r = -EFAULT;
|
|
|
|
if (copy_from_user(&args, argp,
|
|
|
|
sizeof(struct kvm_s390_skeys)))
|
|
|
|
break;
|
|
|
|
r = kvm_s390_get_skeys(kvm, &args);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case KVM_S390_SET_SKEYS: {
|
|
|
|
struct kvm_s390_skeys args;
|
|
|
|
|
|
|
|
r = -EFAULT;
|
|
|
|
if (copy_from_user(&args, argp,
|
|
|
|
sizeof(struct kvm_s390_skeys)))
|
|
|
|
break;
|
|
|
|
r = kvm_s390_set_skeys(kvm, &args);
|
|
|
|
break;
|
|
|
|
}
|
2016-08-04 15:58:47 +00:00
|
|
|
case KVM_S390_GET_CMMA_BITS: {
|
|
|
|
struct kvm_s390_cmma_log args;
|
|
|
|
|
|
|
|
r = -EFAULT;
|
|
|
|
if (copy_from_user(&args, argp, sizeof(args)))
|
|
|
|
break;
|
2017-12-22 09:54:20 +00:00
|
|
|
mutex_lock(&kvm->slots_lock);
|
2016-08-04 15:58:47 +00:00
|
|
|
r = kvm_s390_get_cmma_bits(kvm, &args);
|
2017-12-22 09:54:20 +00:00
|
|
|
mutex_unlock(&kvm->slots_lock);
|
2016-08-04 15:58:47 +00:00
|
|
|
if (!r) {
|
|
|
|
r = copy_to_user(argp, &args, sizeof(args));
|
|
|
|
if (r)
|
|
|
|
r = -EFAULT;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case KVM_S390_SET_CMMA_BITS: {
|
|
|
|
struct kvm_s390_cmma_log args;
|
|
|
|
|
|
|
|
r = -EFAULT;
|
|
|
|
if (copy_from_user(&args, argp, sizeof(args)))
|
|
|
|
break;
|
2017-12-22 09:54:20 +00:00
|
|
|
mutex_lock(&kvm->slots_lock);
|
2016-08-04 15:58:47 +00:00
|
|
|
r = kvm_s390_set_cmma_bits(kvm, &args);
|
2017-12-22 09:54:20 +00:00
|
|
|
mutex_unlock(&kvm->slots_lock);
|
2016-08-04 15:58:47 +00:00
|
|
|
break;
|
|
|
|
}
|
2019-09-30 08:19:18 +00:00
|
|
|
case KVM_S390_PV_COMMAND: {
|
|
|
|
struct kvm_pv_cmd args;
|
|
|
|
|
2021-10-08 20:31:12 +00:00
|
|
|
/* protvirt means user cpu state */
|
|
|
|
kvm_s390_set_user_cpu_state_ctrl(kvm);
|
2019-09-30 08:19:18 +00:00
|
|
|
r = 0;
|
|
|
|
if (!is_prot_virt_host()) {
|
|
|
|
r = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (copy_from_user(&args, argp, sizeof(args))) {
|
|
|
|
r = -EFAULT;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (args.flags) {
|
|
|
|
r = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
KVM: s390: pv: asynchronous destroy for reboot
Until now, destroying a protected guest was an entirely synchronous
operation that could potentially take a very long time, depending on
the size of the guest, due to the time needed to clean up the address
space from protected pages.
This patch implements an asynchronous destroy mechanism, that allows a
protected guest to reboot significantly faster than previously.
This is achieved by clearing the pages of the old guest in background.
In case of reboot, the new guest will be able to run in the same
address space almost immediately.
The old protected guest is then only destroyed when all of its memory
has been destroyed or otherwise made non protected.
Two new PV commands are added for the KVM_S390_PV_COMMAND ioctl:
KVM_PV_ASYNC_CLEANUP_PREPARE: set aside the current protected VM for
later asynchronous teardown. The current KVM VM will then continue
immediately as non-protected. If a protected VM had already been
set aside for asynchronous teardown, but without starting the teardown
process, this call will fail. There can be at most one VM set aside at
any time. Once it is set aside, the protected VM only exists in the
context of the Ultravisor, it is not associated with the KVM VM
anymore. Its protected CPUs have already been destroyed, but not its
memory. This command can be issued again immediately after starting
KVM_PV_ASYNC_CLEANUP_PERFORM, without having to wait for completion.
KVM_PV_ASYNC_CLEANUP_PERFORM: tears down the protected VM previously
set aside using KVM_PV_ASYNC_CLEANUP_PREPARE. Ideally the
KVM_PV_ASYNC_CLEANUP_PERFORM PV command should be issued by userspace
from a separate thread. If a fatal signal is received (or if the
process terminates naturally), the command will terminate immediately
without completing. All protected VMs whose teardown was interrupted
will be put in the need_cleanup list. The rest of the normal KVM
teardown process will take care of properly cleaning up all remaining
protected VMs, including the ones on the need_cleanup list.
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Link: https://lore.kernel.org/r/20221111170632.77622-2-imbrenda@linux.ibm.com
Message-Id: <20221111170632.77622-2-imbrenda@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
2022-11-11 17:06:27 +00:00
|
|
|
/* must be called without kvm->lock */
|
2019-09-30 08:19:18 +00:00
|
|
|
r = kvm_s390_handle_pv(kvm, &args);
|
|
|
|
if (copy_to_user(argp, &args, sizeof(args))) {
|
|
|
|
r = -EFAULT;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2022-02-11 18:22:11 +00:00
|
|
|
case KVM_S390_MEM_OP: {
|
|
|
|
struct kvm_s390_mem_op mem_op;
|
|
|
|
|
|
|
|
if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
|
|
|
|
r = kvm_s390_vm_mem_op(kvm, &mem_op);
|
|
|
|
else
|
|
|
|
r = -EFAULT;
|
|
|
|
break;
|
|
|
|
}
|
2022-06-06 20:33:24 +00:00
|
|
|
case KVM_S390_ZPCI_OP: {
|
|
|
|
struct kvm_s390_zpci_op args;
|
|
|
|
|
|
|
|
r = -EINVAL;
|
|
|
|
if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
|
|
|
|
break;
|
|
|
|
if (copy_from_user(&args, argp, sizeof(args))) {
|
|
|
|
r = -EFAULT;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
r = kvm_s390_pci_zpci_op(kvm, &args);
|
|
|
|
break;
|
|
|
|
}
|
2008-03-25 17:47:20 +00:00
|
|
|
default:
|
2009-08-26 11:57:07 +00:00
|
|
|
r = -ENOTTY;
|
2008-03-25 17:47:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2015-01-13 16:33:26 +00:00
|
|
|
static int kvm_s390_apxa_installed(void)
|
|
|
|
{
|
2018-09-25 23:16:18 +00:00
|
|
|
struct ap_config_info info;
|
2015-01-13 16:33:26 +00:00
|
|
|
|
2018-09-25 23:16:18 +00:00
|
|
|
if (ap_instructions_available()) {
|
|
|
|
if (ap_qci(&info) == 0)
|
|
|
|
return info.apxa;
|
2015-01-13 16:33:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-09-25 23:16:18 +00:00
|
|
|
/*
|
|
|
|
* The format of the crypto control block (CRYCB) is specified in the 3 low
|
|
|
|
* order bits of the CRYCB designation (CRYCBD) field as follows:
|
|
|
|
* Format 0: Neither the message security assist extension 3 (MSAX3) nor the
|
|
|
|
* AP extended addressing (APXA) facility are installed.
|
|
|
|
* Format 1: The APXA facility is not installed but the MSAX3 facility is.
|
|
|
|
* Format 2: Both the APXA and MSAX3 facilities are installed
|
|
|
|
*/
|
2015-01-13 16:33:26 +00:00
|
|
|
static void kvm_s390_set_crycb_format(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
|
|
|
|
|
2018-09-25 23:16:18 +00:00
|
|
|
/* Clear the CRYCB format bits - i.e., set format 0 by default */
|
|
|
|
kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
|
|
|
|
|
|
|
|
/* Check whether MSAX3 is installed */
|
|
|
|
if (!test_kvm_facility(kvm, 76))
|
|
|
|
return;
|
|
|
|
|
2015-01-13 16:33:26 +00:00
|
|
|
if (kvm_s390_apxa_installed())
|
|
|
|
kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
|
|
|
|
else
|
|
|
|
kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
|
|
|
|
}
|
|
|
|
|
2021-08-23 21:20:47 +00:00
|
|
|
/*
|
|
|
|
* kvm_arch_crypto_set_masks
|
|
|
|
*
|
|
|
|
* @kvm: pointer to the target guest's KVM struct containing the crypto masks
|
|
|
|
* to be set.
|
|
|
|
* @apm: the mask identifying the accessible AP adapters
|
|
|
|
* @aqm: the mask identifying the accessible AP domains
|
|
|
|
* @adm: the mask identifying the accessible AP control domains
|
|
|
|
*
|
|
|
|
* Set the masks that identify the adapters, domains and control domains to
|
|
|
|
* which the KVM guest is granted access.
|
|
|
|
*
|
|
|
|
* Note: The kvm->lock mutex must be locked by the caller before invoking this
|
|
|
|
* function.
|
|
|
|
*/
|
2018-10-05 08:31:09 +00:00
|
|
|
void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
|
|
|
|
unsigned long *aqm, unsigned long *adm)
|
|
|
|
{
|
|
|
|
struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
|
|
|
|
|
|
|
|
kvm_s390_vcpu_block_all(kvm);
|
|
|
|
|
|
|
|
switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
|
|
|
|
case CRYCB_FORMAT2: /* APCB1 use 256 bits */
|
|
|
|
memcpy(crycb->apcb1.apm, apm, 32);
|
|
|
|
VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
|
|
|
|
apm[0], apm[1], apm[2], apm[3]);
|
|
|
|
memcpy(crycb->apcb1.aqm, aqm, 32);
|
|
|
|
VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
|
|
|
|
aqm[0], aqm[1], aqm[2], aqm[3]);
|
|
|
|
memcpy(crycb->apcb1.adm, adm, 32);
|
|
|
|
VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
|
|
|
|
adm[0], adm[1], adm[2], adm[3]);
|
|
|
|
break;
|
|
|
|
case CRYCB_FORMAT1:
|
|
|
|
case CRYCB_FORMAT0: /* Fall through both use APCB0 */
|
|
|
|
memcpy(crycb->apcb0.apm, apm, 8);
|
|
|
|
memcpy(crycb->apcb0.aqm, aqm, 2);
|
|
|
|
memcpy(crycb->apcb0.adm, adm, 2);
|
|
|
|
VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
|
|
|
|
apm[0], *((unsigned short *)aqm),
|
|
|
|
*((unsigned short *)adm));
|
|
|
|
break;
|
|
|
|
default: /* Can not happen */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* recreate the shadow crycb for each vcpu */
|
|
|
|
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
|
|
|
|
kvm_s390_vcpu_unblock_all(kvm);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
|
|
|
|
|
2021-08-23 21:20:47 +00:00
|
|
|
/*
|
|
|
|
* kvm_arch_crypto_clear_masks
|
|
|
|
*
|
|
|
|
* @kvm: pointer to the target guest's KVM struct containing the crypto masks
|
|
|
|
* to be cleared.
|
|
|
|
*
|
|
|
|
* Clear the masks that identify the adapters, domains and control domains to
|
|
|
|
* which the KVM guest is granted access.
|
|
|
|
*
|
|
|
|
* Note: The kvm->lock mutex must be locked by the caller before invoking this
|
|
|
|
* function.
|
|
|
|
*/
|
2018-09-25 23:16:25 +00:00
|
|
|
void kvm_arch_crypto_clear_masks(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
kvm_s390_vcpu_block_all(kvm);
|
|
|
|
|
|
|
|
memset(&kvm->arch.crypto.crycb->apcb0, 0,
|
|
|
|
sizeof(kvm->arch.crypto.crycb->apcb0));
|
|
|
|
memset(&kvm->arch.crypto.crycb->apcb1, 0,
|
|
|
|
sizeof(kvm->arch.crypto.crycb->apcb1));
|
|
|
|
|
2018-10-05 08:31:09 +00:00
|
|
|
VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
|
2018-09-25 23:16:30 +00:00
|
|
|
/* recreate the shadow crycb for each vcpu */
|
|
|
|
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
|
2018-09-25 23:16:25 +00:00
|
|
|
kvm_s390_vcpu_unblock_all(kvm);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
|
|
|
|
|
2016-04-04 12:27:51 +00:00
|
|
|
static u64 kvm_s390_get_initial_cpuid(void)
|
2015-02-02 14:42:51 +00:00
|
|
|
{
|
2016-04-04 12:27:51 +00:00
|
|
|
struct cpuid cpuid;
|
|
|
|
|
|
|
|
get_cpu_id(&cpuid);
|
|
|
|
cpuid.version = 0xff;
|
|
|
|
return *((u64 *) &cpuid);
|
2015-02-02 14:42:51 +00:00
|
|
|
}
|
|
|
|
|
2015-12-02 07:53:52 +00:00
|
|
|
static void kvm_s390_crypto_init(struct kvm *kvm)
|
2014-06-27 18:46:01 +00:00
|
|
|
{
|
2015-12-02 07:53:52 +00:00
|
|
|
kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
|
2015-01-13 16:33:26 +00:00
|
|
|
kvm_s390_set_crycb_format(kvm);
|
2021-08-23 21:20:46 +00:00
|
|
|
init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
|
2014-06-27 18:46:01 +00:00
|
|
|
|
2018-09-25 23:16:18 +00:00
|
|
|
if (!test_kvm_facility(kvm, 76))
|
|
|
|
return;
|
|
|
|
|
2015-02-24 19:06:57 +00:00
|
|
|
/* Enable AES/DEA protected key functions by default */
|
|
|
|
kvm->arch.crypto.aes_kw = 1;
|
|
|
|
kvm->arch.crypto.dea_kw = 1;
|
|
|
|
get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
|
|
|
|
sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
|
|
|
|
get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
|
|
|
|
sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
|
2014-06-27 18:46:01 +00:00
|
|
|
}
|
|
|
|
|
2015-04-22 15:09:44 +00:00
|
|
|
static void sca_dispose(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
if (kvm->arch.use_esca)
|
2015-04-22 16:08:39 +00:00
|
|
|
free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
|
2015-04-22 15:09:44 +00:00
|
|
|
else
|
|
|
|
free_page((unsigned long)(kvm->arch.sca));
|
|
|
|
kvm->arch.sca = NULL;
|
|
|
|
}
|
|
|
|
|
2022-06-06 20:33:20 +00:00
|
|
|
void kvm_arch_free_vm(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
|
|
|
|
kvm_s390_pci_clear_list(kvm);
|
|
|
|
|
|
|
|
__kvm_arch_free_vm(kvm);
|
|
|
|
}
|
|
|
|
|
2012-01-04 09:25:20 +00:00
|
|
|
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
2008-03-25 17:47:20 +00:00
|
|
|
{
|
2020-11-06 07:34:23 +00:00
|
|
|
gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
|
2015-02-02 14:42:51 +00:00
|
|
|
int i, rc;
|
2008-03-25 17:47:20 +00:00
|
|
|
char debug_name[16];
|
2014-03-19 10:18:29 +00:00
|
|
|
static unsigned long sca_offset;
|
2008-03-25 17:47:20 +00:00
|
|
|
|
2012-01-04 09:25:20 +00:00
|
|
|
rc = -EINVAL;
|
|
|
|
#ifdef CONFIG_KVM_S390_UCONTROL
|
|
|
|
if (type & ~KVM_VM_S390_UCONTROL)
|
|
|
|
goto out_err;
|
|
|
|
if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
|
|
|
|
goto out_err;
|
|
|
|
#else
|
|
|
|
if (type)
|
|
|
|
goto out_err;
|
|
|
|
#endif
|
|
|
|
|
2008-03-25 17:47:20 +00:00
|
|
|
rc = s390_enable_sie();
|
|
|
|
if (rc)
|
2010-11-09 16:02:49 +00:00
|
|
|
goto out_err;
|
2008-03-25 17:47:20 +00:00
|
|
|
|
2011-10-18 10:27:13 +00:00
|
|
|
rc = -ENOMEM;
|
|
|
|
|
2015-11-24 12:33:49 +00:00
|
|
|
if (!sclp.has_64bscao)
|
|
|
|
alloc_flags |= GFP_DMA;
|
2015-04-22 16:08:39 +00:00
|
|
|
rwlock_init(&kvm->arch.sca_lock);
|
2018-04-27 12:36:12 +00:00
|
|
|
/* start with basic SCA */
|
2015-11-24 12:33:49 +00:00
|
|
|
kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
|
2008-03-25 17:47:20 +00:00
|
|
|
if (!kvm->arch.sca)
|
2010-11-09 16:02:49 +00:00
|
|
|
goto out_err;
|
2019-01-04 01:14:28 +00:00
|
|
|
mutex_lock(&kvm_lock);
|
2015-10-26 07:41:29 +00:00
|
|
|
sca_offset += 16;
|
2015-04-23 14:09:06 +00:00
|
|
|
if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
|
2015-10-26 07:41:29 +00:00
|
|
|
sca_offset = 0;
|
2015-04-23 14:09:06 +00:00
|
|
|
kvm->arch.sca = (struct bsca_block *)
|
|
|
|
((char *) kvm->arch.sca + sca_offset);
|
2019-01-04 01:14:28 +00:00
|
|
|
mutex_unlock(&kvm_lock);
|
2008-03-25 17:47:20 +00:00
|
|
|
|
|
|
|
sprintf(debug_name, "kvm-%u", current->pid);
|
|
|
|
|
2015-07-20 13:04:48 +00:00
|
|
|
kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
|
2008-03-25 17:47:20 +00:00
|
|
|
if (!kvm->arch.dbf)
|
2015-03-12 12:55:53 +00:00
|
|
|
goto out_err;
|
2008-03-25 17:47:20 +00:00
|
|
|
|
2017-05-30 12:26:02 +00:00
|
|
|
BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
|
2015-12-02 07:53:52 +00:00
|
|
|
kvm->arch.sie_page2 =
|
2020-11-06 07:34:23 +00:00
|
|
|
(struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
|
2015-12-02 07:53:52 +00:00
|
|
|
if (!kvm->arch.sie_page2)
|
2015-03-12 12:55:53 +00:00
|
|
|
goto out_err;
|
2015-02-02 14:42:51 +00:00
|
|
|
|
2019-01-31 08:52:41 +00:00
|
|
|
kvm->arch.sie_page2->kvm = kvm;
|
2015-12-02 07:53:52 +00:00
|
|
|
kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
|
2018-02-09 16:26:29 +00:00
|
|
|
|
|
|
|
for (i = 0; i < kvm_s390_fac_size(); i++) {
|
2021-05-05 20:01:10 +00:00
|
|
|
kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
|
2018-02-09 16:26:29 +00:00
|
|
|
(kvm_s390_fac_base[i] |
|
|
|
|
kvm_s390_fac_ext[i]);
|
2021-05-05 20:01:10 +00:00
|
|
|
kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
|
2018-02-09 16:26:29 +00:00
|
|
|
kvm_s390_fac_base[i];
|
|
|
|
}
|
2019-02-18 12:48:25 +00:00
|
|
|
kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
|
2015-02-24 12:51:04 +00:00
|
|
|
|
2017-08-29 14:31:08 +00:00
|
|
|
/* we are always in czam mode - even on pre z14 machines */
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_mask, 138);
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_list, 138);
|
|
|
|
/* we emulate STHYI in kvm */
|
2016-05-23 13:11:58 +00:00
|
|
|
set_kvm_facility(kvm->arch.model.fac_mask, 74);
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_list, 74);
|
2016-08-29 13:56:55 +00:00
|
|
|
if (MACHINE_HAS_TLB_GUEST) {
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_mask, 147);
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_list, 147);
|
|
|
|
}
|
2016-05-23 13:11:58 +00:00
|
|
|
|
2019-05-21 15:34:37 +00:00
|
|
|
if (css_general_characteristics.aiv && test_facility(65))
|
|
|
|
set_kvm_facility(kvm->arch.model.fac_mask, 65);
|
|
|
|
|
2016-04-04 12:27:51 +00:00
|
|
|
kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
|
2015-05-06 11:18:59 +00:00
|
|
|
kvm->arch.model.ibc = sclp.ibc & 0x0fff;
|
2015-02-02 14:42:51 +00:00
|
|
|
|
2015-12-02 07:53:52 +00:00
|
|
|
kvm_s390_crypto_init(kvm);
|
2014-06-27 18:46:01 +00:00
|
|
|
|
2022-06-06 20:33:20 +00:00
|
|
|
if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
|
|
|
|
mutex_lock(&kvm->lock);
|
|
|
|
kvm_s390_pci_init_list(kvm);
|
|
|
|
kvm_s390_vcpu_pci_enable_interp(kvm);
|
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
}
|
|
|
|
|
2017-02-17 09:06:26 +00:00
|
|
|
mutex_init(&kvm->arch.float_int.ais_lock);
|
KVM: s390: interrupt subsystem, cpu timer, waitpsw
This patch contains the s390 interrupt subsystem (similar to in kernel apic)
including timer interrupts (similar to in-kernel-pit) and enabled wait
(similar to in kernel hlt).
In order to achieve that, this patch also introduces intercept handling
for instruction intercepts, and it implements load control instructions.
This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both
the vm file descriptors and the vcpu file descriptors. In case this ioctl is
issued against a vm file descriptor, the interrupt is considered floating.
Floating interrupts may be delivered to any virtual cpu in the configuration.
The following interrupts are supported:
SIGP STOP - interprocessor signal that stops a remote cpu
SIGP SET PREFIX - interprocessor signal that sets the prefix register of a
(stopped) remote cpu
INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed
and for smp_call_function() in the guest.
PROGRAM INT - exception during program execution such as page fault, illegal
instruction and friends
RESTART - interprocessor signal that starts a stopped cpu
INT VIRTIO - floating interrupt for virtio signalisation
INT SERVICE - floating interrupt for signalisations from the system
service processor
struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting
an interrupt, also carrys parameter data for interrupts along with the interrupt
type. Interrupts on s390 usually have a state that represents the current
operation, or identifies which device has caused the interruption on s390.
kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a
disabled wait (that is, disabled for interrupts), we exit to userspace. In case
of an enabled wait we set up a timer that equals the cpu clock comparator value
and sleep on a wait queue.
[christian: change virtio interrupt to 0x2603]
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-25 17:47:26 +00:00
|
|
|
spin_lock_init(&kvm->arch.float_int.lock);
|
2013-07-03 13:18:35 +00:00
|
|
|
for (i = 0; i < FIRQ_LIST_COUNT; i++)
|
|
|
|
INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
|
2014-01-10 13:33:28 +00:00
|
|
|
init_waitqueue_head(&kvm->arch.ipte_wq);
|
2014-10-01 12:48:42 +00:00
|
|
|
mutex_init(&kvm->arch.ipte_mutex);
|
KVM: s390: interrupt subsystem, cpu timer, waitpsw
This patch contains the s390 interrupt subsystem (similar to in kernel apic)
including timer interrupts (similar to in-kernel-pit) and enabled wait
(similar to in kernel hlt).
In order to achieve that, this patch also introduces intercept handling
for instruction intercepts, and it implements load control instructions.
This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both
the vm file descriptors and the vcpu file descriptors. In case this ioctl is
issued against a vm file descriptor, the interrupt is considered floating.
Floating interrupts may be delivered to any virtual cpu in the configuration.
The following interrupts are supported:
SIGP STOP - interprocessor signal that stops a remote cpu
SIGP SET PREFIX - interprocessor signal that sets the prefix register of a
(stopped) remote cpu
INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed
and for smp_call_function() in the guest.
PROGRAM INT - exception during program execution such as page fault, illegal
instruction and friends
RESTART - interprocessor signal that starts a stopped cpu
INT VIRTIO - floating interrupt for virtio signalisation
INT SERVICE - floating interrupt for signalisations from the system
service processor
struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting
an interrupt, also carrys parameter data for interrupts along with the interrupt
type. Interrupts on s390 usually have a state that represents the current
operation, or identifies which device has caused the interruption on s390.
kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a
disabled wait (that is, disabled for interrupts), we exit to userspace. In case
of an enabled wait we set up a timer that equals the cpu clock comparator value
and sleep on a wait queue.
[christian: change virtio interrupt to 0x2603]
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-25 17:47:26 +00:00
|
|
|
|
2008-03-25 17:47:20 +00:00
|
|
|
debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
|
2015-07-22 13:50:58 +00:00
|
|
|
VM_EVENT(kvm, 3, "vm created with type %lu", type);
|
2008-03-25 17:47:20 +00:00
|
|
|
|
2012-01-04 09:25:20 +00:00
|
|
|
if (type & KVM_VM_S390_UCONTROL) {
|
|
|
|
kvm->arch.gmap = NULL;
|
2014-12-01 16:24:42 +00:00
|
|
|
kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
|
2012-01-04 09:25:20 +00:00
|
|
|
} else {
|
2014-12-01 16:24:42 +00:00
|
|
|
if (sclp.hamax == U64_MAX)
|
2017-04-20 12:43:51 +00:00
|
|
|
kvm->arch.mem_limit = TASK_SIZE_MAX;
|
2014-12-01 16:24:42 +00:00
|
|
|
else
|
2017-04-20 12:43:51 +00:00
|
|
|
kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
|
2014-12-01 16:24:42 +00:00
|
|
|
sclp.hamax + 1);
|
2016-03-08 10:55:04 +00:00
|
|
|
kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
|
2012-01-04 09:25:20 +00:00
|
|
|
if (!kvm->arch.gmap)
|
2015-03-12 12:55:53 +00:00
|
|
|
goto out_err;
|
2013-05-17 12:41:36 +00:00
|
|
|
kvm->arch.gmap->private = kvm;
|
2013-06-17 14:25:18 +00:00
|
|
|
kvm->arch.gmap->pfault_enabled = 0;
|
2012-01-04 09:25:20 +00:00
|
|
|
}
|
2012-12-20 14:32:12 +00:00
|
|
|
|
2018-02-16 11:16:14 +00:00
|
|
|
kvm->arch.use_pfmfi = sclp.has_pfmfi;
|
2018-02-15 15:33:47 +00:00
|
|
|
kvm->arch.use_skf = sclp.has_skey;
|
2014-03-14 10:00:21 +00:00
|
|
|
spin_lock_init(&kvm->arch.start_stop_lock);
|
2015-07-08 11:19:48 +00:00
|
|
|
kvm_s390_vsie_init(kvm);
|
2020-02-27 09:10:31 +00:00
|
|
|
if (use_gisa)
|
|
|
|
kvm_s390_gisa_init(kvm);
|
KVM: s390: pv: asynchronous destroy for reboot
Until now, destroying a protected guest was an entirely synchronous
operation that could potentially take a very long time, depending on
the size of the guest, due to the time needed to clean up the address
space from protected pages.
This patch implements an asynchronous destroy mechanism, that allows a
protected guest to reboot significantly faster than previously.
This is achieved by clearing the pages of the old guest in background.
In case of reboot, the new guest will be able to run in the same
address space almost immediately.
The old protected guest is then only destroyed when all of its memory
has been destroyed or otherwise made non protected.
Two new PV commands are added for the KVM_S390_PV_COMMAND ioctl:
KVM_PV_ASYNC_CLEANUP_PREPARE: set aside the current protected VM for
later asynchronous teardown. The current KVM VM will then continue
immediately as non-protected. If a protected VM had already been
set aside for asynchronous teardown, but without starting the teardown
process, this call will fail. There can be at most one VM set aside at
any time. Once it is set aside, the protected VM only exists in the
context of the Ultravisor, it is not associated with the KVM VM
anymore. Its protected CPUs have already been destroyed, but not its
memory. This command can be issued again immediately after starting
KVM_PV_ASYNC_CLEANUP_PERFORM, without having to wait for completion.
KVM_PV_ASYNC_CLEANUP_PERFORM: tears down the protected VM previously
set aside using KVM_PV_ASYNC_CLEANUP_PREPARE. Ideally the
KVM_PV_ASYNC_CLEANUP_PERFORM PV command should be issued by userspace
from a separate thread. If a fatal signal is received (or if the
process terminates naturally), the command will terminate immediately
without completing. All protected VMs whose teardown was interrupted
will be put in the need_cleanup list. The rest of the normal KVM
teardown process will take care of properly cleaning up all remaining
protected VMs, including the ones on the need_cleanup list.
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Link: https://lore.kernel.org/r/20221111170632.77622-2-imbrenda@linux.ibm.com
Message-Id: <20221111170632.77622-2-imbrenda@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
2022-11-11 17:06:27 +00:00
|
|
|
INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
|
|
|
|
kvm->arch.pv.set_aside = NULL;
|
2015-12-08 15:55:27 +00:00
|
|
|
KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
|
2014-03-14 10:00:21 +00:00
|
|
|
|
2010-11-09 16:02:49 +00:00
|
|
|
return 0;
|
2015-03-12 12:55:53 +00:00
|
|
|
out_err:
|
2015-12-02 07:53:52 +00:00
|
|
|
free_page((unsigned long)kvm->arch.sie_page2);
|
2011-07-24 08:48:21 +00:00
|
|
|
debug_unregister(kvm->arch.dbf);
|
2015-04-22 15:09:44 +00:00
|
|
|
sca_dispose(kvm);
|
2015-07-22 13:50:58 +00:00
|
|
|
KVM_EVENT(3, "creation of vm failed: %d", rc);
|
2010-11-09 16:02:49 +00:00
|
|
|
return rc;
|
2008-03-25 17:47:20 +00:00
|
|
|
}
|
|
|
|
|
2008-11-26 13:50:27 +00:00
|
|
|
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2019-09-30 08:19:18 +00:00
|
|
|
u16 rc, rrc;
|
|
|
|
|
2008-11-26 13:50:27 +00:00
|
|
|
VCPU_EVENT(vcpu, 3, "%s", "free cpu");
|
2012-07-23 15:20:30 +00:00
|
|
|
trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
|
2014-03-25 16:09:08 +00:00
|
|
|
kvm_s390_clear_local_irqs(vcpu);
|
2013-10-07 15:11:48 +00:00
|
|
|
kvm_clear_async_pf_completion_queue(vcpu);
|
2015-04-23 14:09:06 +00:00
|
|
|
if (!kvm_is_ucontrol(vcpu->kvm))
|
2015-04-21 13:31:59 +00:00
|
|
|
sca_del_vcpu(vcpu);
|
2022-07-14 10:18:23 +00:00
|
|
|
kvm_s390_update_topology_change_report(vcpu->kvm, 1);
|
2012-01-04 09:25:21 +00:00
|
|
|
|
|
|
|
if (kvm_is_ucontrol(vcpu->kvm))
|
2016-03-08 10:55:04 +00:00
|
|
|
gmap_remove(vcpu->arch.gmap);
|
2012-01-04 09:25:21 +00:00
|
|
|
|
2015-05-07 13:41:57 +00:00
|
|
|
if (vcpu->kvm->arch.use_cmma)
|
2014-03-25 12:47:11 +00:00
|
|
|
kvm_s390_vcpu_unsetup_cmma(vcpu);
|
2019-09-30 08:19:18 +00:00
|
|
|
/* We can not hold the vcpu mutex here, we are already dying */
|
|
|
|
if (kvm_s390_pv_cpu_get_handle(vcpu))
|
|
|
|
kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
|
2008-11-26 13:50:27 +00:00
|
|
|
free_page((unsigned long)(vcpu->arch.sie_block));
|
|
|
|
}
|
|
|
|
|
2008-03-25 17:47:20 +00:00
|
|
|
void kvm_arch_destroy_vm(struct kvm *kvm)
|
|
|
|
{
|
2019-09-30 08:19:18 +00:00
|
|
|
u16 rc, rrc;
|
|
|
|
|
2021-11-16 16:03:57 +00:00
|
|
|
kvm_destroy_vcpus(kvm);
|
2015-04-22 15:09:44 +00:00
|
|
|
sca_dispose(kvm);
|
2017-06-12 12:15:19 +00:00
|
|
|
kvm_s390_gisa_destroy(kvm);
|
2019-09-30 08:19:18 +00:00
|
|
|
/*
|
|
|
|
* We are already at the end of life and kvm->lock is not taken.
|
|
|
|
* This is ok as the file descriptor is closed by now and nobody
|
KVM: s390: pv: asynchronous destroy for reboot
Until now, destroying a protected guest was an entirely synchronous
operation that could potentially take a very long time, depending on
the size of the guest, due to the time needed to clean up the address
space from protected pages.
This patch implements an asynchronous destroy mechanism, that allows a
protected guest to reboot significantly faster than previously.
This is achieved by clearing the pages of the old guest in background.
In case of reboot, the new guest will be able to run in the same
address space almost immediately.
The old protected guest is then only destroyed when all of its memory
has been destroyed or otherwise made non protected.
Two new PV commands are added for the KVM_S390_PV_COMMAND ioctl:
KVM_PV_ASYNC_CLEANUP_PREPARE: set aside the current protected VM for
later asynchronous teardown. The current KVM VM will then continue
immediately as non-protected. If a protected VM had already been
set aside for asynchronous teardown, but without starting the teardown
process, this call will fail. There can be at most one VM set aside at
any time. Once it is set aside, the protected VM only exists in the
context of the Ultravisor, it is not associated with the KVM VM
anymore. Its protected CPUs have already been destroyed, but not its
memory. This command can be issued again immediately after starting
KVM_PV_ASYNC_CLEANUP_PERFORM, without having to wait for completion.
KVM_PV_ASYNC_CLEANUP_PERFORM: tears down the protected VM previously
set aside using KVM_PV_ASYNC_CLEANUP_PREPARE. Ideally the
KVM_PV_ASYNC_CLEANUP_PERFORM PV command should be issued by userspace
from a separate thread. If a fatal signal is received (or if the
process terminates naturally), the command will terminate immediately
without completing. All protected VMs whose teardown was interrupted
will be put in the need_cleanup list. The rest of the normal KVM
teardown process will take care of properly cleaning up all remaining
protected VMs, including the ones on the need_cleanup list.
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Link: https://lore.kernel.org/r/20221111170632.77622-2-imbrenda@linux.ibm.com
Message-Id: <20221111170632.77622-2-imbrenda@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
2022-11-11 17:06:27 +00:00
|
|
|
* can mess with the pv state.
|
2019-09-30 08:19:18 +00:00
|
|
|
*/
|
KVM: s390: pv: asynchronous destroy for reboot
Until now, destroying a protected guest was an entirely synchronous
operation that could potentially take a very long time, depending on
the size of the guest, due to the time needed to clean up the address
space from protected pages.
This patch implements an asynchronous destroy mechanism, that allows a
protected guest to reboot significantly faster than previously.
This is achieved by clearing the pages of the old guest in background.
In case of reboot, the new guest will be able to run in the same
address space almost immediately.
The old protected guest is then only destroyed when all of its memory
has been destroyed or otherwise made non protected.
Two new PV commands are added for the KVM_S390_PV_COMMAND ioctl:
KVM_PV_ASYNC_CLEANUP_PREPARE: set aside the current protected VM for
later asynchronous teardown. The current KVM VM will then continue
immediately as non-protected. If a protected VM had already been
set aside for asynchronous teardown, but without starting the teardown
process, this call will fail. There can be at most one VM set aside at
any time. Once it is set aside, the protected VM only exists in the
context of the Ultravisor, it is not associated with the KVM VM
anymore. Its protected CPUs have already been destroyed, but not its
memory. This command can be issued again immediately after starting
KVM_PV_ASYNC_CLEANUP_PERFORM, without having to wait for completion.
KVM_PV_ASYNC_CLEANUP_PERFORM: tears down the protected VM previously
set aside using KVM_PV_ASYNC_CLEANUP_PREPARE. Ideally the
KVM_PV_ASYNC_CLEANUP_PERFORM PV command should be issued by userspace
from a separate thread. If a fatal signal is received (or if the
process terminates naturally), the command will terminate immediately
without completing. All protected VMs whose teardown was interrupted
will be put in the need_cleanup list. The rest of the normal KVM
teardown process will take care of properly cleaning up all remaining
protected VMs, including the ones on the need_cleanup list.
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Link: https://lore.kernel.org/r/20221111170632.77622-2-imbrenda@linux.ibm.com
Message-Id: <20221111170632.77622-2-imbrenda@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
2022-11-11 17:06:27 +00:00
|
|
|
kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc);
|
2022-06-28 13:56:10 +00:00
|
|
|
/*
|
|
|
|
* Remove the mmu notifier only when the whole KVM VM is torn down,
|
|
|
|
* and only if one was registered to begin with. If the VM is
|
|
|
|
* currently not protected, but has been previously been protected,
|
|
|
|
* then it's possible that the notifier is still registered.
|
|
|
|
*/
|
|
|
|
if (kvm->arch.pv.mmu_notifier.ops)
|
|
|
|
mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm);
|
|
|
|
|
2019-09-30 08:19:18 +00:00
|
|
|
debug_unregister(kvm->arch.dbf);
|
2015-12-02 07:53:52 +00:00
|
|
|
free_page((unsigned long)kvm->arch.sie_page2);
|
2012-01-04 09:25:21 +00:00
|
|
|
if (!kvm_is_ucontrol(kvm))
|
2016-03-08 10:55:04 +00:00
|
|
|
gmap_remove(kvm->arch.gmap);
|
2013-07-15 11:36:01 +00:00
|
|
|
kvm_s390_destroy_adapters(kvm);
|
2014-03-25 16:09:08 +00:00
|
|
|
kvm_s390_clear_float_irqs(kvm);
|
2015-07-08 11:19:48 +00:00
|
|
|
kvm_s390_vsie_destroy(kvm);
|
2015-12-08 15:55:27 +00:00
|
|
|
KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
|
2008-03-25 17:47:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Section: vcpu related */
|
2014-12-02 15:53:21 +00:00
|
|
|
static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2016-03-08 10:55:04 +00:00
|
|
|
vcpu->arch.gmap = gmap_create(current->mm, -1UL);
|
2014-12-02 15:53:21 +00:00
|
|
|
if (!vcpu->arch.gmap)
|
|
|
|
return -ENOMEM;
|
|
|
|
vcpu->arch.gmap->private = vcpu->kvm;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-04-21 13:31:59 +00:00
|
|
|
static void sca_del_vcpu(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2016-08-08 20:39:32 +00:00
|
|
|
if (!kvm_s390_use_sca_entries())
|
|
|
|
return;
|
2015-04-22 16:08:39 +00:00
|
|
|
read_lock(&vcpu->kvm->arch.sca_lock);
|
2015-04-22 15:09:44 +00:00
|
|
|
if (vcpu->kvm->arch.use_esca) {
|
|
|
|
struct esca_block *sca = vcpu->kvm->arch.sca;
|
2015-04-21 13:31:59 +00:00
|
|
|
|
2015-04-22 15:09:44 +00:00
|
|
|
clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
|
2015-10-12 10:41:41 +00:00
|
|
|
sca->cpu[vcpu->vcpu_id].sda = 0;
|
2015-04-22 15:09:44 +00:00
|
|
|
} else {
|
|
|
|
struct bsca_block *sca = vcpu->kvm->arch.sca;
|
|
|
|
|
|
|
|
clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
|
2015-10-12 10:41:41 +00:00
|
|
|
sca->cpu[vcpu->vcpu_id].sda = 0;
|
2015-04-22 15:09:44 +00:00
|
|
|
}
|
2015-04-22 16:08:39 +00:00
|
|
|
read_unlock(&vcpu->kvm->arch.sca_lock);
|
2015-04-21 13:31:59 +00:00
|
|
|
}
|
|
|
|
|
2015-10-12 14:29:01 +00:00
|
|
|
static void sca_add_vcpu(struct kvm_vcpu *vcpu)
|
2015-04-21 13:31:59 +00:00
|
|
|
{
|
2016-08-08 20:39:32 +00:00
|
|
|
if (!kvm_s390_use_sca_entries()) {
|
2022-10-20 14:31:57 +00:00
|
|
|
phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca);
|
2016-08-08 20:39:32 +00:00
|
|
|
|
|
|
|
/* we still need the basic sca for the ipte control */
|
2022-10-20 14:31:57 +00:00
|
|
|
vcpu->arch.sie_block->scaoh = sca_phys >> 32;
|
|
|
|
vcpu->arch.sie_block->scaol = sca_phys;
|
2018-03-06 13:27:58 +00:00
|
|
|
return;
|
2016-08-08 20:39:32 +00:00
|
|
|
}
|
2015-10-12 14:29:01 +00:00
|
|
|
read_lock(&vcpu->kvm->arch.sca_lock);
|
|
|
|
if (vcpu->kvm->arch.use_esca) {
|
|
|
|
struct esca_block *sca = vcpu->kvm->arch.sca;
|
2022-10-20 14:31:57 +00:00
|
|
|
phys_addr_t sca_phys = virt_to_phys(sca);
|
2015-04-22 15:09:44 +00:00
|
|
|
|
2022-10-20 14:31:57 +00:00
|
|
|
sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
|
|
|
|
vcpu->arch.sie_block->scaoh = sca_phys >> 32;
|
|
|
|
vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK;
|
2017-03-13 10:48:28 +00:00
|
|
|
vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
|
2015-10-12 14:29:01 +00:00
|
|
|
set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
|
2015-04-22 15:09:44 +00:00
|
|
|
} else {
|
2015-10-12 14:29:01 +00:00
|
|
|
struct bsca_block *sca = vcpu->kvm->arch.sca;
|
2022-10-20 14:31:57 +00:00
|
|
|
phys_addr_t sca_phys = virt_to_phys(sca);
|
2015-04-21 13:31:59 +00:00
|
|
|
|
2022-10-20 14:31:57 +00:00
|
|
|
sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
|
|
|
|
vcpu->arch.sie_block->scaoh = sca_phys >> 32;
|
|
|
|
vcpu->arch.sie_block->scaol = sca_phys;
|
2015-10-12 14:29:01 +00:00
|
|
|
set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
|
2015-04-22 15:09:44 +00:00
|
|
|
}
|
2015-10-12 14:29:01 +00:00
|
|
|
read_unlock(&vcpu->kvm->arch.sca_lock);
|
2015-04-22 16:08:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Basic SCA to Extended SCA data copy routines */
|
|
|
|
static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
|
|
|
|
{
|
|
|
|
d->sda = s->sda;
|
|
|
|
d->sigp_ctrl.c = s->sigp_ctrl.c;
|
|
|
|
d->sigp_ctrl.scn = s->sigp_ctrl.scn;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
d->ipte_control = s->ipte_control;
|
|
|
|
d->mcn[0] = s->mcn;
|
|
|
|
for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
|
|
|
|
sca_copy_entry(&d->cpu[i], &s->cpu[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sca_switch_to_extended(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
struct bsca_block *old_sca = kvm->arch.sca;
|
|
|
|
struct esca_block *new_sca;
|
|
|
|
struct kvm_vcpu *vcpu;
|
2021-11-16 16:04:02 +00:00
|
|
|
unsigned long vcpu_idx;
|
2015-04-22 16:08:39 +00:00
|
|
|
u32 scaol, scaoh;
|
2022-10-20 14:31:57 +00:00
|
|
|
phys_addr_t new_sca_phys;
|
2015-04-22 16:08:39 +00:00
|
|
|
|
2019-09-30 08:19:18 +00:00
|
|
|
if (kvm->arch.use_esca)
|
|
|
|
return 0;
|
|
|
|
|
2020-11-06 07:34:23 +00:00
|
|
|
new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
|
2015-04-22 16:08:39 +00:00
|
|
|
if (!new_sca)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2022-10-20 14:31:57 +00:00
|
|
|
new_sca_phys = virt_to_phys(new_sca);
|
|
|
|
scaoh = new_sca_phys >> 32;
|
|
|
|
scaol = new_sca_phys & ESCA_SCAOL_MASK;
|
2015-04-22 16:08:39 +00:00
|
|
|
|
|
|
|
kvm_s390_vcpu_block_all(kvm);
|
|
|
|
write_lock(&kvm->arch.sca_lock);
|
|
|
|
|
|
|
|
sca_copy_b_to_e(new_sca, old_sca);
|
|
|
|
|
|
|
|
kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
|
|
|
|
vcpu->arch.sie_block->scaoh = scaoh;
|
|
|
|
vcpu->arch.sie_block->scaol = scaol;
|
2017-03-13 10:48:28 +00:00
|
|
|
vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
|
2015-04-22 16:08:39 +00:00
|
|
|
}
|
|
|
|
kvm->arch.sca = new_sca;
|
|
|
|
kvm->arch.use_esca = 1;
|
|
|
|
|
|
|
|
write_unlock(&kvm->arch.sca_lock);
|
|
|
|
kvm_s390_vcpu_unblock_all(kvm);
|
|
|
|
|
|
|
|
free_page((unsigned long)old_sca);
|
|
|
|
|
2015-12-08 15:55:27 +00:00
|
|
|
VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
|
|
|
|
old_sca, kvm->arch.sca);
|
2015-04-22 16:08:39 +00:00
|
|
|
return 0;
|
2015-04-21 13:31:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
|
|
|
|
{
|
2015-04-22 16:08:39 +00:00
|
|
|
int rc;
|
|
|
|
|
2016-08-08 20:39:32 +00:00
|
|
|
if (!kvm_s390_use_sca_entries()) {
|
|
|
|
if (id < KVM_MAX_VCPUS)
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
2015-04-22 16:08:39 +00:00
|
|
|
if (id < KVM_S390_BSCA_CPU_SLOTS)
|
|
|
|
return true;
|
2015-11-24 12:33:49 +00:00
|
|
|
if (!sclp.has_esca || !sclp.has_64bscao)
|
2015-04-22 16:08:39 +00:00
|
|
|
return false;
|
|
|
|
|
|
|
|
rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
|
|
|
|
|
|
|
|
return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
|
2015-04-21 13:31:59 +00:00
|
|
|
}
|
|
|
|
|
2016-02-15 08:42:25 +00:00
|
|
|
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
|
|
|
|
static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
|
2016-02-17 20:53:33 +00:00
|
|
|
raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
|
2016-02-15 08:42:25 +00:00
|
|
|
vcpu->arch.cputm_start = get_tod_clock_fast();
|
2016-02-17 20:53:33 +00:00
|
|
|
raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
|
2016-02-15 08:42:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
|
|
|
|
static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
|
2016-02-17 20:53:33 +00:00
|
|
|
raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
|
2016-02-15 08:42:25 +00:00
|
|
|
vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
|
|
|
|
vcpu->arch.cputm_start = 0;
|
2016-02-17 20:53:33 +00:00
|
|
|
raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
|
2016-02-15 08:42:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
|
|
|
|
static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
WARN_ON_ONCE(vcpu->arch.cputm_enabled);
|
|
|
|
vcpu->arch.cputm_enabled = true;
|
|
|
|
__start_cpu_timer_accounting(vcpu);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
|
|
|
|
static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
|
|
|
|
__stop_cpu_timer_accounting(vcpu);
|
|
|
|
vcpu->arch.cputm_enabled = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
preempt_disable(); /* protect from TOD sync and vcpu_load/put */
|
|
|
|
__enable_cpu_timer_accounting(vcpu);
|
|
|
|
preempt_enable();
|
|
|
|
}
|
|
|
|
|
|
|
|
static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
preempt_disable(); /* protect from TOD sync and vcpu_load/put */
|
|
|
|
__disable_cpu_timer_accounting(vcpu);
|
|
|
|
preempt_enable();
|
|
|
|
}
|
|
|
|
|
2016-02-15 08:40:12 +00:00
|
|
|
/* set the cpu timer - may only be called from the VCPU thread itself */
|
|
|
|
void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
|
|
|
|
{
|
2016-02-15 08:42:25 +00:00
|
|
|
preempt_disable(); /* protect from TOD sync and vcpu_load/put */
|
2016-02-17 20:53:33 +00:00
|
|
|
raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
|
2016-02-15 08:42:25 +00:00
|
|
|
if (vcpu->arch.cputm_enabled)
|
|
|
|
vcpu->arch.cputm_start = get_tod_clock_fast();
|
2016-02-15 08:40:12 +00:00
|
|
|
vcpu->arch.sie_block->cputm = cputm;
|
2016-02-17 20:53:33 +00:00
|
|
|
raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
|
2016-02-15 08:42:25 +00:00
|
|
|
preempt_enable();
|
2016-02-15 08:40:12 +00:00
|
|
|
}
|
|
|
|
|
2016-02-15 08:42:25 +00:00
|
|
|
/* update and get the cpu timer - can also be called from other VCPU threads */
|
2016-02-15 08:40:12 +00:00
|
|
|
__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2016-02-17 20:53:33 +00:00
|
|
|
unsigned int seq;
|
2016-02-15 08:42:25 +00:00
|
|
|
__u64 value;
|
|
|
|
|
|
|
|
if (unlikely(!vcpu->arch.cputm_enabled))
|
|
|
|
return vcpu->arch.sie_block->cputm;
|
|
|
|
|
2016-02-17 20:53:33 +00:00
|
|
|
preempt_disable(); /* protect from TOD sync and vcpu_load/put */
|
|
|
|
do {
|
|
|
|
seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
|
|
|
|
/*
|
|
|
|
* If the writer would ever execute a read in the critical
|
|
|
|
* section, e.g. in irq context, we have a deadlock.
|
|
|
|
*/
|
|
|
|
WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
|
|
|
|
value = vcpu->arch.sie_block->cputm;
|
|
|
|
/* if cputm_start is 0, accounting is being started/stopped */
|
|
|
|
if (likely(vcpu->arch.cputm_start))
|
|
|
|
value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
|
|
|
|
} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
|
|
|
|
preempt_enable();
|
2016-02-15 08:42:25 +00:00
|
|
|
return value;
|
2016-02-15 08:40:12 +00:00
|
|
|
}
|
|
|
|
|
2008-03-25 17:47:20 +00:00
|
|
|
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
|
|
|
{
|
s390/kernel: lazy restore fpu registers
Improve the save and restore behavior of FPU register contents to use the
vector extension within the kernel.
The kernel does not use floating-point or vector registers and, therefore,
saving and restoring the FPU register contents are performed for handling
signals or switching processes only. To prepare for using vector
instructions and vector registers within the kernel, enhance the save
behavior and implement a lazy restore at return to user space from a
system call or interrupt.
To implement the lazy restore, the save_fpu_regs() sets a CPU information
flag, CIF_FPU, to indicate that the FPU registers must be restored.
Saving and setting CIF_FPU is performed in an atomic fashion to be
interrupt-safe. When the kernel wants to use the vector extension or
wants to change the FPU register state for a task during signal handling,
the save_fpu_regs() must be called first. The CIF_FPU flag is also set at
process switch. At return to user space, the FPU state is restored. In
particular, the FPU state includes the floating-point or vector register
contents, as well as, vector-enablement and floating-point control. The
FPU state restore and clearing CIF_FPU is also performed in an atomic
fashion.
For KVM, the restore of the FPU register state is performed when restoring
the general-purpose guest registers before the SIE instructions is started.
Because the path towards the SIE instruction is interruptible, the CIF_FPU
flag must be checked again right before going into SIE. If set, the guest
registers must be reloaded again by re-entering the outer SIE loop. This
is the same behavior as if the SIE critical section is interrupted.
Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2015-06-10 10:53:42 +00:00
|
|
|
|
2015-03-11 15:47:33 +00:00
|
|
|
gmap_enable(vcpu->arch.enabled_gmap);
|
2018-01-23 17:05:29 +00:00
|
|
|
kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
|
2016-02-22 12:52:27 +00:00
|
|
|
if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
|
2016-02-15 08:42:25 +00:00
|
|
|
__start_cpu_timer_accounting(vcpu);
|
2016-02-12 19:41:56 +00:00
|
|
|
vcpu->cpu = cpu;
|
2008-03-25 17:47:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2016-02-12 19:41:56 +00:00
|
|
|
vcpu->cpu = -1;
|
2016-02-22 12:52:27 +00:00
|
|
|
if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
|
2016-02-15 08:42:25 +00:00
|
|
|
__stop_cpu_timer_accounting(vcpu);
|
2018-01-23 17:05:30 +00:00
|
|
|
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
|
2015-03-11 15:47:33 +00:00
|
|
|
vcpu->arch.enabled_gmap = gmap_get_enabled();
|
|
|
|
gmap_disable(vcpu->arch.enabled_gmap);
|
s390/kernel: lazy restore fpu registers
Improve the save and restore behavior of FPU register contents to use the
vector extension within the kernel.
The kernel does not use floating-point or vector registers and, therefore,
saving and restoring the FPU register contents are performed for handling
signals or switching processes only. To prepare for using vector
instructions and vector registers within the kernel, enhance the save
behavior and implement a lazy restore at return to user space from a
system call or interrupt.
To implement the lazy restore, the save_fpu_regs() sets a CPU information
flag, CIF_FPU, to indicate that the FPU registers must be restored.
Saving and setting CIF_FPU is performed in an atomic fashion to be
interrupt-safe. When the kernel wants to use the vector extension or
wants to change the FPU register state for a task during signal handling,
the save_fpu_regs() must be called first. The CIF_FPU flag is also set at
process switch. At return to user space, the FPU state is restored. In
particular, the FPU state includes the floating-point or vector register
contents, as well as, vector-enablement and floating-point control. The
FPU state restore and clearing CIF_FPU is also performed in an atomic
fashion.
For KVM, the restore of the FPU register state is performed when restoring
the general-purpose guest registers before the SIE instructions is started.
Because the path towards the SIE instruction is interruptible, the CIF_FPU
flag must be checked again right before going into SIE. If set, the guest
registers must be reloaded again by re-entering the outer SIE loop. This
is the same behavior as if the SIE critical section is interrupted.
Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2015-06-10 10:53:42 +00:00
|
|
|
|
2008-03-25 17:47:20 +00:00
|
|
|
}
|
|
|
|
|
2014-12-04 14:47:07 +00:00
|
|
|
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
2012-11-28 01:29:02 +00:00
|
|
|
{
|
2014-11-25 14:46:02 +00:00
|
|
|
mutex_lock(&vcpu->kvm->lock);
|
2015-05-13 08:58:41 +00:00
|
|
|
preempt_disable();
|
2014-11-25 14:46:02 +00:00
|
|
|
vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
|
2018-02-07 11:46:44 +00:00
|
|
|
vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
|
2015-05-13 08:58:41 +00:00
|
|
|
preempt_enable();
|
2014-11-25 14:46:02 +00:00
|
|
|
mutex_unlock(&vcpu->kvm->lock);
|
2015-10-12 14:27:23 +00:00
|
|
|
if (!kvm_is_ucontrol(vcpu->kvm)) {
|
2014-12-02 15:53:21 +00:00
|
|
|
vcpu->arch.gmap = vcpu->kvm->arch.gmap;
|
2015-10-12 14:29:01 +00:00
|
|
|
sca_add_vcpu(vcpu);
|
2015-10-12 14:27:23 +00:00
|
|
|
}
|
2016-06-21 12:19:51 +00:00
|
|
|
if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
|
|
|
|
vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
|
2015-03-11 15:47:33 +00:00
|
|
|
/* make vcpu_load load the right gmap on the first trigger */
|
|
|
|
vcpu->arch.enabled_gmap = vcpu->arch.gmap;
|
2012-11-28 01:29:02 +00:00
|
|
|
}
|
|
|
|
|
2019-04-03 07:00:35 +00:00
|
|
|
static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
|
|
|
|
{
|
|
|
|
if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
|
|
|
|
test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool kvm_has_pckmo_ecc(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
/* At least one ECC subfunction must be present */
|
|
|
|
return kvm_has_pckmo_subfunc(kvm, 32) ||
|
|
|
|
kvm_has_pckmo_subfunc(kvm, 33) ||
|
|
|
|
kvm_has_pckmo_subfunc(kvm, 34) ||
|
|
|
|
kvm_has_pckmo_subfunc(kvm, 40) ||
|
|
|
|
kvm_has_pckmo_subfunc(kvm, 41);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2014-06-27 18:46:01 +00:00
|
|
|
static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2018-09-25 23:16:18 +00:00
|
|
|
/*
|
|
|
|
* If the AP instructions are not being interpreted and the MSAX3
|
|
|
|
* facility is not configured for the guest, there is nothing to set up.
|
|
|
|
*/
|
|
|
|
if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
|
2014-06-27 18:46:01 +00:00
|
|
|
return;
|
|
|
|
|
2018-09-25 23:16:18 +00:00
|
|
|
vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
|
2014-09-03 08:13:53 +00:00
|
|
|
vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
|
2018-09-25 23:16:39 +00:00
|
|
|
vcpu->arch.sie_block->eca &= ~ECA_APIE;
|
2019-04-03 07:00:35 +00:00
|
|
|
vcpu->arch.sie_block->ecd &= ~ECD_ECC;
|
2014-09-03 08:13:53 +00:00
|
|
|
|
2018-09-25 23:16:18 +00:00
|
|
|
if (vcpu->kvm->arch.crypto.apie)
|
|
|
|
vcpu->arch.sie_block->eca |= ECA_APIE;
|
2014-09-03 08:13:53 +00:00
|
|
|
|
2018-09-25 23:16:18 +00:00
|
|
|
/* Set up protected key support */
|
2019-04-03 07:00:35 +00:00
|
|
|
if (vcpu->kvm->arch.crypto.aes_kw) {
|
2014-09-03 08:13:53 +00:00
|
|
|
vcpu->arch.sie_block->ecb3 |= ECB3_AES;
|
2019-04-03 07:00:35 +00:00
|
|
|
/* ecc is also wrapped with AES key */
|
|
|
|
if (kvm_has_pckmo_ecc(vcpu->kvm))
|
|
|
|
vcpu->arch.sie_block->ecd |= ECD_ECC;
|
|
|
|
}
|
|
|
|
|
2014-09-03 08:13:53 +00:00
|
|
|
if (vcpu->kvm->arch.crypto.dea_kw)
|
|
|
|
vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
|
2014-06-27 18:46:01 +00:00
|
|
|
}
|
|
|
|
|
2014-03-25 12:47:11 +00:00
|
|
|
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2022-10-20 14:31:57 +00:00
|
|
|
free_page((unsigned long)phys_to_virt(vcpu->arch.sie_block->cbrlo));
|
2014-03-25 12:47:11 +00:00
|
|
|
vcpu->arch.sie_block->cbrlo = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2022-10-20 14:31:57 +00:00
|
|
|
void *cbrlo_page = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
|
|
|
|
|
|
|
|
if (!cbrlo_page)
|
2014-03-25 12:47:11 +00:00
|
|
|
return -ENOMEM;
|
2022-10-20 14:31:57 +00:00
|
|
|
|
|
|
|
vcpu->arch.sie_block->cbrlo = virt_to_phys(cbrlo_page);
|
2014-03-25 12:47:11 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-02-27 13:32:11 +00:00
|
|
|
static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
|
|
|
|
|
|
|
|
vcpu->arch.sie_block->ibc = model->ibc;
|
2015-12-02 08:43:29 +00:00
|
|
|
if (test_kvm_facility(vcpu->kvm, 7))
|
2022-10-20 14:31:57 +00:00
|
|
|
vcpu->arch.sie_block->fac = virt_to_phys(model->fac_list);
|
2015-02-27 13:32:11 +00:00
|
|
|
}
|
|
|
|
|
2019-12-18 21:55:20 +00:00
|
|
|
static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
|
2008-03-25 17:47:20 +00:00
|
|
|
{
|
2014-03-25 12:47:11 +00:00
|
|
|
int rc = 0;
|
2019-09-30 08:19:18 +00:00
|
|
|
u16 uvrc, uvrrc;
|
2013-04-17 15:36:29 +00:00
|
|
|
|
2011-11-17 10:00:41 +00:00
|
|
|
atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
|
|
|
|
CPUSTAT_SM |
|
2015-03-31 12:39:49 +00:00
|
|
|
CPUSTAT_STOPPED);
|
|
|
|
|
2015-02-18 10:13:03 +00:00
|
|
|
if (test_kvm_facility(vcpu->kvm, 78))
|
2018-01-23 17:05:29 +00:00
|
|
|
kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
|
2015-02-18 10:13:03 +00:00
|
|
|
else if (test_kvm_facility(vcpu->kvm, 8))
|
2018-01-23 17:05:29 +00:00
|
|
|
kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
|
2015-03-31 12:39:49 +00:00
|
|
|
|
2015-02-27 13:32:11 +00:00
|
|
|
kvm_s390_vcpu_setup_model(vcpu);
|
|
|
|
|
2016-04-12 09:07:49 +00:00
|
|
|
/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
|
|
|
|
if (MACHINE_HAS_ESOP)
|
2017-03-13 10:48:28 +00:00
|
|
|
vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
|
2016-03-04 11:23:55 +00:00
|
|
|
if (test_kvm_facility(vcpu->kvm, 9))
|
2017-03-13 10:48:28 +00:00
|
|
|
vcpu->arch.sie_block->ecb |= ECB_SRSI;
|
2022-07-14 10:18:23 +00:00
|
|
|
if (test_kvm_facility(vcpu->kvm, 11))
|
|
|
|
vcpu->arch.sie_block->ecb |= ECB_PTF;
|
2016-04-22 14:26:49 +00:00
|
|
|
if (test_kvm_facility(vcpu->kvm, 73))
|
2017-03-13 10:48:28 +00:00
|
|
|
vcpu->arch.sie_block->ecb |= ECB_TE;
|
2021-06-29 08:55:30 +00:00
|
|
|
if (!kvm_is_ucontrol(vcpu->kvm))
|
|
|
|
vcpu->arch.sie_block->ecb |= ECB_SPECI;
|
2013-06-28 11:30:24 +00:00
|
|
|
|
2018-02-16 11:16:14 +00:00
|
|
|
if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
|
2017-03-13 10:48:28 +00:00
|
|
|
vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
|
2016-08-04 07:57:36 +00:00
|
|
|
if (test_kvm_facility(vcpu->kvm, 130))
|
2017-03-13 10:48:28 +00:00
|
|
|
vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
|
|
|
|
vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
|
2016-04-04 13:49:34 +00:00
|
|
|
if (sclp.has_cei)
|
2017-03-13 10:48:28 +00:00
|
|
|
vcpu->arch.sie_block->eca |= ECA_CEI;
|
2016-04-04 13:46:26 +00:00
|
|
|
if (sclp.has_ib)
|
2017-03-13 10:48:28 +00:00
|
|
|
vcpu->arch.sie_block->eca |= ECA_IB;
|
2015-05-06 11:18:59 +00:00
|
|
|
if (sclp.has_siif)
|
2017-03-13 10:48:28 +00:00
|
|
|
vcpu->arch.sie_block->eca |= ECA_SII;
|
2015-05-06 11:18:59 +00:00
|
|
|
if (sclp.has_sigpif)
|
2017-03-13 10:48:28 +00:00
|
|
|
vcpu->arch.sie_block->eca |= ECA_SIGPI;
|
2015-03-16 15:05:41 +00:00
|
|
|
if (test_kvm_facility(vcpu->kvm, 129)) {
|
2017-03-13 10:48:28 +00:00
|
|
|
vcpu->arch.sie_block->eca |= ECA_VX;
|
|
|
|
vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
|
2014-04-30 17:39:46 +00:00
|
|
|
}
|
2016-07-26 19:29:44 +00:00
|
|
|
if (test_kvm_facility(vcpu->kvm, 139))
|
|
|
|
vcpu->arch.sie_block->ecd |= ECD_MEF;
|
2018-03-08 16:08:49 +00:00
|
|
|
if (test_kvm_facility(vcpu->kvm, 156))
|
|
|
|
vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
|
2017-06-12 12:15:19 +00:00
|
|
|
if (vcpu->arch.sie_block->gd) {
|
|
|
|
vcpu->arch.sie_block->eca |= ECA_AIV;
|
|
|
|
VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
|
|
|
|
vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
|
|
|
|
}
|
2022-10-20 14:31:57 +00:00
|
|
|
vcpu->arch.sie_block->sdnxo = virt_to_phys(&vcpu->run->s.regs.sdnx) | SDNXC;
|
|
|
|
vcpu->arch.sie_block->riccbd = virt_to_phys(&vcpu->run->s.regs.riccb);
|
2017-02-24 21:12:56 +00:00
|
|
|
|
|
|
|
if (sclp.has_kss)
|
2018-01-23 17:05:29 +00:00
|
|
|
kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
|
2017-02-24 21:12:56 +00:00
|
|
|
else
|
|
|
|
vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
|
2013-01-29 16:48:20 +00:00
|
|
|
|
2015-05-07 13:41:57 +00:00
|
|
|
if (vcpu->kvm->arch.use_cmma) {
|
2014-03-25 12:47:11 +00:00
|
|
|
rc = kvm_s390_vcpu_setup_cmma(vcpu);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
2013-04-17 15:36:29 +00:00
|
|
|
}
|
2014-12-12 14:17:31 +00:00
|
|
|
hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
2009-05-12 15:21:49 +00:00
|
|
|
vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
|
2015-02-02 14:42:51 +00:00
|
|
|
|
2018-08-31 16:51:19 +00:00
|
|
|
vcpu->arch.sie_block->hpid = HPID_KVM;
|
|
|
|
|
2014-06-27 18:46:01 +00:00
|
|
|
kvm_s390_vcpu_crypto_setup(vcpu);
|
|
|
|
|
2022-06-06 20:33:18 +00:00
|
|
|
kvm_s390_vcpu_pci_setup(vcpu);
|
|
|
|
|
2019-09-30 08:19:18 +00:00
|
|
|
mutex_lock(&vcpu->kvm->lock);
|
|
|
|
if (kvm_s390_pv_is_protected(vcpu->kvm)) {
|
|
|
|
rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
|
|
|
|
if (rc)
|
|
|
|
kvm_s390_vcpu_unsetup_cmma(vcpu);
|
|
|
|
}
|
|
|
|
mutex_unlock(&vcpu->kvm->lock);
|
|
|
|
|
2014-03-25 12:47:11 +00:00
|
|
|
return rc;
|
2008-03-25 17:47:20 +00:00
|
|
|
}
|
|
|
|
|
2019-12-18 21:55:09 +00:00
|
|
|
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
|
|
|
|
{
|
|
|
|
if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
|
|
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-12-18 21:55:15 +00:00
|
|
|
int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
|
2008-03-25 17:47:20 +00:00
|
|
|
{
|
2013-06-28 11:30:24 +00:00
|
|
|
struct sie_page *sie_page;
|
2019-12-18 21:55:09 +00:00
|
|
|
int rc;
|
2011-10-18 10:27:12 +00:00
|
|
|
|
KVM: s390: Backup the guest's machine check info
When a machine check happens in the guest, related mcck info (mcic,
external damage code, ...) is stored in the vcpu's lowcore on the host.
Then the machine check handler's low-level part is executed, followed
by the high-level part.
If the high-level part's execution is interrupted by a new machine check
happening on the same vcpu on the host, the mcck info in the lowcore is
overwritten with the new machine check's data.
If the high-level part's execution is scheduled to a different cpu,
the mcck info in the lowcore is uncertain.
Therefore, for both cases, the further reinjection to the guest will use
the wrong data.
Let's backup the mcck info in the lowcore to the sie page
for further reinjection, so that the right data will be used.
Add new member into struct sie_page to store related machine check's
info of mcic, failing storage address and external damage code.
Signed-off-by: QingFeng Hao <haoqf@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-06-07 09:41:19 +00:00
|
|
|
BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
|
2020-11-06 07:34:23 +00:00
|
|
|
sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
|
2013-06-28 11:30:24 +00:00
|
|
|
if (!sie_page)
|
2019-12-18 21:55:15 +00:00
|
|
|
return -ENOMEM;
|
2008-03-25 17:47:20 +00:00
|
|
|
|
2013-06-28 11:30:24 +00:00
|
|
|
vcpu->arch.sie_block = &sie_page->sie_block;
|
2022-10-20 14:31:57 +00:00
|
|
|
vcpu->arch.sie_block->itdba = virt_to_phys(&sie_page->itdb);
|
2013-06-28 11:30:24 +00:00
|
|
|
|
2015-04-16 10:32:41 +00:00
|
|
|
/* the real guest size will always be smaller than msl */
|
|
|
|
vcpu->arch.sie_block->mso = 0;
|
|
|
|
vcpu->arch.sie_block->msl = sclp.hamax;
|
|
|
|
|
2019-12-18 21:55:15 +00:00
|
|
|
vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
|
KVM: s390: interrupt subsystem, cpu timer, waitpsw
This patch contains the s390 interrupt subsystem (similar to in kernel apic)
including timer interrupts (similar to in-kernel-pit) and enabled wait
(similar to in kernel hlt).
In order to achieve that, this patch also introduces intercept handling
for instruction intercepts, and it implements load control instructions.
This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both
the vm file descriptors and the vcpu file descriptors. In case this ioctl is
issued against a vm file descriptor, the interrupt is considered floating.
Floating interrupts may be delivered to any virtual cpu in the configuration.
The following interrupts are supported:
SIGP STOP - interprocessor signal that stops a remote cpu
SIGP SET PREFIX - interprocessor signal that sets the prefix register of a
(stopped) remote cpu
INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed
and for smp_call_function() in the guest.
PROGRAM INT - exception during program execution such as page fault, illegal
instruction and friends
RESTART - interprocessor signal that starts a stopped cpu
INT VIRTIO - floating interrupt for virtio signalisation
INT SERVICE - floating interrupt for signalisations from the system
service processor
struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting
an interrupt, also carrys parameter data for interrupts along with the interrupt
type. Interrupts on s390 usually have a state that represents the current
operation, or identifies which device has caused the interruption on s390.
kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a
disabled wait (that is, disabled for interrupts), we exit to userspace. In case
of an enabled wait we set up a timer that equals the cpu clock comparator value
and sleep on a wait queue.
[christian: change virtio interrupt to 0x2603]
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-25 17:47:26 +00:00
|
|
|
spin_lock_init(&vcpu->arch.local_int.lock);
|
2022-02-09 15:22:17 +00:00
|
|
|
vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
|
2016-02-17 20:53:33 +00:00
|
|
|
seqcount_init(&vcpu->arch.cputm_seqcount);
|
KVM: s390: interrupt subsystem, cpu timer, waitpsw
This patch contains the s390 interrupt subsystem (similar to in kernel apic)
including timer interrupts (similar to in-kernel-pit) and enabled wait
(similar to in kernel hlt).
In order to achieve that, this patch also introduces intercept handling
for instruction intercepts, and it implements load control instructions.
This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both
the vm file descriptors and the vcpu file descriptors. In case this ioctl is
issued against a vm file descriptor, the interrupt is considered floating.
Floating interrupts may be delivered to any virtual cpu in the configuration.
The following interrupts are supported:
SIGP STOP - interprocessor signal that stops a remote cpu
SIGP SET PREFIX - interprocessor signal that sets the prefix register of a
(stopped) remote cpu
INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed
and for smp_call_function() in the guest.
PROGRAM INT - exception during program execution such as page fault, illegal
instruction and friends
RESTART - interprocessor signal that starts a stopped cpu
INT VIRTIO - floating interrupt for virtio signalisation
INT SERVICE - floating interrupt for signalisations from the system
service processor
struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting
an interrupt, also carrys parameter data for interrupts along with the interrupt
type. Interrupts on s390 usually have a state that represents the current
operation, or identifies which device has caused the interruption on s390.
kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a
disabled wait (that is, disabled for interrupts), we exit to userspace. In case
of an enabled wait we set up a timer that equals the cpu clock comparator value
and sleep on a wait queue.
[christian: change virtio interrupt to 0x2603]
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-25 17:47:26 +00:00
|
|
|
|
2019-12-18 21:55:10 +00:00
|
|
|
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
|
|
|
|
kvm_clear_async_pf_completion_queue(vcpu);
|
|
|
|
vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
|
|
|
|
KVM_SYNC_GPRS |
|
|
|
|
KVM_SYNC_ACRS |
|
|
|
|
KVM_SYNC_CRS |
|
|
|
|
KVM_SYNC_ARCH0 |
|
2020-06-22 15:46:36 +00:00
|
|
|
KVM_SYNC_PFAULT |
|
|
|
|
KVM_SYNC_DIAG318;
|
2019-12-18 21:55:10 +00:00
|
|
|
kvm_s390_set_prefix(vcpu, 0);
|
|
|
|
if (test_kvm_facility(vcpu->kvm, 64))
|
|
|
|
vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
|
|
|
|
if (test_kvm_facility(vcpu->kvm, 82))
|
|
|
|
vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
|
|
|
|
if (test_kvm_facility(vcpu->kvm, 133))
|
|
|
|
vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
|
|
|
|
if (test_kvm_facility(vcpu->kvm, 156))
|
|
|
|
vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
|
|
|
|
/* fprs can be synchronized via vrs, even if the guest has no vx. With
|
|
|
|
* MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
|
|
|
|
*/
|
|
|
|
if (MACHINE_HAS_VX)
|
|
|
|
vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
|
|
|
|
else
|
|
|
|
vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
|
|
|
|
|
|
|
|
if (kvm_is_ucontrol(vcpu->kvm)) {
|
|
|
|
rc = __kvm_ucontrol_vcpu_init(vcpu);
|
|
|
|
if (rc)
|
2019-12-18 21:55:11 +00:00
|
|
|
goto out_free_sie_block;
|
2019-12-18 21:55:10 +00:00
|
|
|
}
|
|
|
|
|
2019-12-18 21:55:15 +00:00
|
|
|
VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
|
|
|
|
vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
|
|
|
|
trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
|
|
|
|
|
2019-12-18 21:55:20 +00:00
|
|
|
rc = kvm_s390_vcpu_setup(vcpu);
|
|
|
|
if (rc)
|
|
|
|
goto out_ucontrol_uninit;
|
2022-07-14 10:18:23 +00:00
|
|
|
|
|
|
|
kvm_s390_update_topology_change_report(vcpu->kvm, 1);
|
2019-12-18 21:55:15 +00:00
|
|
|
return 0;
|
2008-03-25 17:47:20 +00:00
|
|
|
|
2019-12-18 21:55:20 +00:00
|
|
|
out_ucontrol_uninit:
|
|
|
|
if (kvm_is_ucontrol(vcpu->kvm))
|
|
|
|
gmap_remove(vcpu->arch.gmap);
|
2010-03-09 06:37:53 +00:00
|
|
|
out_free_sie_block:
|
|
|
|
free_page((unsigned long)(vcpu->arch.sie_block));
|
2019-12-18 21:55:15 +00:00
|
|
|
return rc;
|
2008-03-25 17:47:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2021-10-19 17:53:59 +00:00
|
|
|
clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
|
2014-08-05 15:40:47 +00:00
|
|
|
return kvm_s390_vcpu_has_irq(vcpu, 0);
|
2008-03-25 17:47:20 +00:00
|
|
|
}
|
|
|
|
|
2017-08-08 04:05:32 +00:00
|
|
|
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2017-08-08 04:05:34 +00:00
|
|
|
return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
|
2017-08-08 04:05:32 +00:00
|
|
|
}
|
|
|
|
|
2015-04-14 10:17:34 +00:00
|
|
|
void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
|
2013-05-17 12:41:35 +00:00
|
|
|
{
|
2015-04-23 23:12:32 +00:00
|
|
|
atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
|
2015-05-12 06:41:40 +00:00
|
|
|
exit_sie(vcpu);
|
2013-05-17 12:41:35 +00:00
|
|
|
}
|
|
|
|
|
2015-04-14 10:17:34 +00:00
|
|
|
void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
|
2013-05-17 12:41:35 +00:00
|
|
|
{
|
2015-04-23 23:12:32 +00:00
|
|
|
atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
|
2013-05-17 12:41:35 +00:00
|
|
|
}
|
|
|
|
|
2015-04-09 11:49:04 +00:00
|
|
|
static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2015-04-23 23:12:32 +00:00
|
|
|
atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
|
2015-05-12 06:41:40 +00:00
|
|
|
exit_sie(vcpu);
|
2015-04-09 11:49:04 +00:00
|
|
|
}
|
|
|
|
|
2018-09-25 23:16:16 +00:00
|
|
|
bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
return atomic_read(&vcpu->arch.sie_block->prog20) &
|
|
|
|
(PROG_BLOCK_SIE | PROG_REQUEST);
|
|
|
|
}
|
|
|
|
|
2015-04-09 11:49:04 +00:00
|
|
|
static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2015-09-16 13:13:50 +00:00
|
|
|
atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
|
2015-04-09 11:49:04 +00:00
|
|
|
}
|
|
|
|
|
2013-05-17 12:41:35 +00:00
|
|
|
/*
|
2018-09-25 23:16:16 +00:00
|
|
|
* Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
|
2013-05-17 12:41:35 +00:00
|
|
|
* If the CPU is not running (e.g. waiting as idle) the function will
|
|
|
|
* return immediately. */
|
|
|
|
void exit_sie(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2018-01-23 17:05:29 +00:00
|
|
|
kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
|
2018-09-25 23:16:16 +00:00
|
|
|
kvm_s390_vsie_kick(vcpu);
|
2013-05-17 12:41:35 +00:00
|
|
|
while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
|
|
|
|
cpu_relax();
|
|
|
|
}
|
|
|
|
|
2015-04-09 11:49:04 +00:00
|
|
|
/* Kick a guest cpu out of SIE to process a request synchronously */
|
|
|
|
void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
|
2013-05-17 12:41:35 +00:00
|
|
|
{
|
2022-02-23 16:53:02 +00:00
|
|
|
__kvm_make_request(req, vcpu);
|
2015-04-09 11:49:04 +00:00
|
|
|
kvm_s390_vcpu_request(vcpu);
|
2013-05-17 12:41:35 +00:00
|
|
|
}
|
|
|
|
|
2016-03-08 10:52:54 +00:00
|
|
|
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
|
|
|
|
unsigned long end)
|
2013-05-17 12:41:36 +00:00
|
|
|
{
|
|
|
|
struct kvm *kvm = gmap->private;
|
|
|
|
struct kvm_vcpu *vcpu;
|
2016-03-08 10:52:54 +00:00
|
|
|
unsigned long prefix;
|
2021-11-16 16:04:02 +00:00
|
|
|
unsigned long i;
|
2013-05-17 12:41:36 +00:00
|
|
|
|
2015-04-27 14:29:34 +00:00
|
|
|
if (gmap_is_shadow(gmap))
|
|
|
|
return;
|
2016-03-08 10:52:54 +00:00
|
|
|
if (start >= 1UL << 31)
|
|
|
|
/* We are only interested in prefix pages */
|
|
|
|
return;
|
2013-05-17 12:41:36 +00:00
|
|
|
kvm_for_each_vcpu(i, vcpu, kvm) {
|
|
|
|
/* match against both prefix pages */
|
2016-03-08 10:52:54 +00:00
|
|
|
prefix = kvm_s390_get_prefix(vcpu);
|
|
|
|
if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
|
|
|
|
VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
|
|
|
|
start, end);
|
2022-02-25 18:22:46 +00:00
|
|
|
kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
|
2013-05-17 12:41:36 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-05 10:30:02 +00:00
|
|
|
bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
/* do not poll with more than halt_poll_max_steal percent of steal time */
|
|
|
|
if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
|
2021-10-09 02:11:56 +00:00
|
|
|
READ_ONCE(halt_poll_max_steal)) {
|
2019-03-05 10:30:02 +00:00
|
|
|
vcpu->stat.halt_no_poll_steal++;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2012-03-08 21:44:24 +00:00
|
|
|
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
/* kvm common code refers to this, but never calls it */
|
|
|
|
BUG();
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-05-15 12:15:26 +00:00
|
|
|
static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_one_reg *reg)
|
|
|
|
{
|
|
|
|
int r = -EINVAL;
|
|
|
|
|
|
|
|
switch (reg->id) {
|
2012-05-15 12:15:27 +00:00
|
|
|
case KVM_REG_S390_TODPR:
|
|
|
|
r = put_user(vcpu->arch.sie_block->todpr,
|
|
|
|
(u32 __user *)reg->addr);
|
|
|
|
break;
|
|
|
|
case KVM_REG_S390_EPOCHDIFF:
|
|
|
|
r = put_user(vcpu->arch.sie_block->epoch,
|
|
|
|
(u64 __user *)reg->addr);
|
|
|
|
break;
|
2012-05-15 12:15:28 +00:00
|
|
|
case KVM_REG_S390_CPU_TIMER:
|
2016-02-15 08:40:12 +00:00
|
|
|
r = put_user(kvm_s390_get_cpu_timer(vcpu),
|
2012-05-15 12:15:28 +00:00
|
|
|
(u64 __user *)reg->addr);
|
|
|
|
break;
|
|
|
|
case KVM_REG_S390_CLOCK_COMP:
|
|
|
|
r = put_user(vcpu->arch.sie_block->ckc,
|
|
|
|
(u64 __user *)reg->addr);
|
|
|
|
break;
|
2013-09-30 08:55:33 +00:00
|
|
|
case KVM_REG_S390_PFTOKEN:
|
|
|
|
r = put_user(vcpu->arch.pfault_token,
|
|
|
|
(u64 __user *)reg->addr);
|
|
|
|
break;
|
|
|
|
case KVM_REG_S390_PFCOMPARE:
|
|
|
|
r = put_user(vcpu->arch.pfault_compare,
|
|
|
|
(u64 __user *)reg->addr);
|
|
|
|
break;
|
|
|
|
case KVM_REG_S390_PFSELECT:
|
|
|
|
r = put_user(vcpu->arch.pfault_select,
|
|
|
|
(u64 __user *)reg->addr);
|
|
|
|
break;
|
2014-02-10 14:32:19 +00:00
|
|
|
case KVM_REG_S390_PP:
|
|
|
|
r = put_user(vcpu->arch.sie_block->pp,
|
|
|
|
(u64 __user *)reg->addr);
|
|
|
|
break;
|
2014-02-10 14:39:23 +00:00
|
|
|
case KVM_REG_S390_GBEA:
|
|
|
|
r = put_user(vcpu->arch.sie_block->gbea,
|
|
|
|
(u64 __user *)reg->addr);
|
|
|
|
break;
|
2012-05-15 12:15:26 +00:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_one_reg *reg)
|
|
|
|
{
|
|
|
|
int r = -EINVAL;
|
2016-02-15 08:40:12 +00:00
|
|
|
__u64 val;
|
2012-05-15 12:15:26 +00:00
|
|
|
|
|
|
|
switch (reg->id) {
|
2012-05-15 12:15:27 +00:00
|
|
|
case KVM_REG_S390_TODPR:
|
|
|
|
r = get_user(vcpu->arch.sie_block->todpr,
|
|
|
|
(u32 __user *)reg->addr);
|
|
|
|
break;
|
|
|
|
case KVM_REG_S390_EPOCHDIFF:
|
|
|
|
r = get_user(vcpu->arch.sie_block->epoch,
|
|
|
|
(u64 __user *)reg->addr);
|
|
|
|
break;
|
2012-05-15 12:15:28 +00:00
|
|
|
case KVM_REG_S390_CPU_TIMER:
|
2016-02-15 08:40:12 +00:00
|
|
|
r = get_user(val, (u64 __user *)reg->addr);
|
|
|
|
if (!r)
|
|
|
|
kvm_s390_set_cpu_timer(vcpu, val);
|
2012-05-15 12:15:28 +00:00
|
|
|
break;
|
|
|
|
case KVM_REG_S390_CLOCK_COMP:
|
|
|
|
r = get_user(vcpu->arch.sie_block->ckc,
|
|
|
|
(u64 __user *)reg->addr);
|
|
|
|
break;
|
2013-09-30 08:55:33 +00:00
|
|
|
case KVM_REG_S390_PFTOKEN:
|
|
|
|
r = get_user(vcpu->arch.pfault_token,
|
|
|
|
(u64 __user *)reg->addr);
|
2014-10-09 13:01:38 +00:00
|
|
|
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
|
|
|
|
kvm_clear_async_pf_completion_queue(vcpu);
|
2013-09-30 08:55:33 +00:00
|
|
|
break;
|
|
|
|
case KVM_REG_S390_PFCOMPARE:
|
|
|
|
r = get_user(vcpu->arch.pfault_compare,
|
|
|
|
(u64 __user *)reg->addr);
|
|
|
|
break;
|
|
|
|
case KVM_REG_S390_PFSELECT:
|
|
|
|
r = get_user(vcpu->arch.pfault_select,
|
|
|
|
(u64 __user *)reg->addr);
|
|
|
|
break;
|
2014-02-10 14:32:19 +00:00
|
|
|
case KVM_REG_S390_PP:
|
|
|
|
r = get_user(vcpu->arch.sie_block->pp,
|
|
|
|
(u64 __user *)reg->addr);
|
|
|
|
break;
|
2014-02-10 14:39:23 +00:00
|
|
|
case KVM_REG_S390_GBEA:
|
|
|
|
r = get_user(vcpu->arch.sie_block->gbea,
|
|
|
|
(u64 __user *)reg->addr);
|
|
|
|
break;
|
2012-05-15 12:15:26 +00:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
2012-03-08 21:44:24 +00:00
|
|
|
|
2020-01-31 10:02:02 +00:00
|
|
|
static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
|
2008-03-25 17:47:20 +00:00
|
|
|
{
|
2020-01-31 10:02:02 +00:00
|
|
|
vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
|
|
|
|
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
|
|
|
|
memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
|
|
|
|
|
|
|
|
kvm_clear_async_pf_completion_queue(vcpu);
|
|
|
|
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
|
|
|
|
kvm_s390_vcpu_stop(vcpu);
|
|
|
|
kvm_s390_clear_local_irqs(vcpu);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
/* Initial reset is a superset of the normal reset */
|
|
|
|
kvm_arch_vcpu_ioctl_normal_reset(vcpu);
|
|
|
|
|
2020-03-03 08:10:57 +00:00
|
|
|
/*
|
|
|
|
* This equals initial cpu reset in pop, but we don't switch to ESA.
|
|
|
|
* We do not only reset the internal data, but also ...
|
|
|
|
*/
|
2020-01-31 10:02:02 +00:00
|
|
|
vcpu->arch.sie_block->gpsw.mask = 0;
|
|
|
|
vcpu->arch.sie_block->gpsw.addr = 0;
|
|
|
|
kvm_s390_set_prefix(vcpu, 0);
|
|
|
|
kvm_s390_set_cpu_timer(vcpu, 0);
|
|
|
|
vcpu->arch.sie_block->ckc = 0;
|
|
|
|
memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
|
|
|
|
vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
|
|
|
|
vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
|
2020-03-03 08:10:57 +00:00
|
|
|
|
|
|
|
/* ... the data in sync regs */
|
|
|
|
memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
|
|
|
|
vcpu->run->s.regs.ckc = 0;
|
|
|
|
vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
|
|
|
|
vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
|
|
|
|
vcpu->run->psw_addr = 0;
|
|
|
|
vcpu->run->psw_mask = 0;
|
|
|
|
vcpu->run->s.regs.todpr = 0;
|
|
|
|
vcpu->run->s.regs.cputm = 0;
|
|
|
|
vcpu->run->s.regs.ckc = 0;
|
|
|
|
vcpu->run->s.regs.pp = 0;
|
|
|
|
vcpu->run->s.regs.gbea = 1;
|
2020-01-31 10:02:02 +00:00
|
|
|
vcpu->run->s.regs.fpc = 0;
|
2020-02-10 09:27:47 +00:00
|
|
|
/*
|
|
|
|
* Do not reset these registers in the protected case, as some of
|
|
|
|
* them are overlayed and they are not accessible in this case
|
|
|
|
* anyway.
|
|
|
|
*/
|
|
|
|
if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
|
|
|
|
vcpu->arch.sie_block->gbea = 1;
|
|
|
|
vcpu->arch.sie_block->pp = 0;
|
|
|
|
vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
|
|
|
|
vcpu->arch.sie_block->todpr = 0;
|
|
|
|
}
|
2020-01-31 10:02:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
struct kvm_sync_regs *regs = &vcpu->run->s.regs;
|
|
|
|
|
|
|
|
/* Clear reset is a superset of the initial reset */
|
|
|
|
kvm_arch_vcpu_ioctl_initial_reset(vcpu);
|
|
|
|
|
|
|
|
memset(®s->gprs, 0, sizeof(regs->gprs));
|
|
|
|
memset(®s->vrs, 0, sizeof(regs->vrs));
|
|
|
|
memset(®s->acrs, 0, sizeof(regs->acrs));
|
|
|
|
memset(®s->gscb, 0, sizeof(regs->gscb));
|
|
|
|
|
|
|
|
regs->etoken = 0;
|
|
|
|
regs->etoken_extension = 0;
|
2008-03-25 17:47:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
|
|
|
{
|
2017-12-04 20:35:27 +00:00
|
|
|
vcpu_load(vcpu);
|
2012-01-11 10:20:32 +00:00
|
|
|
memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
|
2017-12-04 20:35:27 +00:00
|
|
|
vcpu_put(vcpu);
|
2008-03-25 17:47:20 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
|
|
|
{
|
2017-12-04 20:35:26 +00:00
|
|
|
vcpu_load(vcpu);
|
2012-01-11 10:20:32 +00:00
|
|
|
memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
|
2017-12-04 20:35:26 +00:00
|
|
|
vcpu_put(vcpu);
|
2008-03-25 17:47:20 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_sregs *sregs)
|
|
|
|
{
|
2017-12-04 20:35:29 +00:00
|
|
|
vcpu_load(vcpu);
|
|
|
|
|
2012-01-11 10:20:33 +00:00
|
|
|
memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
|
2008-03-25 17:47:20 +00:00
|
|
|
memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
|
2017-12-04 20:35:29 +00:00
|
|
|
|
|
|
|
vcpu_put(vcpu);
|
2008-03-25 17:47:20 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_sregs *sregs)
|
|
|
|
{
|
2017-12-04 20:35:28 +00:00
|
|
|
vcpu_load(vcpu);
|
|
|
|
|
2012-01-11 10:20:33 +00:00
|
|
|
memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
|
2008-03-25 17:47:20 +00:00
|
|
|
memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
|
2017-12-04 20:35:28 +00:00
|
|
|
|
|
|
|
vcpu_put(vcpu);
|
2008-03-25 17:47:20 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
|
|
|
|
{
|
2017-12-04 20:35:35 +00:00
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
vcpu_load(vcpu);
|
|
|
|
|
|
|
|
if (test_fp_ctl(fpu->fpc)) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
2016-11-22 08:29:38 +00:00
|
|
|
vcpu->run->s.regs.fpc = fpu->fpc;
|
2016-01-14 21:12:47 +00:00
|
|
|
if (MACHINE_HAS_VX)
|
2016-08-16 12:38:24 +00:00
|
|
|
convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
|
|
|
|
(freg_t *) fpu->fprs);
|
2016-01-14 21:12:47 +00:00
|
|
|
else
|
2016-08-16 12:38:24 +00:00
|
|
|
memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
|
2017-12-04 20:35:35 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
vcpu_put(vcpu);
|
|
|
|
return ret;
|
2008-03-25 17:47:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
|
|
|
|
{
|
2017-12-04 20:35:34 +00:00
|
|
|
vcpu_load(vcpu);
|
|
|
|
|
2016-01-14 21:12:47 +00:00
|
|
|
/* make sure we have the latest values */
|
|
|
|
save_fpu_regs();
|
|
|
|
if (MACHINE_HAS_VX)
|
2016-08-16 12:38:24 +00:00
|
|
|
convert_vx_to_fp((freg_t *) fpu->fprs,
|
|
|
|
(__vector128 *) vcpu->run->s.regs.vrs);
|
2016-01-14 21:12:47 +00:00
|
|
|
else
|
2016-08-16 12:38:24 +00:00
|
|
|
memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
|
2016-11-22 08:29:38 +00:00
|
|
|
fpu->fpc = vcpu->run->s.regs.fpc;
|
2017-12-04 20:35:34 +00:00
|
|
|
|
|
|
|
vcpu_put(vcpu);
|
2008-03-25 17:47:20 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
|
|
|
|
{
|
|
|
|
int rc = 0;
|
|
|
|
|
2014-05-05 14:26:19 +00:00
|
|
|
if (!is_vcpu_stopped(vcpu))
|
2008-03-25 17:47:20 +00:00
|
|
|
rc = -EBUSY;
|
2009-11-19 13:21:16 +00:00
|
|
|
else {
|
|
|
|
vcpu->run->psw_mask = psw.mask;
|
|
|
|
vcpu->run->psw_addr = psw.addr;
|
|
|
|
}
|
2008-03-25 17:47:20 +00:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_translation *tr)
|
|
|
|
{
|
|
|
|
return -EINVAL; /* not implemented yet */
|
|
|
|
}
|
|
|
|
|
2014-01-23 11:26:52 +00:00
|
|
|
#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
|
|
|
|
KVM_GUESTDBG_USE_HW_BP | \
|
|
|
|
KVM_GUESTDBG_ENABLE)
|
|
|
|
|
2008-12-15 12:52:10 +00:00
|
|
|
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_guest_debug *dbg)
|
2008-03-25 17:47:20 +00:00
|
|
|
{
|
2014-01-23 11:26:52 +00:00
|
|
|
int rc = 0;
|
|
|
|
|
2017-12-04 20:35:33 +00:00
|
|
|
vcpu_load(vcpu);
|
|
|
|
|
2014-01-23 11:26:52 +00:00
|
|
|
vcpu->guest_debug = 0;
|
|
|
|
kvm_s390_clear_bp_data(vcpu);
|
|
|
|
|
2017-12-04 20:35:33 +00:00
|
|
|
if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
|
|
|
|
rc = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (!sclp.has_gpere) {
|
|
|
|
rc = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
2014-01-23 11:26:52 +00:00
|
|
|
|
|
|
|
if (dbg->control & KVM_GUESTDBG_ENABLE) {
|
|
|
|
vcpu->guest_debug = dbg->control;
|
|
|
|
/* enforce guest PER */
|
2018-01-23 17:05:29 +00:00
|
|
|
kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
|
2014-01-23 11:26:52 +00:00
|
|
|
|
|
|
|
if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
|
|
|
|
rc = kvm_s390_import_bp_data(vcpu, dbg);
|
|
|
|
} else {
|
2018-01-23 17:05:30 +00:00
|
|
|
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
|
2014-01-23 11:26:52 +00:00
|
|
|
vcpu->arch.guestdbg.last_bp = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (rc) {
|
|
|
|
vcpu->guest_debug = 0;
|
|
|
|
kvm_s390_clear_bp_data(vcpu);
|
2018-01-23 17:05:30 +00:00
|
|
|
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
|
2014-01-23 11:26:52 +00:00
|
|
|
}
|
|
|
|
|
2017-12-04 20:35:33 +00:00
|
|
|
out:
|
|
|
|
vcpu_put(vcpu);
|
2014-01-23 11:26:52 +00:00
|
|
|
return rc;
|
2008-03-25 17:47:20 +00:00
|
|
|
}
|
|
|
|
|
2008-04-11 16:24:45 +00:00
|
|
|
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_mp_state *mp_state)
|
|
|
|
{
|
2017-12-04 20:35:30 +00:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
vcpu_load(vcpu);
|
|
|
|
|
2014-04-10 15:35:00 +00:00
|
|
|
/* CHECK_STOP and LOAD are not supported yet */
|
2017-12-04 20:35:30 +00:00
|
|
|
ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
|
|
|
|
KVM_MP_STATE_OPERATING;
|
|
|
|
|
|
|
|
vcpu_put(vcpu);
|
|
|
|
return ret;
|
2008-04-11 16:24:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_mp_state *mp_state)
|
|
|
|
{
|
2014-04-10 15:35:00 +00:00
|
|
|
int rc = 0;
|
|
|
|
|
2017-12-04 20:35:31 +00:00
|
|
|
vcpu_load(vcpu);
|
|
|
|
|
2014-04-10 15:35:00 +00:00
|
|
|
/* user space knows about this interface - let it control the state */
|
2021-10-08 20:31:12 +00:00
|
|
|
kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
|
2014-04-10 15:35:00 +00:00
|
|
|
|
|
|
|
switch (mp_state->mp_state) {
|
|
|
|
case KVM_MP_STATE_STOPPED:
|
2019-05-15 11:24:30 +00:00
|
|
|
rc = kvm_s390_vcpu_stop(vcpu);
|
2014-04-10 15:35:00 +00:00
|
|
|
break;
|
|
|
|
case KVM_MP_STATE_OPERATING:
|
2019-05-15 11:24:30 +00:00
|
|
|
rc = kvm_s390_vcpu_start(vcpu);
|
2014-04-10 15:35:00 +00:00
|
|
|
break;
|
|
|
|
case KVM_MP_STATE_LOAD:
|
2019-09-02 06:34:44 +00:00
|
|
|
if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
|
|
|
|
rc = -ENXIO;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
|
|
|
|
break;
|
2014-04-10 15:35:00 +00:00
|
|
|
case KVM_MP_STATE_CHECK_STOP:
|
2020-03-11 04:51:32 +00:00
|
|
|
fallthrough; /* CHECK_STOP and LOAD are not supported yet */
|
2014-04-10 15:35:00 +00:00
|
|
|
default:
|
|
|
|
rc = -ENXIO;
|
|
|
|
}
|
|
|
|
|
2017-12-04 20:35:31 +00:00
|
|
|
vcpu_put(vcpu);
|
2014-04-10 15:35:00 +00:00
|
|
|
return rc;
|
2008-04-11 16:24:45 +00:00
|
|
|
}
|
|
|
|
|
2014-03-14 10:00:21 +00:00
|
|
|
static bool ibs_enabled(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2018-01-23 17:05:31 +00:00
|
|
|
return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
|
2014-03-14 10:00:21 +00:00
|
|
|
}
|
|
|
|
|
2013-05-17 12:41:36 +00:00
|
|
|
static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2014-03-14 10:00:21 +00:00
|
|
|
retry:
|
2015-04-09 11:49:04 +00:00
|
|
|
kvm_s390_vcpu_request_handled(vcpu);
|
2017-06-04 12:43:52 +00:00
|
|
|
if (!kvm_request_pending(vcpu))
|
2015-07-28 13:03:05 +00:00
|
|
|
return 0;
|
2013-05-17 12:41:36 +00:00
|
|
|
/*
|
2022-02-25 18:22:46 +00:00
|
|
|
* If the guest prefix changed, re-arm the ipte notifier for the
|
2016-03-08 10:54:42 +00:00
|
|
|
* guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
|
2013-05-17 12:41:36 +00:00
|
|
|
* This ensures that the ipte instruction for this request has
|
|
|
|
* already finished. We might race against a second unmapper that
|
|
|
|
* wants to set the blocking bit. Lets just retry the request loop.
|
|
|
|
*/
|
2022-02-25 18:22:46 +00:00
|
|
|
if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
|
2013-05-17 12:41:36 +00:00
|
|
|
int rc;
|
2016-03-08 10:54:42 +00:00
|
|
|
rc = gmap_mprotect_notify(vcpu->arch.gmap,
|
|
|
|
kvm_s390_get_prefix(vcpu),
|
|
|
|
PAGE_SIZE * 2, PROT_WRITE);
|
2016-08-03 14:39:55 +00:00
|
|
|
if (rc) {
|
2022-02-25 18:22:46 +00:00
|
|
|
kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
|
2013-05-17 12:41:36 +00:00
|
|
|
return rc;
|
2016-08-03 14:39:55 +00:00
|
|
|
}
|
2014-03-14 10:00:21 +00:00
|
|
|
goto retry;
|
2013-05-17 12:41:36 +00:00
|
|
|
}
|
2014-03-14 10:00:21 +00:00
|
|
|
|
2014-07-29 06:53:36 +00:00
|
|
|
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
|
|
|
|
vcpu->arch.sie_block->ihcpu = 0xffff;
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
|
2014-03-14 10:00:21 +00:00
|
|
|
if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
|
|
|
|
if (!ibs_enabled(vcpu)) {
|
|
|
|
trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
|
2018-01-23 17:05:29 +00:00
|
|
|
kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
|
2014-03-14 10:00:21 +00:00
|
|
|
}
|
|
|
|
goto retry;
|
2013-05-17 12:41:36 +00:00
|
|
|
}
|
2014-03-14 10:00:21 +00:00
|
|
|
|
|
|
|
if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
|
|
|
|
if (ibs_enabled(vcpu)) {
|
|
|
|
trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
|
2018-01-23 17:05:30 +00:00
|
|
|
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
|
2014-03-14 10:00:21 +00:00
|
|
|
}
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
|
2016-06-21 12:19:51 +00:00
|
|
|
if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
|
|
|
|
vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
|
2016-08-04 15:54:42 +00:00
|
|
|
if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
|
|
|
|
/*
|
2018-02-16 11:16:14 +00:00
|
|
|
* Disable CMM virtualization; we will emulate the ESSA
|
2016-08-04 15:54:42 +00:00
|
|
|
* instruction manually, in order to provide additional
|
|
|
|
* functionalities needed for live migration.
|
|
|
|
*/
|
|
|
|
vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
|
|
|
|
/*
|
2018-02-16 11:16:14 +00:00
|
|
|
* Re-enable CMM virtualization if CMMA is available and
|
|
|
|
* CMM has been used.
|
2016-08-04 15:54:42 +00:00
|
|
|
*/
|
|
|
|
if ((vcpu->kvm->arch.use_cmma) &&
|
2018-02-16 11:16:14 +00:00
|
|
|
(vcpu->kvm->mm->context.uses_cmm))
|
2016-08-04 15:54:42 +00:00
|
|
|
vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
|
2018-09-25 23:16:17 +00:00
|
|
|
/* we left the vsie handler, nothing to do, just clear the request */
|
|
|
|
kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
|
2014-05-13 14:54:32 +00:00
|
|
|
|
2013-05-17 12:41:36 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
KVM: s390x: fix SCK locking
When handling the SCK instruction, the kvm lock is taken, even though
the vcpu lock is already being held. The normal locking order is kvm
lock first and then vcpu lock. This is can (and in some circumstances
does) lead to deadlocks.
The function kvm_s390_set_tod_clock is called both by the SCK handler
and by some IOCTLs to set the clock. The IOCTLs will not hold the vcpu
lock, so they can safely take the kvm lock. The SCK handler holds the
vcpu lock, but will also somehow need to acquire the kvm lock without
relinquishing the vcpu lock.
The solution is to factor out the code to set the clock, and provide
two wrappers. One is called like the original function and does the
locking, the other is called kvm_s390_try_set_tod_clock and uses
trylock to try to acquire the kvm lock. This new wrapper is then used
in the SCK handler. If locking fails, -EAGAIN is returned, which is
eventually propagated to userspace, thus also freeing the vcpu lock and
allowing for forward progress.
This is not the most efficient or elegant way to solve this issue, but
the SCK instruction is deprecated and its performance is not critical.
The goal of this patch is just to provide a simple but correct way to
fix the bug.
Fixes: 6a3f95a6b04c ("KVM: s390: Intercept SCK instruction")
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Reviewed-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Link: https://lore.kernel.org/r/20220301143340.111129-1-imbrenda@linux.ibm.com
Cc: stable@vger.kernel.org
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
2022-03-01 14:33:40 +00:00
|
|
|
static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
|
2016-07-26 19:29:44 +00:00
|
|
|
{
|
|
|
|
struct kvm_vcpu *vcpu;
|
2021-02-08 15:27:33 +00:00
|
|
|
union tod_clock clk;
|
2021-11-16 16:04:02 +00:00
|
|
|
unsigned long i;
|
2016-07-26 19:29:44 +00:00
|
|
|
|
|
|
|
preempt_disable();
|
|
|
|
|
2021-02-08 15:27:33 +00:00
|
|
|
store_tod_clock_ext(&clk);
|
2016-07-26 19:29:44 +00:00
|
|
|
|
2021-02-08 15:27:33 +00:00
|
|
|
kvm->arch.epoch = gtod->tod - clk.tod;
|
2018-02-07 11:46:43 +00:00
|
|
|
kvm->arch.epdx = 0;
|
|
|
|
if (test_kvm_facility(kvm, 139)) {
|
2021-02-08 15:27:33 +00:00
|
|
|
kvm->arch.epdx = gtod->epoch_idx - clk.ei;
|
2018-02-07 11:46:43 +00:00
|
|
|
if (kvm->arch.epoch > gtod->tod)
|
|
|
|
kvm->arch.epdx -= 1;
|
|
|
|
}
|
2016-07-26 19:29:44 +00:00
|
|
|
|
|
|
|
kvm_s390_vcpu_block_all(kvm);
|
|
|
|
kvm_for_each_vcpu(i, vcpu, kvm) {
|
|
|
|
vcpu->arch.sie_block->epoch = kvm->arch.epoch;
|
|
|
|
vcpu->arch.sie_block->epdx = kvm->arch.epdx;
|
|
|
|
}
|
|
|
|
|
|
|
|
kvm_s390_vcpu_unblock_all(kvm);
|
|
|
|
preempt_enable();
|
KVM: s390x: fix SCK locking
When handling the SCK instruction, the kvm lock is taken, even though
the vcpu lock is already being held. The normal locking order is kvm
lock first and then vcpu lock. This is can (and in some circumstances
does) lead to deadlocks.
The function kvm_s390_set_tod_clock is called both by the SCK handler
and by some IOCTLs to set the clock. The IOCTLs will not hold the vcpu
lock, so they can safely take the kvm lock. The SCK handler holds the
vcpu lock, but will also somehow need to acquire the kvm lock without
relinquishing the vcpu lock.
The solution is to factor out the code to set the clock, and provide
two wrappers. One is called like the original function and does the
locking, the other is called kvm_s390_try_set_tod_clock and uses
trylock to try to acquire the kvm lock. This new wrapper is then used
in the SCK handler. If locking fails, -EAGAIN is returned, which is
eventually propagated to userspace, thus also freeing the vcpu lock and
allowing for forward progress.
This is not the most efficient or elegant way to solve this issue, but
the SCK instruction is deprecated and its performance is not critical.
The goal of this patch is just to provide a simple but correct way to
fix the bug.
Fixes: 6a3f95a6b04c ("KVM: s390: Intercept SCK instruction")
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Reviewed-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Link: https://lore.kernel.org/r/20220301143340.111129-1-imbrenda@linux.ibm.com
Cc: stable@vger.kernel.org
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
2022-03-01 14:33:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
|
|
|
|
{
|
|
|
|
if (!mutex_trylock(&kvm->lock))
|
|
|
|
return 0;
|
|
|
|
__kvm_s390_set_tod_clock(kvm, gtod);
|
2016-07-26 19:29:44 +00:00
|
|
|
mutex_unlock(&kvm->lock);
|
KVM: s390x: fix SCK locking
When handling the SCK instruction, the kvm lock is taken, even though
the vcpu lock is already being held. The normal locking order is kvm
lock first and then vcpu lock. This is can (and in some circumstances
does) lead to deadlocks.
The function kvm_s390_set_tod_clock is called both by the SCK handler
and by some IOCTLs to set the clock. The IOCTLs will not hold the vcpu
lock, so they can safely take the kvm lock. The SCK handler holds the
vcpu lock, but will also somehow need to acquire the kvm lock without
relinquishing the vcpu lock.
The solution is to factor out the code to set the clock, and provide
two wrappers. One is called like the original function and does the
locking, the other is called kvm_s390_try_set_tod_clock and uses
trylock to try to acquire the kvm lock. This new wrapper is then used
in the SCK handler. If locking fails, -EAGAIN is returned, which is
eventually propagated to userspace, thus also freeing the vcpu lock and
allowing for forward progress.
This is not the most efficient or elegant way to solve this issue, but
the SCK instruction is deprecated and its performance is not critical.
The goal of this patch is just to provide a simple but correct way to
fix the bug.
Fixes: 6a3f95a6b04c ("KVM: s390: Intercept SCK instruction")
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Reviewed-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Link: https://lore.kernel.org/r/20220301143340.111129-1-imbrenda@linux.ibm.com
Cc: stable@vger.kernel.org
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
2022-03-01 14:33:40 +00:00
|
|
|
return 1;
|
2016-07-26 19:29:44 +00:00
|
|
|
}
|
|
|
|
|
2014-05-06 15:20:16 +00:00
|
|
|
/**
|
|
|
|
* kvm_arch_fault_in_page - fault-in guest page if necessary
|
|
|
|
* @vcpu: The corresponding virtual cpu
|
|
|
|
* @gpa: Guest physical address
|
|
|
|
* @writable: Whether the page should be writable or not
|
|
|
|
*
|
|
|
|
* Make sure that a guest page has been faulted-in on the host.
|
|
|
|
*
|
|
|
|
* Return: Zero on success, negative error code otherwise.
|
|
|
|
*/
|
|
|
|
long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
|
2013-06-17 14:25:18 +00:00
|
|
|
{
|
2014-04-30 14:04:25 +00:00
|
|
|
return gmap_fault(vcpu->arch.gmap, gpa,
|
|
|
|
writable ? FAULT_FLAG_WRITE : 0);
|
2013-06-17 14:25:18 +00:00
|
|
|
}
|
|
|
|
|
2013-10-07 15:11:48 +00:00
|
|
|
static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
|
|
|
|
unsigned long token)
|
|
|
|
{
|
|
|
|
struct kvm_s390_interrupt inti;
|
2014-07-29 13:11:49 +00:00
|
|
|
struct kvm_s390_irq irq;
|
2013-10-07 15:11:48 +00:00
|
|
|
|
|
|
|
if (start_token) {
|
2014-07-29 13:11:49 +00:00
|
|
|
irq.u.ext.ext_params2 = token;
|
|
|
|
irq.type = KVM_S390_INT_PFAULT_INIT;
|
|
|
|
WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
|
2013-10-07 15:11:48 +00:00
|
|
|
} else {
|
|
|
|
inti.type = KVM_S390_INT_PFAULT_DONE;
|
2014-07-29 13:11:49 +00:00
|
|
|
inti.parm64 = token;
|
2013-10-07 15:11:48 +00:00
|
|
|
WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-10 17:55:32 +00:00
|
|
|
bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
|
2013-10-07 15:11:48 +00:00
|
|
|
struct kvm_async_pf *work)
|
|
|
|
{
|
|
|
|
trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
|
|
|
|
__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
|
2020-06-10 17:55:32 +00:00
|
|
|
|
|
|
|
return true;
|
2013-10-07 15:11:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_async_pf *work)
|
|
|
|
{
|
|
|
|
trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
|
|
|
|
__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
|
|
|
|
}
|
|
|
|
|
|
|
|
void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_async_pf *work)
|
|
|
|
{
|
|
|
|
/* s390 will always inject the page directly */
|
|
|
|
}
|
|
|
|
|
2020-05-25 14:41:18 +00:00
|
|
|
bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
|
2013-10-07 15:11:48 +00:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* s390 will always inject the page directly,
|
|
|
|
* but we still want check_async_completion to cleanup
|
|
|
|
*/
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-06-15 12:13:34 +00:00
|
|
|
static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
|
2013-10-07 15:11:48 +00:00
|
|
|
{
|
|
|
|
hva_t hva;
|
|
|
|
struct kvm_arch_async_pf arch;
|
|
|
|
|
|
|
|
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
|
2020-06-15 12:13:34 +00:00
|
|
|
return false;
|
2013-10-07 15:11:48 +00:00
|
|
|
if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
|
|
|
|
vcpu->arch.pfault_compare)
|
2020-06-15 12:13:34 +00:00
|
|
|
return false;
|
2013-10-07 15:11:48 +00:00
|
|
|
if (psw_extint_disabled(vcpu))
|
2020-06-15 12:13:34 +00:00
|
|
|
return false;
|
2014-08-05 15:40:47 +00:00
|
|
|
if (kvm_s390_vcpu_has_irq(vcpu, 0))
|
2020-06-15 12:13:34 +00:00
|
|
|
return false;
|
2018-04-30 15:55:24 +00:00
|
|
|
if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
|
2020-06-15 12:13:34 +00:00
|
|
|
return false;
|
2013-10-07 15:11:48 +00:00
|
|
|
if (!vcpu->arch.gmap->pfault_enabled)
|
2020-06-15 12:13:34 +00:00
|
|
|
return false;
|
2013-10-07 15:11:48 +00:00
|
|
|
|
2014-01-01 15:36:07 +00:00
|
|
|
hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
|
|
|
|
hva += current->thread.gmap_addr & ~PAGE_MASK;
|
|
|
|
if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
|
2020-06-15 12:13:34 +00:00
|
|
|
return false;
|
2013-10-07 15:11:48 +00:00
|
|
|
|
2020-06-15 12:13:34 +00:00
|
|
|
return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
|
2013-10-07 15:11:48 +00:00
|
|
|
}
|
|
|
|
|
2013-09-12 08:33:43 +00:00
|
|
|
static int vcpu_pre_run(struct kvm_vcpu *vcpu)
|
2008-03-25 17:47:20 +00:00
|
|
|
{
|
2013-09-12 08:33:43 +00:00
|
|
|
int rc, cpuflags;
|
2012-01-04 09:25:22 +00:00
|
|
|
|
2013-10-07 15:11:48 +00:00
|
|
|
/*
|
|
|
|
* On s390 notifications for arriving pages will be delivered directly
|
|
|
|
* to the guest but the house keeping for completed pfaults is
|
|
|
|
* handled outside the worker.
|
|
|
|
*/
|
|
|
|
kvm_check_async_pf_completion(vcpu);
|
|
|
|
|
2015-12-02 13:27:03 +00:00
|
|
|
vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
|
|
|
|
vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
|
2008-03-25 17:47:20 +00:00
|
|
|
|
|
|
|
if (need_resched())
|
|
|
|
schedule();
|
|
|
|
|
2014-04-17 08:10:30 +00:00
|
|
|
if (!kvm_is_ucontrol(vcpu->kvm)) {
|
|
|
|
rc = kvm_s390_deliver_pending_interrupts(vcpu);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
}
|
2008-05-21 11:37:37 +00:00
|
|
|
|
2013-05-17 12:41:36 +00:00
|
|
|
rc = kvm_s390_handle_requests(vcpu);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
|
2014-01-23 11:26:52 +00:00
|
|
|
if (guestdbg_enabled(vcpu)) {
|
|
|
|
kvm_s390_backup_guest_per_regs(vcpu);
|
|
|
|
kvm_s390_patch_guest_per_regs(vcpu);
|
|
|
|
}
|
|
|
|
|
2021-09-10 18:32:19 +00:00
|
|
|
clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
|
2019-01-31 08:52:44 +00:00
|
|
|
|
2008-03-25 17:47:20 +00:00
|
|
|
vcpu->arch.sie_block->icptcode = 0;
|
2013-09-12 08:33:43 +00:00
|
|
|
cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
|
|
|
|
VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
|
|
|
|
trace_kvm_s390_sie_enter(vcpu, cpuflags);
|
2013-07-26 13:04:00 +00:00
|
|
|
|
2013-09-12 08:33:43 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-02-10 15:11:01 +00:00
|
|
|
static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2016-01-12 16:37:58 +00:00
|
|
|
struct kvm_s390_pgm_info pgm_info = {
|
|
|
|
.code = PGM_ADDRESSING,
|
|
|
|
};
|
|
|
|
u8 opcode, ilen;
|
2015-02-10 15:11:01 +00:00
|
|
|
int rc;
|
|
|
|
|
|
|
|
VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
|
|
|
|
trace_kvm_s390_sie_fault(vcpu);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We want to inject an addressing exception, which is defined as a
|
|
|
|
* suppressing or terminating exception. However, since we came here
|
|
|
|
* by a DAT access exception, the PSW still points to the faulting
|
|
|
|
* instruction since DAT exceptions are nullifying. So we've got
|
|
|
|
* to look up the current opcode to get the length of the instruction
|
|
|
|
* to be able to forward the PSW.
|
|
|
|
*/
|
2016-05-24 10:00:49 +00:00
|
|
|
rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
|
2016-01-12 16:37:58 +00:00
|
|
|
ilen = insn_length(opcode);
|
2016-01-12 16:40:54 +00:00
|
|
|
if (rc < 0) {
|
|
|
|
return rc;
|
|
|
|
} else if (rc) {
|
|
|
|
/* Instruction-Fetching Exceptions - we can't detect the ilen.
|
|
|
|
* Forward by arbitrary ilc, injection will take care of
|
|
|
|
* nullification if necessary.
|
|
|
|
*/
|
|
|
|
pgm_info = vcpu->arch.pgm;
|
|
|
|
ilen = 4;
|
|
|
|
}
|
2016-01-12 16:37:58 +00:00
|
|
|
pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
|
|
|
|
kvm_s390_forward_psw(vcpu, ilen);
|
|
|
|
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
|
2015-02-10 15:11:01 +00:00
|
|
|
}
|
|
|
|
|
2013-09-12 08:33:43 +00:00
|
|
|
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
|
|
|
|
{
|
KVM: s390: Inject machine check into the guest
If the exit flag of SIE indicates that a machine check has happened
during guest's running and needs to be injected, inject it to the guest
accordingly.
But some machine checks, e.g. Channel Report Pending (CRW), refer to
host conditions only (the guest's channel devices are not managed by
the kernel directly) and are therefore not injected into the guest.
External Damage (ED) is also not reinjected into the guest because ETR
conditions are gone in Linux and STP conditions are not enabled in the
guest, and ED contains only these 8 ETR and STP conditions.
In general, instruction-processing damage, system recovery,
storage error, service-processor damage and channel subsystem damage
will be reinjected into the guest, and the remain (System damage,
timing-facility damage, warning, ED and CRW) will be handled on the host.
Signed-off-by: QingFeng Hao <haoqf@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-06-07 10:03:05 +00:00
|
|
|
struct mcck_volatile_info *mcck_info;
|
|
|
|
struct sie_page *sie_page;
|
|
|
|
|
2013-07-26 13:04:00 +00:00
|
|
|
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
|
|
|
|
vcpu->arch.sie_block->icptcode);
|
|
|
|
trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
|
|
|
|
|
2014-01-23 11:26:52 +00:00
|
|
|
if (guestdbg_enabled(vcpu))
|
|
|
|
kvm_s390_restore_guest_per_regs(vcpu);
|
|
|
|
|
2015-12-02 13:27:03 +00:00
|
|
|
vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
|
|
|
|
vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
|
2015-10-19 14:24:28 +00:00
|
|
|
|
KVM: s390: Inject machine check into the guest
If the exit flag of SIE indicates that a machine check has happened
during guest's running and needs to be injected, inject it to the guest
accordingly.
But some machine checks, e.g. Channel Report Pending (CRW), refer to
host conditions only (the guest's channel devices are not managed by
the kernel directly) and are therefore not injected into the guest.
External Damage (ED) is also not reinjected into the guest because ETR
conditions are gone in Linux and STP conditions are not enabled in the
guest, and ED contains only these 8 ETR and STP conditions.
In general, instruction-processing damage, system recovery,
storage error, service-processor damage and channel subsystem damage
will be reinjected into the guest, and the remain (System damage,
timing-facility damage, warning, ED and CRW) will be handled on the host.
Signed-off-by: QingFeng Hao <haoqf@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-06-07 10:03:05 +00:00
|
|
|
if (exit_reason == -EINTR) {
|
|
|
|
VCPU_EVENT(vcpu, 3, "%s", "machine check");
|
|
|
|
sie_page = container_of(vcpu->arch.sie_block,
|
|
|
|
struct sie_page, sie_block);
|
|
|
|
mcck_info = &sie_page->mcck_info;
|
|
|
|
kvm_s390_reinject_machine_check(vcpu, mcck_info);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-10-19 14:24:28 +00:00
|
|
|
if (vcpu->arch.sie_block->icptcode > 0) {
|
|
|
|
int rc = kvm_handle_sie_intercept(vcpu);
|
|
|
|
|
|
|
|
if (rc != -EOPNOTSUPP)
|
|
|
|
return rc;
|
|
|
|
vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
|
|
|
|
vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
|
|
|
|
vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
|
|
|
|
vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
|
|
|
|
return -EREMOTE;
|
|
|
|
} else if (exit_reason != -EFAULT) {
|
|
|
|
vcpu->stat.exit_null++;
|
|
|
|
return 0;
|
2013-09-19 14:26:18 +00:00
|
|
|
} else if (kvm_is_ucontrol(vcpu->kvm)) {
|
|
|
|
vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
|
|
|
|
vcpu->run->s390_ucontrol.trans_exc_code =
|
|
|
|
current->thread.gmap_addr;
|
|
|
|
vcpu->run->s390_ucontrol.pgm_code = 0x10;
|
2015-10-19 14:24:28 +00:00
|
|
|
return -EREMOTE;
|
2013-06-17 14:25:18 +00:00
|
|
|
} else if (current->thread.gmap_pfault) {
|
2013-10-07 15:11:48 +00:00
|
|
|
trace_kvm_s390_major_guest_pfault(vcpu);
|
2013-06-17 14:25:18 +00:00
|
|
|
current->thread.gmap_pfault = 0;
|
2015-10-19 14:24:28 +00:00
|
|
|
if (kvm_arch_setup_async_pf(vcpu))
|
|
|
|
return 0;
|
2020-11-25 09:06:58 +00:00
|
|
|
vcpu->stat.pfault_sync++;
|
2015-10-19 14:24:28 +00:00
|
|
|
return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
|
2013-09-12 08:33:44 +00:00
|
|
|
}
|
2015-10-19 14:24:28 +00:00
|
|
|
return vcpu_post_run_fault_in_sie(vcpu);
|
2013-09-12 08:33:43 +00:00
|
|
|
}
|
|
|
|
|
2019-12-13 13:26:06 +00:00
|
|
|
#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
|
2013-09-12 08:33:43 +00:00
|
|
|
static int __vcpu_run(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
int rc, exit_reason;
|
2019-05-08 13:52:00 +00:00
|
|
|
struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
|
2013-09-12 08:33:43 +00:00
|
|
|
|
2013-09-12 08:33:45 +00:00
|
|
|
/*
|
|
|
|
* We try to hold kvm->srcu during most of vcpu_run (except when run-
|
|
|
|
* ning the guest), so that memslots (and other stuff) are protected
|
|
|
|
*/
|
2022-04-15 00:43:43 +00:00
|
|
|
kvm_vcpu_srcu_read_lock(vcpu);
|
2013-09-12 08:33:45 +00:00
|
|
|
|
2013-09-12 08:33:44 +00:00
|
|
|
do {
|
|
|
|
rc = vcpu_pre_run(vcpu);
|
|
|
|
if (rc)
|
|
|
|
break;
|
2013-09-12 08:33:43 +00:00
|
|
|
|
2022-04-15 00:43:43 +00:00
|
|
|
kvm_vcpu_srcu_read_unlock(vcpu);
|
2013-09-12 08:33:44 +00:00
|
|
|
/*
|
|
|
|
* As PF_VCPU will be used in fault handler, between
|
|
|
|
* guest_enter and guest_exit should be no uaccess.
|
|
|
|
*/
|
2015-04-30 11:43:30 +00:00
|
|
|
local_irq_disable();
|
2016-06-15 13:18:26 +00:00
|
|
|
guest_enter_irqoff();
|
2016-02-15 08:42:25 +00:00
|
|
|
__disable_cpu_timer_accounting(vcpu);
|
2015-04-30 11:43:30 +00:00
|
|
|
local_irq_enable();
|
2019-05-08 13:52:00 +00:00
|
|
|
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
|
|
|
|
memcpy(sie_page->pv_grregs,
|
|
|
|
vcpu->run->s.regs.gprs,
|
|
|
|
sizeof(sie_page->pv_grregs));
|
|
|
|
}
|
2020-11-21 10:14:56 +00:00
|
|
|
if (test_cpu_flag(CIF_FPU))
|
|
|
|
load_fpu_regs();
|
2013-09-12 08:33:44 +00:00
|
|
|
exit_reason = sie64a(vcpu->arch.sie_block,
|
|
|
|
vcpu->run->s.regs.gprs);
|
2019-05-08 13:52:00 +00:00
|
|
|
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
|
|
|
|
memcpy(vcpu->run->s.regs.gprs,
|
|
|
|
sie_page->pv_grregs,
|
|
|
|
sizeof(sie_page->pv_grregs));
|
2019-12-13 13:26:06 +00:00
|
|
|
/*
|
|
|
|
* We're not allowed to inject interrupts on intercepts
|
|
|
|
* that leave the guest state in an "in-between" state
|
|
|
|
* where the next SIE entry will do a continuation.
|
|
|
|
* Fence interrupts in our "internal" PSW.
|
|
|
|
*/
|
|
|
|
if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
|
|
|
|
vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
|
|
|
|
vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
|
|
|
|
}
|
2019-05-08 13:52:00 +00:00
|
|
|
}
|
2015-04-30 11:43:30 +00:00
|
|
|
local_irq_disable();
|
2016-02-15 08:42:25 +00:00
|
|
|
__enable_cpu_timer_accounting(vcpu);
|
2016-06-15 13:18:26 +00:00
|
|
|
guest_exit_irqoff();
|
2015-04-30 11:43:30 +00:00
|
|
|
local_irq_enable();
|
2022-04-15 00:43:43 +00:00
|
|
|
kvm_vcpu_srcu_read_lock(vcpu);
|
2013-09-12 08:33:44 +00:00
|
|
|
|
|
|
|
rc = vcpu_post_run(vcpu, exit_reason);
|
2014-01-23 11:26:52 +00:00
|
|
|
} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
|
2013-09-12 08:33:43 +00:00
|
|
|
|
2022-04-15 00:43:43 +00:00
|
|
|
kvm_vcpu_srcu_read_unlock(vcpu);
|
2012-01-04 09:25:22 +00:00
|
|
|
return rc;
|
2008-03-25 17:47:20 +00:00
|
|
|
}
|
|
|
|
|
2020-06-23 13:14:14 +00:00
|
|
|
static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
|
2014-07-17 08:47:43 +00:00
|
|
|
{
|
2020-06-23 13:14:14 +00:00
|
|
|
struct kvm_run *kvm_run = vcpu->run;
|
2017-02-09 16:15:41 +00:00
|
|
|
struct runtime_instr_cb *riccb;
|
2016-11-29 06:17:55 +00:00
|
|
|
struct gs_cb *gscb;
|
2017-02-09 16:15:41 +00:00
|
|
|
|
|
|
|
riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
|
2016-11-29 06:17:55 +00:00
|
|
|
gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
|
2014-07-17 08:47:43 +00:00
|
|
|
vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
|
|
|
|
vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
|
|
|
|
if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
|
|
|
|
vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
|
|
|
|
vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
|
|
|
|
vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
|
|
|
|
}
|
|
|
|
if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
|
|
|
|
vcpu->arch.pfault_token = kvm_run->s.regs.pft;
|
|
|
|
vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
|
|
|
|
vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
|
2014-10-09 13:01:38 +00:00
|
|
|
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
|
|
|
|
kvm_clear_async_pf_completion_queue(vcpu);
|
2014-07-17 08:47:43 +00:00
|
|
|
}
|
2020-06-22 15:46:36 +00:00
|
|
|
if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
|
|
|
|
vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
|
|
|
|
vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
|
2021-10-27 02:54:51 +00:00
|
|
|
VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
|
2020-06-22 15:46:36 +00:00
|
|
|
}
|
2016-08-15 02:53:22 +00:00
|
|
|
/*
|
|
|
|
* If userspace sets the riccb (e.g. after migration) to a valid state,
|
|
|
|
* we should enable RI here instead of doing the lazy enablement.
|
|
|
|
*/
|
|
|
|
if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
|
2017-02-09 16:15:41 +00:00
|
|
|
test_kvm_facility(vcpu->kvm, 64) &&
|
2017-09-14 10:35:45 +00:00
|
|
|
riccb->v &&
|
2017-03-13 10:48:28 +00:00
|
|
|
!(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
|
2017-02-09 16:15:41 +00:00
|
|
|
VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
|
2017-03-13 10:48:28 +00:00
|
|
|
vcpu->arch.sie_block->ecb3 |= ECB3_RI;
|
2016-08-15 02:53:22 +00:00
|
|
|
}
|
2016-11-29 06:17:55 +00:00
|
|
|
/*
|
|
|
|
* If userspace sets the gscb (e.g. after migration) to non-zero,
|
|
|
|
* we should enable GS here instead of doing the lazy enablement.
|
|
|
|
*/
|
|
|
|
if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
|
|
|
|
test_kvm_facility(vcpu->kvm, 133) &&
|
|
|
|
gscb->gssm &&
|
|
|
|
!vcpu->arch.gs_enabled) {
|
|
|
|
VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
|
|
|
|
vcpu->arch.sie_block->ecb |= ECB_GS;
|
|
|
|
vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
|
|
|
|
vcpu->arch.gs_enabled = 1;
|
2016-08-15 02:53:22 +00:00
|
|
|
}
|
2018-01-17 13:44:34 +00:00
|
|
|
if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
|
|
|
|
test_kvm_facility(vcpu->kvm, 82)) {
|
|
|
|
vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
|
|
|
|
vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
|
|
|
|
}
|
2019-06-14 11:11:21 +00:00
|
|
|
if (MACHINE_HAS_GS) {
|
|
|
|
preempt_disable();
|
|
|
|
__ctl_set_bit(2, 4);
|
|
|
|
if (current->thread.gs_cb) {
|
|
|
|
vcpu->arch.host_gscb = current->thread.gs_cb;
|
|
|
|
save_gs_cb(vcpu->arch.host_gscb);
|
|
|
|
}
|
|
|
|
if (vcpu->arch.gs_enabled) {
|
|
|
|
current->thread.gs_cb = (struct gs_cb *)
|
|
|
|
&vcpu->run->s.regs.gscb;
|
|
|
|
restore_gs_cb(current->thread.gs_cb);
|
|
|
|
}
|
|
|
|
preempt_enable();
|
|
|
|
}
|
|
|
|
/* SIE will load etoken directly from SDNX and therefore kvm_run */
|
|
|
|
}
|
|
|
|
|
2020-06-23 13:14:14 +00:00
|
|
|
static void sync_regs(struct kvm_vcpu *vcpu)
|
2019-06-14 11:11:21 +00:00
|
|
|
{
|
2020-06-23 13:14:14 +00:00
|
|
|
struct kvm_run *kvm_run = vcpu->run;
|
|
|
|
|
2019-06-14 11:11:21 +00:00
|
|
|
if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
|
|
|
|
kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
|
|
|
|
if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
|
|
|
|
memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
|
|
|
|
/* some control register changes require a tlb flush */
|
|
|
|
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
|
|
|
|
}
|
|
|
|
if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
|
|
|
|
kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
|
|
|
|
vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
|
|
|
|
}
|
2016-11-10 13:22:02 +00:00
|
|
|
save_access_regs(vcpu->arch.host_acrs);
|
|
|
|
restore_access_regs(vcpu->run->s.regs.acrs);
|
2016-11-22 08:29:38 +00:00
|
|
|
/* save host (userspace) fprs/vrs */
|
|
|
|
save_fpu_regs();
|
|
|
|
vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
|
|
|
|
vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
|
|
|
|
if (MACHINE_HAS_VX)
|
|
|
|
current->thread.fpu.regs = vcpu->run->s.regs.vrs;
|
|
|
|
else
|
|
|
|
current->thread.fpu.regs = vcpu->run->s.regs.fprs;
|
|
|
|
current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
|
|
|
|
if (test_fp_ctl(current->thread.fpu.fpc))
|
|
|
|
/* User space provided an invalid FPC, let's clear it */
|
|
|
|
current->thread.fpu.fpc = 0;
|
2019-06-14 11:11:21 +00:00
|
|
|
|
|
|
|
/* Sync fmt2 only data */
|
|
|
|
if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
|
2020-06-23 13:14:14 +00:00
|
|
|
sync_regs_fmt2(vcpu);
|
2019-06-14 11:11:21 +00:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* In several places we have to modify our internal view to
|
|
|
|
* not do things that are disallowed by the ultravisor. For
|
|
|
|
* example we must not inject interrupts after specific exits
|
|
|
|
* (e.g. 112 prefix page not secure). We do this by turning
|
|
|
|
* off the machine check, external and I/O interrupt bits
|
|
|
|
* of our PSW copy. To avoid getting validity intercepts, we
|
|
|
|
* do only accept the condition code from userspace.
|
|
|
|
*/
|
|
|
|
vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
|
|
|
|
vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
|
|
|
|
PSW_MASK_CC;
|
|
|
|
}
|
|
|
|
|
|
|
|
kvm_run->kvm_dirty_regs = 0;
|
|
|
|
}
|
|
|
|
|
2020-06-23 13:14:14 +00:00
|
|
|
static void store_regs_fmt2(struct kvm_vcpu *vcpu)
|
2019-06-14 11:11:21 +00:00
|
|
|
{
|
2020-06-23 13:14:14 +00:00
|
|
|
struct kvm_run *kvm_run = vcpu->run;
|
|
|
|
|
2019-06-14 11:11:21 +00:00
|
|
|
kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
|
|
|
|
kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
|
|
|
|
kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
|
|
|
|
kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
|
2020-06-22 15:46:36 +00:00
|
|
|
kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
|
2016-11-29 06:17:55 +00:00
|
|
|
if (MACHINE_HAS_GS) {
|
2021-04-15 08:01:27 +00:00
|
|
|
preempt_disable();
|
2016-11-29 06:17:55 +00:00
|
|
|
__ctl_set_bit(2, 4);
|
2019-06-14 11:11:21 +00:00
|
|
|
if (vcpu->arch.gs_enabled)
|
|
|
|
save_gs_cb(current->thread.gs_cb);
|
|
|
|
current->thread.gs_cb = vcpu->arch.host_gscb;
|
|
|
|
restore_gs_cb(vcpu->arch.host_gscb);
|
|
|
|
if (!vcpu->arch.host_gscb)
|
|
|
|
__ctl_clear_bit(2, 4);
|
|
|
|
vcpu->arch.host_gscb = NULL;
|
2021-04-15 08:01:27 +00:00
|
|
|
preempt_enable();
|
2016-11-29 06:17:55 +00:00
|
|
|
}
|
2019-06-14 11:11:21 +00:00
|
|
|
/* SIE will save etoken directly into SDNX and therefore kvm_run */
|
2014-07-17 08:47:43 +00:00
|
|
|
}
|
|
|
|
|
2020-06-23 13:14:14 +00:00
|
|
|
static void store_regs(struct kvm_vcpu *vcpu)
|
2014-07-17 08:47:43 +00:00
|
|
|
{
|
2020-06-23 13:14:14 +00:00
|
|
|
struct kvm_run *kvm_run = vcpu->run;
|
|
|
|
|
2014-07-17 08:47:43 +00:00
|
|
|
kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
|
|
|
|
kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
|
|
|
|
kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
|
|
|
|
memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
|
2016-02-15 08:40:12 +00:00
|
|
|
kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
|
2014-07-17 08:47:43 +00:00
|
|
|
kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
|
|
|
|
kvm_run->s.regs.pft = vcpu->arch.pfault_token;
|
|
|
|
kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
|
|
|
|
kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
|
2016-11-10 13:22:02 +00:00
|
|
|
save_access_regs(vcpu->run->s.regs.acrs);
|
|
|
|
restore_access_regs(vcpu->arch.host_acrs);
|
2016-11-22 08:29:38 +00:00
|
|
|
/* Save guest register state */
|
|
|
|
save_fpu_regs();
|
|
|
|
vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
|
|
|
|
/* Restore will be done lazily at return */
|
|
|
|
current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
|
|
|
|
current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
|
2019-06-14 11:11:21 +00:00
|
|
|
if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
|
2020-06-23 13:14:14 +00:00
|
|
|
store_regs_fmt2(vcpu);
|
2014-07-17 08:47:43 +00:00
|
|
|
}
|
|
|
|
|
2020-04-16 05:10:57 +00:00
|
|
|
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
2008-03-25 17:47:20 +00:00
|
|
|
{
|
2020-04-16 05:10:57 +00:00
|
|
|
struct kvm_run *kvm_run = vcpu->run;
|
2008-03-25 17:47:23 +00:00
|
|
|
int rc;
|
2008-03-25 17:47:20 +00:00
|
|
|
|
2022-05-17 16:36:24 +00:00
|
|
|
/*
|
|
|
|
* Running a VM while dumping always has the potential to
|
|
|
|
* produce inconsistent dump data. But for PV vcpus a SIE
|
|
|
|
* entry while dumping could also lead to a fatal validity
|
|
|
|
* intercept which we absolutely want to avoid.
|
|
|
|
*/
|
|
|
|
if (vcpu->kvm->arch.pv.dumping)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2017-02-08 10:50:15 +00:00
|
|
|
if (kvm_run->immediate_exit)
|
|
|
|
return -EINTR;
|
|
|
|
|
2019-09-04 08:51:59 +00:00
|
|
|
if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
|
|
|
|
kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2017-12-04 20:35:25 +00:00
|
|
|
vcpu_load(vcpu);
|
|
|
|
|
2014-01-23 11:26:52 +00:00
|
|
|
if (guestdbg_exit_pending(vcpu)) {
|
|
|
|
kvm_s390_prepare_debug_exit(vcpu);
|
2017-12-04 20:35:25 +00:00
|
|
|
rc = 0;
|
|
|
|
goto out;
|
2014-01-23 11:26:52 +00:00
|
|
|
}
|
|
|
|
|
2017-11-24 21:39:01 +00:00
|
|
|
kvm_sigset_activate(vcpu);
|
2008-03-25 17:47:20 +00:00
|
|
|
|
2019-05-15 11:24:30 +00:00
|
|
|
/*
|
|
|
|
* no need to check the return value of vcpu_start as it can only have
|
|
|
|
* an error for protvirt, but protvirt means user cpu state
|
|
|
|
*/
|
2014-04-10 15:35:00 +00:00
|
|
|
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
|
|
|
|
kvm_s390_vcpu_start(vcpu);
|
|
|
|
} else if (is_vcpu_stopped(vcpu)) {
|
2015-05-20 11:24:02 +00:00
|
|
|
pr_err_ratelimited("can't run stopped vcpu %d\n",
|
2014-04-10 15:35:00 +00:00
|
|
|
vcpu->vcpu_id);
|
2017-12-04 20:35:25 +00:00
|
|
|
rc = -EINVAL;
|
|
|
|
goto out;
|
2014-04-10 15:35:00 +00:00
|
|
|
}
|
2008-03-25 17:47:20 +00:00
|
|
|
|
2020-06-23 13:14:14 +00:00
|
|
|
sync_regs(vcpu);
|
2016-02-15 08:42:25 +00:00
|
|
|
enable_cpu_timer_accounting(vcpu);
|
2009-11-19 13:21:16 +00:00
|
|
|
|
2009-06-12 08:26:32 +00:00
|
|
|
might_fault();
|
2013-09-12 08:33:44 +00:00
|
|
|
rc = __vcpu_run(vcpu);
|
2009-05-20 13:34:55 +00:00
|
|
|
|
2009-05-20 13:34:56 +00:00
|
|
|
if (signal_pending(current) && !rc) {
|
|
|
|
kvm_run->exit_reason = KVM_EXIT_INTR;
|
2008-03-25 17:47:23 +00:00
|
|
|
rc = -EINTR;
|
2009-05-20 13:34:56 +00:00
|
|
|
}
|
2008-03-25 17:47:23 +00:00
|
|
|
|
2014-01-23 11:26:52 +00:00
|
|
|
if (guestdbg_exit_pending(vcpu) && !rc) {
|
|
|
|
kvm_s390_prepare_debug_exit(vcpu);
|
|
|
|
rc = 0;
|
|
|
|
}
|
|
|
|
|
2008-03-25 17:47:23 +00:00
|
|
|
if (rc == -EREMOTE) {
|
2015-10-19 14:24:28 +00:00
|
|
|
/* userspace support is needed, kvm_run has been prepared */
|
2008-03-25 17:47:23 +00:00
|
|
|
rc = 0;
|
|
|
|
}
|
2008-03-25 17:47:20 +00:00
|
|
|
|
2016-02-15 08:42:25 +00:00
|
|
|
disable_cpu_timer_accounting(vcpu);
|
2020-06-23 13:14:14 +00:00
|
|
|
store_regs(vcpu);
|
2009-11-19 13:21:16 +00:00
|
|
|
|
2017-11-24 21:39:01 +00:00
|
|
|
kvm_sigset_deactivate(vcpu);
|
2008-03-25 17:47:20 +00:00
|
|
|
|
|
|
|
vcpu->stat.exit_userspace++;
|
2017-12-04 20:35:25 +00:00
|
|
|
out:
|
|
|
|
vcpu_put(vcpu);
|
2008-04-04 13:12:35 +00:00
|
|
|
return rc;
|
2008-03-25 17:47:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* store status at address
|
|
|
|
* we use have two special cases:
|
|
|
|
* KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
|
|
|
|
* KVM_S390_STORE_STATUS_PREFIXED: -> prefix
|
|
|
|
*/
|
2014-01-01 15:45:58 +00:00
|
|
|
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
|
2008-03-25 17:47:20 +00:00
|
|
|
{
|
2011-07-24 08:48:22 +00:00
|
|
|
unsigned char archmode = 1;
|
2016-01-14 21:12:47 +00:00
|
|
|
freg_t fprs[NUM_FPRS];
|
2014-05-13 14:58:30 +00:00
|
|
|
unsigned int px;
|
2016-02-15 08:40:12 +00:00
|
|
|
u64 clkcomp, cputm;
|
2014-01-01 15:45:58 +00:00
|
|
|
int rc;
|
2008-03-25 17:47:20 +00:00
|
|
|
|
2015-10-23 07:02:32 +00:00
|
|
|
px = kvm_s390_get_prefix(vcpu);
|
2014-01-01 15:45:58 +00:00
|
|
|
if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
|
|
|
|
if (write_guest_abs(vcpu, 163, &archmode, 1))
|
2008-03-25 17:47:20 +00:00
|
|
|
return -EFAULT;
|
2015-10-23 07:02:32 +00:00
|
|
|
gpa = 0;
|
2014-01-01 15:45:58 +00:00
|
|
|
} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
|
|
|
|
if (write_guest_real(vcpu, 163, &archmode, 1))
|
2008-03-25 17:47:20 +00:00
|
|
|
return -EFAULT;
|
2015-10-23 07:02:32 +00:00
|
|
|
gpa = px;
|
|
|
|
} else
|
|
|
|
gpa -= __LC_FPREGS_SAVE_AREA;
|
2016-01-14 21:12:47 +00:00
|
|
|
|
|
|
|
/* manually convert vector registers if necessary */
|
|
|
|
if (MACHINE_HAS_VX) {
|
2016-03-08 11:24:30 +00:00
|
|
|
convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
|
2016-01-14 21:12:47 +00:00
|
|
|
rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
|
|
|
|
fprs, 128);
|
|
|
|
} else {
|
|
|
|
rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
|
2016-01-18 13:46:34 +00:00
|
|
|
vcpu->run->s.regs.fprs, 128);
|
2016-01-14 21:12:47 +00:00
|
|
|
}
|
2015-10-23 07:02:32 +00:00
|
|
|
rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
|
2014-01-01 15:45:58 +00:00
|
|
|
vcpu->run->s.regs.gprs, 128);
|
2015-10-23 07:02:32 +00:00
|
|
|
rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
|
2014-01-01 15:45:58 +00:00
|
|
|
&vcpu->arch.sie_block->gpsw, 16);
|
2015-10-23 07:02:32 +00:00
|
|
|
rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
|
2014-05-13 14:58:30 +00:00
|
|
|
&px, 4);
|
2015-10-23 07:02:32 +00:00
|
|
|
rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
|
2016-01-14 21:12:47 +00:00
|
|
|
&vcpu->run->s.regs.fpc, 4);
|
2015-10-23 07:02:32 +00:00
|
|
|
rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
|
2014-01-01 15:45:58 +00:00
|
|
|
&vcpu->arch.sie_block->todpr, 4);
|
2016-02-15 08:40:12 +00:00
|
|
|
cputm = kvm_s390_get_cpu_timer(vcpu);
|
2015-10-23 07:02:32 +00:00
|
|
|
rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
|
2016-02-15 08:40:12 +00:00
|
|
|
&cputm, 8);
|
2013-11-13 19:28:18 +00:00
|
|
|
clkcomp = vcpu->arch.sie_block->ckc >> 8;
|
2015-10-23 07:02:32 +00:00
|
|
|
rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
|
2014-01-01 15:45:58 +00:00
|
|
|
&clkcomp, 8);
|
2015-10-23 07:02:32 +00:00
|
|
|
rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
|
2014-01-01 15:45:58 +00:00
|
|
|
&vcpu->run->s.regs.acrs, 64);
|
2015-10-23 07:02:32 +00:00
|
|
|
rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
|
2014-01-01 15:45:58 +00:00
|
|
|
&vcpu->arch.sie_block->gcr, 128);
|
|
|
|
return rc ? -EFAULT : 0;
|
2008-03-25 17:47:20 +00:00
|
|
|
}
|
|
|
|
|
2013-11-06 14:46:33 +00:00
|
|
|
int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
|
2016-11-10 13:22:02 +00:00
|
|
|
* switch in the run ioctl. Let's update our copies before we save
|
2013-11-06 14:46:33 +00:00
|
|
|
* it into the save area
|
|
|
|
*/
|
2015-06-29 14:43:06 +00:00
|
|
|
save_fpu_regs();
|
2016-01-14 21:12:47 +00:00
|
|
|
vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
|
2013-11-06 14:46:33 +00:00
|
|
|
save_access_regs(vcpu->run->s.regs.acrs);
|
|
|
|
|
|
|
|
return kvm_s390_store_status_unloaded(vcpu, addr);
|
|
|
|
}
|
|
|
|
|
2014-03-14 10:00:21 +00:00
|
|
|
static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
|
2015-04-09 11:49:04 +00:00
|
|
|
kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
|
2014-03-14 10:00:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
|
|
|
|
{
|
2021-11-16 16:04:02 +00:00
|
|
|
unsigned long i;
|
2014-03-14 10:00:21 +00:00
|
|
|
struct kvm_vcpu *vcpu;
|
|
|
|
|
|
|
|
kvm_for_each_vcpu(i, vcpu, kvm) {
|
|
|
|
__disable_ibs_on_vcpu(vcpu);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2016-04-04 13:57:08 +00:00
|
|
|
if (!sclp.has_ibs)
|
|
|
|
return;
|
2014-03-14 10:00:21 +00:00
|
|
|
kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
|
2015-04-09 11:49:04 +00:00
|
|
|
kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
|
2014-03-14 10:00:21 +00:00
|
|
|
}
|
|
|
|
|
2019-05-15 11:24:30 +00:00
|
|
|
int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
|
2014-03-14 09:59:29 +00:00
|
|
|
{
|
2019-05-15 11:24:30 +00:00
|
|
|
int i, online_vcpus, r = 0, started_vcpus = 0;
|
2014-03-14 10:00:21 +00:00
|
|
|
|
|
|
|
if (!is_vcpu_stopped(vcpu))
|
2019-05-15 11:24:30 +00:00
|
|
|
return 0;
|
2014-03-14 10:00:21 +00:00
|
|
|
|
2014-03-14 09:59:29 +00:00
|
|
|
trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
|
2014-03-14 10:00:21 +00:00
|
|
|
/* Only one cpu at a time may enter/leave the STOPPED state. */
|
2014-05-06 14:11:14 +00:00
|
|
|
spin_lock(&vcpu->kvm->arch.start_stop_lock);
|
2014-03-14 10:00:21 +00:00
|
|
|
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
|
|
|
|
|
2019-05-15 11:24:30 +00:00
|
|
|
/* Let's tell the UV that we want to change into the operating state */
|
|
|
|
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
|
|
|
|
r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
|
|
|
|
if (r) {
|
|
|
|
spin_unlock(&vcpu->kvm->arch.start_stop_lock);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-14 10:00:21 +00:00
|
|
|
for (i = 0; i < online_vcpus; i++) {
|
2021-11-16 16:03:59 +00:00
|
|
|
if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
|
2014-03-14 10:00:21 +00:00
|
|
|
started_vcpus++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (started_vcpus == 0) {
|
|
|
|
/* we're the only active VCPU -> speed it up */
|
|
|
|
__enable_ibs_on_vcpu(vcpu);
|
|
|
|
} else if (started_vcpus == 1) {
|
|
|
|
/*
|
|
|
|
* As we are starting a second VCPU, we have to disable
|
|
|
|
* the IBS facility on all VCPUs to remove potentially
|
2021-02-13 15:32:27 +00:00
|
|
|
* outstanding ENABLE requests.
|
2014-03-14 10:00:21 +00:00
|
|
|
*/
|
|
|
|
__disable_ibs_on_all_vcpus(vcpu->kvm);
|
|
|
|
}
|
|
|
|
|
2018-01-23 17:05:30 +00:00
|
|
|
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
|
2020-01-30 16:18:28 +00:00
|
|
|
/*
|
|
|
|
* The real PSW might have changed due to a RESTART interpreted by the
|
|
|
|
* ultravisor. We block all interrupts and let the next sie exit
|
|
|
|
* refresh our view.
|
|
|
|
*/
|
|
|
|
if (kvm_s390_pv_cpu_is_protected(vcpu))
|
|
|
|
vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
|
2014-03-14 10:00:21 +00:00
|
|
|
/*
|
|
|
|
* Another VCPU might have used IBS while we were offline.
|
|
|
|
* Let's play safe and flush the VCPU at startup.
|
|
|
|
*/
|
2014-07-29 06:53:36 +00:00
|
|
|
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
|
2014-05-06 14:11:14 +00:00
|
|
|
spin_unlock(&vcpu->kvm->arch.start_stop_lock);
|
2019-05-15 11:24:30 +00:00
|
|
|
return 0;
|
2014-03-14 09:59:29 +00:00
|
|
|
}
|
|
|
|
|
2019-05-15 11:24:30 +00:00
|
|
|
int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
|
2014-03-14 09:59:29 +00:00
|
|
|
{
|
2019-05-15 11:24:30 +00:00
|
|
|
int i, online_vcpus, r = 0, started_vcpus = 0;
|
2014-03-14 10:00:21 +00:00
|
|
|
struct kvm_vcpu *started_vcpu = NULL;
|
|
|
|
|
|
|
|
if (is_vcpu_stopped(vcpu))
|
2019-05-15 11:24:30 +00:00
|
|
|
return 0;
|
2014-03-14 10:00:21 +00:00
|
|
|
|
2014-03-14 09:59:29 +00:00
|
|
|
trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
|
2014-03-14 10:00:21 +00:00
|
|
|
/* Only one cpu at a time may enter/leave the STOPPED state. */
|
2014-05-06 14:11:14 +00:00
|
|
|
spin_lock(&vcpu->kvm->arch.start_stop_lock);
|
2014-03-14 10:00:21 +00:00
|
|
|
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
|
|
|
|
|
2019-05-15 11:24:30 +00:00
|
|
|
/* Let's tell the UV that we want to change into the stopped state */
|
|
|
|
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
|
|
|
|
r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
|
|
|
|
if (r) {
|
|
|
|
spin_unlock(&vcpu->kvm->arch.start_stop_lock);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
KVM: s390: Clarify SIGP orders versus STOP/RESTART
With KVM_CAP_S390_USER_SIGP, there are only five Signal Processor
orders (CONDITIONAL EMERGENCY SIGNAL, EMERGENCY SIGNAL, EXTERNAL CALL,
SENSE, and SENSE RUNNING STATUS) which are intended for frequent use
and thus are processed in-kernel. The remainder are sent to userspace
with the KVM_CAP_S390_USER_SIGP capability. Of those, three orders
(RESTART, STOP, and STOP AND STORE STATUS) have the potential to
inject work back into the kernel, and thus are asynchronous.
Let's look for those pending IRQs when processing one of the in-kernel
SIGP orders, and return BUSY (CC2) if one is in process. This is in
agreement with the Principles of Operation, which states that only one
order can be "active" on a CPU at a time.
Cc: stable@vger.kernel.org
Suggested-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Eric Farman <farman@linux.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Link: https://lore.kernel.org/r/20211213210550.856213-2-farman@linux.ibm.com
[borntraeger@linux.ibm.com: add stable tag]
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
2021-12-13 21:05:50 +00:00
|
|
|
/*
|
|
|
|
* Set the VCPU to STOPPED and THEN clear the interrupt flag,
|
|
|
|
* now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
|
|
|
|
* have been fully processed. This will ensure that the VCPU
|
|
|
|
* is kept BUSY if another VCPU is inquiring with SIGP SENSE.
|
|
|
|
*/
|
|
|
|
kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
|
2014-10-15 14:48:53 +00:00
|
|
|
kvm_s390_clear_stop_irq(vcpu);
|
2014-04-14 10:40:03 +00:00
|
|
|
|
2014-03-14 10:00:21 +00:00
|
|
|
__disable_ibs_on_vcpu(vcpu);
|
|
|
|
|
|
|
|
for (i = 0; i < online_vcpus; i++) {
|
2021-11-16 16:03:59 +00:00
|
|
|
struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
|
|
|
|
|
|
|
|
if (!is_vcpu_stopped(tmp)) {
|
2014-03-14 10:00:21 +00:00
|
|
|
started_vcpus++;
|
2021-11-16 16:03:59 +00:00
|
|
|
started_vcpu = tmp;
|
2014-03-14 10:00:21 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (started_vcpus == 1) {
|
|
|
|
/*
|
|
|
|
* As we only have one VCPU left, we want to enable the
|
|
|
|
* IBS facility for that VCPU to speed it up.
|
|
|
|
*/
|
|
|
|
__enable_ibs_on_vcpu(started_vcpu);
|
|
|
|
}
|
|
|
|
|
2014-05-06 14:11:14 +00:00
|
|
|
spin_unlock(&vcpu->kvm->arch.start_stop_lock);
|
2019-05-15 11:24:30 +00:00
|
|
|
return 0;
|
2014-03-14 09:59:29 +00:00
|
|
|
}
|
|
|
|
|
2012-12-20 14:32:11 +00:00
|
|
|
static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_enable_cap *cap)
|
|
|
|
{
|
|
|
|
int r;
|
|
|
|
|
|
|
|
if (cap->flags)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
switch (cap->cap) {
|
2012-12-20 14:32:12 +00:00
|
|
|
case KVM_CAP_S390_CSS_SUPPORT:
|
|
|
|
if (!vcpu->kvm->arch.css_support) {
|
|
|
|
vcpu->kvm->arch.css_support = 1;
|
2015-07-22 13:52:10 +00:00
|
|
|
VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
|
2012-12-20 14:32:12 +00:00
|
|
|
trace_kvm_s390_enable_css(vcpu->kvm);
|
|
|
|
}
|
|
|
|
r = 0;
|
|
|
|
break;
|
2012-12-20 14:32:11 +00:00
|
|
|
default:
|
|
|
|
r = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2022-02-11 18:22:12 +00:00
|
|
|
static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_s390_mem_op *mop)
|
2019-04-02 07:21:06 +00:00
|
|
|
{
|
|
|
|
void __user *uaddr = (void __user *)mop->buf;
|
2022-10-20 14:31:58 +00:00
|
|
|
void *sida_addr;
|
2019-04-02 07:21:06 +00:00
|
|
|
int r = 0;
|
|
|
|
|
|
|
|
if (mop->flags || !mop->size)
|
|
|
|
return -EINVAL;
|
|
|
|
if (mop->size + mop->sida_offset < mop->size)
|
|
|
|
return -EINVAL;
|
|
|
|
if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
|
|
|
|
return -E2BIG;
|
2022-01-28 14:06:43 +00:00
|
|
|
if (!kvm_s390_pv_cpu_is_protected(vcpu))
|
|
|
|
return -EINVAL;
|
2019-04-02 07:21:06 +00:00
|
|
|
|
2022-10-20 14:31:58 +00:00
|
|
|
sida_addr = (char *)sida_addr(vcpu->arch.sie_block) + mop->sida_offset;
|
|
|
|
|
2019-04-02 07:21:06 +00:00
|
|
|
switch (mop->op) {
|
|
|
|
case KVM_S390_MEMOP_SIDA_READ:
|
2022-10-20 14:31:58 +00:00
|
|
|
if (copy_to_user(uaddr, sida_addr, mop->size))
|
2019-04-02 07:21:06 +00:00
|
|
|
r = -EFAULT;
|
|
|
|
|
|
|
|
break;
|
|
|
|
case KVM_S390_MEMOP_SIDA_WRITE:
|
2022-10-20 14:31:58 +00:00
|
|
|
if (copy_from_user(sida_addr, uaddr, mop->size))
|
2019-04-02 07:21:06 +00:00
|
|
|
r = -EFAULT;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return r;
|
|
|
|
}
|
2022-02-11 18:22:12 +00:00
|
|
|
|
|
|
|
static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_s390_mem_op *mop)
|
2015-02-06 14:01:21 +00:00
|
|
|
{
|
|
|
|
void __user *uaddr = (void __user *)mop->buf;
|
2023-02-06 16:45:59 +00:00
|
|
|
enum gacc_mode acc_mode;
|
2015-02-06 14:01:21 +00:00
|
|
|
void *tmpbuf = NULL;
|
2023-02-06 16:45:56 +00:00
|
|
|
int r;
|
2015-02-06 14:01:21 +00:00
|
|
|
|
2023-02-06 16:45:56 +00:00
|
|
|
r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_INJECT_EXCEPTION |
|
|
|
|
KVM_S390_MEMOP_F_CHECK_ONLY |
|
|
|
|
KVM_S390_MEMOP_F_SKEY_PROTECTION);
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
if (mop->ar >= NUM_ACRS)
|
2015-02-06 14:01:21 +00:00
|
|
|
return -EINVAL;
|
2019-04-02 07:21:06 +00:00
|
|
|
if (kvm_s390_pv_cpu_is_protected(vcpu))
|
|
|
|
return -EINVAL;
|
2015-02-06 14:01:21 +00:00
|
|
|
if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
|
|
|
|
tmpbuf = vmalloc(mop->size);
|
|
|
|
if (!tmpbuf)
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
2023-02-06 16:45:59 +00:00
|
|
|
acc_mode = mop->op == KVM_S390_MEMOP_LOGICAL_READ ? GACC_FETCH : GACC_STORE;
|
|
|
|
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
|
|
|
|
r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
|
|
|
|
acc_mode, mop->key);
|
|
|
|
goto out_inject;
|
|
|
|
}
|
|
|
|
if (acc_mode == GACC_FETCH) {
|
2022-02-11 18:22:10 +00:00
|
|
|
r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
|
|
|
|
mop->size, mop->key);
|
2023-02-06 16:45:59 +00:00
|
|
|
if (r)
|
|
|
|
goto out_inject;
|
|
|
|
if (copy_to_user(uaddr, tmpbuf, mop->size)) {
|
|
|
|
r = -EFAULT;
|
|
|
|
goto out_free;
|
2015-02-06 14:01:21 +00:00
|
|
|
}
|
2023-02-06 16:45:59 +00:00
|
|
|
} else {
|
2015-02-06 14:01:21 +00:00
|
|
|
if (copy_from_user(tmpbuf, uaddr, mop->size)) {
|
|
|
|
r = -EFAULT;
|
2023-02-06 16:45:59 +00:00
|
|
|
goto out_free;
|
2015-02-06 14:01:21 +00:00
|
|
|
}
|
2022-02-11 18:22:10 +00:00
|
|
|
r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
|
|
|
|
mop->size, mop->key);
|
2015-02-06 14:01:21 +00:00
|
|
|
}
|
|
|
|
|
2023-02-06 16:45:59 +00:00
|
|
|
out_inject:
|
2015-02-06 14:01:21 +00:00
|
|
|
if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
|
|
|
|
kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
|
|
|
|
|
2023-02-06 16:45:59 +00:00
|
|
|
out_free:
|
2015-02-06 14:01:21 +00:00
|
|
|
vfree(tmpbuf);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2022-02-11 18:22:12 +00:00
|
|
|
static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_s390_mem_op *mop)
|
2019-04-02 07:21:06 +00:00
|
|
|
{
|
|
|
|
int r, srcu_idx;
|
|
|
|
|
|
|
|
srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
|
|
|
|
|
|
|
switch (mop->op) {
|
|
|
|
case KVM_S390_MEMOP_LOGICAL_READ:
|
|
|
|
case KVM_S390_MEMOP_LOGICAL_WRITE:
|
2022-02-11 18:22:12 +00:00
|
|
|
r = kvm_s390_vcpu_mem_op(vcpu, mop);
|
2019-04-02 07:21:06 +00:00
|
|
|
break;
|
|
|
|
case KVM_S390_MEMOP_SIDA_READ:
|
|
|
|
case KVM_S390_MEMOP_SIDA_WRITE:
|
|
|
|
/* we are locked against sida going away by the vcpu->mutex */
|
2022-02-11 18:22:12 +00:00
|
|
|
r = kvm_s390_vcpu_sida_op(vcpu, mop);
|
2019-04-02 07:21:06 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
r = -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2017-12-12 16:41:34 +00:00
|
|
|
long kvm_arch_vcpu_async_ioctl(struct file *filp,
|
|
|
|
unsigned int ioctl, unsigned long arg)
|
2008-03-25 17:47:20 +00:00
|
|
|
{
|
|
|
|
struct kvm_vcpu *vcpu = filp->private_data;
|
|
|
|
void __user *argp = (void __user *)arg;
|
|
|
|
|
2010-05-13 09:35:17 +00:00
|
|
|
switch (ioctl) {
|
2014-11-11 19:57:06 +00:00
|
|
|
case KVM_S390_IRQ: {
|
|
|
|
struct kvm_s390_irq s390irq;
|
|
|
|
|
|
|
|
if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
|
2017-12-04 20:35:36 +00:00
|
|
|
return -EFAULT;
|
|
|
|
return kvm_s390_inject_vcpu(vcpu, &s390irq);
|
2014-11-11 19:57:06 +00:00
|
|
|
}
|
2010-05-13 09:35:17 +00:00
|
|
|
case KVM_S390_INTERRUPT: {
|
KVM: s390: interrupt subsystem, cpu timer, waitpsw
This patch contains the s390 interrupt subsystem (similar to in kernel apic)
including timer interrupts (similar to in-kernel-pit) and enabled wait
(similar to in kernel hlt).
In order to achieve that, this patch also introduces intercept handling
for instruction intercepts, and it implements load control instructions.
This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both
the vm file descriptors and the vcpu file descriptors. In case this ioctl is
issued against a vm file descriptor, the interrupt is considered floating.
Floating interrupts may be delivered to any virtual cpu in the configuration.
The following interrupts are supported:
SIGP STOP - interprocessor signal that stops a remote cpu
SIGP SET PREFIX - interprocessor signal that sets the prefix register of a
(stopped) remote cpu
INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed
and for smp_call_function() in the guest.
PROGRAM INT - exception during program execution such as page fault, illegal
instruction and friends
RESTART - interprocessor signal that starts a stopped cpu
INT VIRTIO - floating interrupt for virtio signalisation
INT SERVICE - floating interrupt for signalisations from the system
service processor
struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting
an interrupt, also carrys parameter data for interrupts along with the interrupt
type. Interrupts on s390 usually have a state that represents the current
operation, or identifies which device has caused the interruption on s390.
kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a
disabled wait (that is, disabled for interrupts), we exit to userspace. In case
of an enabled wait we set up a timer that equals the cpu clock comparator value
and sleep on a wait queue.
[christian: change virtio interrupt to 0x2603]
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-25 17:47:26 +00:00
|
|
|
struct kvm_s390_interrupt s390int;
|
KVM: s390: Do not leak kernel stack data in the KVM_S390_INTERRUPT ioctl
When the userspace program runs the KVM_S390_INTERRUPT ioctl to inject
an interrupt, we convert them from the legacy struct kvm_s390_interrupt
to the new struct kvm_s390_irq via the s390int_to_s390irq() function.
However, this function does not take care of all types of interrupts
that we can inject into the guest later (see do_inject_vcpu()). Since we
do not clear out the s390irq values before calling s390int_to_s390irq(),
there is a chance that we copy random data from the kernel stack which
could be leaked to the userspace later.
Specifically, the problem exists with the KVM_S390_INT_PFAULT_INIT
interrupt: s390int_to_s390irq() does not handle it, and the function
__inject_pfault_init() later copies irq->u.ext which contains the
random kernel stack data. This data can then be leaked either to
the guest memory in __deliver_pfault_init(), or the userspace might
retrieve it directly with the KVM_S390_GET_IRQ_STATE ioctl.
Fix it by handling that interrupt type in s390int_to_s390irq(), too,
and by making sure that the s390irq struct is properly pre-initialized.
And while we're at it, make sure that s390int_to_s390irq() now
directly returns -EINVAL for unknown interrupt types, so that we
immediately get a proper error code in case we add more interrupt
types to do_inject_vcpu() without updating s390int_to_s390irq()
sometime in the future.
Cc: stable@vger.kernel.org
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Link: https://lore.kernel.org/kvm/20190912115438.25761-1-thuth@redhat.com
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2019-09-12 11:54:38 +00:00
|
|
|
struct kvm_s390_irq s390irq = {};
|
KVM: s390: interrupt subsystem, cpu timer, waitpsw
This patch contains the s390 interrupt subsystem (similar to in kernel apic)
including timer interrupts (similar to in-kernel-pit) and enabled wait
(similar to in kernel hlt).
In order to achieve that, this patch also introduces intercept handling
for instruction intercepts, and it implements load control instructions.
This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both
the vm file descriptors and the vcpu file descriptors. In case this ioctl is
issued against a vm file descriptor, the interrupt is considered floating.
Floating interrupts may be delivered to any virtual cpu in the configuration.
The following interrupts are supported:
SIGP STOP - interprocessor signal that stops a remote cpu
SIGP SET PREFIX - interprocessor signal that sets the prefix register of a
(stopped) remote cpu
INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed
and for smp_call_function() in the guest.
PROGRAM INT - exception during program execution such as page fault, illegal
instruction and friends
RESTART - interprocessor signal that starts a stopped cpu
INT VIRTIO - floating interrupt for virtio signalisation
INT SERVICE - floating interrupt for signalisations from the system
service processor
struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting
an interrupt, also carrys parameter data for interrupts along with the interrupt
type. Interrupts on s390 usually have a state that represents the current
operation, or identifies which device has caused the interruption on s390.
kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a
disabled wait (that is, disabled for interrupts), we exit to userspace. In case
of an enabled wait we set up a timer that equals the cpu clock comparator value
and sleep on a wait queue.
[christian: change virtio interrupt to 0x2603]
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-25 17:47:26 +00:00
|
|
|
|
|
|
|
if (copy_from_user(&s390int, argp, sizeof(s390int)))
|
2017-12-04 20:35:36 +00:00
|
|
|
return -EFAULT;
|
2014-07-29 13:11:49 +00:00
|
|
|
if (s390int_to_s390irq(&s390int, &s390irq))
|
|
|
|
return -EINVAL;
|
2017-12-04 20:35:36 +00:00
|
|
|
return kvm_s390_inject_vcpu(vcpu, &s390irq);
|
KVM: s390: interrupt subsystem, cpu timer, waitpsw
This patch contains the s390 interrupt subsystem (similar to in kernel apic)
including timer interrupts (similar to in-kernel-pit) and enabled wait
(similar to in kernel hlt).
In order to achieve that, this patch also introduces intercept handling
for instruction intercepts, and it implements load control instructions.
This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both
the vm file descriptors and the vcpu file descriptors. In case this ioctl is
issued against a vm file descriptor, the interrupt is considered floating.
Floating interrupts may be delivered to any virtual cpu in the configuration.
The following interrupts are supported:
SIGP STOP - interprocessor signal that stops a remote cpu
SIGP SET PREFIX - interprocessor signal that sets the prefix register of a
(stopped) remote cpu
INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed
and for smp_call_function() in the guest.
PROGRAM INT - exception during program execution such as page fault, illegal
instruction and friends
RESTART - interprocessor signal that starts a stopped cpu
INT VIRTIO - floating interrupt for virtio signalisation
INT SERVICE - floating interrupt for signalisations from the system
service processor
struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting
an interrupt, also carrys parameter data for interrupts along with the interrupt
type. Interrupts on s390 usually have a state that represents the current
operation, or identifies which device has caused the interruption on s390.
kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a
disabled wait (that is, disabled for interrupts), we exit to userspace. In case
of an enabled wait we set up a timer that equals the cpu clock comparator value
and sleep on a wait queue.
[christian: change virtio interrupt to 0x2603]
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-25 17:47:26 +00:00
|
|
|
}
|
2017-12-04 20:35:36 +00:00
|
|
|
}
|
2017-12-12 16:41:34 +00:00
|
|
|
return -ENOIOCTLCMD;
|
|
|
|
}
|
|
|
|
|
2022-05-17 16:36:25 +00:00
|
|
|
static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_pv_cmd *cmd)
|
|
|
|
{
|
|
|
|
struct kvm_s390_pv_dmp dmp;
|
|
|
|
void *data;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* Dump initialization is a prerequisite */
|
|
|
|
if (!vcpu->kvm->arch.pv.dumping)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
/* We only handle this subcmd right now */
|
|
|
|
if (dmp.subcmd != KVM_PV_DUMP_CPU)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* CPU dump length is the same as create cpu storage donation. */
|
|
|
|
if (dmp.buff_len != uv_info.guest_cpu_stor_len)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL);
|
|
|
|
if (!data)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc);
|
|
|
|
|
|
|
|
VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x",
|
|
|
|
vcpu->vcpu_id, cmd->rc, cmd->rrc);
|
|
|
|
|
|
|
|
if (ret)
|
|
|
|
ret = -EINVAL;
|
|
|
|
|
|
|
|
/* On success copy over the dump data */
|
|
|
|
if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len))
|
|
|
|
ret = -EFAULT;
|
|
|
|
|
|
|
|
kvfree(data);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2017-12-12 16:41:34 +00:00
|
|
|
long kvm_arch_vcpu_ioctl(struct file *filp,
|
|
|
|
unsigned int ioctl, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct kvm_vcpu *vcpu = filp->private_data;
|
|
|
|
void __user *argp = (void __user *)arg;
|
|
|
|
int idx;
|
|
|
|
long r;
|
2020-01-09 09:37:50 +00:00
|
|
|
u16 rc, rrc;
|
2017-12-04 20:35:36 +00:00
|
|
|
|
|
|
|
vcpu_load(vcpu);
|
|
|
|
|
|
|
|
switch (ioctl) {
|
2008-03-25 17:47:20 +00:00
|
|
|
case KVM_S390_STORE_STATUS:
|
2013-09-12 08:33:45 +00:00
|
|
|
idx = srcu_read_lock(&vcpu->kvm->srcu);
|
2020-01-31 10:02:00 +00:00
|
|
|
r = kvm_s390_store_status_unloaded(vcpu, arg);
|
2013-09-12 08:33:45 +00:00
|
|
|
srcu_read_unlock(&vcpu->kvm->srcu, idx);
|
2010-05-13 09:21:46 +00:00
|
|
|
break;
|
2008-03-25 17:47:20 +00:00
|
|
|
case KVM_S390_SET_INITIAL_PSW: {
|
|
|
|
psw_t psw;
|
|
|
|
|
2010-05-13 09:21:46 +00:00
|
|
|
r = -EFAULT;
|
2008-03-25 17:47:20 +00:00
|
|
|
if (copy_from_user(&psw, argp, sizeof(psw)))
|
2010-05-13 09:21:46 +00:00
|
|
|
break;
|
|
|
|
r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
|
|
|
|
break;
|
2008-03-25 17:47:20 +00:00
|
|
|
}
|
2020-01-31 10:02:02 +00:00
|
|
|
case KVM_S390_CLEAR_RESET:
|
|
|
|
r = 0;
|
|
|
|
kvm_arch_vcpu_ioctl_clear_reset(vcpu);
|
2020-01-09 09:37:50 +00:00
|
|
|
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
|
|
|
|
r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
|
|
|
|
UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
|
|
|
|
VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
|
|
|
|
rc, rrc);
|
|
|
|
}
|
2020-01-31 10:02:02 +00:00
|
|
|
break;
|
2008-03-25 17:47:20 +00:00
|
|
|
case KVM_S390_INITIAL_RESET:
|
2020-01-31 10:02:02 +00:00
|
|
|
r = 0;
|
|
|
|
kvm_arch_vcpu_ioctl_initial_reset(vcpu);
|
2020-01-09 09:37:50 +00:00
|
|
|
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
|
|
|
|
r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
|
|
|
|
UVC_CMD_CPU_RESET_INITIAL,
|
|
|
|
&rc, &rrc);
|
|
|
|
VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
|
|
|
|
rc, rrc);
|
|
|
|
}
|
2020-01-31 10:02:02 +00:00
|
|
|
break;
|
|
|
|
case KVM_S390_NORMAL_RESET:
|
|
|
|
r = 0;
|
|
|
|
kvm_arch_vcpu_ioctl_normal_reset(vcpu);
|
2020-01-09 09:37:50 +00:00
|
|
|
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
|
|
|
|
r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
|
|
|
|
UVC_CMD_CPU_RESET, &rc, &rrc);
|
|
|
|
VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
|
|
|
|
rc, rrc);
|
|
|
|
}
|
2010-05-13 09:21:46 +00:00
|
|
|
break;
|
2012-05-15 12:15:26 +00:00
|
|
|
case KVM_SET_ONE_REG:
|
|
|
|
case KVM_GET_ONE_REG: {
|
|
|
|
struct kvm_one_reg reg;
|
2019-06-14 11:11:21 +00:00
|
|
|
r = -EINVAL;
|
|
|
|
if (kvm_s390_pv_cpu_is_protected(vcpu))
|
|
|
|
break;
|
2012-05-15 12:15:26 +00:00
|
|
|
r = -EFAULT;
|
|
|
|
if (copy_from_user(®, argp, sizeof(reg)))
|
|
|
|
break;
|
|
|
|
if (ioctl == KVM_SET_ONE_REG)
|
|
|
|
r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
|
|
|
|
else
|
|
|
|
r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
|
|
|
|
break;
|
|
|
|
}
|
2012-01-04 09:25:21 +00:00
|
|
|
#ifdef CONFIG_KVM_S390_UCONTROL
|
|
|
|
case KVM_S390_UCAS_MAP: {
|
|
|
|
struct kvm_s390_ucas_mapping ucasmap;
|
|
|
|
|
|
|
|
if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
|
|
|
|
r = -EFAULT;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!kvm_is_ucontrol(vcpu->kvm)) {
|
|
|
|
r = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
|
|
|
|
ucasmap.vcpu_addr, ucasmap.length);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case KVM_S390_UCAS_UNMAP: {
|
|
|
|
struct kvm_s390_ucas_mapping ucasmap;
|
|
|
|
|
|
|
|
if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
|
|
|
|
r = -EFAULT;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!kvm_is_ucontrol(vcpu->kvm)) {
|
|
|
|
r = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
|
|
|
|
ucasmap.length);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
#endif
|
2012-01-04 09:25:26 +00:00
|
|
|
case KVM_S390_VCPU_FAULT: {
|
2014-04-30 14:04:25 +00:00
|
|
|
r = gmap_fault(vcpu->arch.gmap, arg, 0);
|
2012-01-04 09:25:26 +00:00
|
|
|
break;
|
|
|
|
}
|
2012-12-20 14:32:11 +00:00
|
|
|
case KVM_ENABLE_CAP:
|
|
|
|
{
|
|
|
|
struct kvm_enable_cap cap;
|
|
|
|
r = -EFAULT;
|
|
|
|
if (copy_from_user(&cap, argp, sizeof(cap)))
|
|
|
|
break;
|
|
|
|
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
|
|
|
|
break;
|
|
|
|
}
|
2015-02-06 14:01:21 +00:00
|
|
|
case KVM_S390_MEM_OP: {
|
|
|
|
struct kvm_s390_mem_op mem_op;
|
|
|
|
|
|
|
|
if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
|
2022-02-11 18:22:12 +00:00
|
|
|
r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
|
2015-02-06 14:01:21 +00:00
|
|
|
else
|
|
|
|
r = -EFAULT;
|
|
|
|
break;
|
|
|
|
}
|
2014-11-24 16:13:46 +00:00
|
|
|
case KVM_S390_SET_IRQ_STATE: {
|
|
|
|
struct kvm_s390_irq_state irq_state;
|
|
|
|
|
|
|
|
r = -EFAULT;
|
|
|
|
if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
|
|
|
|
break;
|
|
|
|
if (irq_state.len > VCPU_IRQS_MAX_BUF ||
|
|
|
|
irq_state.len == 0 ||
|
|
|
|
irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
|
|
|
|
r = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
2017-11-21 15:02:52 +00:00
|
|
|
/* do not use irq_state.flags, it will break old QEMUs */
|
2014-11-24 16:13:46 +00:00
|
|
|
r = kvm_s390_set_irq_state(vcpu,
|
|
|
|
(void __user *) irq_state.buf,
|
|
|
|
irq_state.len);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case KVM_S390_GET_IRQ_STATE: {
|
|
|
|
struct kvm_s390_irq_state irq_state;
|
|
|
|
|
|
|
|
r = -EFAULT;
|
|
|
|
if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
|
|
|
|
break;
|
|
|
|
if (irq_state.len == 0) {
|
|
|
|
r = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
2017-11-21 15:02:52 +00:00
|
|
|
/* do not use irq_state.flags, it will break old QEMUs */
|
2014-11-24 16:13:46 +00:00
|
|
|
r = kvm_s390_get_irq_state(vcpu,
|
|
|
|
(__u8 __user *) irq_state.buf,
|
|
|
|
irq_state.len);
|
2022-05-17 16:36:25 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case KVM_S390_PV_CPU_COMMAND: {
|
|
|
|
struct kvm_pv_cmd cmd;
|
|
|
|
|
|
|
|
r = -EINVAL;
|
|
|
|
if (!is_prot_virt_host())
|
|
|
|
break;
|
|
|
|
|
|
|
|
r = -EFAULT;
|
|
|
|
if (copy_from_user(&cmd, argp, sizeof(cmd)))
|
|
|
|
break;
|
|
|
|
|
|
|
|
r = -EINVAL;
|
|
|
|
if (cmd.flags)
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* We only handle this cmd right now */
|
|
|
|
if (cmd.cmd != KVM_PV_DUMP)
|
|
|
|
break;
|
|
|
|
|
|
|
|
r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd);
|
|
|
|
|
|
|
|
/* Always copy over UV rc / rrc data */
|
|
|
|
if (copy_to_user((__u8 __user *)argp, &cmd.rc,
|
|
|
|
sizeof(cmd.rc) + sizeof(cmd.rrc)))
|
|
|
|
r = -EFAULT;
|
2014-11-24 16:13:46 +00:00
|
|
|
break;
|
|
|
|
}
|
2008-03-25 17:47:20 +00:00
|
|
|
default:
|
2012-01-04 09:25:30 +00:00
|
|
|
r = -ENOTTY;
|
2008-03-25 17:47:20 +00:00
|
|
|
}
|
2017-12-04 20:35:36 +00:00
|
|
|
|
|
|
|
vcpu_put(vcpu);
|
2010-05-13 09:21:46 +00:00
|
|
|
return r;
|
2008-03-25 17:47:20 +00:00
|
|
|
}
|
|
|
|
|
2018-04-18 19:19:58 +00:00
|
|
|
vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
|
2012-01-04 09:25:23 +00:00
|
|
|
{
|
|
|
|
#ifdef CONFIG_KVM_S390_UCONTROL
|
|
|
|
if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
|
|
|
|
&& (kvm_is_ucontrol(vcpu->kvm))) {
|
|
|
|
vmf->page = virt_to_page(vcpu->arch.sie_block);
|
|
|
|
get_page(vmf->page);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
return VM_FAULT_SIGBUS;
|
|
|
|
}
|
|
|
|
|
2022-11-03 14:44:10 +00:00
|
|
|
bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2008-03-25 17:47:20 +00:00
|
|
|
/* Section: memory related */
|
2009-12-23 16:35:18 +00:00
|
|
|
int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
2021-12-06 19:54:11 +00:00
|
|
|
const struct kvm_memory_slot *old,
|
|
|
|
struct kvm_memory_slot *new,
|
2013-02-27 10:44:34 +00:00
|
|
|
enum kvm_mr_change change)
|
2008-03-25 17:47:20 +00:00
|
|
|
{
|
2021-12-06 19:54:21 +00:00
|
|
|
gpa_t size;
|
|
|
|
|
|
|
|
/* When we are protected, we should not change the memory slots */
|
|
|
|
if (kvm_s390_pv_get_handle(kvm))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2023-01-27 14:05:32 +00:00
|
|
|
if (change != KVM_MR_DELETE && change != KVM_MR_FLAGS_ONLY) {
|
|
|
|
/*
|
|
|
|
* A few sanity checks. We can have memory slots which have to be
|
|
|
|
* located/ended at a segment boundary (1MB). The memory in userland is
|
|
|
|
* ok to be fragmented into various different vmas. It is okay to mmap()
|
|
|
|
* and munmap() stuff in this slot after doing this call at any time
|
|
|
|
*/
|
2021-12-06 19:54:15 +00:00
|
|
|
|
2023-01-27 14:05:32 +00:00
|
|
|
if (new->userspace_addr & 0xffffful)
|
|
|
|
return -EINVAL;
|
2008-03-25 17:47:20 +00:00
|
|
|
|
2023-01-27 14:05:32 +00:00
|
|
|
size = new->npages * PAGE_SIZE;
|
|
|
|
if (size & 0xffffful)
|
|
|
|
return -EINVAL;
|
2008-03-25 17:47:20 +00:00
|
|
|
|
2023-01-27 14:05:32 +00:00
|
|
|
if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2008-03-25 17:47:20 +00:00
|
|
|
|
2023-01-27 14:05:32 +00:00
|
|
|
if (!kvm->arch.migration_mode)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Turn off migration mode when:
|
|
|
|
* - userspace creates a new memslot with dirty logging off,
|
|
|
|
* - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and
|
|
|
|
* dirty logging is turned off.
|
|
|
|
* Migration mode expects dirty page logging being enabled to store
|
|
|
|
* its dirty bitmap.
|
|
|
|
*/
|
|
|
|
if (change != KVM_MR_DELETE &&
|
|
|
|
!(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
|
|
|
|
WARN(kvm_s390_vm_stop_migration(kvm),
|
|
|
|
"Failed to stop migration mode");
|
2014-12-01 16:24:42 +00:00
|
|
|
|
2009-12-23 16:35:18 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void kvm_arch_commit_memory_region(struct kvm *kvm,
|
2020-02-18 21:07:24 +00:00
|
|
|
struct kvm_memory_slot *old,
|
2015-05-18 11:20:23 +00:00
|
|
|
const struct kvm_memory_slot *new,
|
2013-02-27 10:45:25 +00:00
|
|
|
enum kvm_mr_change change)
|
2009-12-23 16:35:18 +00:00
|
|
|
{
|
2019-05-24 14:06:23 +00:00
|
|
|
int rc = 0;
|
2011-07-24 08:48:21 +00:00
|
|
|
|
2019-05-24 14:06:23 +00:00
|
|
|
switch (change) {
|
|
|
|
case KVM_MR_DELETE:
|
|
|
|
rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
|
|
|
|
old->npages * PAGE_SIZE);
|
|
|
|
break;
|
|
|
|
case KVM_MR_MOVE:
|
|
|
|
rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
|
|
|
|
old->npages * PAGE_SIZE);
|
|
|
|
if (rc)
|
|
|
|
break;
|
2020-03-11 04:51:32 +00:00
|
|
|
fallthrough;
|
2019-05-24 14:06:23 +00:00
|
|
|
case KVM_MR_CREATE:
|
2021-12-06 19:54:15 +00:00
|
|
|
rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
|
|
|
|
new->base_gfn * PAGE_SIZE,
|
|
|
|
new->npages * PAGE_SIZE);
|
2019-05-24 14:06:23 +00:00
|
|
|
break;
|
|
|
|
case KVM_MR_FLAGS_ONLY:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
|
|
|
|
}
|
2011-07-24 08:48:21 +00:00
|
|
|
if (rc)
|
2015-05-20 11:24:02 +00:00
|
|
|
pr_warn("failed to commit memory region\n");
|
2011-07-24 08:48:21 +00:00
|
|
|
return;
|
2008-03-25 17:47:20 +00:00
|
|
|
}
|
|
|
|
|
2016-04-01 12:38:57 +00:00
|
|
|
static inline unsigned long nonhyp_mask(int i)
|
|
|
|
{
|
|
|
|
unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
|
|
|
|
|
|
|
|
return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
|
|
|
|
}
|
|
|
|
|
2008-03-25 17:47:20 +00:00
|
|
|
static int __init kvm_s390_init(void)
|
|
|
|
{
|
2022-11-30 23:09:11 +00:00
|
|
|
int i, r;
|
2016-04-01 12:38:57 +00:00
|
|
|
|
2015-01-30 15:01:38 +00:00
|
|
|
if (!sclp.has_sief2) {
|
2018-12-10 10:15:16 +00:00
|
|
|
pr_info("SIE is not available\n");
|
2015-01-30 15:01:38 +00:00
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
|
2018-07-13 10:28:31 +00:00
|
|
|
if (nested && hpage) {
|
2018-12-10 10:15:16 +00:00
|
|
|
pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
|
2018-07-13 10:28:31 +00:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2016-04-01 12:38:57 +00:00
|
|
|
for (i = 0; i < 16; i++)
|
2018-02-09 16:26:29 +00:00
|
|
|
kvm_s390_fac_base[i] |=
|
2021-05-05 20:01:10 +00:00
|
|
|
stfle_fac_list[i] & nonhyp_mask(i);
|
2016-04-01 12:38:57 +00:00
|
|
|
|
2022-11-30 23:09:11 +00:00
|
|
|
r = __kvm_s390_init();
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
|
2022-11-30 23:09:16 +00:00
|
|
|
r = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE);
|
2022-11-30 23:09:11 +00:00
|
|
|
if (r) {
|
|
|
|
__kvm_s390_exit();
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
return 0;
|
2008-03-25 17:47:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void __exit kvm_s390_exit(void)
|
|
|
|
{
|
|
|
|
kvm_exit();
|
2022-11-30 23:09:11 +00:00
|
|
|
|
|
|
|
__kvm_s390_exit();
|
2008-03-25 17:47:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
module_init(kvm_s390_init);
|
|
|
|
module_exit(kvm_s390_exit);
|
2013-05-27 16:42:33 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Enable autoloading of the kvm module.
|
|
|
|
* Note that we add the module alias here instead of virt/kvm/kvm_main.c
|
|
|
|
* since x86 takes a different approach.
|
|
|
|
*/
|
|
|
|
#include <linux/miscdevice.h>
|
|
|
|
MODULE_ALIAS_MISCDEV(KVM_MINOR);
|
|
|
|
MODULE_ALIAS("devname:kvm");
|