Merge v5.15-rc5 into char-misc-next

We need the fixes in here as well, and also resolve some merge conflicts
in:
	drivers/misc/eeprom/at25.c

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
Greg Kroah-Hartman 2021-12-13 10:17:10 +01:00
commit af40d16042
635 changed files with 7198 additions and 3205 deletions

View File

@ -126,6 +126,8 @@ Greg Kroah-Hartman <gregkh@suse.de>
Greg Kroah-Hartman <greg@kroah.com>
Greg Kurz <groug@kaod.org> <gkurz@linux.vnet.ibm.com>
Gregory CLEMENT <gregory.clement@bootlin.com> <gregory.clement@free-electrons.com>
Guo Ren <guoren@kernel.org> <guoren@linux.alibaba.com>
Guo Ren <guoren@kernel.org> <ren_guo@c-sky.com>
Gustavo Padovan <gustavo@las.ic.unicamp.br>
Gustavo Padovan <padovan@profusion.mobi>
Hanjun Guo <guohanjun@huawei.com> <hanjun.guo@linaro.org>

View File

@ -25,6 +25,6 @@ Sub graphs of DRBD's state transitions
:alt: disk-states-8.dot
:align: center
.. kernel-figure:: node-states-8.dot
:alt: node-states-8.dot
.. kernel-figure:: peer-states-8.dot
:alt: peer-states-8.dot
:align: center

View File

@ -1,8 +1,3 @@
digraph node_states {
Secondary -> Primary [ label = "ioctl_set_state()" ]
Primary -> Secondary [ label = "ioctl_set_state()" ]
}
digraph peer_states {
Secondary -> Primary [ label = "recv state packet" ]
Primary -> Secondary [ label = "recv state packet" ]

View File

@ -53,11 +53,10 @@ The number of bits that the PAC occupies in a pointer is 55 minus the
virtual address size configured by the kernel. For example, with a
virtual address size of 48, the PAC is 7 bits wide.
Recent versions of GCC can compile code with APIAKey-based return
address protection when passed the -msign-return-address option. This
uses instructions in the HINT space (unless -march=armv8.3-a or higher
is also passed), and such code can run on systems without the pointer
authentication extension.
When ARM64_PTR_AUTH_KERNEL is selected, the kernel will be compiled
with HINT space pointer authentication instructions protecting
function returns. Kernels built with this option will work on hardware
with or without pointer authentication support.
In addition to exec(), keys can also be reinitialized to random values
using the PR_PAC_RESET_KEYS prctl. A bitmask of PR_PAC_APIAKEY,

View File

@ -249,11 +249,16 @@ except ImportError:
html_static_path = ['sphinx-static']
html_context = {
'css_files': [
'_static/theme_overrides.css',
],
}
html_css_files = [
'theme_overrides.css',
]
if major <= 1 and minor < 8:
html_context = {
'css_files': [
'_static/theme_overrides.css',
],
}
# Add any extra paths that contain custom files (such as robots.txt or
# .htaccess) here, relative to this directory. These files are copied

View File

@ -73,12 +73,12 @@ CPUFREQ_POSTCHANGE.
The third argument is a struct cpufreq_freqs with the following
values:
===== ===========================
cpu number of the affected CPU
====== ======================================
policy a pointer to the struct cpufreq_policy
old old frequency
new new frequency
flags flags of the cpufreq driver
===== ===========================
====== ======================================
3. CPUFreq Table Generation with Operating Performance Point (OPP)
==================================================================

View File

@ -136,7 +136,7 @@ examples:
samsung,syscon-phandle = <&pmu_system_controller>;
/* NTC thermistor is a hwmon device */
ncp15wb473 {
thermistor {
compatible = "murata,ncp15wb473";
pullup-uv = <1800000>;
pullup-ohm = <47000>;

View File

@ -142,7 +142,7 @@ examples:
down {
label = "GPIO Key DOWN";
linux,code = <108>;
interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
interrupts = <1 IRQ_TYPE_EDGE_FALLING>;
};
};

View File

@ -79,6 +79,8 @@ properties:
properties:
data-lanes:
description:
Note that 'fsl,imx7-mipi-csi2' only supports up to 2 data lines.
items:
minItems: 1
maxItems: 4
@ -91,18 +93,6 @@ properties:
required:
- data-lanes
allOf:
- if:
properties:
compatible:
contains:
const: fsl,imx7-mipi-csi2
then:
properties:
data-lanes:
items:
maxItems: 2
port@1:
$ref: /schemas/graph.yaml#/properties/port
description:

View File

@ -91,6 +91,14 @@ properties:
compensate for the board being designed with the lanes
swapped.
enet-phy-lane-no-swap:
$ref: /schemas/types.yaml#/definitions/flag
description:
If set, indicates that PHY will disable swap of the
TX/RX lanes. This property allows the PHY to work correcly after
e.g. wrong bootstrap configuration caused by issues in PCB
layout design.
eee-broken-100tx:
$ref: /schemas/types.yaml#/definitions/flag
description:

View File

@ -29,7 +29,7 @@ properties:
- PHY_TYPE_PCIE
- PHY_TYPE_SATA
- PHY_TYPE_SGMII
- PHY_TYPE_USB
- PHY_TYPE_USB3
- description: The PHY instance
minimum: 0
maximum: 1 # for DP, SATA or USB

View File

@ -105,7 +105,7 @@ examples:
reg = <0x65>;
interrupt-parent = <&gpio1>;
interrupts = <16 IRQ_TYPE_EDGE_FALLING>;
ti,watchdog-timer = <0>;
ti,watchdog-timeout-ms = <0>;
ti,sc-ocp-limit-microamp = <2000000>;
ti,sc-ovp-limit-microvolt = <17800000>;
monitored-battery = <&bat>;

View File

@ -19,6 +19,9 @@ properties:
clocks:
maxItems: 1
interrupts:
maxItems: 1
"#sound-dai-cells":
const: 0

View File

@ -33,6 +33,7 @@ properties:
- rockchip,rk3328-spi
- rockchip,rk3368-spi
- rockchip,rk3399-spi
- rockchip,rk3568-spi
- rockchip,rv1126-spi
- const: rockchip,rk3066-spi

View File

@ -1,7 +1,7 @@
.. SPDX-License-Identifier: GPL-2.0
=================================
NETWORK FILESYSTEM HELPER LIBRARY
Network Filesystem Helper Library
=================================
.. Contents:
@ -37,22 +37,22 @@ into a common call framework.
The following services are provided:
* Handles transparent huge pages (THPs).
* Handle folios that span multiple pages.
* Insulates the netfs from VM interface changes.
* Insulate the netfs from VM interface changes.
* Allows the netfs to arbitrarily split reads up into pieces, even ones that
don't match page sizes or page alignments and that may cross pages.
* Allow the netfs to arbitrarily split reads up into pieces, even ones that
don't match folio sizes or folio alignments and that may cross folios.
* Allows the netfs to expand a readahead request in both directions to meet
its needs.
* Allow the netfs to expand a readahead request in both directions to meet its
needs.
* Allows the netfs to partially fulfil a read, which will then be resubmitted.
* Allow the netfs to partially fulfil a read, which will then be resubmitted.
* Handles local caching, allowing cached data and server-read data to be
* Handle local caching, allowing cached data and server-read data to be
interleaved for a single request.
* Handles clearing of bufferage that aren't on the server.
* Handle clearing of bufferage that aren't on the server.
* Handle retrying of reads that failed, switching reads from the cache to the
server as necessary.
@ -70,22 +70,22 @@ Read Helper Functions
Three read helpers are provided::
* void netfs_readahead(struct readahead_control *ractl,
const struct netfs_read_request_ops *ops,
void *netfs_priv);``
* int netfs_readpage(struct file *file,
struct page *page,
const struct netfs_read_request_ops *ops,
void *netfs_priv);
* int netfs_write_begin(struct file *file,
struct address_space *mapping,
loff_t pos,
unsigned int len,
unsigned int flags,
struct page **_page,
void **_fsdata,
const struct netfs_read_request_ops *ops,
void *netfs_priv);
void netfs_readahead(struct readahead_control *ractl,
const struct netfs_read_request_ops *ops,
void *netfs_priv);
int netfs_readpage(struct file *file,
struct folio *folio,
const struct netfs_read_request_ops *ops,
void *netfs_priv);
int netfs_write_begin(struct file *file,
struct address_space *mapping,
loff_t pos,
unsigned int len,
unsigned int flags,
struct folio **_folio,
void **_fsdata,
const struct netfs_read_request_ops *ops,
void *netfs_priv);
Each corresponds to a VM operation, with the addition of a couple of parameters
for the use of the read helpers:
@ -103,8 +103,8 @@ Both of these values will be stored into the read request structure.
For ->readahead() and ->readpage(), the network filesystem should just jump
into the corresponding read helper; whereas for ->write_begin(), it may be a
little more complicated as the network filesystem might want to flush
conflicting writes or track dirty data and needs to put the acquired page if an
error occurs after calling the helper.
conflicting writes or track dirty data and needs to put the acquired folio if
an error occurs after calling the helper.
The helpers manage the read request, calling back into the network filesystem
through the suppplied table of operations. Waits will be performed as
@ -253,7 +253,7 @@ through which it can issue requests and negotiate::
void (*issue_op)(struct netfs_read_subrequest *subreq);
bool (*is_still_valid)(struct netfs_read_request *rreq);
int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
struct page *page, void **_fsdata);
struct folio *folio, void **_fsdata);
void (*done)(struct netfs_read_request *rreq);
void (*cleanup)(struct address_space *mapping, void *netfs_priv);
};
@ -313,13 +313,14 @@ The operations are as follows:
There is no return value; the netfs_subreq_terminated() function should be
called to indicate whether or not the operation succeeded and how much data
it transferred. The filesystem also should not deal with setting pages
it transferred. The filesystem also should not deal with setting folios
uptodate, unlocking them or dropping their refs - the helpers need to deal
with this as they have to coordinate with copying to the local cache.
Note that the helpers have the pages locked, but not pinned. It is possible
to use the ITER_XARRAY iov iterator to refer to the range of the inode that
is being operated upon without the need to allocate large bvec tables.
Note that the helpers have the folios locked, but not pinned. It is
possible to use the ITER_XARRAY iov iterator to refer to the range of the
inode that is being operated upon without the need to allocate large bvec
tables.
* ``is_still_valid()``
@ -330,15 +331,15 @@ The operations are as follows:
* ``check_write_begin()``
[Optional] This is called from the netfs_write_begin() helper once it has
allocated/grabbed the page to be modified to allow the filesystem to flush
allocated/grabbed the folio to be modified to allow the filesystem to flush
conflicting state before allowing it to be modified.
It should return 0 if everything is now fine, -EAGAIN if the page should be
It should return 0 if everything is now fine, -EAGAIN if the folio should be
regrabbed and any other error code to abort the operation.
* ``done``
[Optional] This is called after the pages in the request have all been
[Optional] This is called after the folios in the request have all been
unlocked (and marked uptodate if applicable).
* ``cleanup``
@ -390,7 +391,7 @@ The read helpers work by the following general procedure:
* If NETFS_SREQ_CLEAR_TAIL was set, a short read will be cleared to the
end of the slice instead of reissuing.
* Once the data is read, the pages that have been fully read/cleared:
* Once the data is read, the folios that have been fully read/cleared:
* Will be marked uptodate.
@ -398,11 +399,11 @@ The read helpers work by the following general procedure:
* Unlocked
* Any pages that need writing to the cache will then have DIO writes issued.
* Any folios that need writing to the cache will then have DIO writes issued.
* Synchronous operations will wait for reading to be complete.
* Writes to the cache will proceed asynchronously and the pages will have the
* Writes to the cache will proceed asynchronously and the folios will have the
PG_fscache mark removed when that completes.
* The request structures will be cleaned up when everything has completed.
@ -452,6 +453,9 @@ operation table looks like the following::
netfs_io_terminated_t term_func,
void *term_func_priv);
int (*prepare_write)(struct netfs_cache_resources *cres,
loff_t *_start, size_t *_len, loff_t i_size);
int (*write)(struct netfs_cache_resources *cres,
loff_t start_pos,
struct iov_iter *iter,
@ -509,6 +513,14 @@ The methods defined in the table are:
indicating whether the termination is definitely happening in the caller's
context.
* ``prepare_write()``
[Required] Called to adjust a write to the cache and check that there is
sufficient space in the cache. The start and length values indicate the
size of the write that netfslib is proposing, and this can be adjusted by
the cache to respect DIO boundaries. The file size is passed for
information.
* ``write()``
[Required] Called to write to the cache. The start file offset is given
@ -525,4 +537,9 @@ not the read request structure as they could be used in other situations where
there isn't a read request structure as well, such as writing dirty data to the
cache.
API Function Reference
======================
.. kernel-doc:: include/linux/netfs.h
.. kernel-doc:: fs/netfs/read_helper.c

View File

@ -439,11 +439,9 @@ preemption. The following substitution works on both kernels::
spin_lock(&p->lock);
p->count += this_cpu_read(var2);
On a non-PREEMPT_RT kernel migrate_disable() maps to preempt_disable()
which makes the above code fully equivalent. On a PREEMPT_RT kernel
migrate_disable() ensures that the task is pinned on the current CPU which
in turn guarantees that the per-CPU access to var1 and var2 are staying on
the same CPU.
the same CPU while the task remains preemptible.
The migrate_disable() substitution is not valid for the following
scenario::
@ -456,9 +454,8 @@ scenario::
p = this_cpu_ptr(&var1);
p->val = func2();
While correct on a non-PREEMPT_RT kernel, this breaks on PREEMPT_RT because
here migrate_disable() does not protect against reentrancy from a
preempting task. A correct substitution for this case is::
This breaks because migrate_disable() does not protect against reentrancy from
a preempting task. A correct substitution for this case is::
func()
{

View File

@ -35,6 +35,7 @@ GNU make 3.81 make --version
binutils 2.23 ld -v
flex 2.5.35 flex --version
bison 2.0 bison --version
pahole 1.16 pahole --version
util-linux 2.10o fdformat --version
kmod 13 depmod -V
e2fsprogs 1.41.4 e2fsck -V
@ -108,6 +109,16 @@ Bison
Since Linux 4.16, the build system generates parsers
during build. This requires bison 2.0 or later.
pahole:
-------
Since Linux 5.2, if CONFIG_DEBUG_INFO_BTF is selected, the build system
generates BTF (BPF Type Format) from DWARF in vmlinux, a bit later from kernel
modules as well. This requires pahole v1.16 or later.
It is found in the 'dwarves' or 'pahole' distro packages or from
https://fedorapeople.org/~acme/dwarves/.
Perl
----

View File

@ -14,7 +14,8 @@ works, see Documentation/process/development-process.rst. Also, read
Documentation/process/submit-checklist.rst
for a list of items to check before submitting code. If you are submitting
a driver, also read Documentation/process/submitting-drivers.rst; for device
tree binding patches, read Documentation/process/submitting-patches.rst.
tree binding patches, read
Documentation/devicetree/bindings/submitting-patches.rst.
This documentation assumes that you're using ``git`` to prepare your patches.
If you're unfamiliar with ``git``, you would be well-advised to learn how to

View File

@ -9331,7 +9331,6 @@ S: Maintained
F: drivers/iio/pressure/dps310.c
INFINIBAND SUBSYSTEM
M: Doug Ledford <dledford@redhat.com>
M: Jason Gunthorpe <jgg@nvidia.com>
L: linux-rdma@vger.kernel.org
S: Supported
@ -10282,9 +10281,9 @@ F: lib/Kconfig.kcsan
F: scripts/Makefile.kcsan
KDUMP
M: Dave Young <dyoung@redhat.com>
M: Baoquan He <bhe@redhat.com>
R: Vivek Goyal <vgoyal@redhat.com>
R: Dave Young <dyoung@redhat.com>
L: kexec@lists.infradead.org
S: Maintained
W: http://lse.sourceforge.net/kdump/
@ -12182,8 +12181,8 @@ F: drivers/net/ethernet/mellanox/mlx5/core/fpga/*
F: include/linux/mlx5/mlx5_ifc_fpga.h
MELLANOX ETHERNET SWITCH DRIVERS
M: Jiri Pirko <jiri@nvidia.com>
M: Ido Schimmel <idosch@nvidia.com>
M: Petr Machata <petrm@nvidia.com>
L: netdev@vger.kernel.org
S: Supported
W: http://www.mellanox.com
@ -15773,6 +15772,15 @@ S: Maintained
F: Documentation/devicetree/bindings/net/qcom,ethqos.txt
F: drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
QUALCOMM FASTRPC DRIVER
M: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
M: Amol Maheshwari <amahesh@qti.qualcomm.com>
L: linux-arm-msm@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/misc/qcom,fastrpc.txt
F: drivers/misc/fastrpc.c
F: include/uapi/misc/fastrpc.h
QUALCOMM GENERIC INTERFACE I2C DRIVER
M: Akash Asthana <akashast@codeaurora.org>
M: Mukesh Savaliya <msavaliy@codeaurora.org>
@ -15981,6 +15989,7 @@ F: arch/mips/generic/board-ranchu.c
RANDOM NUMBER DRIVER
M: "Theodore Ts'o" <tytso@mit.edu>
M: Jason A. Donenfeld <Jason@zx2c4.com>
S: Maintained
F: drivers/char/random.c
@ -16503,6 +16512,12 @@ T: git git://linuxtv.org/media_tree.git
F: Documentation/devicetree/bindings/media/allwinner,sun8i-a83t-de2-rotate.yaml
F: drivers/media/platform/sunxi/sun8i-rotate/
RPMSG TTY DRIVER
M: Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
L: linux-remoteproc@vger.kernel.org
S: Maintained
F: drivers/tty/rpmsg_tty.c
RTL2830 MEDIA DRIVER
M: Antti Palosaari <crope@iki.fi>
L: linux-media@vger.kernel.org
@ -16624,8 +16639,8 @@ W: http://www.ibm.com/developerworks/linux/linux390/
F: drivers/iommu/s390-iommu.c
S390 IUCV NETWORK LAYER
M: Julian Wiedmann <jwi@linux.ibm.com>
M: Karsten Graul <kgraul@linux.ibm.com>
M: Alexandra Winter <wintera@linux.ibm.com>
M: Wenjia Zhang <wenjia@linux.ibm.com>
L: linux-s390@vger.kernel.org
L: netdev@vger.kernel.org
S: Supported
@ -16635,8 +16650,8 @@ F: include/net/iucv/
F: net/iucv/
S390 NETWORK DRIVERS
M: Julian Wiedmann <jwi@linux.ibm.com>
M: Karsten Graul <kgraul@linux.ibm.com>
M: Alexandra Winter <wintera@linux.ibm.com>
M: Wenjia Zhang <wenjia@linux.ibm.com>
L: linux-s390@vger.kernel.org
L: netdev@vger.kernel.org
S: Supported

View File

@ -2,7 +2,7 @@
VERSION = 5
PATCHLEVEL = 16
SUBLEVEL = 0
EXTRAVERSION = -rc3
EXTRAVERSION = -rc5
NAME = Gobble Gobble
# *DOCUMENTATION*
@ -789,7 +789,7 @@ stackp-flags-$(CONFIG_STACKPROTECTOR_STRONG) := -fstack-protector-strong
KBUILD_CFLAGS += $(stackp-flags-y)
KBUILD_CFLAGS-$(CONFIG_WERROR) += -Werror
KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH)
KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH:"%"=%)
ifdef CONFIG_CC_IS_CLANG
KBUILD_CPPFLAGS += -Qunused-arguments
@ -1374,17 +1374,17 @@ endif
ifneq ($(dtstree),)
%.dtb: dt_binding_check include/config/kernel.release scripts_dtc
$(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@ $(dtstree)/$*.dt.yaml
%.dtb: include/config/kernel.release scripts_dtc
$(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@
%.dtbo: dt_binding_check include/config/kernel.release scripts_dtc
$(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@ $(dtstree)/$*.dt.yaml
%.dtbo: include/config/kernel.release scripts_dtc
$(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@
PHONY += dtbs dtbs_install dtbs_check
dtbs: include/config/kernel.release scripts_dtc
$(Q)$(MAKE) $(build)=$(dtstree)
ifneq ($(filter dtbs_check %.dtb %.dtbo, $(MAKECMDGOALS)),)
ifneq ($(filter dtbs_check, $(MAKECMDGOALS)),)
export CHECK_DTBS=y
dtbs: dt_binding_check
endif

View File

@ -7,6 +7,7 @@
* Copyright The Asahi Linux Contributors
*/
#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/interrupt-controller/apple-aic.h>
#include <dt-bindings/interrupt-controller/irq.h>
#include <dt-bindings/pinctrl/apple.h>
@ -281,7 +282,7 @@
port00: pci@0,0 {
device_type = "pci";
reg = <0x0 0x0 0x0 0x0 0x0>;
reset-gpios = <&pinctrl_ap 152 0>;
reset-gpios = <&pinctrl_ap 152 GPIO_ACTIVE_LOW>;
max-link-speed = <2>;
#address-cells = <3>;
@ -301,7 +302,7 @@
port01: pci@1,0 {
device_type = "pci";
reg = <0x800 0x0 0x0 0x0 0x0>;
reset-gpios = <&pinctrl_ap 153 0>;
reset-gpios = <&pinctrl_ap 153 GPIO_ACTIVE_LOW>;
max-link-speed = <2>;
#address-cells = <3>;
@ -321,7 +322,7 @@
port02: pci@2,0 {
device_type = "pci";
reg = <0x1000 0x0 0x0 0x0 0x0>;
reset-gpios = <&pinctrl_ap 33 0>;
reset-gpios = <&pinctrl_ap 33 GPIO_ACTIVE_LOW>;
max-link-speed = <1>;
#address-cells = <3>;

View File

@ -91,7 +91,7 @@
#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
/* TCR_EL2 Registers bits */
#define TCR_EL2_RES1 ((1 << 31) | (1 << 23))
#define TCR_EL2_RES1 ((1U << 31) | (1 << 23))
#define TCR_EL2_TBI (1 << 20)
#define TCR_EL2_PS_SHIFT 16
#define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT)
@ -276,7 +276,7 @@
#define CPTR_EL2_TFP_SHIFT 10
/* Hyp Coprocessor Trap Register */
#define CPTR_EL2_TCPAC (1 << 31)
#define CPTR_EL2_TCPAC (1U << 31)
#define CPTR_EL2_TAM (1 << 30)
#define CPTR_EL2_TTA (1 << 20)
#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)

View File

@ -77,11 +77,17 @@
.endm
SYM_CODE_START(ftrace_regs_caller)
#ifdef BTI_C
BTI_C
#endif
ftrace_regs_entry 1
b ftrace_common
SYM_CODE_END(ftrace_regs_caller)
SYM_CODE_START(ftrace_caller)
#ifdef BTI_C
BTI_C
#endif
ftrace_regs_entry 0
b ftrace_common
SYM_CODE_END(ftrace_caller)

View File

@ -147,7 +147,7 @@ int machine_kexec_post_load(struct kimage *kimage)
if (rc)
return rc;
kimage->arch.ttbr1 = __pa(trans_pgd);
kimage->arch.zero_page = __pa(empty_zero_page);
kimage->arch.zero_page = __pa_symbol(empty_zero_page);
reloc_size = __relocate_new_kernel_end - __relocate_new_kernel_start;
memcpy(reloc_code, __relocate_new_kernel_start, reloc_size);

View File

@ -403,6 +403,8 @@ typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *);
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu);
static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code);
/*
* Allow the hypervisor to handle the exit with an exit handler if it has one.
*
@ -429,6 +431,18 @@ static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
*/
static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
{
/*
* Save PSTATE early so that we can evaluate the vcpu mode
* early on.
*/
vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
/*
* Check whether we want to repaint the state one way or
* another.
*/
early_exit_filter(vcpu, exit_code);
if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);

View File

@ -70,7 +70,12 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
{
ctxt->regs.pc = read_sysreg_el2(SYS_ELR);
ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR);
/*
* Guest PSTATE gets saved at guest fixup time in all
* cases. We still need to handle the nVHE host side here.
*/
if (!has_vhe() && ctxt->__hyp_running_vcpu)
ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR);
if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2);

View File

@ -233,7 +233,7 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
* Returns false if the guest ran in AArch32 when it shouldn't have, and
* thus should exit to the host, or true if a the guest run loop can continue.
*/
static bool handle_aarch32_guest(struct kvm_vcpu *vcpu, u64 *exit_code)
static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
{
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
@ -248,10 +248,7 @@ static bool handle_aarch32_guest(struct kvm_vcpu *vcpu, u64 *exit_code)
vcpu->arch.target = -1;
*exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
*exit_code |= ARM_EXCEPTION_IL;
return false;
}
return true;
}
/* Switch to the guest for legacy non-VHE systems */
@ -316,9 +313,6 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
/* Jump in the fire! */
exit_code = __guest_enter(vcpu);
if (unlikely(!handle_aarch32_guest(vcpu, &exit_code)))
break;
/* And we're baaack! */
} while (fixup_guest_exit(vcpu, &exit_code));

View File

@ -112,6 +112,10 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
return hyp_exit_handlers;
}
static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
{
}
/* Switch to the guest for VHE systems running in EL2 */
static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
{

View File

@ -209,7 +209,7 @@ asmlinkage void do_trap_illinsn(struct pt_regs *regs)
asmlinkage void do_trap_fpe(struct pt_regs *regs)
{
#ifdef CONFIG_CPU_HAS_FP
#ifdef CONFIG_CPU_HAS_FPU
return fpu_fpe(regs);
#else
do_trap_error(regs, SIGILL, ILL_ILLOPC, regs->pc,
@ -219,7 +219,7 @@ asmlinkage void do_trap_fpe(struct pt_regs *regs)
asmlinkage void do_trap_priv(struct pt_regs *regs)
{
#ifdef CONFIG_CPU_HAS_FP
#ifdef CONFIG_CPU_HAS_FPU
if (user_mode(regs) && fpu_libc_helper(regs))
return;
#endif

View File

@ -98,7 +98,7 @@ do { \
#define emit(...) __emit(__VA_ARGS__)
/* Workaround for R10000 ll/sc errata */
#ifdef CONFIG_WAR_R10000
#ifdef CONFIG_WAR_R10000_LLSC
#define LLSC_beqz beqzl
#else
#define LLSC_beqz beqz

View File

@ -15,7 +15,12 @@
# Mike Shaver, Helge Deller and Martin K. Petersen
#
ifdef CONFIG_PARISC_SELF_EXTRACT
boot := arch/parisc/boot
KBUILD_IMAGE := $(boot)/bzImage
else
KBUILD_IMAGE := vmlinuz
endif
NM = sh $(srctree)/arch/parisc/nm
CHECKFLAGS += -D__hppa__=1

View File

@ -1,7 +1,9 @@
CONFIG_LOCALVERSION="-64bit"
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_KERNEL_LZ4=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_AUDIT=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y
CONFIG_TASKSTATS=y
@ -35,6 +37,7 @@ CONFIG_MODVERSIONS=y
CONFIG_BLK_DEV_INTEGRITY=y
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
CONFIG_MEMORY_FAILURE=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@ -65,12 +68,15 @@ CONFIG_SCSI_ISCSI_ATTRS=y
CONFIG_SCSI_SRP_ATTRS=y
CONFIG_ISCSI_BOOT_SYSFS=y
CONFIG_SCSI_MPT2SAS=y
CONFIG_SCSI_LASI700=m
CONFIG_SCSI_LASI700=y
CONFIG_SCSI_SYM53C8XX_2=y
CONFIG_SCSI_ZALON=y
CONFIG_SCSI_QLA_ISCSI=m
CONFIG_SCSI_DH=y
CONFIG_ATA=y
CONFIG_SATA_SIL=y
CONFIG_SATA_SIS=y
CONFIG_SATA_VIA=y
CONFIG_PATA_NS87415=y
CONFIG_PATA_SIL680=y
CONFIG_ATA_GENERIC=y
@ -79,6 +85,7 @@ CONFIG_MD_LINEAR=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_RAID=m
CONFIG_DM_UEVENT=y
CONFIG_DM_AUDIT=y
CONFIG_FUSION=y
CONFIG_FUSION_SPI=y
CONFIG_FUSION_SAS=y
@ -196,10 +203,15 @@ CONFIG_FB_MATROX_G=y
CONFIG_FB_MATROX_I2C=y
CONFIG_FB_MATROX_MAVEN=y
CONFIG_FB_RADEON=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_CLUT224 is not set
CONFIG_HIDRAW=y
CONFIG_HID_PID=y
CONFIG_USB_HIDDEV=y
CONFIG_USB=y
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_OHCI_HCD_PLATFORM=y
CONFIG_UIO=y
CONFIG_UIO_PDRV_GENIRQ=m
CONFIG_UIO_AEC=m

View File

@ -39,6 +39,7 @@ verify "$3"
if [ -n "${INSTALLKERNEL}" ]; then
if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
if [ -x /usr/sbin/${INSTALLKERNEL} ]; then exec /usr/sbin/${INSTALLKERNEL} "$@"; fi
fi
# Default install

View File

@ -249,30 +249,16 @@ void __init time_init(void)
static int __init init_cr16_clocksource(void)
{
/*
* The cr16 interval timers are not syncronized across CPUs on
* different sockets, so mark them unstable and lower rating on
* multi-socket SMP systems.
* The cr16 interval timers are not syncronized across CPUs, even if
* they share the same socket.
*/
if (num_online_cpus() > 1 && !running_on_qemu) {
int cpu;
unsigned long cpu0_loc;
cpu0_loc = per_cpu(cpu_data, 0).cpu_loc;
/* mark sched_clock unstable */
clear_sched_clock_stable();
for_each_online_cpu(cpu) {
if (cpu == 0)
continue;
if ((cpu0_loc != 0) &&
(cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc))
continue;
/* mark sched_clock unstable */
clear_sched_clock_stable();
clocksource_cr16.name = "cr16_unstable";
clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE;
clocksource_cr16.rating = 0;
break;
}
clocksource_cr16.name = "cr16_unstable";
clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE;
clocksource_cr16.rating = 0;
}
/* register at clocksource framework */

View File

@ -12,14 +12,12 @@
#include <linux/types.h>
#include <linux/kvm.h>
#include <linux/kvm_types.h>
#include <asm/csr.h>
#include <asm/kvm_vcpu_fp.h>
#include <asm/kvm_vcpu_timer.h>
#ifdef CONFIG_64BIT
#define KVM_MAX_VCPUS (1U << 16)
#else
#define KVM_MAX_VCPUS (1U << 9)
#endif
#define KVM_MAX_VCPUS \
((HGATP_VMID_MASK >> HGATP_VMID_SHIFT) + 1)
#define KVM_HALT_POLL_NS_DEFAULT 500000

View File

@ -453,6 +453,12 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
phys_addr_t size = slot->npages << PAGE_SHIFT;
spin_lock(&kvm->mmu_lock);
stage2_unmap_range(kvm, gpa, size, false);
spin_unlock(&kvm->mmu_lock);
}
void kvm_arch_commit_memory_region(struct kvm *kvm,

View File

@ -403,7 +403,6 @@ CONFIG_DEVTMPFS=y
CONFIG_CONNECTOR=y
CONFIG_ZRAM=y
CONFIG_BLK_DEV_LOOP=m
CONFIG_BLK_DEV_CRYPTOLOOP=m
CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_RAM=y
@ -476,6 +475,7 @@ CONFIG_MACVLAN=m
CONFIG_MACVTAP=m
CONFIG_VXLAN=m
CONFIG_BAREUDP=m
CONFIG_AMT=m
CONFIG_TUN=m
CONFIG_VETH=m
CONFIG_VIRTIO_NET=m
@ -489,6 +489,7 @@ CONFIG_NLMON=m
# CONFIG_NET_VENDOR_AMD is not set
# CONFIG_NET_VENDOR_AQUANTIA is not set
# CONFIG_NET_VENDOR_ARC is not set
# CONFIG_NET_VENDOR_ASIX is not set
# CONFIG_NET_VENDOR_ATHEROS is not set
# CONFIG_NET_VENDOR_BROADCOM is not set
# CONFIG_NET_VENDOR_BROCADE is not set
@ -571,6 +572,7 @@ CONFIG_WATCHDOG=y
CONFIG_WATCHDOG_NOWAYOUT=y
CONFIG_SOFT_WATCHDOG=m
CONFIG_DIAG288_WATCHDOG=m
# CONFIG_DRM_DEBUG_MODESET_LOCK is not set
CONFIG_FB=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
@ -775,12 +777,14 @@ CONFIG_CRC4=m
CONFIG_CRC7=m
CONFIG_CRC8=m
CONFIG_RANDOM32_SELFTEST=y
CONFIG_XZ_DEC_MICROLZMA=y
CONFIG_DMA_CMA=y
CONFIG_CMA_SIZE_MBYTES=0
CONFIG_PRINTK_TIME=y
CONFIG_DYNAMIC_DEBUG=y
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_INFO_DWARF4=y
CONFIG_DEBUG_INFO_BTF=y
CONFIG_GDB_SCRIPTS=y
CONFIG_HEADERS_INSTALL=y
CONFIG_DEBUG_SECTION_MISMATCH=y
@ -807,6 +811,7 @@ CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
CONFIG_DEBUG_PER_CPU_MAPS=y
CONFIG_KFENCE=y
CONFIG_KFENCE_STATIC_KEYS=y
CONFIG_DEBUG_SHIRQ=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_DETECT_HUNG_TASK=y
@ -842,6 +847,7 @@ CONFIG_FTRACE_STARTUP_TEST=y
CONFIG_SAMPLES=y
CONFIG_SAMPLE_TRACE_PRINTK=m
CONFIG_SAMPLE_FTRACE_DIRECT=m
CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m
CONFIG_DEBUG_ENTRY=y
CONFIG_CIO_INJECT=y
CONFIG_KUNIT=m
@ -860,7 +866,7 @@ CONFIG_FAIL_FUNCTION=y
CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
CONFIG_LKDTM=m
CONFIG_TEST_MIN_HEAP=y
CONFIG_KPROBES_SANITY_TEST=y
CONFIG_KPROBES_SANITY_TEST=m
CONFIG_RBTREE_TEST=y
CONFIG_INTERVAL_TREE_TEST=m
CONFIG_PERCPU_TEST=m

View File

@ -394,7 +394,6 @@ CONFIG_DEVTMPFS=y
CONFIG_CONNECTOR=y
CONFIG_ZRAM=y
CONFIG_BLK_DEV_LOOP=m
CONFIG_BLK_DEV_CRYPTOLOOP=m
CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_RAM=y
@ -467,6 +466,7 @@ CONFIG_MACVLAN=m
CONFIG_MACVTAP=m
CONFIG_VXLAN=m
CONFIG_BAREUDP=m
CONFIG_AMT=m
CONFIG_TUN=m
CONFIG_VETH=m
CONFIG_VIRTIO_NET=m
@ -480,6 +480,7 @@ CONFIG_NLMON=m
# CONFIG_NET_VENDOR_AMD is not set
# CONFIG_NET_VENDOR_AQUANTIA is not set
# CONFIG_NET_VENDOR_ARC is not set
# CONFIG_NET_VENDOR_ASIX is not set
# CONFIG_NET_VENDOR_ATHEROS is not set
# CONFIG_NET_VENDOR_BROADCOM is not set
# CONFIG_NET_VENDOR_BROCADE is not set
@ -762,12 +763,14 @@ CONFIG_PRIME_NUMBERS=m
CONFIG_CRC4=m
CONFIG_CRC7=m
CONFIG_CRC8=m
CONFIG_XZ_DEC_MICROLZMA=y
CONFIG_DMA_CMA=y
CONFIG_CMA_SIZE_MBYTES=0
CONFIG_PRINTK_TIME=y
CONFIG_DYNAMIC_DEBUG=y
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_INFO_DWARF4=y
CONFIG_DEBUG_INFO_BTF=y
CONFIG_GDB_SCRIPTS=y
CONFIG_DEBUG_SECTION_MISMATCH=y
CONFIG_MAGIC_SYSRQ=y
@ -792,9 +795,11 @@ CONFIG_HIST_TRIGGERS=y
CONFIG_SAMPLES=y
CONFIG_SAMPLE_TRACE_PRINTK=m
CONFIG_SAMPLE_FTRACE_DIRECT=m
CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m
CONFIG_KUNIT=m
CONFIG_KUNIT_DEBUGFS=y
CONFIG_LKDTM=m
CONFIG_KPROBES_SANITY_TEST=m
CONFIG_PERCPU_TEST=m
CONFIG_ATOMIC64_SELFTEST=y
CONFIG_TEST_BPF=m

View File

@ -65,9 +65,11 @@ CONFIG_ZFCP=y
# CONFIG_NETWORK_FILESYSTEMS is not set
CONFIG_LSM="yama,loadpin,safesetid,integrity"
# CONFIG_ZLIB_DFLTCC is not set
CONFIG_XZ_DEC_MICROLZMA=y
CONFIG_PRINTK_TIME=y
# CONFIG_SYMBOLIC_ERRNAME is not set
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_INFO_BTF=y
CONFIG_DEBUG_FS=y
CONFIG_DEBUG_KERNEL=y
CONFIG_PANIC_ON_OOPS=y

View File

@ -14,12 +14,13 @@
/* I/O Map */
#define ZPCI_IOMAP_SHIFT 48
#define ZPCI_IOMAP_ADDR_BASE 0x8000000000000000UL
#define ZPCI_IOMAP_ADDR_SHIFT 62
#define ZPCI_IOMAP_ADDR_BASE (1UL << ZPCI_IOMAP_ADDR_SHIFT)
#define ZPCI_IOMAP_ADDR_OFF_MASK ((1UL << ZPCI_IOMAP_SHIFT) - 1)
#define ZPCI_IOMAP_MAX_ENTRIES \
((ULONG_MAX - ZPCI_IOMAP_ADDR_BASE + 1) / (1UL << ZPCI_IOMAP_SHIFT))
(1UL << (ZPCI_IOMAP_ADDR_SHIFT - ZPCI_IOMAP_SHIFT))
#define ZPCI_IOMAP_ADDR_IDX_MASK \
(~ZPCI_IOMAP_ADDR_OFF_MASK - ZPCI_IOMAP_ADDR_BASE)
((ZPCI_IOMAP_ADDR_BASE - 1) & ~ZPCI_IOMAP_ADDR_OFF_MASK)
struct zpci_iomap_entry {
u32 fh;

View File

@ -173,10 +173,11 @@ static noinline int unwindme_func4(struct unwindme *u)
}
/*
* trigger specification exception
* Trigger operation exception; use insn notation to bypass
* llvm's integrated assembler sanity checks.
*/
asm volatile(
" mvcl %%r1,%%r1\n"
" .insn e,0x0000\n" /* illegal opcode */
"0: nopr %%r7\n"
EX_TABLE(0b, 0b)
:);

View File

@ -1932,6 +1932,7 @@ config EFI
depends on ACPI
select UCS2_STRING
select EFI_RUNTIME_WRAPPERS
select ARCH_USE_MEMREMAP_PROT
help
This enables the kernel to use EFI runtime services that are
available (such as the EFI variable services).

View File

@ -574,6 +574,10 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
ud2
1:
#endif
#ifdef CONFIG_XEN_PV
ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
#endif
POP_REGS pop_rdi=0
/*
@ -890,6 +894,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
.Lparanoid_entry_checkgs:
/* EBX = 1 -> kernel GSBASE active, no restore required */
movl $1, %ebx
/*
* The kernel-enforced convention is a negative GSBASE indicates
* a kernel value. No SWAPGS needed on entry and exit.
@ -897,21 +902,14 @@ SYM_CODE_START_LOCAL(paranoid_entry)
movl $MSR_GS_BASE, %ecx
rdmsr
testl %edx, %edx
jns .Lparanoid_entry_swapgs
ret
.Lparanoid_entry_swapgs:
swapgs
/*
* The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an
* unconditional CR3 write, even in the PTI case. So do an lfence
* to prevent GS speculation, regardless of whether PTI is enabled.
*/
FENCE_SWAPGS_KERNEL_ENTRY
js .Lparanoid_kernel_gsbase
/* EBX = 0 -> SWAPGS required on exit */
xorl %ebx, %ebx
swapgs
.Lparanoid_kernel_gsbase:
FENCE_SWAPGS_KERNEL_ENTRY
ret
SYM_CODE_END(paranoid_entry)
@ -993,11 +991,6 @@ SYM_CODE_START_LOCAL(error_entry)
pushq %r12
ret
.Lerror_entry_done_lfence:
FENCE_SWAPGS_KERNEL_ENTRY
.Lerror_entry_done:
ret
/*
* There are two places in the kernel that can potentially fault with
* usergs. Handle them here. B stepping K8s sometimes report a
@ -1020,8 +1013,14 @@ SYM_CODE_START_LOCAL(error_entry)
* .Lgs_change's error handler with kernel gsbase.
*/
SWAPGS
FENCE_SWAPGS_USER_ENTRY
jmp .Lerror_entry_done
/*
* Issue an LFENCE to prevent GS speculation, regardless of whether it is a
* kernel or user gsbase.
*/
.Lerror_entry_done_lfence:
FENCE_SWAPGS_KERNEL_ENTRY
ret
.Lbstep_iret:
/* Fix truncated RIP */

View File

@ -108,7 +108,7 @@
#define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */
#define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */
#define INTEL_FAM6_RAPTOR_LAKE 0xB7
#define INTEL_FAM6_RAPTORLAKE 0xB7
/* "Small Core" Processors (Atom) */

View File

@ -97,7 +97,7 @@
KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26)
#define KVM_REQ_TLB_FLUSH_GUEST \
KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_APF_READY KVM_ARCH_REQ(28)
#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
#define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
@ -1036,6 +1036,7 @@ struct kvm_x86_msr_filter {
#define APICV_INHIBIT_REASON_PIT_REINJ 4
#define APICV_INHIBIT_REASON_X2APIC 5
#define APICV_INHIBIT_REASON_BLOCKIRQ 6
#define APICV_INHIBIT_REASON_ABSENT 7
struct kvm_arch {
unsigned long n_used_mmu_pages;

View File

@ -73,4 +73,15 @@
#define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK)
/*
* Error codes related to GHCB input that can be communicated back to the guest
* by setting the lower 32-bits of the GHCB SW_EXITINFO1 field to 2.
*/
#define GHCB_ERR_NOT_REGISTERED 1
#define GHCB_ERR_INVALID_USAGE 2
#define GHCB_ERR_INVALID_SCRATCH_AREA 3
#define GHCB_ERR_MISSING_INPUT 4
#define GHCB_ERR_INVALID_INPUT 5
#define GHCB_ERR_INVALID_EVENT 6
#endif

View File

@ -118,7 +118,7 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
struct fpstate *fpstate)
{
struct xregs_state __user *x = buf;
struct _fpx_sw_bytes sw_bytes;
struct _fpx_sw_bytes sw_bytes = {};
u32 xfeatures;
int err;

View File

@ -294,11 +294,6 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
char *dst, char *buf, size_t size)
{
unsigned long error_code = X86_PF_PROT | X86_PF_WRITE;
char __user *target = (char __user *)dst;
u64 d8;
u32 d4;
u16 d2;
u8 d1;
/*
* This function uses __put_user() independent of whether kernel or user
@ -320,26 +315,42 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
* instructions here would cause infinite nesting.
*/
switch (size) {
case 1:
case 1: {
u8 d1;
u8 __user *target = (u8 __user *)dst;
memcpy(&d1, buf, 1);
if (__put_user(d1, target))
goto fault;
break;
case 2:
}
case 2: {
u16 d2;
u16 __user *target = (u16 __user *)dst;
memcpy(&d2, buf, 2);
if (__put_user(d2, target))
goto fault;
break;
case 4:
}
case 4: {
u32 d4;
u32 __user *target = (u32 __user *)dst;
memcpy(&d4, buf, 4);
if (__put_user(d4, target))
goto fault;
break;
case 8:
}
case 8: {
u64 d8;
u64 __user *target = (u64 __user *)dst;
memcpy(&d8, buf, 8);
if (__put_user(d8, target))
goto fault;
break;
}
default:
WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
return ES_UNSUPPORTED;
@ -362,11 +373,6 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
char *src, char *buf, size_t size)
{
unsigned long error_code = X86_PF_PROT;
char __user *s = (char __user *)src;
u64 d8;
u32 d4;
u16 d2;
u8 d1;
/*
* This function uses __get_user() independent of whether kernel or user
@ -388,26 +394,41 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
* instructions here would cause infinite nesting.
*/
switch (size) {
case 1:
case 1: {
u8 d1;
u8 __user *s = (u8 __user *)src;
if (__get_user(d1, s))
goto fault;
memcpy(buf, &d1, 1);
break;
case 2:
}
case 2: {
u16 d2;
u16 __user *s = (u16 __user *)src;
if (__get_user(d2, s))
goto fault;
memcpy(buf, &d2, 2);
break;
case 4:
}
case 4: {
u32 d4;
u32 __user *s = (u32 __user *)src;
if (__get_user(d4, s))
goto fault;
memcpy(buf, &d4, 4);
break;
case 8:
}
case 8: {
u64 d8;
u64 __user *s = (u64 __user *)src;
if (__get_user(d8, s))
goto fault;
memcpy(buf, &d8, 8);
break;
}
default:
WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
return ES_UNSUPPORTED;

View File

@ -579,6 +579,17 @@ static struct sched_domain_topology_level x86_numa_in_package_topology[] = {
{ NULL, },
};
static struct sched_domain_topology_level x86_hybrid_topology[] = {
#ifdef CONFIG_SCHED_SMT
{ cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
#endif
#ifdef CONFIG_SCHED_MC
{ cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
#endif
{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
{ NULL, },
};
static struct sched_domain_topology_level x86_topology[] = {
#ifdef CONFIG_SCHED_SMT
{ cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
@ -1469,8 +1480,11 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
calculate_max_logical_packages();
/* XXX for now assume numa-in-package and hybrid don't overlap */
if (x86_has_numa_in_package)
set_sched_topology(x86_numa_in_package_topology);
if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
set_sched_topology(x86_hybrid_topology);
nmi_selftest();
impress_friends();

View File

@ -1180,6 +1180,12 @@ void mark_tsc_unstable(char *reason)
EXPORT_SYMBOL_GPL(mark_tsc_unstable);
static void __init tsc_disable_clocksource_watchdog(void)
{
clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
}
static void __init check_system_tsc_reliable(void)
{
#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC)
@ -1196,6 +1202,23 @@ static void __init check_system_tsc_reliable(void)
#endif
if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
tsc_clocksource_reliable = 1;
/*
* Disable the clocksource watchdog when the system has:
* - TSC running at constant frequency
* - TSC which does not stop in C-States
* - the TSC_ADJUST register which allows to detect even minimal
* modifications
* - not more than two sockets. As the number of sockets cannot be
* evaluated at the early boot stage where this has to be
* invoked, check the number of online memory nodes as a
* fallback solution which is an reasonable estimate.
*/
if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
boot_cpu_has(X86_FEATURE_TSC_ADJUST) &&
nr_online_nodes <= 2)
tsc_disable_clocksource_watchdog();
}
/*
@ -1387,9 +1410,6 @@ static int __init init_tsc_clocksource(void)
if (tsc_unstable)
goto unreg;
if (tsc_clocksource_reliable || no_tsc_watchdog)
clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
@ -1527,7 +1547,7 @@ void __init tsc_init(void)
}
if (tsc_clocksource_reliable || no_tsc_watchdog)
clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
tsc_disable_clocksource_watchdog();
clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
detect_art();

View File

@ -30,6 +30,7 @@ struct tsc_adjust {
};
static DEFINE_PER_CPU(struct tsc_adjust, tsc_adjust);
static struct timer_list tsc_sync_check_timer;
/*
* TSC's on different sockets may be reset asynchronously.
@ -77,6 +78,46 @@ void tsc_verify_tsc_adjust(bool resume)
}
}
/*
* Normally the tsc_sync will be checked every time system enters idle
* state, but there is still caveat that a system won't enter idle,
* either because it's too busy or configured purposely to not enter
* idle.
*
* So setup a periodic timer (every 10 minutes) to make sure the check
* is always on.
*/
#define SYNC_CHECK_INTERVAL (HZ * 600)
static void tsc_sync_check_timer_fn(struct timer_list *unused)
{
int next_cpu;
tsc_verify_tsc_adjust(false);
/* Run the check for all onlined CPUs in turn */
next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
if (next_cpu >= nr_cpu_ids)
next_cpu = cpumask_first(cpu_online_mask);
tsc_sync_check_timer.expires += SYNC_CHECK_INTERVAL;
add_timer_on(&tsc_sync_check_timer, next_cpu);
}
static int __init start_sync_check_timer(void)
{
if (!cpu_feature_enabled(X86_FEATURE_TSC_ADJUST) || tsc_clocksource_reliable)
return 0;
timer_setup(&tsc_sync_check_timer, tsc_sync_check_timer_fn, 0);
tsc_sync_check_timer.expires = jiffies + SYNC_CHECK_INTERVAL;
add_timer(&tsc_sync_check_timer);
return 0;
}
late_initcall(start_sync_check_timer);
static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval,
unsigned int cpu, bool bootcpu)
{

View File

@ -1922,11 +1922,13 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL;
if (all_cpus)
goto check_and_send_ipi;
if (!sparse_banks_len)
goto ret_success;
if (!all_cpus &&
kvm_read_guest(kvm,
if (kvm_read_guest(kvm,
hc->ingpa + offsetof(struct hv_send_ipi_ex,
vp_set.bank_contents),
sparse_banks,
@ -1934,6 +1936,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
return HV_STATUS_INVALID_HYPERCALL_INPUT;
}
check_and_send_ipi:
if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
return HV_STATUS_INVALID_HYPERCALL_INPUT;

View File

@ -81,7 +81,6 @@ struct kvm_ioapic {
unsigned long irq_states[IOAPIC_NUM_PINS];
struct kvm_io_device dev;
struct kvm *kvm;
void (*ack_notifier)(void *opaque, int irq);
spinlock_t lock;
struct rtc_status rtc_status;
struct delayed_work eoi_inject;

View File

@ -56,7 +56,6 @@ struct kvm_pic {
struct kvm_io_device dev_master;
struct kvm_io_device dev_slave;
struct kvm_io_device dev_elcr;
void (*ack_notifier)(void *opaque, int irq);
unsigned long irq_states[PIC_NUM_PINS];
};

View File

@ -707,7 +707,7 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
{
int highest_irr;
if (apic->vcpu->arch.apicv_active)
if (kvm_x86_ops.sync_pir_to_irr)
highest_irr = static_call(kvm_x86_sync_pir_to_irr)(apic->vcpu);
else
highest_irr = apic_find_highest_irr(apic);

View File

@ -1582,7 +1582,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
flush = kvm_handle_gfn_range(kvm, range, kvm_unmap_rmapp);
if (is_tdp_mmu_enabled(kvm))
flush |= kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
return flush;
}
@ -1936,7 +1936,11 @@ static void mmu_audit_disable(void) { }
static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
{
return sp->role.invalid ||
if (sp->role.invalid)
return true;
/* TDP MMU pages due not use the MMU generation. */
return !sp->tdp_mmu_page &&
unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
}
@ -2173,10 +2177,10 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato
iterator->shadow_addr = root;
iterator->level = vcpu->arch.mmu->shadow_root_level;
if (iterator->level == PT64_ROOT_4LEVEL &&
if (iterator->level >= PT64_ROOT_4LEVEL &&
vcpu->arch.mmu->root_level < PT64_ROOT_4LEVEL &&
!vcpu->arch.mmu->direct_map)
--iterator->level;
iterator->level = PT32E_ROOT_LEVEL;
if (iterator->level == PT32E_ROOT_LEVEL) {
/*
@ -3976,6 +3980,20 @@ out_retry:
return true;
}
/*
* Returns true if the page fault is stale and needs to be retried, i.e. if the
* root was invalidated by a memslot update or a relevant mmu_notifier fired.
*/
static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
struct kvm_page_fault *fault, int mmu_seq)
{
if (is_obsolete_sp(vcpu->kvm, to_shadow_page(vcpu->arch.mmu->root_hpa)))
return true;
return fault->slot &&
mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
}
static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
{
bool is_tdp_mmu_fault = is_tdp_mmu(vcpu->arch.mmu);
@ -4013,8 +4031,9 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
else
write_lock(&vcpu->kvm->mmu_lock);
if (fault->slot && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva))
if (is_page_fault_stale(vcpu, fault, mmu_seq))
goto out_unlock;
r = make_mmu_pages_available(vcpu);
if (r)
goto out_unlock;
@ -4855,7 +4874,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
struct kvm_mmu *context = &vcpu->arch.guest_mmu;
struct kvm_mmu_role_regs regs = {
.cr0 = cr0,
.cr4 = cr4,
.cr4 = cr4 & ~X86_CR4_PKE,
.efer = efer,
};
union kvm_mmu_role new_role;
@ -4919,7 +4938,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
context->direct_map = false;
update_permission_bitmask(context, true);
update_pkru_bitmask(context);
context->pkru_mask = 0;
reset_rsvds_bits_mask_ept(vcpu, context, execonly);
reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
}
@ -5025,6 +5044,14 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
/*
* Invalidate all MMU roles to force them to reinitialize as CPUID
* information is factored into reserved bit calculations.
*
* Correctly handling multiple vCPU models with respect to paging and
* physical address properties) in a single VM would require tracking
* all relevant CPUID information in kvm_mmu_page_role. That is very
* undesirable as it would increase the memory requirements for
* gfn_track (see struct kvm_mmu_page_role comments). For now that
* problem is swept under the rug; KVM's CPUID API is horrific and
* it's all but impossible to solve it without introducing a new API.
*/
vcpu->arch.root_mmu.mmu_role.ext.valid = 0;
vcpu->arch.guest_mmu.mmu_role.ext.valid = 0;
@ -5032,24 +5059,10 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
kvm_mmu_reset_context(vcpu);
/*
* KVM does not correctly handle changing guest CPUID after KVM_RUN, as
* MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
* tracked in kvm_mmu_page_role. As a result, KVM may miss guest page
* faults due to reusing SPs/SPTEs. Alert userspace, but otherwise
* sweep the problem under the rug.
*
* KVM's horrific CPUID ABI makes the problem all but impossible to
* solve, as correctly handling multiple vCPU models (with respect to
* paging and physical address properties) in a single VM would require
* tracking all relevant CPUID information in kvm_mmu_page_role. That
* is very undesirable as it would double the memory requirements for
* gfn_track (see struct kvm_mmu_page_role comments), and in practice
* no sane VMM mucks with the core vCPU model on the fly.
* Changing guest CPUID after KVM_RUN is forbidden, see the comment in
* kvm_arch_vcpu_ioctl().
*/
if (vcpu->arch.last_vmentry_cpu != -1) {
pr_warn_ratelimited("KVM: KVM_SET_CPUID{,2} after KVM_RUN may cause guest instability\n");
pr_warn_ratelimited("KVM: KVM_SET_CPUID{,2} will fail after KVM_RUN starting with Linux 5.16\n");
}
KVM_BUG_ON(vcpu->arch.last_vmentry_cpu != -1, vcpu->kvm);
}
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
@ -5369,7 +5382,7 @@ void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
{
kvm_mmu_invalidate_gva(vcpu, vcpu->arch.mmu, gva, INVALID_PAGE);
kvm_mmu_invalidate_gva(vcpu, vcpu->arch.walk_mmu, gva, INVALID_PAGE);
++vcpu->stat.invlpg;
}
EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
@ -5854,8 +5867,6 @@ restart:
void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
const struct kvm_memory_slot *slot)
{
bool flush = false;
if (kvm_memslots_have_rmaps(kvm)) {
write_lock(&kvm->mmu_lock);
/*
@ -5863,17 +5874,14 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
* logging at a 4k granularity and never creates collapsible
* 2m SPTEs during dirty logging.
*/
flush = slot_handle_level_4k(kvm, slot, kvm_mmu_zap_collapsible_spte, true);
if (flush)
if (slot_handle_level_4k(kvm, slot, kvm_mmu_zap_collapsible_spte, true))
kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
write_unlock(&kvm->mmu_lock);
}
if (is_tdp_mmu_enabled(kvm)) {
read_lock(&kvm->mmu_lock);
flush = kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot, flush);
if (flush)
kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot);
read_unlock(&kvm->mmu_lock);
}
}
@ -6182,23 +6190,46 @@ void kvm_mmu_module_exit(void)
mmu_audit_disable();
}
/*
* Calculate the effective recovery period, accounting for '0' meaning "let KVM
* select a halving time of 1 hour". Returns true if recovery is enabled.
*/
static bool calc_nx_huge_pages_recovery_period(uint *period)
{
/*
* Use READ_ONCE to get the params, this may be called outside of the
* param setters, e.g. by the kthread to compute its next timeout.
*/
bool enabled = READ_ONCE(nx_huge_pages);
uint ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
if (!enabled || !ratio)
return false;
*period = READ_ONCE(nx_huge_pages_recovery_period_ms);
if (!*period) {
/* Make sure the period is not less than one second. */
ratio = min(ratio, 3600u);
*period = 60 * 60 * 1000 / ratio;
}
return true;
}
static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp)
{
bool was_recovery_enabled, is_recovery_enabled;
uint old_period, new_period;
int err;
was_recovery_enabled = nx_huge_pages_recovery_ratio;
old_period = nx_huge_pages_recovery_period_ms;
was_recovery_enabled = calc_nx_huge_pages_recovery_period(&old_period);
err = param_set_uint(val, kp);
if (err)
return err;
is_recovery_enabled = nx_huge_pages_recovery_ratio;
new_period = nx_huge_pages_recovery_period_ms;
is_recovery_enabled = calc_nx_huge_pages_recovery_period(&new_period);
if (READ_ONCE(nx_huge_pages) && is_recovery_enabled &&
if (is_recovery_enabled &&
(!was_recovery_enabled || old_period > new_period)) {
struct kvm *kvm;
@ -6262,18 +6293,13 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
static long get_nx_lpage_recovery_timeout(u64 start_time)
{
uint ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
uint period = READ_ONCE(nx_huge_pages_recovery_period_ms);
bool enabled;
uint period;
if (!period && ratio) {
/* Make sure the period is not less than one second. */
ratio = min(ratio, 3600u);
period = 60 * 60 * 1000 / ratio;
}
enabled = calc_nx_huge_pages_recovery_period(&period);
return READ_ONCE(nx_huge_pages) && ratio
? start_time + msecs_to_jiffies(period) - get_jiffies_64()
: MAX_SCHEDULE_TIMEOUT;
return enabled ? start_time + msecs_to_jiffies(period) - get_jiffies_64()
: MAX_SCHEDULE_TIMEOUT;
}
static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)

View File

@ -911,7 +911,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
r = RET_PF_RETRY;
write_lock(&vcpu->kvm->mmu_lock);
if (fault->slot && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva))
if (is_page_fault_stale(vcpu, fault, mmu_seq))
goto out_unlock;
kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);

View File

@ -317,9 +317,6 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(pt));
int level = sp->role.level;
gfn_t base_gfn = sp->gfn;
u64 old_child_spte;
u64 *sptep;
gfn_t gfn;
int i;
trace_kvm_mmu_prepare_zap_page(sp);
@ -327,8 +324,9 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
tdp_mmu_unlink_page(kvm, sp, shared);
for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
sptep = rcu_dereference(pt) + i;
gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level);
u64 *sptep = rcu_dereference(pt) + i;
gfn_t gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level);
u64 old_child_spte;
if (shared) {
/*
@ -374,7 +372,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
shared);
}
kvm_flush_remote_tlbs_with_address(kvm, gfn,
kvm_flush_remote_tlbs_with_address(kvm, base_gfn,
KVM_PAGES_PER_HPAGE(level + 1));
call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback);
@ -1033,9 +1031,9 @@ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range,
{
struct kvm_mmu_page *root;
for_each_tdp_mmu_root(kvm, root, range->slot->as_id)
flush |= zap_gfn_range(kvm, root, range->start, range->end,
range->may_block, flush, false);
for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false)
flush = zap_gfn_range(kvm, root, range->start, range->end,
range->may_block, flush, false);
return flush;
}
@ -1364,10 +1362,9 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
* Clear leaf entries which could be replaced by large mappings, for
* GFNs within the slot.
*/
static bool zap_collapsible_spte_range(struct kvm *kvm,
static void zap_collapsible_spte_range(struct kvm *kvm,
struct kvm_mmu_page *root,
const struct kvm_memory_slot *slot,
bool flush)
const struct kvm_memory_slot *slot)
{
gfn_t start = slot->base_gfn;
gfn_t end = start + slot->npages;
@ -1378,10 +1375,8 @@ static bool zap_collapsible_spte_range(struct kvm *kvm,
tdp_root_for_each_pte(iter, root, start, end) {
retry:
if (tdp_mmu_iter_cond_resched(kvm, &iter, flush, true)) {
flush = false;
if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
continue;
}
if (!is_shadow_present_pte(iter.old_spte) ||
!is_last_spte(iter.old_spte, iter.level))
@ -1393,6 +1388,7 @@ retry:
pfn, PG_LEVEL_NUM))
continue;
/* Note, a successful atomic zap also does a remote TLB flush. */
if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) {
/*
* The iter must explicitly re-read the SPTE because
@ -1401,30 +1397,24 @@ retry:
iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
goto retry;
}
flush = true;
}
rcu_read_unlock();
return flush;
}
/*
* Clear non-leaf entries (and free associated page tables) which could
* be replaced by large mappings, for GFNs within the slot.
*/
bool kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
const struct kvm_memory_slot *slot,
bool flush)
void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
const struct kvm_memory_slot *slot)
{
struct kvm_mmu_page *root;
lockdep_assert_held_read(&kvm->mmu_lock);
for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
flush = zap_collapsible_spte_range(kvm, root, slot, flush);
return flush;
zap_collapsible_spte_range(kvm, root, slot);
}
/*

View File

@ -64,9 +64,8 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t gfn, unsigned long mask,
bool wrprot);
bool kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
const struct kvm_memory_slot *slot,
bool flush);
void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
const struct kvm_memory_slot *slot);
bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn,

View File

@ -900,6 +900,7 @@ out:
bool svm_check_apicv_inhibit_reasons(ulong bit)
{
ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
BIT(APICV_INHIBIT_REASON_ABSENT) |
BIT(APICV_INHIBIT_REASON_HYPERV) |
BIT(APICV_INHIBIT_REASON_NESTED) |
BIT(APICV_INHIBIT_REASON_IRQWIN) |
@ -989,16 +990,18 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
{
struct vcpu_svm *svm = to_svm(vcpu);
int cpu = get_cpu();
WARN_ON(cpu != vcpu->cpu);
svm->avic_is_running = is_run;
if (!kvm_vcpu_apicv_active(vcpu))
return;
if (is_run)
avic_vcpu_load(vcpu, vcpu->cpu);
else
avic_vcpu_put(vcpu);
if (kvm_vcpu_apicv_active(vcpu)) {
if (is_run)
avic_vcpu_load(vcpu, cpu);
else
avic_vcpu_put(vcpu);
}
put_cpu();
}
void svm_vcpu_blocking(struct kvm_vcpu *vcpu)

View File

@ -281,7 +281,7 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS;
pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1;
pmu->reserved_bits = 0xffffffff00200000ull;
pmu->reserved_bits = 0xfffffff000280000ull;
pmu->version = 1;
/* not applicable to AMD; but clean them to prevent any fall out */
pmu->counter_bitmask[KVM_PMC_FIXED] = 0;

View File

@ -1543,28 +1543,50 @@ static bool is_cmd_allowed_from_mirror(u32 cmd_id)
return false;
}
static int sev_lock_for_migration(struct kvm *kvm)
static int sev_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info;
struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info;
int r = -EBUSY;
if (dst_kvm == src_kvm)
return -EINVAL;
/*
* Bail if this VM is already involved in a migration to avoid deadlock
* between two VMs trying to migrate to/from each other.
* Bail if these VMs are already involved in a migration to avoid
* deadlock between two VMs trying to migrate to/from each other.
*/
if (atomic_cmpxchg_acquire(&sev->migration_in_progress, 0, 1))
if (atomic_cmpxchg_acquire(&dst_sev->migration_in_progress, 0, 1))
return -EBUSY;
mutex_lock(&kvm->lock);
if (atomic_cmpxchg_acquire(&src_sev->migration_in_progress, 0, 1))
goto release_dst;
r = -EINTR;
if (mutex_lock_killable(&dst_kvm->lock))
goto release_src;
if (mutex_lock_killable(&src_kvm->lock))
goto unlock_dst;
return 0;
unlock_dst:
mutex_unlock(&dst_kvm->lock);
release_src:
atomic_set_release(&src_sev->migration_in_progress, 0);
release_dst:
atomic_set_release(&dst_sev->migration_in_progress, 0);
return r;
}
static void sev_unlock_after_migration(struct kvm *kvm)
static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info;
struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info;
mutex_unlock(&kvm->lock);
atomic_set_release(&sev->migration_in_progress, 0);
mutex_unlock(&dst_kvm->lock);
mutex_unlock(&src_kvm->lock);
atomic_set_release(&dst_sev->migration_in_progress, 0);
atomic_set_release(&src_sev->migration_in_progress, 0);
}
@ -1607,14 +1629,15 @@ static void sev_migrate_from(struct kvm_sev_info *dst,
dst->asid = src->asid;
dst->handle = src->handle;
dst->pages_locked = src->pages_locked;
dst->enc_context_owner = src->enc_context_owner;
src->asid = 0;
src->active = false;
src->handle = 0;
src->pages_locked = 0;
src->enc_context_owner = NULL;
INIT_LIST_HEAD(&dst->regions_list);
list_replace_init(&src->regions_list, &dst->regions_list);
list_cut_before(&dst->regions_list, &src->regions_list, &src->regions_list);
}
static int sev_es_migrate_from(struct kvm *dst, struct kvm *src)
@ -1666,15 +1689,6 @@ int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd)
bool charged = false;
int ret;
ret = sev_lock_for_migration(kvm);
if (ret)
return ret;
if (sev_guest(kvm)) {
ret = -EINVAL;
goto out_unlock;
}
source_kvm_file = fget(source_fd);
if (!file_is_kvm(source_kvm_file)) {
ret = -EBADF;
@ -1682,16 +1696,26 @@ int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd)
}
source_kvm = source_kvm_file->private_data;
ret = sev_lock_for_migration(source_kvm);
ret = sev_lock_two_vms(kvm, source_kvm);
if (ret)
goto out_fput;
if (!sev_guest(source_kvm)) {
if (sev_guest(kvm) || !sev_guest(source_kvm)) {
ret = -EINVAL;
goto out_source;
goto out_unlock;
}
src_sev = &to_kvm_svm(source_kvm)->sev_info;
/*
* VMs mirroring src's encryption context rely on it to keep the
* ASID allocated, but below we are clearing src_sev->asid.
*/
if (src_sev->num_mirrored_vms) {
ret = -EBUSY;
goto out_unlock;
}
dst_sev->misc_cg = get_current_misc_cg();
cg_cleanup_sev = dst_sev;
if (dst_sev->misc_cg != src_sev->misc_cg) {
@ -1728,13 +1752,11 @@ out_dst_cgroup:
sev_misc_cg_uncharge(cg_cleanup_sev);
put_misc_cg(cg_cleanup_sev->misc_cg);
cg_cleanup_sev->misc_cg = NULL;
out_source:
sev_unlock_after_migration(source_kvm);
out_unlock:
sev_unlock_two_vms(kvm, source_kvm);
out_fput:
if (source_kvm_file)
fput(source_kvm_file);
out_unlock:
sev_unlock_after_migration(kvm);
return ret;
}
@ -1953,76 +1975,60 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
{
struct file *source_kvm_file;
struct kvm *source_kvm;
struct kvm_sev_info source_sev, *mirror_sev;
struct kvm_sev_info *source_sev, *mirror_sev;
int ret;
source_kvm_file = fget(source_fd);
if (!file_is_kvm(source_kvm_file)) {
ret = -EBADF;
goto e_source_put;
goto e_source_fput;
}
source_kvm = source_kvm_file->private_data;
mutex_lock(&source_kvm->lock);
ret = sev_lock_two_vms(kvm, source_kvm);
if (ret)
goto e_source_fput;
if (!sev_guest(source_kvm)) {
/*
* Mirrors of mirrors should work, but let's not get silly. Also
* disallow out-of-band SEV/SEV-ES init if the target is already an
* SEV guest, or if vCPUs have been created. KVM relies on vCPUs being
* created after SEV/SEV-ES initialization, e.g. to init intercepts.
*/
if (sev_guest(kvm) || !sev_guest(source_kvm) ||
is_mirroring_enc_context(source_kvm) || kvm->created_vcpus) {
ret = -EINVAL;
goto e_source_unlock;
goto e_unlock;
}
/* Mirrors of mirrors should work, but let's not get silly */
if (is_mirroring_enc_context(source_kvm) || source_kvm == kvm) {
ret = -EINVAL;
goto e_source_unlock;
}
memcpy(&source_sev, &to_kvm_svm(source_kvm)->sev_info,
sizeof(source_sev));
/*
* The mirror kvm holds an enc_context_owner ref so its asid can't
* disappear until we're done with it
*/
source_sev = &to_kvm_svm(source_kvm)->sev_info;
kvm_get_kvm(source_kvm);
fput(source_kvm_file);
mutex_unlock(&source_kvm->lock);
mutex_lock(&kvm->lock);
/*
* Disallow out-of-band SEV/SEV-ES init if the target is already an
* SEV guest, or if vCPUs have been created. KVM relies on vCPUs being
* created after SEV/SEV-ES initialization, e.g. to init intercepts.
*/
if (sev_guest(kvm) || kvm->created_vcpus) {
ret = -EINVAL;
goto e_mirror_unlock;
}
source_sev->num_mirrored_vms++;
/* Set enc_context_owner and copy its encryption context over */
mirror_sev = &to_kvm_svm(kvm)->sev_info;
mirror_sev->enc_context_owner = source_kvm;
mirror_sev->active = true;
mirror_sev->asid = source_sev.asid;
mirror_sev->fd = source_sev.fd;
mirror_sev->es_active = source_sev.es_active;
mirror_sev->handle = source_sev.handle;
mirror_sev->asid = source_sev->asid;
mirror_sev->fd = source_sev->fd;
mirror_sev->es_active = source_sev->es_active;
mirror_sev->handle = source_sev->handle;
INIT_LIST_HEAD(&mirror_sev->regions_list);
ret = 0;
/*
* Do not copy ap_jump_table. Since the mirror does not share the same
* KVM contexts as the original, and they may have different
* memory-views.
*/
mutex_unlock(&kvm->lock);
return 0;
e_mirror_unlock:
mutex_unlock(&kvm->lock);
kvm_put_kvm(source_kvm);
return ret;
e_source_unlock:
mutex_unlock(&source_kvm->lock);
e_source_put:
e_unlock:
sev_unlock_two_vms(kvm, source_kvm);
e_source_fput:
if (source_kvm_file)
fput(source_kvm_file);
return ret;
@ -2034,17 +2040,24 @@ void sev_vm_destroy(struct kvm *kvm)
struct list_head *head = &sev->regions_list;
struct list_head *pos, *q;
WARN_ON(sev->num_mirrored_vms);
if (!sev_guest(kvm))
return;
/* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */
if (is_mirroring_enc_context(kvm)) {
kvm_put_kvm(sev->enc_context_owner);
struct kvm *owner_kvm = sev->enc_context_owner;
struct kvm_sev_info *owner_sev = &to_kvm_svm(owner_kvm)->sev_info;
mutex_lock(&owner_kvm->lock);
if (!WARN_ON(!owner_sev->num_mirrored_vms))
owner_sev->num_mirrored_vms--;
mutex_unlock(&owner_kvm->lock);
kvm_put_kvm(owner_kvm);
return;
}
mutex_lock(&kvm->lock);
/*
* Ensure that all guest tagged cache entries are flushed before
* releasing the pages back to the system for use. CLFLUSH will
@ -2064,8 +2077,6 @@ void sev_vm_destroy(struct kvm *kvm)
}
}
mutex_unlock(&kvm->lock);
sev_unbind_asid(kvm, sev->handle);
sev_asid_free(sev);
}
@ -2249,7 +2260,7 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
__free_page(virt_to_page(svm->sev_es.vmsa));
if (svm->sev_es.ghcb_sa_free)
kfree(svm->sev_es.ghcb_sa);
kvfree(svm->sev_es.ghcb_sa);
}
static void dump_ghcb(struct vcpu_svm *svm)
@ -2341,24 +2352,29 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
}
static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
static bool sev_es_validate_vmgexit(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu;
struct ghcb *ghcb;
u64 exit_code = 0;
u64 exit_code;
u64 reason;
ghcb = svm->sev_es.ghcb;
/* Only GHCB Usage code 0 is supported */
if (ghcb->ghcb_usage)
goto vmgexit_err;
/*
* Retrieve the exit code now even though is may not be marked valid
* Retrieve the exit code now even though it may not be marked valid
* as it could help with debugging.
*/
exit_code = ghcb_get_sw_exit_code(ghcb);
/* Only GHCB Usage code 0 is supported */
if (ghcb->ghcb_usage) {
reason = GHCB_ERR_INVALID_USAGE;
goto vmgexit_err;
}
reason = GHCB_ERR_MISSING_INPUT;
if (!ghcb_sw_exit_code_is_valid(ghcb) ||
!ghcb_sw_exit_info_1_is_valid(ghcb) ||
!ghcb_sw_exit_info_2_is_valid(ghcb))
@ -2437,30 +2453,34 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
case SVM_VMGEXIT_UNSUPPORTED_EVENT:
break;
default:
reason = GHCB_ERR_INVALID_EVENT;
goto vmgexit_err;
}
return 0;
return true;
vmgexit_err:
vcpu = &svm->vcpu;
if (ghcb->ghcb_usage) {
if (reason == GHCB_ERR_INVALID_USAGE) {
vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
ghcb->ghcb_usage);
} else if (reason == GHCB_ERR_INVALID_EVENT) {
vcpu_unimpl(vcpu, "vmgexit: exit code %#llx is not valid\n",
exit_code);
} else {
vcpu_unimpl(vcpu, "vmgexit: exit reason %#llx is not valid\n",
vcpu_unimpl(vcpu, "vmgexit: exit code %#llx input is not valid\n",
exit_code);
dump_ghcb(svm);
}
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
vcpu->run->internal.ndata = 2;
vcpu->run->internal.data[0] = exit_code;
vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
/* Clear the valid entries fields */
memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
return -EINVAL;
ghcb_set_sw_exit_info_1(ghcb, 2);
ghcb_set_sw_exit_info_2(ghcb, reason);
return false;
}
void sev_es_unmap_ghcb(struct vcpu_svm *svm)
@ -2482,7 +2502,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
svm->sev_es.ghcb_sa_sync = false;
}
kfree(svm->sev_es.ghcb_sa);
kvfree(svm->sev_es.ghcb_sa);
svm->sev_es.ghcb_sa = NULL;
svm->sev_es.ghcb_sa_free = false;
}
@ -2530,14 +2550,14 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
scratch_gpa_beg = ghcb_get_sw_scratch(ghcb);
if (!scratch_gpa_beg) {
pr_err("vmgexit: scratch gpa not provided\n");
return false;
goto e_scratch;
}
scratch_gpa_end = scratch_gpa_beg + len;
if (scratch_gpa_end < scratch_gpa_beg) {
pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n",
len, scratch_gpa_beg);
return false;
goto e_scratch;
}
if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) {
@ -2555,7 +2575,7 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
scratch_gpa_end > ghcb_scratch_end) {
pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n",
scratch_gpa_beg, scratch_gpa_end);
return false;
goto e_scratch;
}
scratch_va = (void *)svm->sev_es.ghcb;
@ -2568,18 +2588,18 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
if (len > GHCB_SCRATCH_AREA_LIMIT) {
pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n",
len, GHCB_SCRATCH_AREA_LIMIT);
return false;
goto e_scratch;
}
scratch_va = kzalloc(len, GFP_KERNEL_ACCOUNT);
scratch_va = kvzalloc(len, GFP_KERNEL_ACCOUNT);
if (!scratch_va)
return false;
goto e_scratch;
if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) {
/* Unable to copy scratch area from guest */
pr_err("vmgexit: kvm_read_guest for scratch area failed\n");
kfree(scratch_va);
return false;
kvfree(scratch_va);
goto e_scratch;
}
/*
@ -2596,6 +2616,12 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
svm->sev_es.ghcb_sa_len = len;
return true;
e_scratch:
ghcb_set_sw_exit_info_1(ghcb, 2);
ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_SCRATCH_AREA);
return false;
}
static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask,
@ -2646,7 +2672,7 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_CPUID);
if (!ret) {
ret = -EINVAL;
/* Error, keep GHCB MSR value as-is */
break;
}
@ -2682,10 +2708,13 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
GHCB_MSR_TERM_REASON_POS);
pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
reason_set, reason_code);
fallthrough;
ret = -EINVAL;
break;
}
default:
ret = -EINVAL;
/* Error, keep GHCB MSR value as-is */
break;
}
trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id,
@ -2709,14 +2738,18 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
if (!ghcb_gpa) {
vcpu_unimpl(vcpu, "vmgexit: GHCB gpa is not set\n");
return -EINVAL;
/* Without a GHCB, just return right back to the guest */
return 1;
}
if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->sev_es.ghcb_map)) {
/* Unable to map GHCB from guest */
vcpu_unimpl(vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
ghcb_gpa);
return -EINVAL;
/* Without a GHCB, just return right back to the guest */
return 1;
}
svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva;
@ -2726,15 +2759,14 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
exit_code = ghcb_get_sw_exit_code(ghcb);
ret = sev_es_validate_vmgexit(svm);
if (ret)
return ret;
if (!sev_es_validate_vmgexit(svm))
return 1;
sev_es_sync_from_ghcb(svm);
ghcb_set_sw_exit_info_1(ghcb, 0);
ghcb_set_sw_exit_info_2(ghcb, 0);
ret = -EINVAL;
ret = 1;
switch (exit_code) {
case SVM_VMGEXIT_MMIO_READ:
if (!setup_vmgexit_scratch(svm, true, control->exit_info_2))
@ -2775,20 +2807,17 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
default:
pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
control->exit_info_1);
ghcb_set_sw_exit_info_1(ghcb, 1);
ghcb_set_sw_exit_info_2(ghcb,
X86_TRAP_UD |
SVM_EVTINJ_TYPE_EXEPT |
SVM_EVTINJ_VALID);
ghcb_set_sw_exit_info_1(ghcb, 2);
ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_INPUT);
}
ret = 1;
break;
}
case SVM_VMGEXIT_UNSUPPORTED_EVENT:
vcpu_unimpl(vcpu,
"vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
control->exit_info_1, control->exit_info_2);
ret = -EINVAL;
break;
default:
ret = svm_invoke_exit_handler(vcpu, exit_code);
@ -2810,7 +2839,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
return -EINVAL;
if (!setup_vmgexit_scratch(svm, in, bytes))
return -EINVAL;
return 1;
return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa,
count, in);

View File

@ -4651,7 +4651,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.load_eoi_exitmap = svm_load_eoi_exitmap,
.hwapic_irr_update = svm_hwapic_irr_update,
.hwapic_isr_update = svm_hwapic_isr_update,
.sync_pir_to_irr = kvm_lapic_find_highest_irr,
.apicv_post_state_restore = avic_post_state_restore,
.set_tss_addr = svm_set_tss_addr,

View File

@ -79,6 +79,7 @@ struct kvm_sev_info {
struct list_head regions_list; /* List of registered regions */
u64 ap_jump_table; /* SEV-ES AP Jump Table address */
struct kvm *enc_context_owner; /* Owner of copied encryption context */
unsigned long num_mirrored_vms; /* Number of VMs sharing this ASID */
struct misc_cg *misc_cg; /* For misc cgroup accounting */
atomic_t migration_in_progress;
};

View File

@ -1162,29 +1162,26 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
WARN_ON(!enable_vpid);
/*
* If VPID is enabled and used by vmc12, but L2 does not have a unique
* TLB tag (ASID), i.e. EPT is disabled and KVM was unable to allocate
* a VPID for L2, flush the current context as the effective ASID is
* common to both L1 and L2.
*
* Defer the flush so that it runs after vmcs02.EPTP has been set by
* KVM_REQ_LOAD_MMU_PGD (if nested EPT is enabled) and to avoid
* redundant flushes further down the nested pipeline.
*
* If a TLB flush isn't required due to any of the above, and vpid12 is
* changing then the new "virtual" VPID (vpid12) will reuse the same
* "real" VPID (vpid02), and so needs to be flushed. There's no direct
* mapping between vpid02 and vpid12, vpid02 is per-vCPU and reused for
* all nested vCPUs. Remember, a flush on VM-Enter does not invalidate
* guest-physical mappings, so there is no need to sync the nEPT MMU.
* VPID is enabled and in use by vmcs12. If vpid12 is changing, then
* emulate a guest TLB flush as KVM does not track vpid12 history nor
* is the VPID incorporated into the MMU context. I.e. KVM must assume
* that the new vpid12 has never been used and thus represents a new
* guest ASID that cannot have entries in the TLB.
*/
if (!nested_has_guest_tlb_tag(vcpu)) {
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
} else if (is_vmenter &&
vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
if (is_vmenter && vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
vmx->nested.last_vpid = vmcs12->virtual_processor_id;
vpid_sync_context(nested_get_vpid02(vcpu));
kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
return;
}
/*
* If VPID is enabled, used by vmc12, and vpid12 is not changing but
* does not have a unique TLB tag (ASID), i.e. EPT is disabled and
* KVM was unable to allocate a VPID for L2, flush the current context
* as the effective ASID is common to both L1 and L2.
*/
if (!nested_has_guest_tlb_tag(vcpu))
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
}
static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
@ -2594,8 +2591,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
vmcs12->guest_ia32_perf_global_ctrl)))
vmcs12->guest_ia32_perf_global_ctrl))) {
*entry_failure_code = ENTRY_FAIL_DEFAULT;
return -EINVAL;
}
kvm_rsp_write(vcpu, vmcs12->guest_rsp);
kvm_rip_write(vcpu, vmcs12->guest_rip);
@ -3344,8 +3343,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
};
u32 failed_index;
if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
kvm_vcpu_flush_tlb_current(vcpu);
kvm_service_local_tlb_flush_requests(vcpu);
evaluate_pending_interrupts = exec_controls_get(vmx) &
(CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
@ -4502,9 +4500,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
(void)nested_get_evmcs_page(vcpu);
}
/* Service the TLB flush request for L2 before switching to L1. */
if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
kvm_vcpu_flush_tlb_current(vcpu);
/* Service pending TLB flush requests for L2 before switching to L1. */
kvm_service_local_tlb_flush_requests(vcpu);
/*
* VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between
@ -4857,6 +4854,7 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
if (!vmx->nested.cached_vmcs12)
goto out_cached_vmcs12;
vmx->nested.shadow_vmcs12_cache.gpa = INVALID_GPA;
vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
if (!vmx->nested.cached_shadow_vmcs12)
goto out_cached_shadow_vmcs12;
@ -5289,8 +5287,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
struct gfn_to_hva_cache *ghc = &vmx->nested.vmcs12_cache;
struct vmcs_hdr hdr;
if (ghc->gpa != vmptr &&
kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, vmptr, VMCS12_SIZE)) {
if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, vmptr, VMCS12_SIZE)) {
/*
* Reads from an unbacked page return all 1s,
* which means that the 32 bits located at the

View File

@ -5,6 +5,7 @@
#include <asm/cpu.h>
#include "lapic.h"
#include "irq.h"
#include "posted_intr.h"
#include "trace.h"
#include "vmx.h"
@ -77,13 +78,18 @@ after_clear_sn:
pi_set_on(pi_desc);
}
static bool vmx_can_use_vtd_pi(struct kvm *kvm)
{
return irqchip_in_kernel(kvm) && enable_apicv &&
kvm_arch_has_assigned_device(kvm) &&
irq_remapping_cap(IRQ_POSTING_CAP);
}
void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
{
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
!irq_remapping_cap(IRQ_POSTING_CAP) ||
!kvm_vcpu_apicv_active(vcpu))
if (!vmx_can_use_vtd_pi(vcpu->kvm))
return;
/* Set SN when the vCPU is preempted */
@ -141,9 +147,7 @@ int pi_pre_block(struct kvm_vcpu *vcpu)
struct pi_desc old, new;
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
!irq_remapping_cap(IRQ_POSTING_CAP) ||
!kvm_vcpu_apicv_active(vcpu))
if (!vmx_can_use_vtd_pi(vcpu->kvm))
return 0;
WARN_ON(irqs_disabled());
@ -270,9 +274,7 @@ int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq,
struct vcpu_data vcpu_info;
int idx, ret = 0;
if (!kvm_arch_has_assigned_device(kvm) ||
!irq_remapping_cap(IRQ_POSTING_CAP) ||
!kvm_vcpu_apicv_active(kvm->vcpus[0]))
if (!vmx_can_use_vtd_pi(kvm))
return 0;
idx = srcu_read_lock(&kvm->irq_srcu);

View File

@ -2646,15 +2646,6 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
if (!loaded_vmcs->msr_bitmap)
goto out_vmcs;
memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
if (IS_ENABLED(CONFIG_HYPERV) &&
static_branch_unlikely(&enable_evmcs) &&
(ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
struct hv_enlightened_vmcs *evmcs =
(struct hv_enlightened_vmcs *)loaded_vmcs->vmcs;
evmcs->hv_enlightenments_control.msr_bitmap = 1;
}
}
memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
@ -2918,6 +2909,13 @@ static void vmx_flush_tlb_all(struct kvm_vcpu *vcpu)
}
}
static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu)
{
if (is_guest_mode(vcpu))
return nested_get_vpid02(vcpu);
return to_vmx(vcpu)->vpid;
}
static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *mmu = vcpu->arch.mmu;
@ -2930,31 +2928,29 @@ static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
if (enable_ept)
ept_sync_context(construct_eptp(vcpu, root_hpa,
mmu->shadow_root_level));
else if (!is_guest_mode(vcpu))
vpid_sync_context(to_vmx(vcpu)->vpid);
else
vpid_sync_context(nested_get_vpid02(vcpu));
vpid_sync_context(vmx_get_current_vpid(vcpu));
}
static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
{
/*
* vpid_sync_vcpu_addr() is a nop if vmx->vpid==0, see the comment in
* vpid_sync_vcpu_addr() is a nop if vpid==0, see the comment in
* vmx_flush_tlb_guest() for an explanation of why this is ok.
*/
vpid_sync_vcpu_addr(to_vmx(vcpu)->vpid, addr);
vpid_sync_vcpu_addr(vmx_get_current_vpid(vcpu), addr);
}
static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu)
{
/*
* vpid_sync_context() is a nop if vmx->vpid==0, e.g. if enable_vpid==0
* or a vpid couldn't be allocated for this vCPU. VM-Enter and VM-Exit
* are required to flush GVA->{G,H}PA mappings from the TLB if vpid is
* vpid_sync_context() is a nop if vpid==0, e.g. if enable_vpid==0 or a
* vpid couldn't be allocated for this vCPU. VM-Enter and VM-Exit are
* required to flush GVA->{G,H}PA mappings from the TLB if vpid is
* disabled (VM-Enter with vpid enabled and vpid==0 is disallowed),
* i.e. no explicit INVVPID is necessary.
*/
vpid_sync_context(to_vmx(vcpu)->vpid);
vpid_sync_context(vmx_get_current_vpid(vcpu));
}
void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu)
@ -6262,9 +6258,9 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int max_irr;
bool max_irr_updated;
bool got_posted_interrupt;
if (KVM_BUG_ON(!vcpu->arch.apicv_active, vcpu->kvm))
if (KVM_BUG_ON(!enable_apicv, vcpu->kvm))
return -EIO;
if (pi_test_on(&vmx->pi_desc)) {
@ -6274,22 +6270,33 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
* But on x86 this is just a compiler barrier anyway.
*/
smp_mb__after_atomic();
max_irr_updated =
got_posted_interrupt =
kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
/*
* If we are running L2 and L1 has a new pending interrupt
* which can be injected, this may cause a vmexit or it may
* be injected into L2. Either way, this interrupt will be
* processed via KVM_REQ_EVENT, not RVI, because we do not use
* virtual interrupt delivery to inject L1 interrupts into L2.
*/
if (is_guest_mode(vcpu) && max_irr_updated)
kvm_make_request(KVM_REQ_EVENT, vcpu);
} else {
max_irr = kvm_lapic_find_highest_irr(vcpu);
got_posted_interrupt = false;
}
vmx_hwapic_irr_update(vcpu, max_irr);
/*
* Newly recognized interrupts are injected via either virtual interrupt
* delivery (RVI) or KVM_REQ_EVENT. Virtual interrupt delivery is
* disabled in two cases:
*
* 1) If L2 is running and the vCPU has a new pending interrupt. If L1
* wants to exit on interrupts, KVM_REQ_EVENT is needed to synthesize a
* VM-Exit to L1. If L1 doesn't want to exit, the interrupt is injected
* into L2, but KVM doesn't use virtual interrupt delivery to inject
* interrupts into L2, and so KVM_REQ_EVENT is again needed.
*
* 2) If APICv is disabled for this vCPU, assigned devices may still
* attempt to post interrupts. The posted interrupt vector will cause
* a VM-Exit and the subsequent entry will call sync_pir_to_irr.
*/
if (!is_guest_mode(vcpu) && kvm_vcpu_apicv_active(vcpu))
vmx_set_rvi(max_irr);
else if (got_posted_interrupt)
kvm_make_request(KVM_REQ_EVENT, vcpu);
return max_irr;
}
@ -6826,6 +6833,19 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
if (err < 0)
goto free_pml;
/*
* Use Hyper-V 'Enlightened MSR Bitmap' feature when KVM runs as a
* nested (L1) hypervisor and Hyper-V in L0 supports it. Enable the
* feature only for vmcs01, KVM currently isn't equipped to realize any
* performance benefits from enabling it for vmcs02.
*/
if (IS_ENABLED(CONFIG_HYPERV) && static_branch_unlikely(&enable_evmcs) &&
(ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
struct hv_enlightened_vmcs *evmcs = (void *)vmx->vmcs01.vmcs;
evmcs->hv_enlightenments_control.msr_bitmap = 1;
}
/* The MSR bitmap starts with all ones */
bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
@ -7509,6 +7529,7 @@ static void hardware_unsetup(void)
static bool vmx_check_apicv_inhibit_reasons(ulong bit)
{
ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
BIT(APICV_INHIBIT_REASON_ABSENT) |
BIT(APICV_INHIBIT_REASON_HYPERV) |
BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
@ -7761,10 +7782,10 @@ static __init int hardware_setup(void)
ple_window_shrink = 0;
}
if (!cpu_has_vmx_apicv()) {
if (!cpu_has_vmx_apicv())
enable_apicv = 0;
if (!enable_apicv)
vmx_x86_ops.sync_pir_to_irr = NULL;
}
if (cpu_has_vmx_tsc_scaling()) {
kvm_has_tsc_control = true;

View File

@ -890,7 +890,8 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
!load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)))
return 1;
if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
if (!(cr0 & X86_CR0_PG) &&
(is_64_bit_mode(vcpu) || kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)))
return 1;
static_call(kvm_x86_set_cr0)(vcpu, cr0);
@ -3258,6 +3259,29 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
static_call(kvm_x86_tlb_flush_guest)(vcpu);
}
static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
{
++vcpu->stat.tlb_flush;
static_call(kvm_x86_tlb_flush_current)(vcpu);
}
/*
* Service "local" TLB flush requests, which are specific to the current MMU
* context. In addition to the generic event handling in vcpu_enter_guest(),
* TLB flushes that are targeted at an MMU context also need to be serviced
* prior before nested VM-Enter/VM-Exit.
*/
void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu)
{
if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
kvm_vcpu_flush_tlb_current(vcpu);
if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
kvm_vcpu_flush_tlb_guest(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_service_local_tlb_flush_requests);
static void record_steal_time(struct kvm_vcpu *vcpu)
{
struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
@ -4133,6 +4157,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SGX_ATTRIBUTE:
#endif
case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM:
case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM:
case KVM_CAP_SREGS2:
case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
case KVM_CAP_VCPU_ATTRIBUTES:
@ -4448,8 +4473,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s)
{
if (vcpu->arch.apicv_active)
static_call(kvm_x86_sync_pir_to_irr)(vcpu);
static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
return kvm_apic_get_state(vcpu, s);
}
@ -5124,6 +5148,17 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_cpuid __user *cpuid_arg = argp;
struct kvm_cpuid cpuid;
/*
* KVM does not correctly handle changing guest CPUID after KVM_RUN, as
* MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
* tracked in kvm_mmu_page_role. As a result, KVM may miss guest page
* faults due to reusing SPs/SPTEs. In practice no sane VMM mucks with
* the core vCPU model on the fly, so fail.
*/
r = -EINVAL;
if (vcpu->arch.last_vmentry_cpu != -1)
goto out;
r = -EFAULT;
if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
goto out;
@ -5134,6 +5169,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_cpuid2 __user *cpuid_arg = argp;
struct kvm_cpuid2 cpuid;
/*
* KVM_SET_CPUID{,2} after KVM_RUN is forbidded, see the comment in
* KVM_SET_CPUID case above.
*/
r = -EINVAL;
if (vcpu->arch.last_vmentry_cpu != -1)
goto out;
r = -EFAULT;
if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
goto out;
@ -5698,6 +5741,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
smp_wmb();
kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
kvm_request_apicv_update(kvm, true, APICV_INHIBIT_REASON_ABSENT);
r = 0;
split_irqchip_unlock:
mutex_unlock(&kvm->lock);
@ -6078,6 +6122,7 @@ set_identity_unlock:
/* Write kvm->irq_routing before enabling irqchip_in_kernel. */
smp_wmb();
kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL;
kvm_request_apicv_update(kvm, true, APICV_INHIBIT_REASON_ABSENT);
create_irqchip_unlock:
mutex_unlock(&kvm->lock);
break;
@ -7077,7 +7122,13 @@ static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
unsigned short port, void *val, unsigned int count)
{
if (vcpu->arch.pio.count) {
/* Complete previous iteration. */
/*
* Complete a previous iteration that required userspace I/O.
* Note, @count isn't guaranteed to match pio.count as userspace
* can modify ECX before rerunning the vCPU. Ignore any such
* shenanigans as KVM doesn't support modifying the rep count,
* and the emulator ensures @count doesn't overflow the buffer.
*/
} else {
int r = __emulator_pio_in(vcpu, size, port, count);
if (!r)
@ -7086,7 +7137,6 @@ static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
/* Results already available, fall through. */
}
WARN_ON(count != vcpu->arch.pio.count);
complete_emulator_pio_in(vcpu, val);
return 1;
}
@ -8776,10 +8826,9 @@ static void kvm_apicv_init(struct kvm *kvm)
{
init_rwsem(&kvm->arch.apicv_update_lock);
if (enable_apicv)
clear_bit(APICV_INHIBIT_REASON_DISABLE,
&kvm->arch.apicv_inhibit_reasons);
else
set_bit(APICV_INHIBIT_REASON_ABSENT,
&kvm->arch.apicv_inhibit_reasons);
if (!enable_apicv)
set_bit(APICV_INHIBIT_REASON_DISABLE,
&kvm->arch.apicv_inhibit_reasons);
}
@ -9528,8 +9577,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
if (irqchip_split(vcpu->kvm))
kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
else {
if (vcpu->arch.apicv_active)
static_call(kvm_x86_sync_pir_to_irr)(vcpu);
static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
if (ioapic_in_kernel(vcpu->kvm))
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
}
@ -9648,10 +9696,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
/* Flushing all ASIDs flushes the current ASID... */
kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
}
if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
kvm_vcpu_flush_tlb_current(vcpu);
if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
kvm_vcpu_flush_tlb_guest(vcpu);
kvm_service_local_tlb_flush_requests(vcpu);
if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
@ -9802,10 +9847,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
/*
* This handles the case where a posted interrupt was
* notified with kvm_vcpu_kick.
* notified with kvm_vcpu_kick. Assigned devices can
* use the POSTED_INTR_VECTOR even if APICv is disabled,
* so do it even if APICv is disabled on this vCPU.
*/
if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
static_call(kvm_x86_sync_pir_to_irr)(vcpu);
if (kvm_lapic_enabled(vcpu))
static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
if (kvm_vcpu_exit_request(vcpu)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
@ -9849,8 +9896,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
break;
if (vcpu->arch.apicv_active)
static_call(kvm_x86_sync_pir_to_irr)(vcpu);
if (kvm_lapic_enabled(vcpu))
static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
if (unlikely(kvm_vcpu_exit_request(vcpu))) {
exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;

View File

@ -103,6 +103,7 @@ static inline unsigned int __shrink_ple_window(unsigned int val,
#define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL
void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu);
int kvm_check_nested_events(struct kvm_vcpu *vcpu);
static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
@ -185,12 +186,6 @@ static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
}
static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
{
++vcpu->stat.tlb_flush;
static_call(kvm_x86_tlb_flush_current)(vcpu);
}
static inline int is_pae(struct kvm_vcpu *vcpu)
{
return kvm_read_cr4_bits(vcpu, X86_CR4_PAE);

View File

@ -277,7 +277,8 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
return;
}
new = early_memremap(data.phys_map, data.size);
new = early_memremap_prot(data.phys_map, data.size,
pgprot_val(pgprot_encrypted(FIXMAP_PAGE_NORMAL)));
if (!new) {
pr_err("Failed to map new boot services memmap\n");
return;

View File

@ -72,6 +72,7 @@ static void __init setup_real_mode(void)
#ifdef CONFIG_X86_64
u64 *trampoline_pgd;
u64 efer;
int i;
#endif
base = (unsigned char *)real_mode_header;
@ -128,8 +129,17 @@ static void __init setup_real_mode(void)
trampoline_header->flags = 0;
trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
/* Map the real mode stub as virtual == physical */
trampoline_pgd[0] = trampoline_pgd_entry.pgd;
trampoline_pgd[511] = init_top_pgt[511].pgd;
/*
* Include the entirety of the kernel mapping into the trampoline
* PGD. This way, all mappings present in the normal kernel page
* tables are usable while running on trampoline_pgd.
*/
for (i = pgd_index(__PAGE_OFFSET); i < PTRS_PER_PGD; i++)
trampoline_pgd[i] = init_top_pgt[i].pgd;
#endif
sme_sev_setup_real_mode(trampoline_header);

View File

@ -20,6 +20,7 @@
#include <linux/init.h>
#include <linux/linkage.h>
#include <../entry/calling.h>
.pushsection .noinstr.text, "ax"
/*
@ -192,6 +193,25 @@ SYM_CODE_START(xen_iret)
jmp hypercall_iret
SYM_CODE_END(xen_iret)
/*
* XEN pv doesn't use trampoline stack, PER_CPU_VAR(cpu_tss_rw + TSS_sp0) is
* also the kernel stack. Reusing swapgs_restore_regs_and_return_to_usermode()
* in XEN pv would cause %rsp to move up to the top of the kernel stack and
* leave the IRET frame below %rsp, which is dangerous to be corrupted if #NMI
* interrupts. And swapgs_restore_regs_and_return_to_usermode() pushing the IRET
* frame at the same address is useless.
*/
SYM_CODE_START(xenpv_restore_regs_and_return_to_usermode)
UNWIND_HINT_REGS
POP_REGS
/* stackleak_erase() can work safely on the kernel stack. */
STACKLEAK_ERASE_NOCLOBBER
addq $8, %rsp /* skip regs->orig_ax */
jmp xen_iret
SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode)
/*
* Xen handles syscall callbacks much like ordinary exceptions, which
* means we have:

View File

@ -15,6 +15,7 @@
#include <linux/falloc.h>
#include <linux/suspend.h>
#include <linux/fs.h>
#include <linux/module.h>
#include "blk.h"
static inline struct inode *bdev_file_inode(struct file *file)
@ -340,8 +341,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
} else {
ret = bio_iov_iter_get_pages(bio, iter);
if (unlikely(ret)) {
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
bio_put(bio);
return ret;
}
}

View File

@ -220,6 +220,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
pgrp = task_pgrp(current);
else
pgrp = find_vpid(who);
read_lock(&tasklist_lock);
do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
tmpio = get_task_ioprio(p);
if (tmpio < 0)
@ -229,6 +230,8 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
else
ret = ioprio_best(ret, tmpio);
} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
read_unlock(&tasklist_lock);
break;
case IOPRIO_WHO_USER:
uid = make_kuid(current_user_ns(), who);

View File

@ -4743,23 +4743,20 @@ static int binder_thread_release(struct binder_proc *proc,
__release(&t->lock);
/*
* If this thread used poll, make sure we remove the waitqueue
* from any epoll data structures holding it with POLLFREE.
* waitqueue_active() is safe to use here because we're holding
* the inner lock.
* If this thread used poll, make sure we remove the waitqueue from any
* poll data structures holding it.
*/
if ((thread->looper & BINDER_LOOPER_STATE_POLL) &&
waitqueue_active(&thread->wait)) {
wake_up_poll(&thread->wait, EPOLLHUP | POLLFREE);
}
if (thread->looper & BINDER_LOOPER_STATE_POLL)
wake_up_pollfree(&thread->wait);
binder_inner_proc_unlock(thread->proc);
/*
* This is needed to avoid races between wake_up_poll() above and
* and ep_remove_waitqueue() called for other reasons (eg the epoll file
* descriptor being closed); ep_remove_waitqueue() holds an RCU read
* lock, so we can be sure it's done after calling synchronize_rcu().
* This is needed to avoid races between wake_up_pollfree() above and
* someone else removing the last entry from the queue for other reasons
* (e.g. ep_remove_wait_queue() being called due to an epoll file
* descriptor being closed). Such other users hold an RCU read lock, so
* we can be sure they're done after we call synchronize_rcu().
*/
if (thread->looper & BINDER_LOOPER_STATE_POLL)
synchronize_rcu();

View File

@ -94,6 +94,7 @@ struct ceva_ahci_priv {
static unsigned int ceva_ahci_read_id(struct ata_device *dev,
struct ata_taskfile *tf, u16 *id)
{
__le16 *__id = (__le16 *)id;
u32 err_mask;
err_mask = ata_do_dev_read_id(dev, tf, id);
@ -103,7 +104,7 @@ static unsigned int ceva_ahci_read_id(struct ata_device *dev,
* Since CEVA controller does not support device sleep feature, we
* need to clear DEVSLP (bit 8) in word78 of the IDENTIFY DEVICE data.
*/
id[ATA_ID_FEATURE_SUPP] &= cpu_to_le16(~(1 << 8));
__id[ATA_ID_FEATURE_SUPP] &= cpu_to_le16(~(1 << 8));
return 0;
}

View File

@ -3920,6 +3920,8 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
{ "VRFDFC22048UCHC-TE*", NULL, ATA_HORKAGE_NODMA },
/* Odd clown on sil3726/4726 PMPs */
{ "Config Disk", NULL, ATA_HORKAGE_DISABLE },
/* Similar story with ASMedia 1092 */
{ "ASMT109x- Config", NULL, ATA_HORKAGE_DISABLE },
/* Weird ATAPI devices */
{ "TORiSAN DVD-ROM DRD-N216", NULL, ATA_HORKAGE_MAX_SEC_128 },

View File

@ -827,7 +827,7 @@ static ssize_t ata_scsi_lpm_show(struct device *dev,
if (ap->target_lpm_policy >= ARRAY_SIZE(ata_lpm_policy_names))
return -EINVAL;
return snprintf(buf, PAGE_SIZE, "%s\n",
return sysfs_emit(buf, "%s\n",
ata_lpm_policy_names[ap->target_lpm_policy]);
}
DEVICE_ATTR(link_power_management_policy, S_IRUGO | S_IWUSR,

View File

@ -55,14 +55,14 @@ static unsigned int pata_falcon_data_xfer(struct ata_queued_cmd *qc,
/* Transfer multiple of 2 bytes */
if (rw == READ) {
if (swap)
raw_insw_swapw((u16 *)data_addr, (u16 *)buf, words);
raw_insw_swapw(data_addr, (u16 *)buf, words);
else
raw_insw((u16 *)data_addr, (u16 *)buf, words);
raw_insw(data_addr, (u16 *)buf, words);
} else {
if (swap)
raw_outsw_swapw((u16 *)data_addr, (u16 *)buf, words);
raw_outsw_swapw(data_addr, (u16 *)buf, words);
else
raw_outsw((u16 *)data_addr, (u16 *)buf, words);
raw_outsw(data_addr, (u16 *)buf, words);
}
/* Transfer trailing byte, if any. */
@ -74,16 +74,16 @@ static unsigned int pata_falcon_data_xfer(struct ata_queued_cmd *qc,
if (rw == READ) {
if (swap)
raw_insw_swapw((u16 *)data_addr, (u16 *)pad, 1);
raw_insw_swapw(data_addr, (u16 *)pad, 1);
else
raw_insw((u16 *)data_addr, (u16 *)pad, 1);
raw_insw(data_addr, (u16 *)pad, 1);
*buf = pad[0];
} else {
pad[0] = *buf;
if (swap)
raw_outsw_swapw((u16 *)data_addr, (u16 *)pad, 1);
raw_outsw_swapw(data_addr, (u16 *)pad, 1);
else
raw_outsw((u16 *)data_addr, (u16 *)pad, 1);
raw_outsw(data_addr, (u16 *)pad, 1);
}
words++;
}

View File

@ -1394,6 +1394,14 @@ static int sata_fsl_init_controller(struct ata_host *host)
return 0;
}
static void sata_fsl_host_stop(struct ata_host *host)
{
struct sata_fsl_host_priv *host_priv = host->private_data;
iounmap(host_priv->hcr_base);
kfree(host_priv);
}
/*
* scsi mid-layer and libata interface structures
*/
@ -1426,6 +1434,8 @@ static struct ata_port_operations sata_fsl_ops = {
.port_start = sata_fsl_port_start,
.port_stop = sata_fsl_port_stop,
.host_stop = sata_fsl_host_stop,
.pmp_attach = sata_fsl_pmp_attach,
.pmp_detach = sata_fsl_pmp_detach,
};
@ -1480,9 +1490,9 @@ static int sata_fsl_probe(struct platform_device *ofdev)
host_priv->ssr_base = ssr_base;
host_priv->csr_base = csr_base;
irq = irq_of_parse_and_map(ofdev->dev.of_node, 0);
if (!irq) {
dev_err(&ofdev->dev, "invalid irq from platform\n");
irq = platform_get_irq(ofdev, 0);
if (irq < 0) {
retval = irq;
goto error_exit_with_cleanup;
}
host_priv->irq = irq;
@ -1557,10 +1567,6 @@ static int sata_fsl_remove(struct platform_device *ofdev)
ata_host_detach(host);
irq_dispose_mapping(host_priv->irq);
iounmap(host_priv->hcr_base);
kfree(host_priv);
return 0;
}

View File

@ -2103,7 +2103,7 @@ static int loop_control_remove(int idx)
int ret;
if (idx < 0) {
pr_warn("deleting an unspecified loop device is not supported.\n");
pr_warn_once("deleting an unspecified loop device is not supported.\n");
return -EINVAL;
}

View File

@ -881,7 +881,7 @@ int mhi_pm_suspend(struct mhi_controller *mhi_cntrl)
}
EXPORT_SYMBOL_GPL(mhi_pm_suspend);
int mhi_pm_resume(struct mhi_controller *mhi_cntrl)
static int __mhi_pm_resume(struct mhi_controller *mhi_cntrl, bool force)
{
struct mhi_chan *itr, *tmp;
struct device *dev = &mhi_cntrl->mhi_dev->dev;
@ -898,8 +898,12 @@ int mhi_pm_resume(struct mhi_controller *mhi_cntrl)
if (MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state))
return -EIO;
if (mhi_get_mhi_state(mhi_cntrl) != MHI_STATE_M3)
return -EINVAL;
if (mhi_get_mhi_state(mhi_cntrl) != MHI_STATE_M3) {
dev_warn(dev, "Resuming from non M3 state (%s)\n",
TO_MHI_STATE_STR(mhi_get_mhi_state(mhi_cntrl)));
if (!force)
return -EINVAL;
}
/* Notify clients about exiting LPM */
list_for_each_entry_safe(itr, tmp, &mhi_cntrl->lpm_chans, node) {
@ -940,8 +944,19 @@ int mhi_pm_resume(struct mhi_controller *mhi_cntrl)
return 0;
}
int mhi_pm_resume(struct mhi_controller *mhi_cntrl)
{
return __mhi_pm_resume(mhi_cntrl, false);
}
EXPORT_SYMBOL_GPL(mhi_pm_resume);
int mhi_pm_resume_force(struct mhi_controller *mhi_cntrl)
{
return __mhi_pm_resume(mhi_cntrl, true);
}
EXPORT_SYMBOL_GPL(mhi_pm_resume_force);
int __mhi_device_get_sync(struct mhi_controller *mhi_cntrl)
{
int ret;

View File

@ -20,7 +20,7 @@
#define MHI_PCI_DEFAULT_BAR_NUM 0
#define MHI_POST_RESET_DELAY_MS 500
#define MHI_POST_RESET_DELAY_MS 2000
#define HEALTH_CHECK_PERIOD (HZ * 2)

View File

@ -281,7 +281,7 @@ agp_ioc_init(void __iomem *ioc_regs)
return 0;
}
static int
static int __init
lba_find_capability(int cap)
{
struct _parisc_agp_info *info = &parisc_agp_info;
@ -366,7 +366,7 @@ fail:
return error;
}
static int
static int __init
find_quicksilver(struct device *dev, void *data)
{
struct parisc_device **lba = data;
@ -378,7 +378,7 @@ find_quicksilver(struct device *dev, void *data)
return 0;
}
static int
static int __init
parisc_agp_init(void)
{
extern struct sba_device *sba_list;

View File

@ -191,6 +191,8 @@ struct ipmi_user {
struct work_struct remove_work;
};
static struct workqueue_struct *remove_work_wq;
static struct ipmi_user *acquire_ipmi_user(struct ipmi_user *user, int *index)
__acquires(user->release_barrier)
{
@ -1297,7 +1299,7 @@ static void free_user(struct kref *ref)
struct ipmi_user *user = container_of(ref, struct ipmi_user, refcount);
/* SRCU cleanup must happen in task context. */
schedule_work(&user->remove_work);
queue_work(remove_work_wq, &user->remove_work);
}
static void _ipmi_destroy_user(struct ipmi_user *user)
@ -3918,9 +3920,11 @@ static int handle_ipmb_direct_rcv_cmd(struct ipmi_smi *intf,
/* We didn't find a user, deliver an error response. */
ipmi_inc_stat(intf, unhandled_commands);
msg->data[0] = ((netfn + 1) << 2) | (msg->rsp[4] & 0x3);
msg->data[1] = msg->rsp[2];
msg->data[2] = msg->rsp[4] & ~0x3;
msg->data[0] = (netfn + 1) << 2;
msg->data[0] |= msg->rsp[2] & 0x3; /* rqLUN */
msg->data[1] = msg->rsp[1]; /* Addr */
msg->data[2] = msg->rsp[2] & ~0x3; /* rqSeq */
msg->data[2] |= msg->rsp[0] & 0x3; /* rsLUN */
msg->data[3] = cmd;
msg->data[4] = IPMI_INVALID_CMD_COMPLETION_CODE;
msg->data_size = 5;
@ -4455,13 +4459,24 @@ return_unspecified:
msg->rsp[2] = IPMI_ERR_UNSPECIFIED;
msg->rsp_size = 3;
} else if (msg->type == IPMI_SMI_MSG_TYPE_IPMB_DIRECT) {
/* commands must have at least 3 bytes, responses 4. */
if (is_cmd && (msg->rsp_size < 3)) {
/* commands must have at least 4 bytes, responses 5. */
if (is_cmd && (msg->rsp_size < 4)) {
ipmi_inc_stat(intf, invalid_commands);
goto out;
}
if (!is_cmd && (msg->rsp_size < 4))
goto return_unspecified;
if (!is_cmd && (msg->rsp_size < 5)) {
ipmi_inc_stat(intf, invalid_ipmb_responses);
/* Construct a valid error response. */
msg->rsp[0] = msg->data[0] & 0xfc; /* NetFN */
msg->rsp[0] |= (1 << 2); /* Make it a response */
msg->rsp[0] |= msg->data[2] & 3; /* rqLUN */
msg->rsp[1] = msg->data[1]; /* Addr */
msg->rsp[2] = msg->data[2] & 0xfc; /* rqSeq */
msg->rsp[2] |= msg->data[0] & 0x3; /* rsLUN */
msg->rsp[3] = msg->data[3]; /* Cmd */
msg->rsp[4] = IPMI_ERR_UNSPECIFIED;
msg->rsp_size = 5;
}
} else if ((msg->data_size >= 2)
&& (msg->data[0] == (IPMI_NETFN_APP_REQUEST << 2))
&& (msg->data[1] == IPMI_SEND_MSG_CMD)
@ -5031,6 +5046,7 @@ struct ipmi_smi_msg *ipmi_alloc_smi_msg(void)
if (rv) {
rv->done = free_smi_msg;
rv->user_data = NULL;
rv->type = IPMI_SMI_MSG_TYPE_NORMAL;
atomic_inc(&smi_msg_inuse_count);
}
return rv;
@ -5383,6 +5399,13 @@ static int ipmi_init_msghandler(void)
atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
remove_work_wq = create_singlethread_workqueue("ipmi-msghandler-remove-wq");
if (!remove_work_wq) {
pr_err("unable to create ipmi-msghandler-remove-wq workqueue");
rv = -ENOMEM;
goto out;
}
initialized = true;
out:
@ -5408,6 +5431,8 @@ static void __exit cleanup_ipmi(void)
int count;
if (initialized) {
destroy_workqueue(remove_work_wq);
atomic_notifier_chain_unregister(&panic_notifier_list,
&panic_block);

View File

@ -370,7 +370,7 @@ static struct platform_driver imx8qxp_lpcg_clk_driver = {
.probe = imx8qxp_lpcg_clk_probe,
};
builtin_platform_driver(imx8qxp_lpcg_clk_driver);
module_platform_driver(imx8qxp_lpcg_clk_driver);
MODULE_AUTHOR("Aisheng Dong <aisheng.dong@nxp.com>");
MODULE_DESCRIPTION("NXP i.MX8QXP LPCG clock driver");

View File

@ -308,7 +308,7 @@ static struct platform_driver imx8qxp_clk_driver = {
},
.probe = imx8qxp_clk_probe,
};
builtin_platform_driver(imx8qxp_clk_driver);
module_platform_driver(imx8qxp_clk_driver);
MODULE_AUTHOR("Aisheng Dong <aisheng.dong@nxp.com>");
MODULE_DESCRIPTION("NXP i.MX8QXP clock driver");

View File

@ -1429,6 +1429,15 @@ EXPORT_SYMBOL_GPL(clk_alpha_pll_postdiv_fabia_ops);
void clk_trion_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap,
const struct alpha_pll_config *config)
{
/*
* If the bootloader left the PLL enabled it's likely that there are
* RCGs that will lock up if we disable the PLL below.
*/
if (trion_pll_is_enabled(pll, regmap)) {
pr_debug("Trion PLL is already enabled, skipping configuration\n");
return;
}
clk_alpha_pll_write_config(regmap, PLL_L_VAL(pll), config->l);
regmap_write(regmap, PLL_CAL_L_VAL(pll), TRION_PLL_CAL_VAL);
clk_alpha_pll_write_config(regmap, PLL_ALPHA_VAL(pll), config->alpha);

View File

@ -28,7 +28,7 @@ static u8 mux_get_parent(struct clk_hw *hw)
val &= mask;
if (mux->parent_map)
return qcom_find_src_index(hw, mux->parent_map, val);
return qcom_find_cfg_index(hw, mux->parent_map, val);
return val;
}

View File

@ -69,6 +69,18 @@ int qcom_find_src_index(struct clk_hw *hw, const struct parent_map *map, u8 src)
}
EXPORT_SYMBOL_GPL(qcom_find_src_index);
int qcom_find_cfg_index(struct clk_hw *hw, const struct parent_map *map, u8 cfg)
{
int i, num_parents = clk_hw_get_num_parents(hw);
for (i = 0; i < num_parents; i++)
if (cfg == map[i].cfg)
return i;
return -ENOENT;
}
EXPORT_SYMBOL_GPL(qcom_find_cfg_index);
struct regmap *
qcom_cc_map(struct platform_device *pdev, const struct qcom_cc_desc *desc)
{

View File

@ -49,6 +49,8 @@ extern void
qcom_pll_set_fsm_mode(struct regmap *m, u32 reg, u8 bias_count, u8 lock_count);
extern int qcom_find_src_index(struct clk_hw *hw, const struct parent_map *map,
u8 src);
extern int qcom_find_cfg_index(struct clk_hw *hw, const struct parent_map *map,
u8 cfg);
extern int qcom_cc_register_board_clk(struct device *dev, const char *path,
const char *name, unsigned long rate);

View File

@ -1121,7 +1121,7 @@ static struct clk_rcg2 gcc_sdcc1_apps_clk_src = {
.name = "gcc_sdcc1_apps_clk_src",
.parent_data = gcc_parent_data_1,
.num_parents = ARRAY_SIZE(gcc_parent_data_1),
.ops = &clk_rcg2_ops,
.ops = &clk_rcg2_floor_ops,
},
};
@ -1143,7 +1143,7 @@ static struct clk_rcg2 gcc_sdcc1_ice_core_clk_src = {
.name = "gcc_sdcc1_ice_core_clk_src",
.parent_data = gcc_parent_data_0,
.num_parents = ARRAY_SIZE(gcc_parent_data_0),
.ops = &clk_rcg2_floor_ops,
.ops = &clk_rcg2_ops,
},
};

View File

@ -543,8 +543,8 @@ static void __init of_syscon_icst_setup(struct device_node *np)
regclk = icst_clk_setup(NULL, &icst_desc, name, parent_name, map, ctype);
if (IS_ERR(regclk)) {
kfree(name);
pr_err("error setting up syscon ICST clock %s\n", name);
kfree(name);
return;
}
of_clk_add_provider(np, of_clk_src_simple_get, regclk);

View File

@ -394,8 +394,13 @@ EXPORT_SYMBOL_GPL(timer_unstable_counter_workaround);
static atomic_t timer_unstable_counter_workaround_in_use = ATOMIC_INIT(0);
static void erratum_set_next_event_generic(const int access, unsigned long evt,
struct clock_event_device *clk)
/*
* Force the inlining of this function so that the register accesses
* can be themselves correctly inlined.
*/
static __always_inline
void erratum_set_next_event_generic(const int access, unsigned long evt,
struct clock_event_device *clk)
{
unsigned long ctrl;
u64 cval;

View File

@ -47,7 +47,7 @@ static int __init timer_get_base_and_rate(struct device_node *np,
pr_warn("pclk for %pOFn is present, but could not be activated\n",
np);
if (!of_property_read_u32(np, "clock-freq", rate) &&
if (!of_property_read_u32(np, "clock-freq", rate) ||
!of_property_read_u32(np, "clock-frequency", rate))
return 0;

View File

@ -1004,10 +1004,9 @@ static struct kobj_type ktype_cpufreq = {
.release = cpufreq_sysfs_release,
};
static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu)
static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu,
struct device *dev)
{
struct device *dev = get_cpu_device(cpu);
if (unlikely(!dev))
return;
@ -1296,8 +1295,9 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
if (policy->max_freq_req) {
/*
* CPUFREQ_CREATE_POLICY notification is sent only after
* successfully adding max_freq_req request.
* Remove max_freq_req after sending CPUFREQ_REMOVE_POLICY
* notification, since CPUFREQ_CREATE_POLICY notification was
* sent after adding max_freq_req earlier.
*/
blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
CPUFREQ_REMOVE_POLICY, policy);
@ -1391,7 +1391,7 @@ static int cpufreq_online(unsigned int cpu)
if (new_policy) {
for_each_cpu(j, policy->related_cpus) {
per_cpu(cpufreq_cpu_data, j) = policy;
add_cpu_dev_symlink(policy, j);
add_cpu_dev_symlink(policy, j, get_cpu_device(j));
}
policy->min_freq_req = kzalloc(2 * sizeof(*policy->min_freq_req),
@ -1565,7 +1565,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
/* Create sysfs link on CPU registration */
policy = per_cpu(cpufreq_cpu_data, cpu);
if (policy)
add_cpu_dev_symlink(policy, cpu);
add_cpu_dev_symlink(policy, cpu, dev);
return 0;
}

View File

@ -290,7 +290,7 @@ static void system_heap_dma_buf_release(struct dma_buf *dmabuf)
int i;
table = &buffer->sg_table;
for_each_sg(table->sgl, sg, table->nents, i) {
for_each_sgtable_sg(table, sg, i) {
struct page *page = sg_page(sg);
__free_pages(page, compound_order(page));

View File

@ -1396,7 +1396,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct sg_table *sg = NULL;
uint64_t user_addr = 0;
struct amdgpu_bo *bo;
struct drm_gem_object *gobj;
struct drm_gem_object *gobj = NULL;
u32 domain, alloc_domain;
u64 alloc_flags;
int ret;
@ -1506,14 +1506,16 @@ allocate_init_user_pages_failed:
remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
drm_vma_node_revoke(&gobj->vma_node, drm_priv);
err_node_allow:
drm_gem_object_put(gobj);
/* Don't unreserve system mem limit twice */
goto err_reserve_limit;
err_bo_create:
unreserve_mem_limit(adev, size, alloc_domain, !!sg);
err_reserve_limit:
mutex_destroy(&(*mem)->lock);
kfree(*mem);
if (gobj)
drm_gem_object_put(gobj);
else
kfree(*mem);
err:
if (sg) {
sg_free_table(sg);

View File

@ -3833,7 +3833,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
/* disable all interrupts */
amdgpu_irq_disable_all(adev);
if (adev->mode_info.mode_config_initialized){
if (!amdgpu_device_has_dc_support(adev))
if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
drm_helper_force_disable_all(adev_to_drm(adev));
else
drm_atomic_helper_shutdown(adev_to_drm(adev));
@ -4289,6 +4289,8 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
{
int r;
amdgpu_amdkfd_pre_reset(adev);
if (from_hypervisor)
r = amdgpu_virt_request_full_gpu(adev, true);
else
@ -4316,6 +4318,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
amdgpu_irq_gpu_reset_resume_helper(adev);
r = amdgpu_ib_ring_tests(adev);
amdgpu_amdkfd_post_reset(adev);
error:
if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
@ -5030,7 +5033,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
amdgpu_amdkfd_pre_reset(tmp_adev);
if (!amdgpu_sriov_vf(tmp_adev))
amdgpu_amdkfd_pre_reset(tmp_adev);
/*
* Mark these ASICs to be reseted as untracked first
@ -5129,7 +5133,7 @@ skip_hw_reset:
drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
}
if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
}
@ -5148,9 +5152,9 @@ skip_hw_reset:
skip_sched_resume:
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
/* unlock kfd */
if (!need_emergency_restart)
amdgpu_amdkfd_post_reset(tmp_adev);
/* unlock kfd: SRIOV would do it separately */
if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
amdgpu_amdkfd_post_reset(tmp_adev);
/* kfd_post_reset will do nothing if kfd device is not initialized,
* need to bring up kfd here if it's not be initialized before

View File

@ -157,6 +157,8 @@ static int hw_id_map[MAX_HWIP] = {
[HDP_HWIP] = HDP_HWID,
[SDMA0_HWIP] = SDMA0_HWID,
[SDMA1_HWIP] = SDMA1_HWID,
[SDMA2_HWIP] = SDMA2_HWID,
[SDMA3_HWIP] = SDMA3_HWID,
[MMHUB_HWIP] = MMHUB_HWID,
[ATHUB_HWIP] = ATHUB_HWID,
[NBIO_HWIP] = NBIF_HWID,
@ -918,6 +920,7 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(3, 0, 64):
case IP_VERSION(3, 1, 1):
case IP_VERSION(3, 0, 2):
case IP_VERSION(3, 0, 192):
amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
if (!amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block);

Some files were not shown because too many files have changed in this diff Show More