mirror of
https://github.com/torvalds/linux.git
synced 2024-11-17 09:31:50 +00:00
x86/fpu: Remove use_eager_fpu()
This removes all the obvious code paths that depend on lazy FPU mode. It shouldn't change the generated code at all. Signed-off-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Rik van Riel <riel@redhat.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Fenghua Yu <fenghua.yu@intel.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: pbonzini@redhat.com Link: http://lkml.kernel.org/r/1475627678-20788-5-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
2f7fada235
commit
c592b57347
@ -48,21 +48,13 @@
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* use carryless multiply version of crc32c when buffer
|
||||
* size is >= 512 (when eager fpu is enabled) or
|
||||
* >= 1024 (when eager fpu is disabled) to account
|
||||
* size is >= 512 to account
|
||||
* for fpu state save/restore overhead.
|
||||
*/
|
||||
#define CRC32C_PCL_BREAKEVEN_EAGERFPU 512
|
||||
#define CRC32C_PCL_BREAKEVEN_NOEAGERFPU 1024
|
||||
#define CRC32C_PCL_BREAKEVEN 512
|
||||
|
||||
asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
|
||||
unsigned int crc_init);
|
||||
static int crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_EAGERFPU;
|
||||
#define set_pcl_breakeven_point() \
|
||||
do { \
|
||||
if (!use_eager_fpu()) \
|
||||
crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU; \
|
||||
} while (0)
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
|
||||
@ -185,7 +177,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
|
||||
* use faster PCL version if datasize is large enough to
|
||||
* overcome kernel fpu state save/restore overhead
|
||||
*/
|
||||
if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
|
||||
if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) {
|
||||
kernel_fpu_begin();
|
||||
*crcp = crc_pcl(data, len, *crcp);
|
||||
kernel_fpu_end();
|
||||
@ -197,7 +189,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
|
||||
static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
|
||||
u8 *out)
|
||||
{
|
||||
if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
|
||||
if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) {
|
||||
kernel_fpu_begin();
|
||||
*(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp));
|
||||
kernel_fpu_end();
|
||||
@ -256,7 +248,6 @@ static int __init crc32c_intel_mod_init(void)
|
||||
alg.update = crc32c_pcl_intel_update;
|
||||
alg.finup = crc32c_pcl_intel_finup;
|
||||
alg.digest = crc32c_pcl_intel_digest;
|
||||
set_pcl_breakeven_point();
|
||||
}
|
||||
#endif
|
||||
return crypto_register_shash(&alg);
|
||||
|
@ -60,11 +60,6 @@ extern u64 fpu__get_supported_xfeatures_mask(void);
|
||||
/*
|
||||
* FPU related CPU feature flag helper routines:
|
||||
*/
|
||||
static __always_inline __pure bool use_eager_fpu(void)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static __always_inline __pure bool use_xsaveopt(void)
|
||||
{
|
||||
return static_cpu_has(X86_FEATURE_XSAVEOPT);
|
||||
@ -501,24 +496,6 @@ static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation'
|
||||
* idiom, which is then paired with the sw-flag (fpregs_active) later on:
|
||||
*/
|
||||
|
||||
static inline void __fpregs_activate_hw(void)
|
||||
{
|
||||
if (!use_eager_fpu())
|
||||
clts();
|
||||
}
|
||||
|
||||
static inline void __fpregs_deactivate_hw(void)
|
||||
{
|
||||
if (!use_eager_fpu())
|
||||
stts();
|
||||
}
|
||||
|
||||
/* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */
|
||||
static inline void __fpregs_deactivate(struct fpu *fpu)
|
||||
{
|
||||
WARN_ON_FPU(!fpu->fpregs_active);
|
||||
@ -528,7 +505,6 @@ static inline void __fpregs_deactivate(struct fpu *fpu)
|
||||
trace_x86_fpu_regs_deactivated(fpu);
|
||||
}
|
||||
|
||||
/* Must be paired with a 'clts' (fpregs_activate_hw()) before! */
|
||||
static inline void __fpregs_activate(struct fpu *fpu)
|
||||
{
|
||||
WARN_ON_FPU(fpu->fpregs_active);
|
||||
@ -554,22 +530,17 @@ static inline int fpregs_active(void)
|
||||
}
|
||||
|
||||
/*
|
||||
* Encapsulate the CR0.TS handling together with the
|
||||
* software flag.
|
||||
*
|
||||
* These generally need preemption protection to work,
|
||||
* do try to avoid using these on their own.
|
||||
*/
|
||||
static inline void fpregs_activate(struct fpu *fpu)
|
||||
{
|
||||
__fpregs_activate_hw();
|
||||
__fpregs_activate(fpu);
|
||||
}
|
||||
|
||||
static inline void fpregs_deactivate(struct fpu *fpu)
|
||||
{
|
||||
__fpregs_deactivate(fpu);
|
||||
__fpregs_deactivate_hw();
|
||||
}
|
||||
|
||||
/*
|
||||
@ -596,8 +567,7 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
|
||||
* or if the past 5 consecutive context-switches used math.
|
||||
*/
|
||||
fpu.preload = static_cpu_has(X86_FEATURE_FPU) &&
|
||||
new_fpu->fpstate_active &&
|
||||
(use_eager_fpu() || new_fpu->counter > 5);
|
||||
new_fpu->fpstate_active;
|
||||
|
||||
if (old_fpu->fpregs_active) {
|
||||
if (!copy_fpregs_to_fpstate(old_fpu))
|
||||
@ -615,8 +585,6 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
|
||||
__fpregs_activate(new_fpu);
|
||||
trace_x86_fpu_regs_activated(new_fpu);
|
||||
prefetch(&new_fpu->state);
|
||||
} else {
|
||||
__fpregs_deactivate_hw();
|
||||
}
|
||||
} else {
|
||||
old_fpu->counter = 0;
|
||||
|
@ -57,27 +57,9 @@ static bool kernel_fpu_disabled(void)
|
||||
return this_cpu_read(in_kernel_fpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* Were we in an interrupt that interrupted kernel mode?
|
||||
*
|
||||
* On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
|
||||
* pair does nothing at all: the thread must not have fpu (so
|
||||
* that we don't try to save the FPU state), and TS must
|
||||
* be set (so that the clts/stts pair does nothing that is
|
||||
* visible in the interrupted kernel thread).
|
||||
*
|
||||
* Except for the eagerfpu case when we return true; in the likely case
|
||||
* the thread has FPU but we are not going to set/clear TS.
|
||||
*/
|
||||
static bool interrupted_kernel_fpu_idle(void)
|
||||
{
|
||||
if (kernel_fpu_disabled())
|
||||
return false;
|
||||
|
||||
if (use_eager_fpu())
|
||||
return true;
|
||||
|
||||
return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS);
|
||||
return !kernel_fpu_disabled();
|
||||
}
|
||||
|
||||
/*
|
||||
@ -125,7 +107,6 @@ void __kernel_fpu_begin(void)
|
||||
copy_fpregs_to_fpstate(fpu);
|
||||
} else {
|
||||
this_cpu_write(fpu_fpregs_owner_ctx, NULL);
|
||||
__fpregs_activate_hw();
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(__kernel_fpu_begin);
|
||||
@ -136,8 +117,6 @@ void __kernel_fpu_end(void)
|
||||
|
||||
if (fpu->fpregs_active)
|
||||
copy_kernel_to_fpregs(&fpu->state);
|
||||
else
|
||||
__fpregs_deactivate_hw();
|
||||
|
||||
kernel_fpu_enable();
|
||||
}
|
||||
@ -199,10 +178,7 @@ void fpu__save(struct fpu *fpu)
|
||||
trace_x86_fpu_before_save(fpu);
|
||||
if (fpu->fpregs_active) {
|
||||
if (!copy_fpregs_to_fpstate(fpu)) {
|
||||
if (use_eager_fpu())
|
||||
copy_kernel_to_fpregs(&fpu->state);
|
||||
else
|
||||
fpregs_deactivate(fpu);
|
||||
copy_kernel_to_fpregs(&fpu->state);
|
||||
}
|
||||
}
|
||||
trace_x86_fpu_after_save(fpu);
|
||||
@ -259,8 +235,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
|
||||
* Don't let 'init optimized' areas of the XSAVE area
|
||||
* leak into the child task:
|
||||
*/
|
||||
if (use_eager_fpu())
|
||||
memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
|
||||
memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
|
||||
|
||||
/*
|
||||
* Save current FPU registers directly into the child
|
||||
@ -282,10 +257,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
|
||||
memcpy(&src_fpu->state, &dst_fpu->state,
|
||||
fpu_kernel_xstate_size);
|
||||
|
||||
if (use_eager_fpu())
|
||||
copy_kernel_to_fpregs(&src_fpu->state);
|
||||
else
|
||||
fpregs_deactivate(src_fpu);
|
||||
copy_kernel_to_fpregs(&src_fpu->state);
|
||||
}
|
||||
preempt_enable();
|
||||
|
||||
@ -517,7 +489,7 @@ void fpu__clear(struct fpu *fpu)
|
||||
{
|
||||
WARN_ON_FPU(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */
|
||||
|
||||
if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) {
|
||||
if (!static_cpu_has(X86_FEATURE_FPU)) {
|
||||
/* FPU state will be reallocated lazily at the first use. */
|
||||
fpu__drop(fpu);
|
||||
} else {
|
||||
|
@ -340,11 +340,9 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
|
||||
}
|
||||
|
||||
fpu->fpstate_active = 1;
|
||||
if (use_eager_fpu()) {
|
||||
preempt_disable();
|
||||
fpu__restore(fpu);
|
||||
preempt_enable();
|
||||
}
|
||||
preempt_disable();
|
||||
fpu__restore(fpu);
|
||||
preempt_enable();
|
||||
|
||||
return err;
|
||||
} else {
|
||||
|
@ -886,15 +886,6 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
||||
*/
|
||||
if (!boot_cpu_has(X86_FEATURE_OSPKE))
|
||||
return -EINVAL;
|
||||
/*
|
||||
* For most XSAVE components, this would be an arduous task:
|
||||
* brining fpstate up to date with fpregs, updating fpstate,
|
||||
* then re-populating fpregs. But, for components that are
|
||||
* never lazily managed, we can just access the fpregs
|
||||
* directly. PKRU is never managed lazily, so we can just
|
||||
* manipulate it directly. Make sure it stays that way.
|
||||
*/
|
||||
WARN_ON_ONCE(!use_eager_fpu());
|
||||
|
||||
/* Set the bits we need in PKRU: */
|
||||
if (init_val & PKEY_DISABLE_ACCESS)
|
||||
|
@ -16,7 +16,6 @@
|
||||
#include <linux/export.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/fpu/internal.h> /* For use_eager_fpu. Ugh! */
|
||||
#include <asm/user.h>
|
||||
#include <asm/fpu/xstate.h>
|
||||
#include "cpuid.h"
|
||||
@ -114,8 +113,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
|
||||
if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
|
||||
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
|
||||
|
||||
if (use_eager_fpu())
|
||||
kvm_x86_ops->fpu_activate(vcpu);
|
||||
kvm_x86_ops->fpu_activate(vcpu);
|
||||
|
||||
/*
|
||||
* The existing code assumes virtual address is 48-bit in the canonical
|
||||
|
@ -7357,16 +7357,6 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
|
||||
copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
|
||||
__kernel_fpu_end();
|
||||
++vcpu->stat.fpu_reload;
|
||||
/*
|
||||
* If using eager FPU mode, or if the guest is a frequent user
|
||||
* of the FPU, just leave the FPU active for next time.
|
||||
* Every 255 times fpu_counter rolls over to 0; a guest that uses
|
||||
* the FPU in bursts will revert to loading it on demand.
|
||||
*/
|
||||
if (!use_eager_fpu()) {
|
||||
if (++vcpu->fpu_counter < 5)
|
||||
kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
|
||||
}
|
||||
trace_kvm_fpu(0);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user