mirror of
https://github.com/torvalds/linux.git
synced 2024-11-10 22:21:40 +00:00
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Ingo Molnar: "Misc changes: - fix lguest bug - fix /proc/meminfo output on certain configs - fix pvclock bug - fix reboot on certain iMacs by adding new reboot quirk - fix bootup crash - fix FPU boot line option parsing - add more x86 self-tests - small cleanups, documentation improvements, etc" * 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/cpu/amd: Remove an unneeded condition in srat_detect_node() x86/vdso/pvclock: Protect STABLE check with the seqcount x86/mm: Improve switch_mm() barrier comments selftests/x86: Test __kernel_sigreturn and __kernel_rt_sigreturn x86/reboot/quirks: Add iMac10,1 to pci_reboot_dmi_table[] lguest: Map switcher text R/O x86/boot: Hide local labels in verify_cpu() x86/fpu: Disable AVX when eagerfpu is off x86/fpu: Disable MPX when eagerfpu is off x86/fpu: Disable XGETBV1 when no XSAVE x86/fpu: Fix early FPU command-line parsing x86/mm: Use PAGE_ALIGNED instead of IS_ALIGNED selftests/x86: Disable the ldt_gdt_64 test for now x86/mm/pat: Make split_page_count() check for empty levels to fix /proc/meminfo output x86/boot: Double BOOT_HEAP_SIZE to 64KB x86/mm: Add barriers and document switch_mm()-vs-flush synchronization
This commit is contained in:
commit
10a0c0f059
@ -126,23 +126,23 @@ static notrace cycle_t vread_pvclock(int *mode)
|
||||
*
|
||||
* On Xen, we don't appear to have that guarantee, but Xen still
|
||||
* supplies a valid seqlock using the version field.
|
||||
|
||||
*
|
||||
* We only do pvclock vdso timing at all if
|
||||
* PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
|
||||
* mean that all vCPUs have matching pvti and that the TSC is
|
||||
* synced, so we can just look at vCPU 0's pvti.
|
||||
*/
|
||||
|
||||
if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
|
||||
*mode = VCLOCK_NONE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
do {
|
||||
version = pvti->version;
|
||||
|
||||
smp_rmb();
|
||||
|
||||
if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
|
||||
*mode = VCLOCK_NONE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
tsc = rdtsc_ordered();
|
||||
pvti_tsc_to_system_mul = pvti->tsc_to_system_mul;
|
||||
pvti_tsc_shift = pvti->tsc_shift;
|
||||
|
@ -27,7 +27,7 @@
|
||||
#define BOOT_HEAP_SIZE 0x400000
|
||||
#else /* !CONFIG_KERNEL_BZIP2 */
|
||||
|
||||
#define BOOT_HEAP_SIZE 0x8000
|
||||
#define BOOT_HEAP_SIZE 0x10000
|
||||
|
||||
#endif /* !CONFIG_KERNEL_BZIP2 */
|
||||
|
||||
|
@ -42,6 +42,7 @@ extern void fpu__init_cpu_xstate(void);
|
||||
extern void fpu__init_system(struct cpuinfo_x86 *c);
|
||||
extern void fpu__init_check_bugs(void);
|
||||
extern void fpu__resume_cpu(void);
|
||||
extern u64 fpu__get_supported_xfeatures_mask(void);
|
||||
|
||||
/*
|
||||
* Debugging facility:
|
||||
|
@ -20,14 +20,15 @@
|
||||
|
||||
/* Supported features which support lazy state saving */
|
||||
#define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \
|
||||
XFEATURE_MASK_SSE | \
|
||||
XFEATURE_MASK_YMM | \
|
||||
XFEATURE_MASK_OPMASK | \
|
||||
XFEATURE_MASK_ZMM_Hi256 | \
|
||||
XFEATURE_MASK_Hi16_ZMM)
|
||||
XFEATURE_MASK_SSE)
|
||||
|
||||
/* Supported features which require eager state saving */
|
||||
#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)
|
||||
#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | \
|
||||
XFEATURE_MASK_BNDCSR | \
|
||||
XFEATURE_MASK_YMM | \
|
||||
XFEATURE_MASK_OPMASK | \
|
||||
XFEATURE_MASK_ZMM_Hi256 | \
|
||||
XFEATURE_MASK_Hi16_ZMM)
|
||||
|
||||
/* All currently supported features */
|
||||
#define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER)
|
||||
|
@ -12,7 +12,9 @@
|
||||
#define GUEST_PL 1
|
||||
|
||||
/* Page for Switcher text itself, then two pages per cpu */
|
||||
#define TOTAL_SWITCHER_PAGES (1 + 2 * nr_cpu_ids)
|
||||
#define SWITCHER_TEXT_PAGES (1)
|
||||
#define SWITCHER_STACK_PAGES (2 * nr_cpu_ids)
|
||||
#define TOTAL_SWITCHER_PAGES (SWITCHER_TEXT_PAGES + SWITCHER_STACK_PAGES)
|
||||
|
||||
/* Where we map the Switcher, in both Host and Guest. */
|
||||
extern unsigned long switcher_addr;
|
||||
|
@ -116,8 +116,36 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
||||
#endif
|
||||
cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
|
||||
/* Re-load page tables */
|
||||
/*
|
||||
* Re-load page tables.
|
||||
*
|
||||
* This logic has an ordering constraint:
|
||||
*
|
||||
* CPU 0: Write to a PTE for 'next'
|
||||
* CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
|
||||
* CPU 1: set bit 1 in next's mm_cpumask
|
||||
* CPU 1: load from the PTE that CPU 0 writes (implicit)
|
||||
*
|
||||
* We need to prevent an outcome in which CPU 1 observes
|
||||
* the new PTE value and CPU 0 observes bit 1 clear in
|
||||
* mm_cpumask. (If that occurs, then the IPI will never
|
||||
* be sent, and CPU 0's TLB will contain a stale entry.)
|
||||
*
|
||||
* The bad outcome can occur if either CPU's load is
|
||||
* reordered before that CPU's store, so both CPUs must
|
||||
* execute full barriers to prevent this from happening.
|
||||
*
|
||||
* Thus, switch_mm needs a full barrier between the
|
||||
* store to mm_cpumask and any operation that could load
|
||||
* from next->pgd. TLB fills are special and can happen
|
||||
* due to instruction fetches or for no reason at all,
|
||||
* and neither LOCK nor MFENCE orders them.
|
||||
* Fortunately, load_cr3() is serializing and gives the
|
||||
* ordering guarantee we need.
|
||||
*
|
||||
*/
|
||||
load_cr3(next->pgd);
|
||||
|
||||
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
|
||||
|
||||
/* Stop flush ipis for the previous mm */
|
||||
@ -156,10 +184,14 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
||||
* schedule, protecting us from simultaneous changes.
|
||||
*/
|
||||
cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
|
||||
/*
|
||||
* We were in lazy tlb mode and leave_mm disabled
|
||||
* tlb flush IPI delivery. We must reload CR3
|
||||
* to make sure to use no freed page tables.
|
||||
*
|
||||
* As above, load_cr3() is serializing and orders TLB
|
||||
* fills with respect to the mm_cpumask write.
|
||||
*/
|
||||
load_cr3(next->pgd);
|
||||
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
|
||||
|
@ -434,8 +434,7 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
|
||||
*/
|
||||
int ht_nodeid = c->initial_apicid;
|
||||
|
||||
if (ht_nodeid >= 0 &&
|
||||
__apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
|
||||
if (__apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
|
||||
node = __apicid_to_node[ht_nodeid];
|
||||
/* Pick a nearby node */
|
||||
if (!node_online(node))
|
||||
|
@ -3,8 +3,11 @@
|
||||
*/
|
||||
#include <asm/fpu/internal.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/cmdline.h>
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/init.h>
|
||||
|
||||
/*
|
||||
* Initialize the TS bit in CR0 according to the style of context-switches
|
||||
@ -270,20 +273,52 @@ static void __init fpu__init_system_xstate_size_legacy(void)
|
||||
*/
|
||||
static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
|
||||
|
||||
static int __init eager_fpu_setup(char *s)
|
||||
/*
|
||||
* Find supported xfeatures based on cpu features and command-line input.
|
||||
* This must be called after fpu__init_parse_early_param() is called and
|
||||
* xfeatures_mask is enumerated.
|
||||
*/
|
||||
u64 __init fpu__get_supported_xfeatures_mask(void)
|
||||
{
|
||||
if (!strcmp(s, "on"))
|
||||
eagerfpu = ENABLE;
|
||||
else if (!strcmp(s, "off"))
|
||||
eagerfpu = DISABLE;
|
||||
else if (!strcmp(s, "auto"))
|
||||
eagerfpu = AUTO;
|
||||
return 1;
|
||||
/* Support all xfeatures known to us */
|
||||
if (eagerfpu != DISABLE)
|
||||
return XCNTXT_MASK;
|
||||
|
||||
/* Warning of xfeatures being disabled for no eagerfpu mode */
|
||||
if (xfeatures_mask & XFEATURE_MASK_EAGER) {
|
||||
pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n",
|
||||
xfeatures_mask & XFEATURE_MASK_EAGER);
|
||||
}
|
||||
|
||||
/* Return a mask that masks out all features requiring eagerfpu mode */
|
||||
return ~XFEATURE_MASK_EAGER;
|
||||
}
|
||||
|
||||
/*
|
||||
* Disable features dependent on eagerfpu.
|
||||
*/
|
||||
static void __init fpu__clear_eager_fpu_features(void)
|
||||
{
|
||||
setup_clear_cpu_cap(X86_FEATURE_MPX);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX2);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512F);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
|
||||
}
|
||||
__setup("eagerfpu=", eager_fpu_setup);
|
||||
|
||||
/*
|
||||
* Pick the FPU context switching strategy:
|
||||
*
|
||||
* When eagerfpu is AUTO or ENABLE, we ensure it is ENABLE if either of
|
||||
* the following is true:
|
||||
*
|
||||
* (1) the cpu has xsaveopt, as it has the optimization and doing eager
|
||||
* FPU switching has a relatively low cost compared to a plain xsave;
|
||||
* (2) the cpu has xsave features (e.g. MPX) that depend on eager FPU
|
||||
* switching. Should the kernel boot with noxsaveopt, we support MPX
|
||||
* with eager FPU switching at a higher cost.
|
||||
*/
|
||||
static void __init fpu__init_system_ctx_switch(void)
|
||||
{
|
||||
@ -295,19 +330,11 @@ static void __init fpu__init_system_ctx_switch(void)
|
||||
WARN_ON_FPU(current->thread.fpu.fpstate_active);
|
||||
current_thread_info()->status = 0;
|
||||
|
||||
/* Auto enable eagerfpu for xsaveopt */
|
||||
if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE)
|
||||
eagerfpu = ENABLE;
|
||||
|
||||
if (xfeatures_mask & XFEATURE_MASK_EAGER) {
|
||||
if (eagerfpu == DISABLE) {
|
||||
pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n",
|
||||
xfeatures_mask & XFEATURE_MASK_EAGER);
|
||||
xfeatures_mask &= ~XFEATURE_MASK_EAGER;
|
||||
} else {
|
||||
eagerfpu = ENABLE;
|
||||
}
|
||||
}
|
||||
if (xfeatures_mask & XFEATURE_MASK_EAGER)
|
||||
eagerfpu = ENABLE;
|
||||
|
||||
if (eagerfpu == ENABLE)
|
||||
setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
|
||||
@ -315,12 +342,49 @@ static void __init fpu__init_system_ctx_switch(void)
|
||||
printk(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", eagerfpu == ENABLE ? "eager" : "lazy");
|
||||
}
|
||||
|
||||
/*
|
||||
* We parse fpu parameters early because fpu__init_system() is executed
|
||||
* before parse_early_param().
|
||||
*/
|
||||
static void __init fpu__init_parse_early_param(void)
|
||||
{
|
||||
/*
|
||||
* No need to check "eagerfpu=auto" again, since it is the
|
||||
* initial default.
|
||||
*/
|
||||
if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) {
|
||||
eagerfpu = DISABLE;
|
||||
fpu__clear_eager_fpu_features();
|
||||
} else if (cmdline_find_option_bool(boot_command_line, "eagerfpu=on")) {
|
||||
eagerfpu = ENABLE;
|
||||
}
|
||||
|
||||
if (cmdline_find_option_bool(boot_command_line, "no387"))
|
||||
setup_clear_cpu_cap(X86_FEATURE_FPU);
|
||||
|
||||
if (cmdline_find_option_bool(boot_command_line, "nofxsr")) {
|
||||
setup_clear_cpu_cap(X86_FEATURE_FXSR);
|
||||
setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT);
|
||||
setup_clear_cpu_cap(X86_FEATURE_XMM);
|
||||
}
|
||||
|
||||
if (cmdline_find_option_bool(boot_command_line, "noxsave"))
|
||||
fpu__xstate_clear_all_cpu_caps();
|
||||
|
||||
if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
|
||||
|
||||
if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called on the boot CPU once per system bootup, to set up the initial
|
||||
* FPU state that is later cloned into all processes:
|
||||
*/
|
||||
void __init fpu__init_system(struct cpuinfo_x86 *c)
|
||||
{
|
||||
fpu__init_parse_early_param();
|
||||
fpu__init_system_early_generic(c);
|
||||
|
||||
/*
|
||||
@ -344,62 +408,3 @@ void __init fpu__init_system(struct cpuinfo_x86 *c)
|
||||
|
||||
fpu__init_system_ctx_switch();
|
||||
}
|
||||
|
||||
/*
|
||||
* Boot parameter to turn off FPU support and fall back to math-emu:
|
||||
*/
|
||||
static int __init no_387(char *s)
|
||||
{
|
||||
setup_clear_cpu_cap(X86_FEATURE_FPU);
|
||||
return 1;
|
||||
}
|
||||
__setup("no387", no_387);
|
||||
|
||||
/*
|
||||
* Disable all xstate CPU features:
|
||||
*/
|
||||
static int __init x86_noxsave_setup(char *s)
|
||||
{
|
||||
if (strlen(s))
|
||||
return 0;
|
||||
|
||||
fpu__xstate_clear_all_cpu_caps();
|
||||
|
||||
return 1;
|
||||
}
|
||||
__setup("noxsave", x86_noxsave_setup);
|
||||
|
||||
/*
|
||||
* Disable the XSAVEOPT instruction specifically:
|
||||
*/
|
||||
static int __init x86_noxsaveopt_setup(char *s)
|
||||
{
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
|
||||
|
||||
return 1;
|
||||
}
|
||||
__setup("noxsaveopt", x86_noxsaveopt_setup);
|
||||
|
||||
/*
|
||||
* Disable the XSAVES instruction:
|
||||
*/
|
||||
static int __init x86_noxsaves_setup(char *s)
|
||||
{
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
|
||||
|
||||
return 1;
|
||||
}
|
||||
__setup("noxsaves", x86_noxsaves_setup);
|
||||
|
||||
/*
|
||||
* Disable FX save/restore and SSE support:
|
||||
*/
|
||||
static int __init x86_nofxsr_setup(char *s)
|
||||
{
|
||||
setup_clear_cpu_cap(X86_FEATURE_FXSR);
|
||||
setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT);
|
||||
setup_clear_cpu_cap(X86_FEATURE_XMM);
|
||||
|
||||
return 1;
|
||||
}
|
||||
__setup("nofxsr", x86_nofxsr_setup);
|
||||
|
@ -52,6 +52,7 @@ void fpu__xstate_clear_all_cpu_caps(void)
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
|
||||
setup_clear_cpu_cap(X86_FEATURE_MPX);
|
||||
setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -632,8 +633,7 @@ void __init fpu__init_system_xstate(void)
|
||||
BUG();
|
||||
}
|
||||
|
||||
/* Support only the state known to the OS: */
|
||||
xfeatures_mask = xfeatures_mask & XCNTXT_MASK;
|
||||
xfeatures_mask &= fpu__get_supported_xfeatures_mask();
|
||||
|
||||
/* Enable xstate instructions to be able to continue with initialization: */
|
||||
fpu__init_cpu_xstate();
|
||||
|
@ -182,6 +182,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
|
||||
DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"),
|
||||
},
|
||||
},
|
||||
{ /* Handle problems with rebooting on the iMac10,1. */
|
||||
.callback = set_pci_reboot,
|
||||
.ident = "Apple iMac10,1",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
|
||||
DMI_MATCH(DMI_PRODUCT_NAME, "iMac10,1"),
|
||||
},
|
||||
},
|
||||
|
||||
/* ASRock */
|
||||
{ /* Handle problems with rebooting on ASRock Q1900DC-ITX */
|
||||
|
@ -48,31 +48,31 @@ verify_cpu:
|
||||
pushfl
|
||||
popl %eax
|
||||
cmpl %eax,%ebx
|
||||
jz verify_cpu_no_longmode # cpu has no cpuid
|
||||
jz .Lverify_cpu_no_longmode # cpu has no cpuid
|
||||
#endif
|
||||
|
||||
movl $0x0,%eax # See if cpuid 1 is implemented
|
||||
cpuid
|
||||
cmpl $0x1,%eax
|
||||
jb verify_cpu_no_longmode # no cpuid 1
|
||||
jb .Lverify_cpu_no_longmode # no cpuid 1
|
||||
|
||||
xor %di,%di
|
||||
cmpl $0x68747541,%ebx # AuthenticAMD
|
||||
jnz verify_cpu_noamd
|
||||
jnz .Lverify_cpu_noamd
|
||||
cmpl $0x69746e65,%edx
|
||||
jnz verify_cpu_noamd
|
||||
jnz .Lverify_cpu_noamd
|
||||
cmpl $0x444d4163,%ecx
|
||||
jnz verify_cpu_noamd
|
||||
jnz .Lverify_cpu_noamd
|
||||
mov $1,%di # cpu is from AMD
|
||||
jmp verify_cpu_check
|
||||
jmp .Lverify_cpu_check
|
||||
|
||||
verify_cpu_noamd:
|
||||
.Lverify_cpu_noamd:
|
||||
cmpl $0x756e6547,%ebx # GenuineIntel?
|
||||
jnz verify_cpu_check
|
||||
jnz .Lverify_cpu_check
|
||||
cmpl $0x49656e69,%edx
|
||||
jnz verify_cpu_check
|
||||
jnz .Lverify_cpu_check
|
||||
cmpl $0x6c65746e,%ecx
|
||||
jnz verify_cpu_check
|
||||
jnz .Lverify_cpu_check
|
||||
|
||||
# only call IA32_MISC_ENABLE when:
|
||||
# family > 6 || (family == 6 && model >= 0xd)
|
||||
@ -83,59 +83,59 @@ verify_cpu_noamd:
|
||||
andl $0x0ff00f00, %eax # mask family and extended family
|
||||
shrl $8, %eax
|
||||
cmpl $6, %eax
|
||||
ja verify_cpu_clear_xd # family > 6, ok
|
||||
jb verify_cpu_check # family < 6, skip
|
||||
ja .Lverify_cpu_clear_xd # family > 6, ok
|
||||
jb .Lverify_cpu_check # family < 6, skip
|
||||
|
||||
andl $0x000f00f0, %ecx # mask model and extended model
|
||||
shrl $4, %ecx
|
||||
cmpl $0xd, %ecx
|
||||
jb verify_cpu_check # family == 6, model < 0xd, skip
|
||||
jb .Lverify_cpu_check # family == 6, model < 0xd, skip
|
||||
|
||||
verify_cpu_clear_xd:
|
||||
.Lverify_cpu_clear_xd:
|
||||
movl $MSR_IA32_MISC_ENABLE, %ecx
|
||||
rdmsr
|
||||
btrl $2, %edx # clear MSR_IA32_MISC_ENABLE_XD_DISABLE
|
||||
jnc verify_cpu_check # only write MSR if bit was changed
|
||||
jnc .Lverify_cpu_check # only write MSR if bit was changed
|
||||
wrmsr
|
||||
|
||||
verify_cpu_check:
|
||||
.Lverify_cpu_check:
|
||||
movl $0x1,%eax # Does the cpu have what it takes
|
||||
cpuid
|
||||
andl $REQUIRED_MASK0,%edx
|
||||
xorl $REQUIRED_MASK0,%edx
|
||||
jnz verify_cpu_no_longmode
|
||||
jnz .Lverify_cpu_no_longmode
|
||||
|
||||
movl $0x80000000,%eax # See if extended cpuid is implemented
|
||||
cpuid
|
||||
cmpl $0x80000001,%eax
|
||||
jb verify_cpu_no_longmode # no extended cpuid
|
||||
jb .Lverify_cpu_no_longmode # no extended cpuid
|
||||
|
||||
movl $0x80000001,%eax # Does the cpu have what it takes
|
||||
cpuid
|
||||
andl $REQUIRED_MASK1,%edx
|
||||
xorl $REQUIRED_MASK1,%edx
|
||||
jnz verify_cpu_no_longmode
|
||||
jnz .Lverify_cpu_no_longmode
|
||||
|
||||
verify_cpu_sse_test:
|
||||
.Lverify_cpu_sse_test:
|
||||
movl $1,%eax
|
||||
cpuid
|
||||
andl $SSE_MASK,%edx
|
||||
cmpl $SSE_MASK,%edx
|
||||
je verify_cpu_sse_ok
|
||||
je .Lverify_cpu_sse_ok
|
||||
test %di,%di
|
||||
jz verify_cpu_no_longmode # only try to force SSE on AMD
|
||||
jz .Lverify_cpu_no_longmode # only try to force SSE on AMD
|
||||
movl $MSR_K7_HWCR,%ecx
|
||||
rdmsr
|
||||
btr $15,%eax # enable SSE
|
||||
wrmsr
|
||||
xor %di,%di # don't loop
|
||||
jmp verify_cpu_sse_test # try again
|
||||
jmp .Lverify_cpu_sse_test # try again
|
||||
|
||||
verify_cpu_no_longmode:
|
||||
.Lverify_cpu_no_longmode:
|
||||
popf # Restore caller passed flags
|
||||
movl $1,%eax
|
||||
ret
|
||||
verify_cpu_sse_ok:
|
||||
.Lverify_cpu_sse_ok:
|
||||
popf # Restore caller passed flags
|
||||
xorl %eax, %eax
|
||||
ret
|
||||
|
@ -814,8 +814,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
|
||||
if (phys_addr < (phys_addr_t)0x40000000)
|
||||
return;
|
||||
|
||||
if (IS_ALIGNED(addr, PAGE_SIZE) &&
|
||||
IS_ALIGNED(next, PAGE_SIZE)) {
|
||||
if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) {
|
||||
/*
|
||||
* Do not free direct mapping pages since they were
|
||||
* freed when offlining, or simplely not in use.
|
||||
|
@ -66,6 +66,9 @@ void update_page_count(int level, unsigned long pages)
|
||||
|
||||
static void split_page_count(int level)
|
||||
{
|
||||
if (direct_pages_count[level] == 0)
|
||||
return;
|
||||
|
||||
direct_pages_count[level]--;
|
||||
direct_pages_count[level - 1] += PTRS_PER_PTE;
|
||||
}
|
||||
|
@ -161,7 +161,10 @@ void flush_tlb_current_task(void)
|
||||
preempt_disable();
|
||||
|
||||
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
|
||||
|
||||
/* This is an implicit full barrier that synchronizes with switch_mm. */
|
||||
local_flush_tlb();
|
||||
|
||||
trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
|
||||
if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
|
||||
flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
|
||||
@ -188,17 +191,29 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
|
||||
|
||||
preempt_disable();
|
||||
if (current->active_mm != mm)
|
||||
if (current->active_mm != mm) {
|
||||
/* Synchronize with switch_mm. */
|
||||
smp_mb();
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!current->mm) {
|
||||
leave_mm(smp_processor_id());
|
||||
|
||||
/* Synchronize with switch_mm. */
|
||||
smp_mb();
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
|
||||
base_pages_to_flush = (end - start) >> PAGE_SHIFT;
|
||||
|
||||
/*
|
||||
* Both branches below are implicit full barriers (MOV to CR or
|
||||
* INVLPG) that synchronize with switch_mm.
|
||||
*/
|
||||
if (base_pages_to_flush > tlb_single_page_flush_ceiling) {
|
||||
base_pages_to_flush = TLB_FLUSH_ALL;
|
||||
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
|
||||
@ -228,10 +243,18 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
|
||||
preempt_disable();
|
||||
|
||||
if (current->active_mm == mm) {
|
||||
if (current->mm)
|
||||
if (current->mm) {
|
||||
/*
|
||||
* Implicit full barrier (INVLPG) that synchronizes
|
||||
* with switch_mm.
|
||||
*/
|
||||
__flush_tlb_one(start);
|
||||
else
|
||||
} else {
|
||||
leave_mm(smp_processor_id());
|
||||
|
||||
/* Synchronize with switch_mm. */
|
||||
smp_mb();
|
||||
}
|
||||
}
|
||||
|
||||
if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
|
||||
|
@ -22,7 +22,8 @@
|
||||
|
||||
unsigned long switcher_addr;
|
||||
struct page **lg_switcher_pages;
|
||||
static struct vm_struct *switcher_vma;
|
||||
static struct vm_struct *switcher_text_vma;
|
||||
static struct vm_struct *switcher_stacks_vma;
|
||||
|
||||
/* This One Big lock protects all inter-guest data structures. */
|
||||
DEFINE_MUTEX(lguest_lock);
|
||||
@ -82,55 +83,81 @@ static __init int map_switcher(void)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy in the compiled-in Switcher code (from x86/switcher_32.S).
|
||||
* It goes in the first page, which we map in momentarily.
|
||||
*/
|
||||
memcpy(kmap(lg_switcher_pages[0]), start_switcher_text,
|
||||
end_switcher_text - start_switcher_text);
|
||||
kunmap(lg_switcher_pages[0]);
|
||||
|
||||
/*
|
||||
* We place the Switcher underneath the fixmap area, which is the
|
||||
* highest virtual address we can get. This is important, since we
|
||||
* tell the Guest it can't access this memory, so we want its ceiling
|
||||
* as high as possible.
|
||||
*/
|
||||
switcher_addr = FIXADDR_START - (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE;
|
||||
switcher_addr = FIXADDR_START - TOTAL_SWITCHER_PAGES*PAGE_SIZE;
|
||||
|
||||
/*
|
||||
* Now we reserve the "virtual memory area" we want. We might
|
||||
* not get it in theory, but in practice it's worked so far.
|
||||
* The end address needs +1 because __get_vm_area allocates an
|
||||
* extra guard page, so we need space for that.
|
||||
* Now we reserve the "virtual memory area"s we want. We might
|
||||
* not get them in theory, but in practice it's worked so far.
|
||||
*
|
||||
* We want the switcher text to be read-only and executable, and
|
||||
* the stacks to be read-write and non-executable.
|
||||
*/
|
||||
switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE,
|
||||
VM_ALLOC, switcher_addr, switcher_addr
|
||||
+ (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE);
|
||||
if (!switcher_vma) {
|
||||
switcher_text_vma = __get_vm_area(PAGE_SIZE, VM_ALLOC|VM_NO_GUARD,
|
||||
switcher_addr,
|
||||
switcher_addr + PAGE_SIZE);
|
||||
|
||||
if (!switcher_text_vma) {
|
||||
err = -ENOMEM;
|
||||
printk("lguest: could not map switcher pages high\n");
|
||||
goto free_pages;
|
||||
}
|
||||
|
||||
switcher_stacks_vma = __get_vm_area(SWITCHER_STACK_PAGES * PAGE_SIZE,
|
||||
VM_ALLOC|VM_NO_GUARD,
|
||||
switcher_addr + PAGE_SIZE,
|
||||
switcher_addr + TOTAL_SWITCHER_PAGES * PAGE_SIZE);
|
||||
if (!switcher_stacks_vma) {
|
||||
err = -ENOMEM;
|
||||
printk("lguest: could not map switcher pages high\n");
|
||||
goto free_text_vma;
|
||||
}
|
||||
|
||||
/*
|
||||
* This code actually sets up the pages we've allocated to appear at
|
||||
* switcher_addr. map_vm_area() takes the vma we allocated above, the
|
||||
* kind of pages we're mapping (kernel pages), and a pointer to our
|
||||
* array of struct pages.
|
||||
* kind of pages we're mapping (kernel text pages and kernel writable
|
||||
* pages respectively), and a pointer to our array of struct pages.
|
||||
*/
|
||||
err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, lg_switcher_pages);
|
||||
err = map_vm_area(switcher_text_vma, PAGE_KERNEL_RX, lg_switcher_pages);
|
||||
if (err) {
|
||||
printk("lguest: map_vm_area failed: %i\n", err);
|
||||
goto free_vma;
|
||||
printk("lguest: text map_vm_area failed: %i\n", err);
|
||||
goto free_vmas;
|
||||
}
|
||||
|
||||
err = map_vm_area(switcher_stacks_vma, PAGE_KERNEL,
|
||||
lg_switcher_pages + SWITCHER_TEXT_PAGES);
|
||||
if (err) {
|
||||
printk("lguest: stacks map_vm_area failed: %i\n", err);
|
||||
goto free_vmas;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now the Switcher is mapped at the right address, we can't fail!
|
||||
* Copy in the compiled-in Switcher code (from x86/switcher_32.S).
|
||||
*/
|
||||
memcpy(switcher_vma->addr, start_switcher_text,
|
||||
end_switcher_text - start_switcher_text);
|
||||
|
||||
printk(KERN_INFO "lguest: mapped switcher at %p\n",
|
||||
switcher_vma->addr);
|
||||
switcher_text_vma->addr);
|
||||
/* And we succeeded... */
|
||||
return 0;
|
||||
|
||||
free_vma:
|
||||
vunmap(switcher_vma->addr);
|
||||
free_vmas:
|
||||
/* Undoes map_vm_area and __get_vm_area */
|
||||
vunmap(switcher_stacks_vma->addr);
|
||||
free_text_vma:
|
||||
vunmap(switcher_text_vma->addr);
|
||||
free_pages:
|
||||
i = TOTAL_SWITCHER_PAGES;
|
||||
free_some_pages:
|
||||
@ -148,7 +175,8 @@ static void unmap_switcher(void)
|
||||
unsigned int i;
|
||||
|
||||
/* vunmap() undoes *both* map_vm_area() and __get_vm_area(). */
|
||||
vunmap(switcher_vma->addr);
|
||||
vunmap(switcher_text_vma->addr);
|
||||
vunmap(switcher_stacks_vma->addr);
|
||||
/* Now we just need to free the pages we copied the switcher into */
|
||||
for (i = 0; i < TOTAL_SWITCHER_PAGES; i++)
|
||||
__free_pages(lg_switcher_pages[i], 0);
|
||||
|
@ -4,9 +4,11 @@ include ../lib.mk
|
||||
|
||||
.PHONY: all all_32 all_64 warn_32bit_failure clean
|
||||
|
||||
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt syscall_nt ptrace_syscall
|
||||
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall
|
||||
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn test_syscall_vdso unwind_vdso \
|
||||
test_FCMOV test_FCOMI test_FISTTP
|
||||
test_FCMOV test_FCOMI test_FISTTP \
|
||||
ldt_gdt \
|
||||
vdso_restorer
|
||||
|
||||
TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
|
||||
BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
|
||||
|
88
tools/testing/selftests/x86/vdso_restorer.c
Normal file
88
tools/testing/selftests/x86/vdso_restorer.c
Normal file
@ -0,0 +1,88 @@
|
||||
/*
|
||||
* vdso_restorer.c - tests vDSO-based signal restore
|
||||
* Copyright (c) 2015 Andrew Lutomirski
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* This makes sure that sa_restorer == NULL keeps working on 32-bit
|
||||
* configurations. Modern glibc doesn't use it under any circumstances,
|
||||
* so it's easy to overlook breakage.
|
||||
*
|
||||
* 64-bit userspace has never supported sa_restorer == NULL, so this is
|
||||
* 32-bit only.
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include <err.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <signal.h>
|
||||
#include <unistd.h>
|
||||
#include <syscall.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
/* Open-code this -- the headers are too messy to easily use them. */
|
||||
struct real_sigaction {
|
||||
void *handler;
|
||||
unsigned long flags;
|
||||
void *restorer;
|
||||
unsigned int mask[2];
|
||||
};
|
||||
|
||||
static volatile sig_atomic_t handler_called;
|
||||
|
||||
static void handler_with_siginfo(int sig, siginfo_t *info, void *ctx_void)
|
||||
{
|
||||
handler_called = 1;
|
||||
}
|
||||
|
||||
static void handler_without_siginfo(int sig)
|
||||
{
|
||||
handler_called = 1;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int nerrs = 0;
|
||||
struct real_sigaction sa;
|
||||
|
||||
memset(&sa, 0, sizeof(sa));
|
||||
sa.handler = handler_with_siginfo;
|
||||
sa.flags = SA_SIGINFO;
|
||||
sa.restorer = NULL; /* request kernel-provided restorer */
|
||||
|
||||
if (syscall(SYS_rt_sigaction, SIGUSR1, &sa, NULL, 8) != 0)
|
||||
err(1, "raw rt_sigaction syscall");
|
||||
|
||||
raise(SIGUSR1);
|
||||
|
||||
if (handler_called) {
|
||||
printf("[OK]\tSA_SIGINFO handler returned successfully\n");
|
||||
} else {
|
||||
printf("[FAIL]\tSA_SIGINFO handler was not called\n");
|
||||
nerrs++;
|
||||
}
|
||||
|
||||
sa.flags = 0;
|
||||
sa.handler = handler_without_siginfo;
|
||||
if (syscall(SYS_sigaction, SIGUSR1, &sa, 0) != 0)
|
||||
err(1, "raw sigaction syscall");
|
||||
handler_called = 0;
|
||||
|
||||
raise(SIGUSR1);
|
||||
|
||||
if (handler_called) {
|
||||
printf("[OK]\t!SA_SIGINFO handler returned successfully\n");
|
||||
} else {
|
||||
printf("[FAIL]\t!SA_SIGINFO handler was not called\n");
|
||||
nerrs++;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user