Merge branch 'x86/process' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into HEAD

Required for KVM support of the CPUID faulting feature.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Paolo Bonzini 2017-04-21 11:55:06 +02:00
commit 8afd74c296
23 changed files with 259 additions and 88 deletions

View File

@ -302,8 +302,8 @@ extern int ignore_sigio_fd(int fd);
extern void maybe_sigio_broken(int fd, int read); extern void maybe_sigio_broken(int fd, int read);
extern void sigio_broken(int fd, int read); extern void sigio_broken(int fd, int read);
/* sys-x86_64/prctl.c */ /* prctl.c */
extern int os_arch_prctl(int pid, int code, unsigned long *addr); extern int os_arch_prctl(int pid, int option, unsigned long *arg2);
/* tty.c */ /* tty.c */
extern int get_pty(void); extern int get_pty(void);

View File

@ -390,3 +390,4 @@
381 i386 pkey_alloc sys_pkey_alloc 381 i386 pkey_alloc sys_pkey_alloc
382 i386 pkey_free sys_pkey_free 382 i386 pkey_free sys_pkey_free
383 i386 statx sys_statx 383 i386 statx sys_statx
384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl

View File

@ -187,6 +187,7 @@
* Reuse free bits when adding new feature flags! * Reuse free bits when adding new feature flags!
*/ */
#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ #define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */

View File

@ -45,6 +45,8 @@
#define MSR_IA32_PERFCTR1 0x000000c2 #define MSR_IA32_PERFCTR1 0x000000c2
#define MSR_FSB_FREQ 0x000000cd #define MSR_FSB_FREQ 0x000000cd
#define MSR_PLATFORM_INFO 0x000000ce #define MSR_PLATFORM_INFO 0x000000ce
#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31
#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 #define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
#define NHM_C3_AUTO_DEMOTE (1UL << 25) #define NHM_C3_AUTO_DEMOTE (1UL << 25)
@ -127,6 +129,7 @@
/* DEBUGCTLMSR bits (others vary by model): */ /* DEBUGCTLMSR bits (others vary by model): */
#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
#define DEBUGCTLMSR_BTF_SHIFT 1
#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
#define DEBUGCTLMSR_TR (1UL << 6) #define DEBUGCTLMSR_TR (1UL << 6)
#define DEBUGCTLMSR_BTS (1UL << 7) #define DEBUGCTLMSR_BTS (1UL << 7)
@ -552,10 +555,12 @@
#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39 #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39
#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT) #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
/* MISC_FEATURE_ENABLES non-architectural features */ /* MISC_FEATURES_ENABLES non-architectural features */
#define MSR_MISC_FEATURE_ENABLES 0x00000140 #define MSR_MISC_FEATURES_ENABLES 0x00000140
#define MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT 1 #define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0
#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT)
#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1
#define MSR_IA32_TSC_DEADLINE 0x000006E0 #define MSR_IA32_TSC_DEADLINE 0x000006E0

View File

@ -884,6 +884,8 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
extern int get_tsc_mode(unsigned long adr); extern int get_tsc_mode(unsigned long adr);
extern int set_tsc_mode(unsigned int val); extern int set_tsc_mode(unsigned int val);
DECLARE_PER_CPU(u64, msr_misc_features_shadow);
/* Register/unregister a process' MPX related resource */ /* Register/unregister a process' MPX related resource */
#define MPX_ENABLE_MANAGEMENT() mpx_enable_management() #define MPX_ENABLE_MANAGEMENT() mpx_enable_management()
#define MPX_DISABLE_MANAGEMENT() mpx_disable_management() #define MPX_DISABLE_MANAGEMENT() mpx_disable_management()

View File

@ -9,6 +9,7 @@ void syscall_init(void);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
void entry_SYSCALL_64(void); void entry_SYSCALL_64(void);
long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
#endif #endif
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
@ -30,6 +31,7 @@ void x86_report_nx(void);
extern int reboot_force; extern int reboot_force;
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); long do_arch_prctl_common(struct task_struct *task, int option,
unsigned long cpuid_enabled);
#endif /* _ASM_X86_PROTO_H */ #endif /* _ASM_X86_PROTO_H */

View File

@ -87,6 +87,7 @@ struct thread_info {
#define TIF_SECCOMP 8 /* secure computing */ #define TIF_SECCOMP 8 /* secure computing */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */
#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_IA32 17 /* IA32 compatibility process */
#define TIF_NOHZ 19 /* in adaptive nohz mode */ #define TIF_NOHZ 19 /* in adaptive nohz mode */
@ -110,6 +111,7 @@ struct thread_info {
#define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_NOCPUID (1 << TIF_NOCPUID)
#define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32) #define _TIF_IA32 (1 << TIF_IA32)
#define _TIF_NOHZ (1 << TIF_NOHZ) #define _TIF_NOHZ (1 << TIF_NOHZ)
@ -138,7 +140,7 @@ struct thread_info {
/* flags to check in __switch_to() */ /* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \ #define _TIF_WORK_CTXSW \
(_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
@ -239,6 +241,8 @@ static inline int arch_within_stack_frames(const void * const stack,
extern void arch_task_cache_init(void); extern void arch_task_cache_init(void);
extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
extern void arch_release_task_struct(struct task_struct *tsk); extern void arch_release_task_struct(struct task_struct *tsk);
extern void arch_setup_new_exec(void);
#define arch_setup_new_exec arch_setup_new_exec
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_THREAD_INFO_H */ #endif /* _ASM_X86_THREAD_INFO_H */

View File

@ -110,6 +110,16 @@ static inline void cr4_clear_bits(unsigned long mask)
} }
} }
static inline void cr4_toggle_bits(unsigned long mask)
{
unsigned long cr4;
cr4 = this_cpu_read(cpu_tlbstate.cr4);
cr4 ^= mask;
this_cpu_write(cpu_tlbstate.cr4, cr4);
__write_cr4(cr4);
}
/* Read the CR4 shadow. */ /* Read the CR4 shadow. */
static inline unsigned long cr4_read_shadow(void) static inline unsigned long cr4_read_shadow(void)
{ {

View File

@ -1,10 +1,13 @@
#ifndef _ASM_X86_PRCTL_H #ifndef _ASM_X86_PRCTL_H
#define _ASM_X86_PRCTL_H #define _ASM_X86_PRCTL_H
#define ARCH_SET_GS 0x1001 #define ARCH_SET_GS 0x1001
#define ARCH_SET_FS 0x1002 #define ARCH_SET_FS 0x1002
#define ARCH_GET_FS 0x1003 #define ARCH_GET_FS 0x1003
#define ARCH_GET_GS 0x1004 #define ARCH_GET_GS 0x1004
#define ARCH_GET_CPUID 0x1011
#define ARCH_SET_CPUID 0x1012
#define ARCH_MAP_VDSO_X32 0x2001 #define ARCH_MAP_VDSO_X32 0x2001
#define ARCH_MAP_VDSO_32 0x2002 #define ARCH_MAP_VDSO_32 0x2002

View File

@ -90,16 +90,12 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
return; return;
} }
if (ring3mwait_disabled) { if (ring3mwait_disabled)
msr_clear_bit(MSR_MISC_FEATURE_ENABLES,
MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
return; return;
}
msr_set_bit(MSR_MISC_FEATURE_ENABLES,
MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
set_cpu_cap(c, X86_FEATURE_RING3MWAIT); set_cpu_cap(c, X86_FEATURE_RING3MWAIT);
this_cpu_or(msr_misc_features_shadow,
1UL << MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT);
if (c == &boot_cpu_data) if (c == &boot_cpu_data)
ELF_HWCAP2 |= HWCAP2_RING3MWAIT; ELF_HWCAP2 |= HWCAP2_RING3MWAIT;
@ -488,6 +484,34 @@ static void intel_bsp_resume(struct cpuinfo_x86 *c)
init_intel_energy_perf(c); init_intel_energy_perf(c);
} }
static void init_cpuid_fault(struct cpuinfo_x86 *c)
{
u64 msr;
if (!rdmsrl_safe(MSR_PLATFORM_INFO, &msr)) {
if (msr & MSR_PLATFORM_INFO_CPUID_FAULT)
set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
}
}
static void init_intel_misc_features(struct cpuinfo_x86 *c)
{
u64 msr;
if (rdmsrl_safe(MSR_MISC_FEATURES_ENABLES, &msr))
return;
/* Clear all MISC features */
this_cpu_write(msr_misc_features_shadow, 0);
/* Check features and update capabilities and shadow control bits */
init_cpuid_fault(c);
probe_xeon_phi_r3mwait(c);
msr = this_cpu_read(msr_misc_features_shadow);
wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
}
static void init_intel(struct cpuinfo_x86 *c) static void init_intel(struct cpuinfo_x86 *c)
{ {
unsigned int l2 = 0; unsigned int l2 = 0;
@ -602,7 +626,7 @@ static void init_intel(struct cpuinfo_x86 *c)
init_intel_energy_perf(c); init_intel_energy_perf(c);
probe_xeon_phi_r3mwait(c); init_intel_misc_features(c);
} }
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32

View File

@ -37,6 +37,7 @@
#include <asm/vm86.h> #include <asm/vm86.h>
#include <asm/switch_to.h> #include <asm/switch_to.h>
#include <asm/desc.h> #include <asm/desc.h>
#include <asm/prctl.h>
/* /*
* per-CPU TSS segments. Threads are completely 'soft' on Linux, * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@ -124,11 +125,6 @@ void flush_thread(void)
fpu__clear(&tsk->thread.fpu); fpu__clear(&tsk->thread.fpu);
} }
static void hard_disable_TSC(void)
{
cr4_set_bits(X86_CR4_TSD);
}
void disable_TSC(void) void disable_TSC(void)
{ {
preempt_disable(); preempt_disable();
@ -137,15 +133,10 @@ void disable_TSC(void)
* Must flip the CPU state synchronously with * Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context. * TIF_NOTSC in the current running context.
*/ */
hard_disable_TSC(); cr4_set_bits(X86_CR4_TSD);
preempt_enable(); preempt_enable();
} }
static void hard_enable_TSC(void)
{
cr4_clear_bits(X86_CR4_TSD);
}
static void enable_TSC(void) static void enable_TSC(void)
{ {
preempt_disable(); preempt_disable();
@ -154,7 +145,7 @@ static void enable_TSC(void)
* Must flip the CPU state synchronously with * Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context. * TIF_NOTSC in the current running context.
*/ */
hard_enable_TSC(); cr4_clear_bits(X86_CR4_TSD);
preempt_enable(); preempt_enable();
} }
@ -182,54 +173,129 @@ int set_tsc_mode(unsigned int val)
return 0; return 0;
} }
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, DEFINE_PER_CPU(u64, msr_misc_features_shadow);
struct tss_struct *tss)
static void set_cpuid_faulting(bool on)
{ {
struct thread_struct *prev, *next; u64 msrval;
prev = &prev_p->thread; msrval = this_cpu_read(msr_misc_features_shadow);
next = &next_p->thread; msrval &= ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT;
msrval |= (on << MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT);
this_cpu_write(msr_misc_features_shadow, msrval);
wrmsrl(MSR_MISC_FEATURES_ENABLES, msrval);
}
if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^ static void disable_cpuid(void)
test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) { {
unsigned long debugctl = get_debugctlmsr(); preempt_disable();
if (!test_and_set_thread_flag(TIF_NOCPUID)) {
debugctl &= ~DEBUGCTLMSR_BTF; /*
if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) * Must flip the CPU state synchronously with
debugctl |= DEBUGCTLMSR_BTF; * TIF_NOCPUID in the current running context.
*/
update_debugctlmsr(debugctl); set_cpuid_faulting(true);
} }
preempt_enable();
}
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ static void enable_cpuid(void)
test_tsk_thread_flag(next_p, TIF_NOTSC)) { {
/* prev and next are different */ preempt_disable();
if (test_tsk_thread_flag(next_p, TIF_NOTSC)) if (test_and_clear_thread_flag(TIF_NOCPUID)) {
hard_disable_TSC(); /*
else * Must flip the CPU state synchronously with
hard_enable_TSC(); * TIF_NOCPUID in the current running context.
*/
set_cpuid_faulting(false);
} }
preempt_enable();
}
if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { static int get_cpuid_mode(void)
{
return !test_thread_flag(TIF_NOCPUID);
}
static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled)
{
if (!static_cpu_has(X86_FEATURE_CPUID_FAULT))
return -ENODEV;
if (cpuid_enabled)
enable_cpuid();
else
disable_cpuid();
return 0;
}
/*
* Called immediately after a successful exec.
*/
void arch_setup_new_exec(void)
{
/* If cpuid was previously disabled for this task, re-enable it. */
if (test_thread_flag(TIF_NOCPUID))
enable_cpuid();
}
static inline void switch_to_bitmap(struct tss_struct *tss,
struct thread_struct *prev,
struct thread_struct *next,
unsigned long tifp, unsigned long tifn)
{
if (tifn & _TIF_IO_BITMAP) {
/* /*
* Copy the relevant range of the IO bitmap. * Copy the relevant range of the IO bitmap.
* Normally this is 128 bytes or less: * Normally this is 128 bytes or less:
*/ */
memcpy(tss->io_bitmap, next->io_bitmap_ptr, memcpy(tss->io_bitmap, next->io_bitmap_ptr,
max(prev->io_bitmap_max, next->io_bitmap_max)); max(prev->io_bitmap_max, next->io_bitmap_max));
/* /*
* Make sure that the TSS limit is correct for the CPU * Make sure that the TSS limit is correct for the CPU
* to notice the IO bitmap. * to notice the IO bitmap.
*/ */
refresh_tss_limit(); refresh_tss_limit();
} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { } else if (tifp & _TIF_IO_BITMAP) {
/* /*
* Clear any possible leftover bits: * Clear any possible leftover bits:
*/ */
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
} }
}
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss)
{
struct thread_struct *prev, *next;
unsigned long tifp, tifn;
prev = &prev_p->thread;
next = &next_p->thread;
tifn = READ_ONCE(task_thread_info(next_p)->flags);
tifp = READ_ONCE(task_thread_info(prev_p)->flags);
switch_to_bitmap(tss, prev, next, tifp, tifn);
propagate_user_return_notify(prev_p, next_p); propagate_user_return_notify(prev_p, next_p);
if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) &&
arch_has_block_step()) {
unsigned long debugctl, msk;
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
debugctl &= ~DEBUGCTLMSR_BTF;
msk = tifn & _TIF_BLOCKSTEP;
debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT;
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
}
if ((tifp ^ tifn) & _TIF_NOTSC)
cr4_toggle_bits(X86_CR4_TSD);
if ((tifp ^ tifn) & _TIF_NOCPUID)
set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
} }
/* /*
@ -550,3 +616,16 @@ out:
put_task_stack(p); put_task_stack(p);
return ret; return ret;
} }
long do_arch_prctl_common(struct task_struct *task, int option,
unsigned long cpuid_enabled)
{
switch (option) {
case ARCH_GET_CPUID:
return get_cpuid_mode();
case ARCH_SET_CPUID:
return set_cpuid_mode(task, cpuid_enabled);
}
return -EINVAL;
}

View File

@ -37,6 +37,7 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/io.h> #include <linux/io.h>
#include <linux/kdebug.h> #include <linux/kdebug.h>
#include <linux/syscalls.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/ldt.h> #include <asm/ldt.h>
@ -56,6 +57,7 @@
#include <asm/switch_to.h> #include <asm/switch_to.h>
#include <asm/vm86.h> #include <asm/vm86.h>
#include <asm/intel_rdt.h> #include <asm/intel_rdt.h>
#include <asm/proto.h>
void __show_regs(struct pt_regs *regs, int all) void __show_regs(struct pt_regs *regs, int all)
{ {
@ -304,3 +306,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
return prev_p; return prev_p;
} }
SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
{
return do_arch_prctl_common(current, option, arg2);
}

View File

@ -37,6 +37,7 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/io.h> #include <linux/io.h>
#include <linux/ftrace.h> #include <linux/ftrace.h>
#include <linux/syscalls.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/processor.h> #include <asm/processor.h>
@ -204,7 +205,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
(struct user_desc __user *)tls, 0); (struct user_desc __user *)tls, 0);
else else
#endif #endif
err = do_arch_prctl(p, ARCH_SET_FS, tls); err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
if (err) if (err)
goto out; goto out;
} }
@ -547,70 +548,72 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
} }
#endif #endif
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
{ {
int ret = 0; int ret = 0;
int doit = task == current; int doit = task == current;
int cpu; int cpu;
switch (code) { switch (option) {
case ARCH_SET_GS: case ARCH_SET_GS:
if (addr >= TASK_SIZE_MAX) if (arg2 >= TASK_SIZE_MAX)
return -EPERM; return -EPERM;
cpu = get_cpu(); cpu = get_cpu();
task->thread.gsindex = 0; task->thread.gsindex = 0;
task->thread.gsbase = addr; task->thread.gsbase = arg2;
if (doit) { if (doit) {
load_gs_index(0); load_gs_index(0);
ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
} }
put_cpu(); put_cpu();
break; break;
case ARCH_SET_FS: case ARCH_SET_FS:
/* Not strictly needed for fs, but do it for symmetry /* Not strictly needed for fs, but do it for symmetry
with gs */ with gs */
if (addr >= TASK_SIZE_MAX) if (arg2 >= TASK_SIZE_MAX)
return -EPERM; return -EPERM;
cpu = get_cpu(); cpu = get_cpu();
task->thread.fsindex = 0; task->thread.fsindex = 0;
task->thread.fsbase = addr; task->thread.fsbase = arg2;
if (doit) { if (doit) {
/* set the selector to 0 to not confuse __switch_to */ /* set the selector to 0 to not confuse __switch_to */
loadsegment(fs, 0); loadsegment(fs, 0);
ret = wrmsrl_safe(MSR_FS_BASE, addr); ret = wrmsrl_safe(MSR_FS_BASE, arg2);
} }
put_cpu(); put_cpu();
break; break;
case ARCH_GET_FS: { case ARCH_GET_FS: {
unsigned long base; unsigned long base;
if (doit) if (doit)
rdmsrl(MSR_FS_BASE, base); rdmsrl(MSR_FS_BASE, base);
else else
base = task->thread.fsbase; base = task->thread.fsbase;
ret = put_user(base, (unsigned long __user *)addr); ret = put_user(base, (unsigned long __user *)arg2);
break; break;
} }
case ARCH_GET_GS: { case ARCH_GET_GS: {
unsigned long base; unsigned long base;
if (doit) if (doit)
rdmsrl(MSR_KERNEL_GS_BASE, base); rdmsrl(MSR_KERNEL_GS_BASE, base);
else else
base = task->thread.gsbase; base = task->thread.gsbase;
ret = put_user(base, (unsigned long __user *)addr); ret = put_user(base, (unsigned long __user *)arg2);
break; break;
} }
#ifdef CONFIG_CHECKPOINT_RESTORE #ifdef CONFIG_CHECKPOINT_RESTORE
# ifdef CONFIG_X86_X32_ABI # ifdef CONFIG_X86_X32_ABI
case ARCH_MAP_VDSO_X32: case ARCH_MAP_VDSO_X32:
return prctl_map_vdso(&vdso_image_x32, addr); return prctl_map_vdso(&vdso_image_x32, arg2);
# endif # endif
# if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION # if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
case ARCH_MAP_VDSO_32: case ARCH_MAP_VDSO_32:
return prctl_map_vdso(&vdso_image_32, addr); return prctl_map_vdso(&vdso_image_32, arg2);
# endif # endif
case ARCH_MAP_VDSO_64: case ARCH_MAP_VDSO_64:
return prctl_map_vdso(&vdso_image_64, addr); return prctl_map_vdso(&vdso_image_64, arg2);
#endif #endif
default: default:
@ -621,11 +624,24 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
return ret; return ret;
} }
long sys_arch_prctl(int code, unsigned long addr) SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
{ {
return do_arch_prctl(current, code, addr); long ret;
ret = do_arch_prctl_64(current, option, arg2);
if (ret == -EINVAL)
ret = do_arch_prctl_common(current, option, arg2);
return ret;
} }
#ifdef CONFIG_IA32_EMULATION
COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
{
return do_arch_prctl_common(current, option, arg2);
}
#endif
unsigned long KSTK_ESP(struct task_struct *task) unsigned long KSTK_ESP(struct task_struct *task)
{ {
return task_pt_regs(task)->sp; return task_pt_regs(task)->sp;

View File

@ -396,12 +396,12 @@ static int putreg(struct task_struct *child,
if (value >= TASK_SIZE_MAX) if (value >= TASK_SIZE_MAX)
return -EIO; return -EIO;
/* /*
* When changing the segment base, use do_arch_prctl * When changing the segment base, use do_arch_prctl_64
* to set either thread.fs or thread.fsindex and the * to set either thread.fs or thread.fsindex and the
* corresponding GDT slot. * corresponding GDT slot.
*/ */
if (child->thread.fsbase != value) if (child->thread.fsbase != value)
return do_arch_prctl(child, ARCH_SET_FS, value); return do_arch_prctl_64(child, ARCH_SET_FS, value);
return 0; return 0;
case offsetof(struct user_regs_struct,gs_base): case offsetof(struct user_regs_struct,gs_base):
/* /*
@ -410,7 +410,7 @@ static int putreg(struct task_struct *child,
if (value >= TASK_SIZE_MAX) if (value >= TASK_SIZE_MAX)
return -EIO; return -EIO;
if (child->thread.gsbase != value) if (child->thread.gsbase != value)
return do_arch_prctl(child, ARCH_SET_GS, value); return do_arch_prctl_64(child, ARCH_SET_GS, value);
return 0; return 0;
#endif #endif
} }
@ -869,7 +869,7 @@ long arch_ptrace(struct task_struct *child, long request,
Works just like arch_prctl, except that the arguments Works just like arch_prctl, except that the arguments
are reversed. */ are reversed. */
case PTRACE_ARCH_PRCTL: case PTRACE_ARCH_PRCTL:
ret = do_arch_prctl(child, data, addr); ret = do_arch_prctl_64(child, data, addr);
break; break;
#endif #endif

View File

@ -16,7 +16,7 @@ obj-y = bug.o bugs_$(BITS).o delay.o fault.o ldt.o \
ifeq ($(CONFIG_X86_32),y) ifeq ($(CONFIG_X86_32),y)
obj-y += checksum_32.o obj-y += checksum_32.o syscalls_32.o
obj-$(CONFIG_ELF_CORE) += elfcore.o obj-$(CONFIG_ELF_CORE) += elfcore.o
subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o

View File

@ -78,7 +78,7 @@ static inline int ptrace_set_thread_area(struct task_struct *child, int idx,
return -ENOSYS; return -ENOSYS;
} }
extern long arch_prctl(struct task_struct *task, int code, extern long arch_prctl(struct task_struct *task, int option,
unsigned long __user *addr); unsigned long __user *addr);
#endif #endif

View File

@ -6,7 +6,7 @@
#include <sys/ptrace.h> #include <sys/ptrace.h>
#include <asm/ptrace.h> #include <asm/ptrace.h>
int os_arch_prctl(int pid, int code, unsigned long *addr) int os_arch_prctl(int pid, int option, unsigned long *arg2)
{ {
return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) addr, code); return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) arg2, option);
} }

View File

@ -0,0 +1,7 @@
#include <linux/syscalls.h>
#include <os.h>
SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
{
return -EINVAL;
}

View File

@ -7,13 +7,15 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
#include <linux/syscalls.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <asm/prctl.h> /* XXX This should get the constants from libc */ #include <asm/prctl.h> /* XXX This should get the constants from libc */
#include <os.h> #include <os.h>
long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) long arch_prctl(struct task_struct *task, int option,
unsigned long __user *arg2)
{ {
unsigned long *ptr = addr, tmp; unsigned long *ptr = arg2, tmp;
long ret; long ret;
int pid = task->mm->context.id.u.pid; int pid = task->mm->context.id.u.pid;
@ -30,7 +32,7 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
* arch_prctl is run on the host, then the registers are read * arch_prctl is run on the host, then the registers are read
* back. * back.
*/ */
switch (code) { switch (option) {
case ARCH_SET_FS: case ARCH_SET_FS:
case ARCH_SET_GS: case ARCH_SET_GS:
ret = restore_registers(pid, &current->thread.regs.regs); ret = restore_registers(pid, &current->thread.regs.regs);
@ -50,11 +52,11 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
ptr = &tmp; ptr = &tmp;
} }
ret = os_arch_prctl(pid, code, ptr); ret = os_arch_prctl(pid, option, ptr);
if (ret) if (ret)
return ret; return ret;
switch (code) { switch (option) {
case ARCH_SET_FS: case ARCH_SET_FS:
current->thread.arch.fs = (unsigned long) ptr; current->thread.arch.fs = (unsigned long) ptr;
ret = save_registers(pid, &current->thread.regs.regs); ret = save_registers(pid, &current->thread.regs.regs);
@ -63,19 +65,19 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
ret = save_registers(pid, &current->thread.regs.regs); ret = save_registers(pid, &current->thread.regs.regs);
break; break;
case ARCH_GET_FS: case ARCH_GET_FS:
ret = put_user(tmp, addr); ret = put_user(tmp, arg2);
break; break;
case ARCH_GET_GS: case ARCH_GET_GS:
ret = put_user(tmp, addr); ret = put_user(tmp, arg2);
break; break;
} }
return ret; return ret;
} }
long sys_arch_prctl(int code, unsigned long addr) SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
{ {
return arch_prctl(current, code, (unsigned long __user *) addr); return arch_prctl(current, option, (unsigned long __user *) arg2);
} }
void arch_switch_to(struct task_struct *to) void arch_switch_to(struct task_struct *to)

View File

@ -1320,6 +1320,7 @@ void setup_new_exec(struct linux_binprm * bprm)
else else
set_dumpable(current->mm, suid_dumpable); set_dumpable(current->mm, suid_dumpable);
arch_setup_new_exec();
perf_event_exec(); perf_event_exec();
__set_task_comm(current, kbasename(bprm->filename), true); __set_task_comm(current, kbasename(bprm->filename), true);

View File

@ -723,6 +723,8 @@ asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32, asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32,
int, const char __user *); int, const char __user *);
asmlinkage long compat_sys_arch_prctl(int option, unsigned long arg2);
/* /*
* For most but not all architectures, "am I in a compat syscall?" and * For most but not all architectures, "am I in a compat syscall?" and
* "am I a compat task?" are the same question. For architectures on which * "am I a compat task?" are the same question. For architectures on which

View File

@ -101,6 +101,10 @@ static inline void check_object_size(const void *ptr, unsigned long n,
{ } { }
#endif /* CONFIG_HARDENED_USERCOPY */ #endif /* CONFIG_HARDENED_USERCOPY */
#ifndef arch_setup_new_exec
static inline void arch_setup_new_exec(void) { }
#endif
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _LINUX_THREAD_INFO_H */ #endif /* _LINUX_THREAD_INFO_H */

View File

@ -148,6 +148,7 @@ cat << EOF
#define __IGNORE_sysfs #define __IGNORE_sysfs
#define __IGNORE_uselib #define __IGNORE_uselib
#define __IGNORE__sysctl #define __IGNORE__sysctl
#define __IGNORE_arch_prctl
/* ... including the "new" 32-bit uid syscalls */ /* ... including the "new" 32-bit uid syscalls */
#define __IGNORE_lchown32 #define __IGNORE_lchown32