diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 1a50e09c945b..03c3eb77bfce 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -178,7 +178,7 @@ notrace static cycle_t vread_tsc(void) /* * GCC likes to generate cmov here, but this branch is extremely - * predictable (it's just a funciton of time and the likely is + * predictable (it's just a function of time and the likely is * very likely) and there's a data dependence, so force GCC * to generate a branch instead. I don't barrier() because * we don't actually need a barrier, and if this function diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 69dd11887dd1..6c3b7c1780c9 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -649,7 +649,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event) /* * return the type of control flow change at address "from" - * intruction is not necessarily a branch (in case of interrupt). + * instruction is not necessarily a branch (in case of interrupt). * * The branch type returned also includes the priv level of the * target of the control flow change (X86_BR_USER, X86_BR_KERNEL). diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 68155cafa8a1..ba6ef18528c9 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -272,7 +272,7 @@ struct cpu_hw_events { * events to select for counter rescheduling. * * Care must be taken as the rescheduling algorithm is O(n!) which - * will increase scheduling cycles for an over-commited system + * will increase scheduling cycles for an over-committed system * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros * and its counter masks must be kept at a minimum. */ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 0899cfc8dfe8..98f25bbafac4 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -643,8 +643,8 @@ static inline void entering_irq(void) static inline void entering_ack_irq(void) { - ack_APIC_irq(); entering_irq(); + ack_APIC_irq(); } static inline void ipi_entering_ack_irq(void) diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 21b66dbf3601..a4820d4df617 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -52,7 +52,7 @@ int ftrace_int3_handler(struct pt_regs *regs); * this screws up the trace output when tracing a ia32 task. * Instead of reporting bogus syscalls, just do not trace them. * - * If the user realy wants these, then they should use the + * If the user really wants these, then they should use the * raw syscall tracepoints with filtering. */ #define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS 1 diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 1815b736269d..b90e1053049b 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -141,6 +141,7 @@ struct irq_alloc_info { struct irq_cfg { unsigned int dest_apicid; u8 vector; + u8 old_vector; }; extern struct irq_cfg *irq_cfg(unsigned int irq); @@ -168,20 +169,6 @@ extern atomic_t irq_mis_count; extern void elcr_set_level_irq(unsigned int irq); -/* SMP */ -extern __visible void smp_apic_timer_interrupt(struct pt_regs *); -extern __visible void smp_spurious_interrupt(struct pt_regs *); -extern __visible void smp_x86_platform_ipi(struct pt_regs *); -extern __visible void smp_error_interrupt(struct pt_regs *); -#ifdef CONFIG_X86_IO_APIC -extern asmlinkage void smp_irq_move_cleanup_interrupt(void); -#endif -#ifdef CONFIG_SMP -extern __visible void smp_reschedule_interrupt(struct pt_regs *); -extern __visible void smp_call_function_interrupt(struct pt_regs *); -extern __visible void smp_call_function_single_interrupt(struct pt_regs *); -#endif - extern char irq_entries_start[]; #ifdef CONFIG_TRACING #define trace_irq_entries_start irq_entries_start diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 93fb7c1cffda..7a79ee2778b3 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -42,14 +42,6 @@ struct saved_msrs { struct saved_msr *array; }; -static inline unsigned long long native_read_tscp(unsigned int *aux) -{ - unsigned long low, high; - asm volatile(".byte 0x0f,0x01,0xf9" - : "=a" (low), "=d" (high), "=c" (*aux)); - return low | ((u64)high << 32); -} - /* * both i386 and x86_64 returns 64-bit value in edx:eax, but gcc's "A" * constraint has different meanings. For i386, "A" means exactly diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index cad82c9c2fde..ceec86eb68e9 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -25,7 +25,7 @@ * This should be totally fair - if anything is waiting, a process that wants a * lock will go to the back of the queue. When the currently active lock is * released, if there's a writer at the front of the queue, then that and only - * that will be woken up; if there's a bunch of consequtive readers at the + * that will be woken up; if there's a bunch of consecutive readers at the * front, then they'll all be woken up, but no other readers will be. */ diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index ca6ba3607705..90dbbd9666d4 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -87,9 +87,9 @@ int strcmp(const char *cs, const char *ct); * * Low level memory copy function that catches machine checks * - * Return true for success, false for fail + * Return 0 for success, -EFAULT for fail */ -bool memcpy_mcsafe(void *dst, const void *src, size_t cnt); +int memcpy_mcsafe(void *dst, const void *src, size_t cnt); #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 8b2d4bea9962..39171b3646bb 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -62,4 +62,6 @@ void xen_arch_register_cpu(int num); void xen_arch_unregister_cpu(int num); #endif +extern void xen_set_iopl_mask(unsigned mask); + #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index e75907601a41..8c2f1ef6ca23 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -956,7 +956,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void) /* * Note that the LAPIC address is obtained from the MADT (32-bit value) - * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). + * and (optionally) overridden by a LAPIC_ADDR_OVR entry (64-bit value). */ count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, @@ -984,7 +984,7 @@ static int __init acpi_parse_madt_lapic_entries(void) /* * Note that the LAPIC address is obtained from the MADT (32-bit value) - * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). + * and (optionally) overridden by a LAPIC_ADDR_OVR entry (64-bit value). */ count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 222a57076039..cefacbad1531 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -221,7 +221,7 @@ static int apbt_cpuhp_notify(struct notifier_block *n, unsigned long cpu = (unsigned long)hcpu; struct apbt_dev *adev = &per_cpu(cpu_apbt_dev, cpu); - switch (action & 0xf) { + switch (action & ~CPU_TASKS_FROZEN) { case CPU_DEAD: dw_apb_clockevent_pause(adev->timer); if (system_state == SYSTEM_RUNNING) { diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 531b9611c51d..d356987a04e9 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1611,7 +1611,7 @@ void __init enable_IR_x2apic(void) legacy_pic->mask_all(); mask_ioapic_entries(); - /* If irq_remapping_prepare() succeded, try to enable it */ + /* If irq_remapping_prepare() succeeded, try to enable it */ if (ir_stat >= 0) ir_stat = try_to_enable_IR(); /* ir_stat contains the remap mode or an error code */ diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 3b670df4ba7b..ad59d70bcb1a 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -213,6 +213,7 @@ update: */ cpumask_and(d->old_domain, d->old_domain, cpu_online_mask); d->move_in_progress = !cpumask_empty(d->old_domain); + d->cfg.old_vector = d->move_in_progress ? d->cfg.vector : 0; d->cfg.vector = vector; cpumask_copy(d->domain, vector_cpumask); success: @@ -655,46 +656,97 @@ void irq_complete_move(struct irq_cfg *cfg) } /* - * Called with @desc->lock held and interrupts disabled. + * Called from fixup_irqs() with @desc->lock held and interrupts disabled. */ void irq_force_complete_move(struct irq_desc *desc) { struct irq_data *irqdata = irq_desc_get_irq_data(desc); struct apic_chip_data *data = apic_chip_data(irqdata); struct irq_cfg *cfg = data ? &data->cfg : NULL; + unsigned int cpu; if (!cfg) return; - __irq_complete_move(cfg, cfg->vector); - /* * This is tricky. If the cleanup of @data->old_domain has not been * done yet, then the following setaffinity call will fail with * -EBUSY. This can leave the interrupt in a stale state. * - * The cleanup cannot make progress because we hold @desc->lock. So in - * case @data->old_domain is not yet cleaned up, we need to drop the - * lock and acquire it again. @desc cannot go away, because the - * hotplug code holds the sparse irq lock. + * All CPUs are stuck in stop machine with interrupts disabled so + * calling __irq_complete_move() would be completely pointless. */ raw_spin_lock(&vector_lock); - /* Clean out all offline cpus (including ourself) first. */ + /* + * Clean out all offline cpus (including the outgoing one) from the + * old_domain mask. + */ cpumask_and(data->old_domain, data->old_domain, cpu_online_mask); - while (!cpumask_empty(data->old_domain)) { + + /* + * If move_in_progress is cleared and the old_domain mask is empty, + * then there is nothing to cleanup. fixup_irqs() will take care of + * the stale vectors on the outgoing cpu. + */ + if (!data->move_in_progress && cpumask_empty(data->old_domain)) { raw_spin_unlock(&vector_lock); - raw_spin_unlock(&desc->lock); - cpu_relax(); - raw_spin_lock(&desc->lock); - /* - * Reevaluate apic_chip_data. It might have been cleared after - * we dropped @desc->lock. - */ - data = apic_chip_data(irqdata); - if (!data) - return; - raw_spin_lock(&vector_lock); + return; } + + /* + * 1) The interrupt is in move_in_progress state. That means that we + * have not seen an interrupt since the io_apic was reprogrammed to + * the new vector. + * + * 2) The interrupt has fired on the new vector, but the cleanup IPIs + * have not been processed yet. + */ + if (data->move_in_progress) { + /* + * In theory there is a race: + * + * set_ioapic(new_vector) <-- Interrupt is raised before update + * is effective, i.e. it's raised on + * the old vector. + * + * So if the target cpu cannot handle that interrupt before + * the old vector is cleaned up, we get a spurious interrupt + * and in the worst case the ioapic irq line becomes stale. + * + * But in case of cpu hotplug this should be a non issue + * because if the affinity update happens right before all + * cpus rendevouz in stop machine, there is no way that the + * interrupt can be blocked on the target cpu because all cpus + * loops first with interrupts enabled in stop machine, so the + * old vector is not yet cleaned up when the interrupt fires. + * + * So the only way to run into this issue is if the delivery + * of the interrupt on the apic/system bus would be delayed + * beyond the point where the target cpu disables interrupts + * in stop machine. I doubt that it can happen, but at least + * there is a theroretical chance. Virtualization might be + * able to expose this, but AFAICT the IOAPIC emulation is not + * as stupid as the real hardware. + * + * Anyway, there is nothing we can do about that at this point + * w/o refactoring the whole fixup_irq() business completely. + * We print at least the irq number and the old vector number, + * so we have the necessary information when a problem in that + * area arises. + */ + pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n", + irqdata->irq, cfg->old_vector); + } + /* + * If old_domain is not empty, then other cpus still have the irq + * descriptor set in their vector array. Clean it up. + */ + for_each_cpu(cpu, data->old_domain) + per_cpu(vector_irq, cpu)[cfg->old_vector] = VECTOR_UNUSED; + + /* Cleanup the left overs of the (half finished) move */ + cpumask_clear(data->old_domain); + data->move_in_progress = 0; raw_spin_unlock(&vector_lock); } #endif diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 624db00583f4..8f4942e2bcbb 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -792,7 +792,8 @@ static int uv_scir_cpu_notify(struct notifier_block *self, unsigned long action, { long cpu = (long)hcpu; - switch (action) { + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_DOWN_FAILED: case CPU_ONLINE: uv_heartbeat_enable(cpu); break; @@ -860,7 +861,7 @@ int uv_set_vga_state(struct pci_dev *pdev, bool decode, */ void uv_cpu_init(void) { - /* CPU 0 initilization will be done via uv_system_init. */ + /* CPU 0 initialization will be done via uv_system_init. */ if (!uv_blade_info) return; diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 052c9c3026cc..9307f182fe30 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -1088,7 +1088,7 @@ static int apm_get_battery_status(u_short which, u_short *status, * @device: identity of device * @enable: on/off * - * Activate or deactive power management on either a specific device + * Activate or deactivate power management on either a specific device * or the entire system (%APM_DEVICE_ALL). */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 5026a13356c4..e51021c9207a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -85,7 +85,7 @@ static void init_amd_k5(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_32 /* * General Systems BIOSen alias the cpu frequency registers - * of the Elan at 0x000df000. Unfortuantly, one of the Linux + * of the Elan at 0x000df000. Unfortunately, one of the Linux * drivers subsequently pokes it, and changes the CPU speed. * Workaround : Remove the unneeded alias. */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 06ad72383b4e..9988caf42161 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -968,7 +968,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_identify) this_cpu->c_identify(c); - /* Clear/Set all flags overriden by options, after probe */ + /* Clear/Set all flags overridden by options, after probe */ for (i = 0; i < NCAPINTS; i++) { c->x86_capability[i] &= ~cpu_caps_cleared[i]; c->x86_capability[i] |= cpu_caps_set[i]; @@ -1028,7 +1028,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) setup_pku(c); /* - * Clear/Set all flags overriden by options, need do it + * Clear/Set all flags overridden by options, need do it * before following smp all cpus cap AND. */ for (i = 0; i < NCAPINTS; i++) { diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index fcbcb2f678ca..19f57360dfd2 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -42,7 +42,7 @@ EXPORT_SYMBOL_GPL(mtrr_state); * "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD * Opteron Processors" (26094 Rev. 3.30 February 2006), section * "13.2.1.2 SYSCFG Register": "The MtrrFixDramModEn bit should be set - * to 1 during BIOS initalization of the fixed MTRRs, then cleared to + * to 1 during BIOS initialization of the fixed MTRRs, then cleared to * 0 for operation." */ static inline void k8_check_syscfg_dram_mod_en(void) diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 21bf92490a7b..8a121991e5ba 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -287,7 +287,7 @@ static __init void early_pci_serial_init(char *s) } /* - * Lastly, initalize the hardware + * Lastly, initialize the hardware */ if (*s) { if (strcmp(s, "nocfg") == 0) diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 0bc3490420c5..8bd1c003942a 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -8,7 +8,7 @@ /* * The xstateregs_active() routine is the same as the regset_fpregs_active() routine, * as the "regset->n" for the xstate regset will be updated based on the feature - * capabilites supported by the xsave. + * capabilities supported by the xsave. */ int regset_fpregs_active(struct task_struct *target, const struct user_regset *regset) { diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index be0ebbb6d1d1..a1f0e4a5c47e 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -717,7 +717,7 @@ static int hpet_cpuhp_notify(struct notifier_block *n, struct hpet_work_struct work; struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu); - switch (action & 0xf) { + switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work); init_completion(&work.complete); diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 37dae792dbbe..589b3193f102 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -96,9 +96,14 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) SYSCALL_DEFINE1(iopl, unsigned int, level) { struct pt_regs *regs = current_pt_regs(); - unsigned int old = (regs->flags >> 12) & 3; struct thread_struct *t = ¤t->thread; + /* + * Careful: the IOPL bits in regs->flags are undefined under Xen PV + * and changing them has no effect. + */ + unsigned int old = t->iopl >> X86_EFLAGS_IOPL_BIT; + if (level > 3) return -EINVAL; /* Trying to gain more privileges? */ @@ -106,8 +111,9 @@ SYSCALL_DEFINE1(iopl, unsigned int, level) if (!capable(CAP_SYS_RAWIO)) return -EPERM; } - regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12); - t->iopl = level << 12; + regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | + (level << X86_EFLAGS_IOPL_BIT); + t->iopl = level << X86_EFLAGS_IOPL_BIT; set_iopl_mask(t->iopl); return 0; diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 0f8a6bbaaa44..2af478e3fd4e 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -271,7 +271,7 @@ static int bzImage64_probe(const char *buf, unsigned long len) int ret = -ENOEXEC; struct setup_header *header; - /* kernel should be atleast two sectors long */ + /* kernel should be at least two sectors long */ if (len < 2 * 512) { pr_err("File is too short to be a bzImage\n"); return ret; diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index ed15cd486d06..2da6ee9ae69b 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -609,9 +609,9 @@ static struct notifier_block kgdb_notifier = { }; /** - * kgdb_arch_init - Perform any architecture specific initalization. + * kgdb_arch_init - Perform any architecture specific initialization. * - * This function will handle the initalization of any architecture + * This function will handle the initialization of any architecture * specific callbacks. */ int kgdb_arch_init(void) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 72cef58693c7..1d39bfbd26bb 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -226,7 +226,7 @@ static void kvm_setup_secondary_clock(void) * registered memory location. If the guest happens to shutdown, this memory * won't be valid. In cases like kexec, in which you install a new kernel, this * means a random memory location will be kept being written. So before any - * kind of shutdown from our side, we unregister the clock by writting anything + * kind of shutdown from our side, we unregister the clock by writing anything * that does not have the 'enable' bit set in the msr */ #ifdef CONFIG_KEXEC_CORE diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index dfa2781610e8..6cbab31ac23a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -48,6 +48,7 @@ #include #include #include +#include asmlinkage extern void ret_from_fork(void); @@ -413,6 +414,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) __switch_to_xtra(prev_p, next_p, tss); +#ifdef CONFIG_XEN + /* + * On Xen PV, IOPL bits in pt_regs->flags have no effect, and + * current_pt_regs()->flags may not match the current task's + * intended IOPL. We need to switch it manually. + */ + if (unlikely(static_cpu_has(X86_FEATURE_XENPV) && + prev->iopl != next->iopl)) + xen_set_iopl_mask(next->iopl); +#endif + if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) { /* * AMD CPUs have a misfeature: SYSRET sets the SS selector but diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 643dbdccf4bc..b2c99f811c3f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -274,11 +274,6 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu) if (test_and_set_bit(pkg, physical_package_map)) goto found; - if (pkg < __max_logical_packages) { - set_bit(pkg, logical_package_map); - physical_to_logical_pkg[pkg] = pkg; - goto found; - } new = find_first_zero_bit(logical_package_map, __max_logical_packages); if (new >= __max_logical_packages) { physical_to_logical_pkg[pkg] = -1; @@ -317,9 +312,27 @@ static void __init smp_init_package_map(void) /* * Today neither Intel nor AMD support heterogenous systems. That * might change in the future.... + * + * While ideally we'd want '* smp_num_siblings' in the below @ncpus + * computation, this won't actually work since some Intel BIOSes + * report inconsistent HT data when they disable HT. + * + * In particular, they reduce the APIC-IDs to only include the cores, + * but leave the CPUID topology to say there are (2) siblings. + * This means we don't know how many threads there will be until + * after the APIC enumeration. + * + * By not including this we'll sometimes over-estimate the number of + * logical packages by the amount of !present siblings, but this is + * still better than MAX_LOCAL_APIC. + * + * We use total_cpus not nr_cpu_ids because nr_cpu_ids can be limited + * on the command line leading to a similar issue as the HT disable + * problem because the hyperthreads are usually enumerated after the + * primary cores. */ - ncpus = boot_cpu_data.x86_max_cores * smp_num_siblings; - __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus); + ncpus = boot_cpu_data.x86_max_cores; + __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus); /* * Possibly larger than what we need as the number of apic ids per diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 56380440d862..c9c4c7ce3eb2 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -881,7 +881,7 @@ void tsc_restore_sched_clock_state(void) local_irq_save(flags); /* - * We're comming out of suspend, there's no concurrency yet; don't + * We're coming out of suspend, there's no concurrency yet; don't * bother being nice about the RCU stuff, just write to both * data fields. */ @@ -1306,11 +1306,15 @@ void __init tsc_init(void) unsigned long calibrate_delay_is_known(void) { int sibling, cpu = smp_processor_id(); + struct cpumask *mask = topology_core_cpumask(cpu); if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC)) return 0; - sibling = cpumask_any_but(topology_core_cpumask(cpu), cpu); + if (!mask) + return 0; + + sibling = cpumask_any_but(mask, cpu); if (sibling < nr_cpu_ids) return cpu_data(sibling).loops_per_jiffy; return 0; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6bdfbc23ecaa..70e95d097ef1 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -479,7 +479,7 @@ static bool spte_is_locklessly_modifiable(u64 spte) static bool spte_has_volatile_bits(u64 spte) { /* - * Always atomicly update spte if it can be updated + * Always atomically update spte if it can be updated * out of mmu-lock, it can ensure dirty bit is not lost, * also, it can help us to get a stable is_writable_pte() * to ensure tlb flush is not missed. @@ -550,7 +550,7 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte) /* * For the spte updated out of mmu-lock is safe, since - * we always atomicly update it, see the comments in + * we always atomically update it, see the comments in * spte_has_volatile_bits(). */ if (spte_is_locklessly_modifiable(old_spte) && diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index efc243e4dabf..ee1c8a93871c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5528,7 +5528,7 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) return kvm_set_cr4(vcpu, val); } -/* called to set cr0 as approriate for clts instruction exit. */ +/* called to set cr0 as appropriate for clts instruction exit. */ static void handle_clts(struct kvm_vcpu *vcpu) { if (is_guest_mode(vcpu)) { @@ -7267,7 +7267,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) /* The value to write might be 32 or 64 bits, depending on L1's long * mode, and eventually we need to write that into a field of several * possible lengths. The code below first zero-extends the value to 64 - * bit (field_value), and then copies only the approriate number of + * bit (field_value), and then copies only the appropriate number of * bits into the vmcs12 field. */ u64 field_value = 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e260ccbc8f55..742d0f7d3556 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1562,7 +1562,7 @@ static cycle_t read_tsc(void) /* * GCC likes to generate cmov here, but this branch is extremely - * predictable (it's just a funciton of time and the likely is + * predictable (it's just a function of time and the likely is * very likely) and there's a data dependence, so force GCC * to generate a branch instead. I don't barrier() because * we don't actually need a barrier, and if this function diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index cbb8ee5830ff..2ec0b0abbfaa 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -1,6 +1,7 @@ /* Copyright 2002 Andi Kleen */ #include +#include #include #include @@ -268,16 +269,16 @@ ENTRY(memcpy_mcsafe) decl %ecx jnz .L_copy_trailing_bytes - /* Copy successful. Return true */ + /* Copy successful. Return zero */ .L_done_memcpy_trap: xorq %rax, %rax ret ENDPROC(memcpy_mcsafe) .section .fixup, "ax" - /* Return false for any failure */ + /* Return -EFAULT for any failure */ .L_memcpy_mcsafe_fail: - mov $1, %rax + mov $-EFAULT, %rax ret .previous diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index c9c81227ea37..e1229ecd2a82 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -9,7 +9,7 @@ /* * ISO C memset - set a memory block to a byte value. This function uses fast * string to get better performance than the original function. The code is - * simpler and shorter than the orignal function as well. + * simpler and shorter than the original function as well. * * rdi destination * rsi value (char) diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index a0a0b9861902..80476878eb4c 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -728,14 +728,14 @@ static inline unsigned long bd_entry_virt_space(struct mm_struct *mm) /* * This covers 32-bit emulation as well as 32-bit kernels - * running on 64-bit harware. + * running on 64-bit hardware. */ if (!is_64bit_mm(mm)) return (4ULL * GB) / MPX_BD_NR_ENTRIES_32; /* * 'x86_virt_bits' returns what the hardware is capable - * of, and returns the full >32-bit adddress space when + * of, and returns the full >32-bit address space when * running 32-bit kernels on 64-bit hardware. */ virt_space = (1ULL << boot_cpu_data.x86_virt_bits); diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 04e2e7144bee..faec01e7a17d 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -149,7 +149,7 @@ enum { PAT_WT = 4, /* Write Through */ PAT_WP = 5, /* Write Protected */ PAT_WB = 6, /* Write Back (default) */ - PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */ + PAT_UC_MINUS = 7, /* UC, but can be overridden by MTRR */ }; #define CM(c) (_PAGE_CACHE_MODE_ ## c) diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 1d2e6392f5fa..0e07e0968c3a 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -437,7 +437,8 @@ static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, void *data) { int cpu = (unsigned long)data; - switch (action) { + + switch (action & ~CPU_TASKS_FROZEN) { case CPU_DOWN_FAILED: case CPU_ONLINE: smp_call_function_single(cpu, nmi_cpu_up, NULL, 0); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bma023.c b/arch/x86/platform/intel-mid/device_libs/platform_bma023.c index 0ae7f2ae2296..c26cf393d35a 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_bma023.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_bma023.c @@ -1,5 +1,5 @@ /* - * platform_bma023.c: bma023 platform data initilization file + * platform_bma023.c: bma023 platform data initialization file * * (C) Copyright 2013 Intel Corporation * diff --git a/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c b/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c index 69a783689d21..c259fb6c8f4f 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c @@ -1,5 +1,5 @@ /* - * platform_emc1403.c: emc1403 platform data initilization file + * platform_emc1403.c: emc1403 platform data initialization file * * (C) Copyright 2013 Intel Corporation * Author: Sathyanarayanan Kuppuswamy diff --git a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c index dccae6b0413f..52534ec29765 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c @@ -1,5 +1,5 @@ /* - * platform_gpio_keys.c: gpio_keys platform data initilization file + * platform_gpio_keys.c: gpio_keys platform data initialization file * * (C) Copyright 2013 Intel Corporation * Author: Sathyanarayanan Kuppuswamy diff --git a/arch/x86/platform/intel-mid/device_libs/platform_lis331.c b/arch/x86/platform/intel-mid/device_libs/platform_lis331.c index 54226de7541a..a35cf912de43 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_lis331.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_lis331.c @@ -1,5 +1,5 @@ /* - * platform_lis331.c: lis331 platform data initilization file + * platform_lis331.c: lis331 platform data initialization file * * (C) Copyright 2013 Intel Corporation * Author: Sathyanarayanan Kuppuswamy diff --git a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c index 2c8acbc1e9ad..6e075afa7877 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c @@ -1,5 +1,5 @@ /* - * platform_max7315.c: max7315 platform data initilization file + * platform_max7315.c: max7315 platform data initialization file * * (C) Copyright 2013 Intel Corporation * Author: Sathyanarayanan Kuppuswamy diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c b/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c index cfe9a47a1e87..ee22864bbc2f 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c @@ -1,5 +1,5 @@ /* - * platform_mpu3050.c: mpu3050 platform data initilization file + * platform_mpu3050.c: mpu3050 platform data initialization file * * (C) Copyright 2013 Intel Corporation * Author: Sathyanarayanan Kuppuswamy diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic.c b/arch/x86/platform/intel-mid/device_libs/platform_msic.c index 9f4a775a69d6..e421106c11cf 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic.c @@ -1,5 +1,5 @@ /* - * platform_msic.c: MSIC platform data initilization file + * platform_msic.c: MSIC platform data initialization file * * (C) Copyright 2013 Intel Corporation * Author: Sathyanarayanan Kuppuswamy diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c index 29629397d2b3..cb3490ecb341 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c @@ -1,5 +1,5 @@ /* - * platform_msic_audio.c: MSIC audio platform data initilization file + * platform_msic_audio.c: MSIC audio platform data initialization file * * (C) Copyright 2013 Intel Corporation * Author: Sathyanarayanan Kuppuswamy diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c index f446c33df1a8..4f72193939a6 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c @@ -1,5 +1,5 @@ /* - * platform_msic_battery.c: MSIC battery platform data initilization file + * platform_msic_battery.c: MSIC battery platform data initialization file * * (C) Copyright 2013 Intel Corporation * Author: Sathyanarayanan Kuppuswamy diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c index 2a4f7b1dd917..70de5b531ba0 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c @@ -1,5 +1,5 @@ /* - * platform_msic_gpio.c: MSIC GPIO platform data initilization file + * platform_msic_gpio.c: MSIC GPIO platform data initialization file * * (C) Copyright 2013 Intel Corporation * Author: Sathyanarayanan Kuppuswamy diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c index 6497111ddb54..3d7c2011b6cf 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c @@ -1,5 +1,5 @@ /* - * platform_msic_ocd.c: MSIC OCD platform data initilization file + * platform_msic_ocd.c: MSIC OCD platform data initialization file * * (C) Copyright 2013 Intel Corporation * Author: Sathyanarayanan Kuppuswamy diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c index 83a3459bc337..038f618fbc52 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c @@ -1,5 +1,5 @@ /* - * platform_msic_power_btn.c: MSIC power btn platform data initilization file + * platform_msic_power_btn.c: MSIC power btn platform data initialization file * * (C) Copyright 2013 Intel Corporation * Author: Sathyanarayanan Kuppuswamy diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c index a351878b96bc..114a5755b1e4 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c @@ -1,5 +1,5 @@ /* - * platform_msic_thermal.c: msic_thermal platform data initilization file + * platform_msic_thermal.c: msic_thermal platform data initialization file * * (C) Copyright 2013 Intel Corporation * Author: Sathyanarayanan Kuppuswamy diff --git a/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c b/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c index 65c2a9a19db4..e30cb62e3300 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c @@ -1,5 +1,5 @@ /* - * platform_pmic_gpio.c: PMIC GPIO platform data initilization file + * platform_pmic_gpio.c: PMIC GPIO platform data initialization file * * (C) Copyright 2013 Intel Corporation * Author: Sathyanarayanan Kuppuswamy diff --git a/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c b/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c index 740fc757050c..b1526b95fd43 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c @@ -1,5 +1,5 @@ /* - * platform_tc35876x.c: tc35876x platform data initilization file + * platform_tc35876x.c: tc35876x platform data initialization file * * (C) Copyright 2013 Intel Corporation * Author: Sathyanarayanan Kuppuswamy diff --git a/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c b/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c index 33be0b3be6e1..4f41372ce400 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c @@ -1,5 +1,5 @@ /* - * platform_tca6416.c: tca6416 platform data initilization file + * platform_tca6416.c: tca6416 platform data initialization file * * (C) Copyright 2013 Intel Corporation * Author: Sathyanarayanan Kuppuswamy diff --git a/arch/x86/purgatory/stack.S b/arch/x86/purgatory/stack.S index 3cefba1fefc8..50a4147f91fb 100644 --- a/arch/x86/purgatory/stack.S +++ b/arch/x86/purgatory/stack.S @@ -8,7 +8,7 @@ */ /* A stack for the loaded kernel. - * Seperate and in the data section so it can be prepopulated. + * Separate and in the data section so it can be prepopulated. */ .data .balign 4096 diff --git a/arch/x86/video/fbdev.c b/arch/x86/video/fbdev.c index d5644bbe8cba..9fd24846d094 100644 --- a/arch/x86/video/fbdev.c +++ b/arch/x86/video/fbdev.c @@ -14,26 +14,24 @@ int fb_is_primary_device(struct fb_info *info) { struct device *device = info->device; - struct pci_dev *pci_dev = NULL; struct pci_dev *default_device = vga_default_device(); - struct resource *res = NULL; + struct pci_dev *pci_dev; + struct resource *res; - if (device) - pci_dev = to_pci_dev(device); - - if (!pci_dev) + if (!device || !dev_is_pci(device)) return 0; + pci_dev = to_pci_dev(device); + if (default_device) { if (pci_dev == default_device) return 1; - else - return 0; + return 0; } - res = &pci_dev->resource[PCI_ROM_RESOURCE]; + res = pci_dev->resource + PCI_ROM_RESOURCE; - if (res && res->flags & IORESOURCE_ROM_SHADOW) + if (res->flags & IORESOURCE_ROM_SHADOW) return 1; return 0; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 2379a5a88504..880862c7d9dd 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -962,7 +962,7 @@ static void xen_load_sp0(struct tss_struct *tss, tss->x86_tss.sp0 = thread->sp0; } -static void xen_set_iopl_mask(unsigned mask) +void xen_set_iopl_mask(unsigned mask) { struct physdev_set_iopl set_iopl; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index c913ca4f6958..478a2de543a5 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1256,7 +1256,7 @@ static void __init xen_pagetable_cleanhighmap(void) xen_cleanhighmap(addr, addr + size); xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base)); #ifdef DEBUG - /* This is superflous and is not neccessary, but you know what + /* This is superfluous and is not necessary, but you know what * lets do it. The MODULES_VADDR -> MODULES_END should be clear of * anything at this stage. */ xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); @@ -1474,7 +1474,7 @@ static void xen_write_cr3(unsigned long cr3) /* * At the start of the day - when Xen launches a guest, it has already * built pagetables for the guest. We diligently look over them - * in xen_setup_kernel_pagetable and graft as appropiate them in the + * in xen_setup_kernel_pagetable and graft as appropriate them in the * init_level4_pgt and its friends. Then when we are happy we load * the new init_level4_pgt - and continue on. * @@ -2792,7 +2792,7 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, struct remap_data *rmd = data; pte_t pte = pte_mkspecial(mfn_pte(*rmd->mfn, rmd->prot)); - /* If we have a contigious range, just update the mfn itself, + /* If we have a contiguous range, just update the mfn itself, else update pointer to be "next mfn". */ if (rmd->contiguous) (*rmd->mfn)++; @@ -2833,7 +2833,7 @@ static int do_remap_gfn(struct vm_area_struct *vma, rmd.mfn = gfn; rmd.prot = prot; - /* We use the err_ptr to indicate if there we are doing a contigious + /* We use the err_ptr to indicate if there we are doing a contiguous * mapping or a discontigious mapping. */ rmd.contiguous = !err_ptr; diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index de93b20fa0d2..7f8d8abf4c1a 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -26,7 +26,7 @@ (1 << XENFEAT_auto_translated_physmap) | \ (1 << XENFEAT_supervisor_mode_kernel) | \ (1 << XENFEAT_hvm_callback_vector)) -/* The XENFEAT_writable_page_tables is not stricly neccessary as we set that +/* The XENFEAT_writable_page_tables is not stricly necessary as we set that * up regardless whether this CONFIG option is enabled or not, but it * clarifies what the right flags need to be. */ diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 7ec5b86735f3..4630eeae18e0 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -65,7 +65,6 @@ static inline bool trigger_allbutself_cpu_backtrace(void) #endif #ifdef CONFIG_LOCKUP_DETECTOR -int hw_nmi_is_cpu_stuck(struct pt_regs *); u64 hw_nmi_get_sample_period(int watchdog_thresh); extern int nmi_watchdog_enabled; extern int soft_watchdog_enabled; diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index d5ce7d7aae3e..b47ebd170690 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -5,7 +5,7 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall \ - check_initial_reg_state sigreturn ldt_gdt + check_initial_reg_state sigreturn ldt_gdt iopl TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer diff --git a/tools/testing/selftests/x86/iopl.c b/tools/testing/selftests/x86/iopl.c new file mode 100644 index 000000000000..c496ca97bc18 --- /dev/null +++ b/tools/testing/selftests/x86/iopl.c @@ -0,0 +1,135 @@ +/* + * iopl.c - Test case for a Linux on Xen 64-bit bug + * Copyright (c) 2015 Andrew Lutomirski + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int nerrs = 0; + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); + +} + +static jmp_buf jmpbuf; + +static void sigsegv(int sig, siginfo_t *si, void *ctx_void) +{ + siglongjmp(jmpbuf, 1); +} + +int main(void) +{ + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) + err(1, "sched_setaffinity to CPU 0"); + + /* Probe for iopl support. Note that iopl(0) works even as nonroot. */ + if (iopl(3) != 0) { + printf("[OK]\tiopl(3) failed (%d) -- try running as root\n", + errno); + return 0; + } + + /* Restore our original state prior to starting the test. */ + if (iopl(0) != 0) + err(1, "iopl(0)"); + + pid_t child = fork(); + if (child == -1) + err(1, "fork"); + + if (child == 0) { + printf("\tchild: set IOPL to 3\n"); + if (iopl(3) != 0) + err(1, "iopl"); + + printf("[RUN]\tchild: write to 0x80\n"); + asm volatile ("outb %%al, $0x80" : : "a" (0)); + + return 0; + } else { + int status; + if (waitpid(child, &status, 0) != child || + !WIFEXITED(status)) { + printf("[FAIL]\tChild died\n"); + nerrs++; + } else if (WEXITSTATUS(status) != 0) { + printf("[FAIL]\tChild failed\n"); + nerrs++; + } else { + printf("[OK]\tChild succeeded\n"); + } + } + + printf("[RUN]\tparent: write to 0x80 (should fail)\n"); + + sethandler(SIGSEGV, sigsegv, 0); + if (sigsetjmp(jmpbuf, 1) != 0) { + printf("[OK]\twrite was denied\n"); + } else { + asm volatile ("outb %%al, $0x80" : : "a" (0)); + printf("[FAIL]\twrite was allowed\n"); + nerrs++; + } + + /* Test the capability checks. */ + printf("\tiopl(3)\n"); + if (iopl(3) != 0) + err(1, "iopl(3)"); + + printf("\tDrop privileges\n"); + if (setresuid(1, 1, 1) != 0) { + printf("[WARN]\tDropping privileges failed\n"); + goto done; + } + + printf("[RUN]\tiopl(3) unprivileged but with IOPL==3\n"); + if (iopl(3) != 0) { + printf("[FAIL]\tiopl(3) should work if iopl is already 3 even if unprivileged\n"); + nerrs++; + } + + printf("[RUN]\tiopl(0) unprivileged\n"); + if (iopl(0) != 0) { + printf("[FAIL]\tiopl(0) should work if iopl is already 3 even if unprivileged\n"); + nerrs++; + } + + printf("[RUN]\tiopl(3) unprivileged\n"); + if (iopl(3) == 0) { + printf("[FAIL]\tiopl(3) should fail if when unprivileged if iopl==0\n"); + nerrs++; + } else { + printf("[OK]\tFailed as expected\n"); + } + +done: + return nerrs ? 1 : 0; +} +