Merge branch 'core-smp-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull core SMP updates from Ingo Molnar:
 "Two main change is generic vCPU pinning and physical CPU SMP-call
  support, for Xen to be able to perform certain calls on specific
  physical CPUs - by Juergen Gross"

* 'core-smp-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  smp: Allocate smp_call_on_cpu() workqueue on stack too
  hwmon: Use smp_call_on_cpu() for dell-smm i8k
  dcdbas: Make use of smp_call_on_cpu()
  xen: Add xen_pin_vcpu() to support calling functions on a dedicated pCPU
  smp: Add function to execute a function synchronously on a CPU
  virt, sched: Add generic vCPU pinning support
  xen: Sync xen header
This commit is contained in:
Linus Torvalds 2016-10-03 11:02:39 -07:00
commit d7a0dab82f
11 changed files with 274 additions and 59 deletions

View File

@ -8860,6 +8860,7 @@ S: Supported
F: Documentation/virtual/paravirt_ops.txt
F: arch/*/kernel/paravirt*
F: arch/*/include/asm/paravirt.h
F: include/linux/hypervisor.h
PARIDE DRIVERS FOR PARALLEL PORT IDE DEVICES
M: Tim Waugh <tim@cyberelk.net>

View File

@ -43,6 +43,9 @@ struct hypervisor_x86 {
/* X2APIC detection (run once per boot) */
bool (*x2apic_available)(void);
/* pin current vcpu to specified physical cpu (run rarely) */
void (*pin_vcpu)(int);
};
extern const struct hypervisor_x86 *x86_hyper;
@ -56,6 +59,7 @@ extern const struct hypervisor_x86 x86_hyper_kvm;
extern void init_hypervisor(struct cpuinfo_x86 *c);
extern void init_hypervisor_platform(void);
extern bool hypervisor_x2apic_available(void);
extern void hypervisor_pin_vcpu(int cpu);
#else
static inline void init_hypervisor(struct cpuinfo_x86 *c) { }
static inline void init_hypervisor_platform(void) { }

View File

@ -86,3 +86,14 @@ bool __init hypervisor_x2apic_available(void)
x86_hyper->x2apic_available &&
x86_hyper->x2apic_available();
}
void hypervisor_pin_vcpu(int cpu)
{
if (!x86_hyper)
return;
if (x86_hyper->pin_vcpu)
x86_hyper->pin_vcpu(cpu);
else
WARN_ONCE(1, "vcpu pinning requested but not supported!\n");
}

View File

@ -1925,6 +1925,45 @@ static void xen_set_cpu_features(struct cpuinfo_x86 *c)
}
}
static void xen_pin_vcpu(int cpu)
{
static bool disable_pinning;
struct sched_pin_override pin_override;
int ret;
if (disable_pinning)
return;
pin_override.pcpu = cpu;
ret = HYPERVISOR_sched_op(SCHEDOP_pin_override, &pin_override);
/* Ignore errors when removing override. */
if (cpu < 0)
return;
switch (ret) {
case -ENOSYS:
pr_warn("Unable to pin on physical cpu %d. In case of problems consider vcpu pinning.\n",
cpu);
disable_pinning = true;
break;
case -EPERM:
WARN(1, "Trying to pin vcpu without having privilege to do so\n");
disable_pinning = true;
break;
case -EINVAL:
case -EBUSY:
pr_warn("Physical cpu %d not available for pinning. Check Xen cpu configuration.\n",
cpu);
break;
case 0:
break;
default:
WARN(1, "rc %d while trying to pin vcpu\n", ret);
disable_pinning = true;
}
}
const struct hypervisor_x86 x86_hyper_xen = {
.name = "Xen",
.detect = xen_platform,
@ -1933,6 +1972,7 @@ const struct hypervisor_x86 x86_hyper_xen = {
#endif
.x2apic_available = xen_x2apic_para_available,
.set_cpu_features = xen_set_cpu_features,
.pin_vcpu = xen_pin_vcpu,
};
EXPORT_SYMBOL(x86_hyper_xen);

View File

@ -23,6 +23,7 @@
#include <linux/platform_device.h>
#include <linux/dma-mapping.h>
#include <linux/errno.h>
#include <linux/cpu.h>
#include <linux/gfp.h>
#include <linux/init.h>
#include <linux/kernel.h>
@ -238,33 +239,14 @@ static ssize_t host_control_on_shutdown_store(struct device *dev,
return count;
}
/**
* dcdbas_smi_request: generate SMI request
*
* Called with smi_data_lock.
*/
int dcdbas_smi_request(struct smi_cmd *smi_cmd)
static int raise_smi(void *par)
{
cpumask_var_t old_mask;
int ret = 0;
struct smi_cmd *smi_cmd = par;
if (smi_cmd->magic != SMI_CMD_MAGIC) {
dev_info(&dcdbas_pdev->dev, "%s: invalid magic value\n",
__func__);
return -EBADR;
}
/* SMI requires CPU 0 */
if (!alloc_cpumask_var(&old_mask, GFP_KERNEL))
return -ENOMEM;
cpumask_copy(old_mask, &current->cpus_allowed);
set_cpus_allowed_ptr(current, cpumask_of(0));
if (smp_processor_id() != 0) {
dev_dbg(&dcdbas_pdev->dev, "%s: failed to get CPU 0\n",
__func__);
ret = -EBUSY;
goto out;
return -EBUSY;
}
/* generate SMI */
@ -280,9 +262,28 @@ int dcdbas_smi_request(struct smi_cmd *smi_cmd)
: "memory"
);
out:
set_cpus_allowed_ptr(current, old_mask);
free_cpumask_var(old_mask);
return 0;
}
/**
* dcdbas_smi_request: generate SMI request
*
* Called with smi_data_lock.
*/
int dcdbas_smi_request(struct smi_cmd *smi_cmd)
{
int ret;
if (smi_cmd->magic != SMI_CMD_MAGIC) {
dev_info(&dcdbas_pdev->dev, "%s: invalid magic value\n",
__func__);
return -EBADR;
}
/* SMI requires CPU 0 */
get_online_cpus();
ret = smp_call_on_cpu(0, raise_smi, smi_cmd, true);
put_online_cpus();
return ret;
}

View File

@ -21,6 +21,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/cpu.h>
#include <linux/delay.h>
#include <linux/module.h>
#include <linux/types.h>
@ -36,6 +37,7 @@
#include <linux/io.h>
#include <linux/sched.h>
#include <linux/ctype.h>
#include <linux/smp.h>
#include <linux/i8k.h>
@ -134,11 +136,11 @@ static inline const char *i8k_get_dmi_data(int field)
/*
* Call the System Management Mode BIOS. Code provided by Jonathan Buzzard.
*/
static int i8k_smm(struct smm_regs *regs)
static int i8k_smm_func(void *par)
{
int rc;
struct smm_regs *regs = par;
int eax = regs->eax;
cpumask_var_t old_mask;
#ifdef DEBUG
int ebx = regs->ebx;
@ -149,16 +151,8 @@ static int i8k_smm(struct smm_regs *regs)
#endif
/* SMM requires CPU 0 */
if (!alloc_cpumask_var(&old_mask, GFP_KERNEL))
return -ENOMEM;
cpumask_copy(old_mask, &current->cpus_allowed);
rc = set_cpus_allowed_ptr(current, cpumask_of(0));
if (rc)
goto out;
if (smp_processor_id() != 0) {
rc = -EBUSY;
goto out;
}
if (smp_processor_id() != 0)
return -EBUSY;
#if defined(CONFIG_X86_64)
asm volatile("pushq %%rax\n\t"
@ -216,10 +210,6 @@ static int i8k_smm(struct smm_regs *regs)
if (rc != 0 || (regs->eax & 0xffff) == 0xffff || regs->eax == eax)
rc = -EINVAL;
out:
set_cpus_allowed_ptr(current, old_mask);
free_cpumask_var(old_mask);
#ifdef DEBUG
rettime = ktime_get();
delta = ktime_sub(rettime, calltime);
@ -231,6 +221,20 @@ out:
return rc;
}
/*
* Call the System Management Mode BIOS.
*/
static int i8k_smm(struct smm_regs *regs)
{
int ret;
get_online_cpus();
ret = smp_call_on_cpu(0, i8k_smm_func, regs, true);
put_online_cpus();
return ret;
}
/*
* Read the fan status.
*/

View File

@ -0,0 +1,17 @@
#ifndef __LINUX_HYPEVISOR_H
#define __LINUX_HYPEVISOR_H
/*
* Generic Hypervisor support
* Juergen Gross <jgross@suse.com>
*/
#ifdef CONFIG_HYPERVISOR_GUEST
#include <asm/hypervisor.h>
#else
static inline void hypervisor_pin_vcpu(int cpu)
{
}
#endif
#endif /* __LINUX_HYPEVISOR_H */

View File

@ -196,6 +196,9 @@ extern void arch_enable_nonboot_cpus_end(void);
void smp_setup_processor_id(void);
int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par,
bool phys);
/* SMP core functions */
int smpcfd_prepare_cpu(unsigned int cpu);
int smpcfd_dead_cpu(unsigned int cpu);

View File

@ -3,6 +3,24 @@
*
* Scheduler state interactions
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Copyright (c) 2005, Keir Fraser <keir@xensource.com>
*/
@ -11,19 +29,31 @@
#include <xen/interface/event_channel.h>
/*
* Guest Scheduler Operations
*
* The SCHEDOP interface provides mechanisms for a guest to interact
* with the scheduler, including yield, blocking and shutting itself
* down.
*/
/*
* The prototype for this hypercall is:
* long sched_op_new(int cmd, void *arg)
* long HYPERVISOR_sched_op(enum sched_op cmd, void *arg, ...)
*
* @cmd == SCHEDOP_??? (scheduler operation).
* @arg == Operation-specific extra argument(s), as described below.
* ... == Additional Operation-specific extra arguments, described below.
*
* **NOTE**:
* Versions of Xen prior to 3.0.2 provide only the following legacy version
* Versions of Xen prior to 3.0.2 provided only the following legacy version
* of this hypercall, supporting only the commands yield, block and shutdown:
* long sched_op(int cmd, unsigned long arg)
* @cmd == SCHEDOP_??? (scheduler operation).
* @arg == 0 (SCHEDOP_yield and SCHEDOP_block)
* == SHUTDOWN_* code (SCHEDOP_shutdown)
*
* This legacy version is available to new guests as:
* long HYPERVISOR_sched_op_compat(enum sched_op cmd, unsigned long arg)
*/
/*
@ -44,12 +74,17 @@
/*
* Halt execution of this domain (all VCPUs) and notify the system controller.
* @arg == pointer to sched_shutdown structure.
*
* If the sched_shutdown_t reason is SHUTDOWN_suspend then
* x86 PV guests must also set RDX (EDX for 32-bit guests) to the MFN
* of the guest's start info page. RDX/EDX is the third hypercall
* argument.
*
* In addition, which reason is SHUTDOWN_suspend this hypercall
* returns 1 if suspend was cancelled or the domain was merely
* checkpointed, and 0 if it is resuming in a new domain.
*/
#define SCHEDOP_shutdown 2
struct sched_shutdown {
unsigned int reason; /* SHUTDOWN_* */
};
DEFINE_GUEST_HANDLE_STRUCT(sched_shutdown);
/*
* Poll a set of event-channel ports. Return when one or more are pending. An
@ -57,12 +92,6 @@ DEFINE_GUEST_HANDLE_STRUCT(sched_shutdown);
* @arg == pointer to sched_poll structure.
*/
#define SCHEDOP_poll 3
struct sched_poll {
GUEST_HANDLE(evtchn_port_t) ports;
unsigned int nr_ports;
uint64_t timeout;
};
DEFINE_GUEST_HANDLE_STRUCT(sched_poll);
/*
* Declare a shutdown for another domain. The main use of this function is
@ -71,15 +100,11 @@ DEFINE_GUEST_HANDLE_STRUCT(sched_poll);
* @arg == pointer to sched_remote_shutdown structure.
*/
#define SCHEDOP_remote_shutdown 4
struct sched_remote_shutdown {
domid_t domain_id; /* Remote domain ID */
unsigned int reason; /* SHUTDOWN_xxx reason */
};
/*
* Latch a shutdown code, so that when the domain later shuts down it
* reports this code to the control tools.
* @arg == as for SCHEDOP_shutdown.
* @arg == sched_shutdown, as for SCHEDOP_shutdown.
*/
#define SCHEDOP_shutdown_code 5
@ -92,10 +117,47 @@ struct sched_remote_shutdown {
* With id != 0 and timeout != 0, poke watchdog timer and set new timeout.
*/
#define SCHEDOP_watchdog 6
/*
* Override the current vcpu affinity by pinning it to one physical cpu or
* undo this override restoring the previous affinity.
* @arg == pointer to sched_pin_override structure.
*
* A negative pcpu value will undo a previous pin override and restore the
* previous cpu affinity.
* This call is allowed for the hardware domain only and requires the cpu
* to be part of the domain's cpupool.
*/
#define SCHEDOP_pin_override 7
struct sched_shutdown {
unsigned int reason; /* SHUTDOWN_* => shutdown reason */
};
DEFINE_GUEST_HANDLE_STRUCT(sched_shutdown);
struct sched_poll {
GUEST_HANDLE(evtchn_port_t) ports;
unsigned int nr_ports;
uint64_t timeout;
};
DEFINE_GUEST_HANDLE_STRUCT(sched_poll);
struct sched_remote_shutdown {
domid_t domain_id; /* Remote domain ID */
unsigned int reason; /* SHUTDOWN_* => shutdown reason */
};
DEFINE_GUEST_HANDLE_STRUCT(sched_remote_shutdown);
struct sched_watchdog {
uint32_t id; /* watchdog ID */
uint32_t timeout; /* timeout */
};
DEFINE_GUEST_HANDLE_STRUCT(sched_watchdog);
struct sched_pin_override {
int32_t pcpu;
};
DEFINE_GUEST_HANDLE_STRUCT(sched_pin_override);
/*
* Reason codes for SCHEDOP_shutdown. These may be interpreted by control
@ -107,6 +169,7 @@ struct sched_watchdog {
#define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */
#define SHUTDOWN_crash 3 /* Tell controller we've crashed. */
#define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */
/*
* Domain asked to perform 'soft reset' for it. The expected behavior is to
* reset internal Xen state for the domain returning it to the point where it
@ -115,5 +178,6 @@ struct sched_watchdog {
* interfaces again.
*/
#define SHUTDOWN_soft_reset 5
#define SHUTDOWN_MAX 5 /* Maximum valid shutdown reason. */
#endif /* __XEN_PUBLIC_SCHED_H__ */

View File

@ -14,6 +14,7 @@
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/sched.h>
#include <linux/hypervisor.h>
#include "smpboot.h"
@ -724,3 +725,54 @@ void wake_up_all_idle_cpus(void)
preempt_enable();
}
EXPORT_SYMBOL_GPL(wake_up_all_idle_cpus);
/**
* smp_call_on_cpu - Call a function on a specific cpu
*
* Used to call a function on a specific cpu and wait for it to return.
* Optionally make sure the call is done on a specified physical cpu via vcpu
* pinning in order to support virtualized environments.
*/
struct smp_call_on_cpu_struct {
struct work_struct work;
struct completion done;
int (*func)(void *);
void *data;
int ret;
int cpu;
};
static void smp_call_on_cpu_callback(struct work_struct *work)
{
struct smp_call_on_cpu_struct *sscs;
sscs = container_of(work, struct smp_call_on_cpu_struct, work);
if (sscs->cpu >= 0)
hypervisor_pin_vcpu(sscs->cpu);
sscs->ret = sscs->func(sscs->data);
if (sscs->cpu >= 0)
hypervisor_pin_vcpu(-1);
complete(&sscs->done);
}
int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
{
struct smp_call_on_cpu_struct sscs = {
.done = COMPLETION_INITIALIZER_ONSTACK(sscs.done),
.func = func,
.data = par,
.cpu = phys ? cpu : -1,
};
INIT_WORK_ONSTACK(&sscs.work, smp_call_on_cpu_callback);
if (cpu >= nr_cpu_ids || !cpu_online(cpu))
return -ENXIO;
queue_work_on(cpu, system_wq, &sscs.work);
wait_for_completion(&sscs.done);
return sscs.ret;
}
EXPORT_SYMBOL_GPL(smp_call_on_cpu);

View File

@ -6,6 +6,7 @@
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/smp.h>
#include <linux/hypervisor.h>
int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
int wait)
@ -82,3 +83,20 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
preempt_enable();
}
EXPORT_SYMBOL(on_each_cpu_cond);
int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
{
int ret;
if (cpu != 0)
return -ENXIO;
if (phys)
hypervisor_pin_vcpu(0);
ret = func(par);
if (phys)
hypervisor_pin_vcpu(-1);
return ret;
}
EXPORT_SYMBOL_GPL(smp_call_on_cpu);