linux/arch/arm64/kernel/topology.c
Jeremy Linton e156ab71a9 arm64: topology: Avoid checking numa mask for scheduler MC selection
The numa mask subset check can often lead to system hang or crash during
CPU hotplug and system suspend operation if NUMA is disabled. This is
mostly observed on HMP systems where the CPU compute capacities are
different and ends up in different scheduler domains. Since
cpumask_of_node is returned instead core_sibling, the scheduler is
confused with incorrect cpumasks(e.g. one CPU in two different sched
domains at the same time) on CPU hotplug.

Lets disable the NUMA siblings checks for the time being, as NUMA in
socket machines have LLC's that will assure that the scheduler topology
isn't "borken".

The NUMA check exists to assure that if a LLC within a socket crosses
NUMA nodes/chiplets the scheduler domains remain consistent. This code will
likely have to be re-enabled in the near future once the NUMA mask story
is sorted.  At the moment its not necessary because the NUMA in socket
machines LLC's are contained within the NUMA domains.

Further, as a defensive mechanism during hot-plug, lets assure that the
LLC siblings are also masked.

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2018-06-07 17:42:11 +01:00

384 lines
8.5 KiB
C

/*
* arch/arm64/kernel/topology.c
*
* Copyright (C) 2011,2013,2014 Linaro Limited.
*
* Based on the arm32 version written by Vincent Guittot in turn based on
* arch/sh/kernel/topology.c
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*/
#include <linux/acpi.h>
#include <linux/arch_topology.h>
#include <linux/cacheinfo.h>
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/init.h>
#include <linux/percpu.h>
#include <linux/node.h>
#include <linux/nodemask.h>
#include <linux/of.h>
#include <linux/sched.h>
#include <linux/sched/topology.h>
#include <linux/slab.h>
#include <linux/smp.h>
#include <linux/string.h>
#include <asm/cpu.h>
#include <asm/cputype.h>
#include <asm/topology.h>
static int __init get_cpu_for_node(struct device_node *node)
{
struct device_node *cpu_node;
int cpu;
cpu_node = of_parse_phandle(node, "cpu", 0);
if (!cpu_node)
return -1;
cpu = of_cpu_node_to_id(cpu_node);
if (cpu >= 0)
topology_parse_cpu_capacity(cpu_node, cpu);
else
pr_crit("Unable to find CPU node for %pOF\n", cpu_node);
of_node_put(cpu_node);
return cpu;
}
static int __init parse_core(struct device_node *core, int package_id,
int core_id)
{
char name[10];
bool leaf = true;
int i = 0;
int cpu;
struct device_node *t;
do {
snprintf(name, sizeof(name), "thread%d", i);
t = of_get_child_by_name(core, name);
if (t) {
leaf = false;
cpu = get_cpu_for_node(t);
if (cpu >= 0) {
cpu_topology[cpu].package_id = package_id;
cpu_topology[cpu].core_id = core_id;
cpu_topology[cpu].thread_id = i;
} else {
pr_err("%pOF: Can't get CPU for thread\n",
t);
of_node_put(t);
return -EINVAL;
}
of_node_put(t);
}
i++;
} while (t);
cpu = get_cpu_for_node(core);
if (cpu >= 0) {
if (!leaf) {
pr_err("%pOF: Core has both threads and CPU\n",
core);
return -EINVAL;
}
cpu_topology[cpu].package_id = package_id;
cpu_topology[cpu].core_id = core_id;
} else if (leaf) {
pr_err("%pOF: Can't get CPU for leaf core\n", core);
return -EINVAL;
}
return 0;
}
static int __init parse_cluster(struct device_node *cluster, int depth)
{
char name[10];
bool leaf = true;
bool has_cores = false;
struct device_node *c;
static int package_id __initdata;
int core_id = 0;
int i, ret;
/*
* First check for child clusters; we currently ignore any
* information about the nesting of clusters and present the
* scheduler with a flat list of them.
*/
i = 0;
do {
snprintf(name, sizeof(name), "cluster%d", i);
c = of_get_child_by_name(cluster, name);
if (c) {
leaf = false;
ret = parse_cluster(c, depth + 1);
of_node_put(c);
if (ret != 0)
return ret;
}
i++;
} while (c);
/* Now check for cores */
i = 0;
do {
snprintf(name, sizeof(name), "core%d", i);
c = of_get_child_by_name(cluster, name);
if (c) {
has_cores = true;
if (depth == 0) {
pr_err("%pOF: cpu-map children should be clusters\n",
c);
of_node_put(c);
return -EINVAL;
}
if (leaf) {
ret = parse_core(c, package_id, core_id++);
} else {
pr_err("%pOF: Non-leaf cluster with core %s\n",
cluster, name);
ret = -EINVAL;
}
of_node_put(c);
if (ret != 0)
return ret;
}
i++;
} while (c);
if (leaf && !has_cores)
pr_warn("%pOF: empty cluster\n", cluster);
if (leaf)
package_id++;
return 0;
}
static int __init parse_dt_topology(void)
{
struct device_node *cn, *map;
int ret = 0;
int cpu;
cn = of_find_node_by_path("/cpus");
if (!cn) {
pr_err("No CPU information found in DT\n");
return 0;
}
/*
* When topology is provided cpu-map is essentially a root
* cluster with restricted subnodes.
*/
map = of_get_child_by_name(cn, "cpu-map");
if (!map)
goto out;
ret = parse_cluster(map, 0);
if (ret != 0)
goto out_map;
topology_normalize_cpu_scale();
/*
* Check that all cores are in the topology; the SMP code will
* only mark cores described in the DT as possible.
*/
for_each_possible_cpu(cpu)
if (cpu_topology[cpu].package_id == -1)
ret = -EINVAL;
out_map:
of_node_put(map);
out:
of_node_put(cn);
return ret;
}
/*
* cpu topology table
*/
struct cpu_topology cpu_topology[NR_CPUS];
EXPORT_SYMBOL_GPL(cpu_topology);
const struct cpumask *cpu_coregroup_mask(int cpu)
{
const cpumask_t *core_mask = &cpu_topology[cpu].core_sibling;
if (cpu_topology[cpu].llc_id != -1) {
if (cpumask_subset(&cpu_topology[cpu].llc_siblings, core_mask))
core_mask = &cpu_topology[cpu].llc_siblings;
}
return core_mask;
}
static void update_siblings_masks(unsigned int cpuid)
{
struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
int cpu;
/* update core and thread sibling masks */
for_each_possible_cpu(cpu) {
cpu_topo = &cpu_topology[cpu];
if (cpuid_topo->llc_id == cpu_topo->llc_id) {
cpumask_set_cpu(cpu, &cpuid_topo->llc_siblings);
cpumask_set_cpu(cpuid, &cpu_topo->llc_siblings);
}
if (cpuid_topo->package_id != cpu_topo->package_id)
continue;
cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
if (cpu != cpuid)
cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
if (cpuid_topo->core_id != cpu_topo->core_id)
continue;
cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
if (cpu != cpuid)
cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
}
}
void store_cpu_topology(unsigned int cpuid)
{
struct cpu_topology *cpuid_topo = &cpu_topology[cpuid];
u64 mpidr;
if (cpuid_topo->package_id != -1)
goto topology_populated;
mpidr = read_cpuid_mpidr();
/* Uniprocessor systems can rely on default topology values */
if (mpidr & MPIDR_UP_BITMASK)
return;
/* Create cpu topology mapping based on MPIDR. */
if (mpidr & MPIDR_MT_BITMASK) {
/* Multiprocessor system : Multi-threads per core */
cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) |
MPIDR_AFFINITY_LEVEL(mpidr, 3) << 8;
} else {
/* Multiprocessor system : Single-thread per core */
cpuid_topo->thread_id = -1;
cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) |
MPIDR_AFFINITY_LEVEL(mpidr, 2) << 8 |
MPIDR_AFFINITY_LEVEL(mpidr, 3) << 16;
}
pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n",
cpuid, cpuid_topo->package_id, cpuid_topo->core_id,
cpuid_topo->thread_id, mpidr);
topology_populated:
update_siblings_masks(cpuid);
}
static void __init reset_cpu_topology(void)
{
unsigned int cpu;
for_each_possible_cpu(cpu) {
struct cpu_topology *cpu_topo = &cpu_topology[cpu];
cpu_topo->thread_id = -1;
cpu_topo->core_id = 0;
cpu_topo->package_id = -1;
cpu_topo->llc_id = -1;
cpumask_clear(&cpu_topo->llc_siblings);
cpumask_set_cpu(cpu, &cpu_topo->llc_siblings);
cpumask_clear(&cpu_topo->core_sibling);
cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
cpumask_clear(&cpu_topo->thread_sibling);
cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
}
}
#ifdef CONFIG_ACPI
/*
* Propagate the topology information of the processor_topology_node tree to the
* cpu_topology array.
*/
static int __init parse_acpi_topology(void)
{
bool is_threaded;
int cpu, topology_id;
is_threaded = read_cpuid_mpidr() & MPIDR_MT_BITMASK;
for_each_possible_cpu(cpu) {
int i, cache_id;
topology_id = find_acpi_cpu_topology(cpu, 0);
if (topology_id < 0)
return topology_id;
if (is_threaded) {
cpu_topology[cpu].thread_id = topology_id;
topology_id = find_acpi_cpu_topology(cpu, 1);
cpu_topology[cpu].core_id = topology_id;
} else {
cpu_topology[cpu].thread_id = -1;
cpu_topology[cpu].core_id = topology_id;
}
topology_id = find_acpi_cpu_topology_package(cpu);
cpu_topology[cpu].package_id = topology_id;
i = acpi_find_last_cache_level(cpu);
if (i > 0) {
/*
* this is the only part of cpu_topology that has
* a direct relationship with the cache topology
*/
cache_id = find_acpi_cpu_cache_topology(cpu, i);
if (cache_id > 0)
cpu_topology[cpu].llc_id = cache_id;
}
}
return 0;
}
#else
static inline int __init parse_acpi_topology(void)
{
return -EINVAL;
}
#endif
void __init init_cpu_topology(void)
{
reset_cpu_topology();
/*
* Discard anything that was parsed if we hit an error so we
* don't use partial information.
*/
if (!acpi_disabled && parse_acpi_topology())
reset_cpu_topology();
else if (of_have_populated_dt() && parse_dt_topology())
reset_cpu_topology();
}