In commit8c27226119
("powerpc/numa: Enable USE_PERCPU_NUMA_NODE_ID"), we switched to the generic implementation of cpu_to_node(), which uses a percpu variable to hold the NUMA node for each CPU. Unfortunately we neglected to notice that we use cpu_to_node() in the allocation of our percpu areas, leading to a chicken and egg problem. In practice what happens is when we are setting up the percpu areas, cpu_to_node() reports that all CPUs are on node 0, so we allocate all percpu areas on node 0. This is visible in the dmesg output, as all pcpu allocs being in group 0: pcpu-alloc: [0] 00 01 02 03 [0] 04 05 06 07 pcpu-alloc: [0] 08 09 10 11 [0] 12 13 14 15 pcpu-alloc: [0] 16 17 18 19 [0] 20 21 22 23 pcpu-alloc: [0] 24 25 26 27 [0] 28 29 30 31 pcpu-alloc: [0] 32 33 34 35 [0] 36 37 38 39 pcpu-alloc: [0] 40 41 42 43 [0] 44 45 46 47 To fix it we need an early_cpu_to_node() which can run prior to percpu being setup. We already have the numa_cpu_lookup_table we can use, so just plumb it in. With the patch dmesg output shows two groups, 0 and 1: pcpu-alloc: [0] 00 01 02 03 [0] 04 05 06 07 pcpu-alloc: [0] 08 09 10 11 [0] 12 13 14 15 pcpu-alloc: [0] 16 17 18 19 [0] 20 21 22 23 pcpu-alloc: [1] 24 25 26 27 [1] 28 29 30 31 pcpu-alloc: [1] 32 33 34 35 [1] 36 37 38 39 pcpu-alloc: [1] 40 41 42 43 [1] 44 45 46 47 We can also check the data_offset in the paca of various CPUs, with the fix we see: CPU 0: data_offset = 0x0ffe8b0000 CPU 24: data_offset = 0x1ffe5b0000 And we can see from dmesg that CPU 24 has an allocation on node 1: node 0: [mem 0x0000000000000000-0x0000000fffffffff] node 1: [mem 0x0000001000000000-0x0000001fffffffff] Cc: stable@vger.kernel.org # v3.16+ Fixes:8c27226119
("powerpc/numa: Enable USE_PERCPU_NUMA_NODE_ID") Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
112 lines
2.5 KiB
C
112 lines
2.5 KiB
C
#ifndef _ASM_POWERPC_TOPOLOGY_H
|
|
#define _ASM_POWERPC_TOPOLOGY_H
|
|
#ifdef __KERNEL__
|
|
|
|
|
|
struct device;
|
|
struct device_node;
|
|
|
|
#ifdef CONFIG_NUMA
|
|
|
|
/*
|
|
* If zone_reclaim_mode is enabled, a RECLAIM_DISTANCE of 10 will mean that
|
|
* all zones on all nodes will be eligible for zone_reclaim().
|
|
*/
|
|
#define RECLAIM_DISTANCE 10
|
|
|
|
#include <asm/mmzone.h>
|
|
|
|
#define parent_node(node) (node)
|
|
|
|
#define cpumask_of_node(node) ((node) == -1 ? \
|
|
cpu_all_mask : \
|
|
node_to_cpumask_map[node])
|
|
|
|
struct pci_bus;
|
|
#ifdef CONFIG_PCI
|
|
extern int pcibus_to_node(struct pci_bus *bus);
|
|
#else
|
|
static inline int pcibus_to_node(struct pci_bus *bus)
|
|
{
|
|
return -1;
|
|
}
|
|
#endif
|
|
|
|
#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \
|
|
cpu_all_mask : \
|
|
cpumask_of_node(pcibus_to_node(bus)))
|
|
|
|
extern int __node_distance(int, int);
|
|
#define node_distance(a, b) __node_distance(a, b)
|
|
|
|
extern void __init dump_numa_cpu_topology(void);
|
|
|
|
extern int sysfs_add_device_to_node(struct device *dev, int nid);
|
|
extern void sysfs_remove_device_from_node(struct device *dev, int nid);
|
|
|
|
static inline int early_cpu_to_node(int cpu)
|
|
{
|
|
int nid;
|
|
|
|
nid = numa_cpu_lookup_table[cpu];
|
|
|
|
/*
|
|
* Fall back to node 0 if nid is unset (it should be, except bugs).
|
|
* This allows callers to safely do NODE_DATA(early_cpu_to_node(cpu)).
|
|
*/
|
|
return (nid < 0) ? 0 : nid;
|
|
}
|
|
#else
|
|
|
|
static inline int early_cpu_to_node(int cpu) { return 0; }
|
|
|
|
static inline void dump_numa_cpu_topology(void) {}
|
|
|
|
static inline int sysfs_add_device_to_node(struct device *dev, int nid)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void sysfs_remove_device_from_node(struct device *dev,
|
|
int nid)
|
|
{
|
|
}
|
|
#endif /* CONFIG_NUMA */
|
|
|
|
#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
|
|
extern int start_topology_update(void);
|
|
extern int stop_topology_update(void);
|
|
extern int prrn_is_enabled(void);
|
|
#else
|
|
static inline int start_topology_update(void)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline int stop_topology_update(void)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline int prrn_is_enabled(void)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
|
|
|
|
#include <asm-generic/topology.h>
|
|
|
|
#ifdef CONFIG_SMP
|
|
#include <asm/cputable.h>
|
|
|
|
#ifdef CONFIG_PPC64
|
|
#include <asm/smp.h>
|
|
|
|
#define topology_physical_package_id(cpu) (cpu_to_chip_id(cpu))
|
|
#define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
|
|
#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
|
|
#define topology_core_id(cpu) (cpu_to_core_id(cpu))
|
|
#endif
|
|
#endif
|
|
|
|
#endif /* __KERNEL__ */
|
|
#endif /* _ASM_POWERPC_TOPOLOGY_H */
|