diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 9ab78ad826e2..869ffc4d4484 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -89,8 +89,6 @@ int __init parse_acpi_topology(void) return 0; for_each_possible_cpu(cpu) { - int i, cache_id; - topology_id = find_acpi_cpu_topology(cpu, 0); if (topology_id < 0) return topology_id; @@ -107,18 +105,6 @@ int __init parse_acpi_topology(void) cpu_topology[cpu].cluster_id = topology_id; topology_id = find_acpi_cpu_topology_package(cpu); cpu_topology[cpu].package_id = topology_id; - - i = acpi_find_last_cache_level(cpu); - - if (i > 0) { - /* - * this is the only part of cpu_topology that has - * a direct relationship with the cache topology - */ - cache_id = find_acpi_cpu_cache_topology(cpu, i); - if (cache_id > 0) - cpu_topology[cpu].llc_id = cache_id; - } } return 0; diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index 701f61c01359..dd3222a15c9c 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -437,7 +437,8 @@ static void cache_setup_acpi_cpu(struct acpi_table_header *table, pr_debug("found = %p %p\n", found_cache, cpu_node); if (found_cache) update_cache_properties(this_leaf, found_cache, - cpu_node, table->revision); + ACPI_TO_POINTER(ACPI_PTR_DIFF(cpu_node, table)), + table->revision); index++; } @@ -690,43 +691,6 @@ int find_acpi_cpu_topology(unsigned int cpu, int level) return find_acpi_cpu_topology_tag(cpu, level, 0); } -/** - * find_acpi_cpu_cache_topology() - Determine a unique cache topology value - * @cpu: Kernel logical CPU number - * @level: The cache level for which we would like a unique ID - * - * Determine a unique ID for each unified cache in the system - * - * Return: -ENOENT if the PPTT doesn't exist, or the CPU cannot be found. - * Otherwise returns a value which represents a unique topological feature. - */ -int find_acpi_cpu_cache_topology(unsigned int cpu, int level) -{ - struct acpi_table_header *table; - struct acpi_pptt_cache *found_cache; - acpi_status status; - u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu); - struct acpi_pptt_processor *cpu_node = NULL; - int ret = -1; - - status = acpi_get_table(ACPI_SIG_PPTT, 0, &table); - if (ACPI_FAILURE(status)) { - acpi_pptt_warn_missing(); - return -ENOENT; - } - - found_cache = acpi_find_cache_node(table, acpi_cpu_id, - CACHE_TYPE_UNIFIED, - level, - &cpu_node); - if (found_cache) - ret = ACPI_PTR_DIFF(cpu_node, table); - - acpi_put_table(table); - - return ret; -} - /** * find_acpi_cpu_topology_package() - Determine a unique CPU package value * @cpu: Kernel logical CPU number diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 579c851a2bd7..441e14ac33a4 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -496,7 +497,7 @@ static int __init get_cpu_for_node(struct device_node *node) } static int __init parse_core(struct device_node *core, int package_id, - int core_id) + int cluster_id, int core_id) { char name[20]; bool leaf = true; @@ -512,6 +513,7 @@ static int __init parse_core(struct device_node *core, int package_id, cpu = get_cpu_for_node(t); if (cpu >= 0) { cpu_topology[cpu].package_id = package_id; + cpu_topology[cpu].cluster_id = cluster_id; cpu_topology[cpu].core_id = core_id; cpu_topology[cpu].thread_id = i; } else if (cpu != -ENODEV) { @@ -533,6 +535,7 @@ static int __init parse_core(struct device_node *core, int package_id, } cpu_topology[cpu].package_id = package_id; + cpu_topology[cpu].cluster_id = cluster_id; cpu_topology[cpu].core_id = core_id; } else if (leaf && cpu != -ENODEV) { pr_err("%pOF: Can't get CPU for leaf core\n", core); @@ -542,13 +545,13 @@ static int __init parse_core(struct device_node *core, int package_id, return 0; } -static int __init parse_cluster(struct device_node *cluster, int depth) +static int __init parse_cluster(struct device_node *cluster, int package_id, + int cluster_id, int depth) { char name[20]; bool leaf = true; bool has_cores = false; struct device_node *c; - static int package_id __initdata; int core_id = 0; int i, ret; @@ -563,7 +566,9 @@ static int __init parse_cluster(struct device_node *cluster, int depth) c = of_get_child_by_name(cluster, name); if (c) { leaf = false; - ret = parse_cluster(c, depth + 1); + ret = parse_cluster(c, package_id, i, depth + 1); + if (depth > 0) + pr_warn("Topology for clusters of clusters not yet supported\n"); of_node_put(c); if (ret != 0) return ret; @@ -587,7 +592,8 @@ static int __init parse_cluster(struct device_node *cluster, int depth) } if (leaf) { - ret = parse_core(c, package_id, core_id++); + ret = parse_core(c, package_id, cluster_id, + core_id++); } else { pr_err("%pOF: Non-leaf cluster with core %s\n", cluster, name); @@ -604,12 +610,35 @@ static int __init parse_cluster(struct device_node *cluster, int depth) if (leaf && !has_cores) pr_warn("%pOF: empty cluster\n", cluster); - if (leaf) - package_id++; - return 0; } +static int __init parse_socket(struct device_node *socket) +{ + char name[20]; + struct device_node *c; + bool has_socket = false; + int package_id = 0, ret; + + do { + snprintf(name, sizeof(name), "socket%d", package_id); + c = of_get_child_by_name(socket, name); + if (c) { + has_socket = true; + ret = parse_cluster(c, package_id, -1, 0); + of_node_put(c); + if (ret != 0) + return ret; + } + package_id++; + } while (c); + + if (!has_socket) + ret = parse_cluster(socket, 0, -1, 0); + + return ret; +} + static int __init parse_dt_topology(void) { struct device_node *cn, *map; @@ -630,7 +659,7 @@ static int __init parse_dt_topology(void) if (!map) goto out; - ret = parse_cluster(map, 0); + ret = parse_socket(map); if (ret != 0) goto out_map; @@ -641,8 +670,10 @@ static int __init parse_dt_topology(void) * only mark cores described in the DT as possible. */ for_each_possible_cpu(cpu) - if (cpu_topology[cpu].package_id == -1) + if (cpu_topology[cpu].package_id < 0) { ret = -EINVAL; + break; + } out_map: of_node_put(map); @@ -667,7 +698,8 @@ const struct cpumask *cpu_coregroup_mask(int cpu) /* not numa in package, lets use the package siblings */ core_mask = &cpu_topology[cpu].core_sibling; } - if (cpu_topology[cpu].llc_id != -1) { + + if (last_level_cache_is_valid(cpu)) { if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask)) core_mask = &cpu_topology[cpu].llc_sibling; } @@ -686,6 +718,14 @@ const struct cpumask *cpu_coregroup_mask(int cpu) const struct cpumask *cpu_clustergroup_mask(int cpu) { + /* + * Forbid cpu_clustergroup_mask() to span more or the same CPUs as + * cpu_coregroup_mask(). + */ + if (cpumask_subset(cpu_coregroup_mask(cpu), + &cpu_topology[cpu].cluster_sibling)) + return get_cpu_mask(cpu); + return &cpu_topology[cpu].cluster_sibling; } @@ -698,7 +738,7 @@ void update_siblings_masks(unsigned int cpuid) for_each_online_cpu(cpu) { cpu_topo = &cpu_topology[cpu]; - if (cpu_topo->llc_id != -1 && cpuid_topo->llc_id == cpu_topo->llc_id) { + if (last_level_cache_is_shared(cpu, cpuid)) { cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling); cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling); } @@ -706,15 +746,17 @@ void update_siblings_masks(unsigned int cpuid) if (cpuid_topo->package_id != cpu_topo->package_id) continue; - if (cpuid_topo->cluster_id == cpu_topo->cluster_id && - cpuid_topo->cluster_id != -1) { + cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); + cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + + if (cpuid_topo->cluster_id != cpu_topo->cluster_id) + continue; + + if (cpuid_topo->cluster_id >= 0) { cpumask_set_cpu(cpu, &cpuid_topo->cluster_sibling); cpumask_set_cpu(cpuid, &cpu_topo->cluster_sibling); } - cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); - cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); - if (cpuid_topo->core_id != cpu_topo->core_id) continue; @@ -750,7 +792,6 @@ void __init reset_cpu_topology(void) cpu_topo->core_id = -1; cpu_topo->cluster_id = -1; cpu_topo->package_id = -1; - cpu_topo->llc_id = -1; clear_cpu_topology(cpu); } @@ -780,15 +821,28 @@ __weak int __init parse_acpi_topology(void) #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV) void __init init_cpu_topology(void) { - reset_cpu_topology(); + int ret, cpu; - /* - * Discard anything that was parsed if we hit an error so we - * don't use partial information. - */ - if (parse_acpi_topology()) - reset_cpu_topology(); - else if (of_have_populated_dt() && parse_dt_topology()) + reset_cpu_topology(); + ret = parse_acpi_topology(); + if (!ret) + ret = of_have_populated_dt() && parse_dt_topology(); + + if (ret) { + /* + * Discard anything that was parsed if we hit an error so we + * don't use partial information. + */ reset_cpu_topology(); + return; + } + + for_each_possible_cpu(cpu) { + ret = detect_cache_attributes(cpu); + if (ret) { + pr_info("Early cacheinfo failed, ret = %d\n", ret); + break; + } + } } #endif diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index dad296229161..65d566ff24c4 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -25,19 +25,60 @@ static DEFINE_PER_CPU(struct cpu_cacheinfo, ci_cpu_cacheinfo); #define ci_cacheinfo(cpu) (&per_cpu(ci_cpu_cacheinfo, cpu)) #define cache_leaves(cpu) (ci_cacheinfo(cpu)->num_leaves) #define per_cpu_cacheinfo(cpu) (ci_cacheinfo(cpu)->info_list) +#define per_cpu_cacheinfo_idx(cpu, idx) \ + (per_cpu_cacheinfo(cpu) + (idx)) struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu) { return ci_cacheinfo(cpu); } -#ifdef CONFIG_OF static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, struct cacheinfo *sib_leaf) { + /* + * For non DT/ACPI systems, assume unique level 1 caches, + * system-wide shared caches for all other levels. This will be used + * only if arch specific code has not populated shared_cpu_map + */ + if (!(IS_ENABLED(CONFIG_OF) || IS_ENABLED(CONFIG_ACPI))) + return !(this_leaf->level == 1); + + if ((sib_leaf->attributes & CACHE_ID) && + (this_leaf->attributes & CACHE_ID)) + return sib_leaf->id == this_leaf->id; + return sib_leaf->fw_token == this_leaf->fw_token; } +bool last_level_cache_is_valid(unsigned int cpu) +{ + struct cacheinfo *llc; + + if (!cache_leaves(cpu)) + return false; + + llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1); + + return (llc->attributes & CACHE_ID) || !!llc->fw_token; + +} + +bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y) +{ + struct cacheinfo *llc_x, *llc_y; + + if (!last_level_cache_is_valid(cpu_x) || + !last_level_cache_is_valid(cpu_y)) + return false; + + llc_x = per_cpu_cacheinfo_idx(cpu_x, cache_leaves(cpu_x) - 1); + llc_y = per_cpu_cacheinfo_idx(cpu_y, cache_leaves(cpu_y) - 1); + + return cache_leaves_are_shared(llc_x, llc_y); +} + +#ifdef CONFIG_OF /* OF properties to query for a given cache type */ struct cache_type_info { const char *size_prop; @@ -157,27 +198,16 @@ static int cache_setup_of_node(unsigned int cpu) { struct device_node *np; struct cacheinfo *this_leaf; - struct device *cpu_dev = get_cpu_device(cpu); - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); unsigned int index = 0; - /* skip if fw_token is already populated */ - if (this_cpu_ci->info_list->fw_token) { - return 0; - } - - if (!cpu_dev) { - pr_err("No cpu device for CPU %d\n", cpu); - return -ENODEV; - } - np = cpu_dev->of_node; + np = of_cpu_device_node_get(cpu); if (!np) { pr_err("Failed to find cpu%d device node\n", cpu); return -ENOENT; } while (index < cache_leaves(cpu)) { - this_leaf = this_cpu_ci->info_list + index; + this_leaf = per_cpu_cacheinfo_idx(cpu, index); if (this_leaf->level != 1) np = of_find_next_cache_node(np); else @@ -196,16 +226,6 @@ static int cache_setup_of_node(unsigned int cpu) } #else static inline int cache_setup_of_node(unsigned int cpu) { return 0; } -static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, - struct cacheinfo *sib_leaf) -{ - /* - * For non-DT/ACPI systems, assume unique level 1 caches, system-wide - * shared caches for all other levels. This will be used only if - * arch specific code has not populated shared_cpu_map - */ - return !(this_leaf->level == 1); -} #endif int __weak cache_setup_acpi(unsigned int cpu) @@ -215,6 +235,18 @@ int __weak cache_setup_acpi(unsigned int cpu) unsigned int coherency_max_size; +static int cache_setup_properties(unsigned int cpu) +{ + int ret = 0; + + if (of_have_populated_dt()) + ret = cache_setup_of_node(cpu); + else if (!acpi_disabled) + ret = cache_setup_acpi(cpu); + + return ret; +} + static int cache_shared_cpu_map_setup(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -225,21 +257,21 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) if (this_cpu_ci->cpu_map_populated) return 0; - if (of_have_populated_dt()) - ret = cache_setup_of_node(cpu); - else if (!acpi_disabled) - ret = cache_setup_acpi(cpu); - - if (ret) - return ret; + /* + * skip setting up cache properties if LLC is valid, just need + * to update the shared cpu_map if the cache attributes were + * populated early before all the cpus are brought online + */ + if (!last_level_cache_is_valid(cpu)) { + ret = cache_setup_properties(cpu); + if (ret) + return ret; + } for (index = 0; index < cache_leaves(cpu); index++) { unsigned int i; - this_leaf = this_cpu_ci->info_list + index; - /* skip if shared_cpu_map is already populated */ - if (!cpumask_empty(&this_leaf->shared_cpu_map)) - continue; + this_leaf = per_cpu_cacheinfo_idx(cpu, index); cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); for_each_online_cpu(i) { @@ -247,7 +279,8 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) if (i == cpu || !sib_cpu_ci->info_list) continue;/* skip if itself or no cacheinfo */ - sib_leaf = sib_cpu_ci->info_list + index; + + sib_leaf = per_cpu_cacheinfo_idx(i, index); if (cache_leaves_are_shared(this_leaf, sib_leaf)) { cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map); cpumask_set_cpu(i, &this_leaf->shared_cpu_map); @@ -263,23 +296,19 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) static void cache_shared_cpu_map_remove(unsigned int cpu) { - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf, *sib_leaf; unsigned int sibling, index; for (index = 0; index < cache_leaves(cpu); index++) { - this_leaf = this_cpu_ci->info_list + index; + this_leaf = per_cpu_cacheinfo_idx(cpu, index); for_each_cpu(sibling, &this_leaf->shared_cpu_map) { - struct cpu_cacheinfo *sib_cpu_ci; + struct cpu_cacheinfo *sib_cpu_ci = + get_cpu_cacheinfo(sibling); - if (sibling == cpu) /* skip itself */ - continue; + if (sibling == cpu || !sib_cpu_ci->info_list) + continue;/* skip if itself or no cacheinfo */ - sib_cpu_ci = get_cpu_cacheinfo(sibling); - if (!sib_cpu_ci->info_list) - continue; - - sib_leaf = sib_cpu_ci->info_list + index; + sib_leaf = per_cpu_cacheinfo_idx(sibling, index); cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map); cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map); } @@ -310,17 +339,28 @@ int __weak populate_cache_leaves(unsigned int cpu) return -ENOENT; } -static int detect_cache_attributes(unsigned int cpu) +int detect_cache_attributes(unsigned int cpu) { int ret; + /* Since early detection of the cacheinfo is allowed via this + * function and this also gets called as CPU hotplug callbacks via + * cacheinfo_cpu_online, the initialisation can be skipped and only + * CPU maps can be updated as the CPU online status would be update + * if called via cacheinfo_cpu_online path. + */ + if (per_cpu_cacheinfo(cpu)) + goto update_cpu_map; + if (init_cache_level(cpu) || !cache_leaves(cpu)) return -ENOENT; per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu), sizeof(struct cacheinfo), GFP_KERNEL); - if (per_cpu_cacheinfo(cpu) == NULL) + if (per_cpu_cacheinfo(cpu) == NULL) { + cache_leaves(cpu) = 0; return -ENOMEM; + } /* * populate_cache_leaves() may completely setup the cache leaves and @@ -329,6 +369,8 @@ static int detect_cache_attributes(unsigned int cpu) ret = populate_cache_leaves(cpu); if (ret) goto free_ci; + +update_cpu_map: /* * For systems using DT for cache hierarchy, fw_token * and shared_cpu_map will be set up here only if they are @@ -614,7 +656,6 @@ static int cache_add_dev(unsigned int cpu) int rc; struct device *ci_dev, *parent; struct cacheinfo *this_leaf; - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); const struct attribute_group **cache_groups; rc = cpu_cache_sysfs_init(cpu); @@ -623,7 +664,7 @@ static int cache_add_dev(unsigned int cpu) parent = per_cpu_cache_dev(cpu); for (i = 0; i < cache_leaves(cpu); i++) { - this_leaf = this_cpu_ci->info_list + i; + this_leaf = per_cpu_cacheinfo_idx(cpu, i); if (this_leaf->disable_sysfs) continue; if (this_leaf->type == CACHE_TYPE_NOCACHE) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4f82a5bc6d98..7b96a8bff6d2 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1429,7 +1429,6 @@ int find_acpi_cpu_topology(unsigned int cpu, int level); int find_acpi_cpu_topology_cluster(unsigned int cpu); int find_acpi_cpu_topology_package(unsigned int cpu); int find_acpi_cpu_topology_hetero_id(unsigned int cpu); -int find_acpi_cpu_cache_topology(unsigned int cpu, int level); #else static inline int acpi_pptt_cpu_is_thread(unsigned int cpu) { @@ -1451,10 +1450,6 @@ static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu) { return -EINVAL; } -static inline int find_acpi_cpu_cache_topology(unsigned int cpu, int level) -{ - return -EINVAL; -} #endif #ifdef CONFIG_ACPI_PCC diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 58cbe18d825c..a07b510e7dc5 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -68,7 +68,6 @@ struct cpu_topology { int core_id; int cluster_id; int package_id; - int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; cpumask_t cluster_sibling; diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 4ff37cb763ae..00b7a6ae8617 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -82,6 +82,9 @@ struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu); int init_cache_level(unsigned int cpu); int populate_cache_leaves(unsigned int cpu); int cache_setup_acpi(unsigned int cpu); +bool last_level_cache_is_valid(unsigned int cpu); +bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y); +int detect_cache_attributes(unsigned int cpu); #ifndef CONFIG_ACPI_PPTT /* * acpi_find_last_cache_level is only called on ACPI enabled