mirror of
https://github.com/torvalds/linux.git
synced 2024-11-10 14:11:52 +00:00
memory tier: consolidate the initialization of memory tiers
The current memory tier initialization process is distributed across two different functions, memory_tier_init() and memory_tier_late_init(). This design is hard to maintain. Thus, this patch is proposed to reduce the possible code paths by consolidating different initialization patches into one. The earlier discussion with Jonathan and Ying is listed here: https://lore.kernel.org/lkml/20240405150244.00004b49@Huawei.com/ If we want to put these two initializations together, they must be placed together in the later function. Because only at that time, the HMAT information will be ready, adist between nodes can be calculated, and memory tiering can be established based on the adist. So we position the initialization at memory_tier_init() to the memory_tier_late_init() call. Moreover, it's natural to keep memory_tier initialization in drivers at device_initcall() level. If we simply move the set_node_memory_tier() from memory_tier_init() to late_initcall(), it will result in HMAT not registering the mt_adistance_algorithm callback function, because set_node_memory_tier() is not performed during the memory tiering initialization phase, leading to a lack of correct default_dram information. Therefore, we introduced a nodemask to pass the information of the default DRAM nodes. The reason for not choosing to reuse default_dram_type->nodes is that it is not clean enough. So in the end, we use a __initdata variable, which is a variable that is released once initialization is complete, including both CPU and memory nodes for HMAT to iterate through. Link: https://lkml.kernel.org/r/20240704072646.437579-1-horen.chuang@linux.dev Signed-off-by: Ho-Ren (Jack) Chuang <horenchuang@bytedance.com> Suggested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: "Huang, Ying" <ying.huang@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Cc: Alistair Popple <apopple@nvidia.com> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Jiang <dave.jiang@intel.com> Cc: Gregory Price <gourry.memverge@gmail.com> Cc: Len Brown <lenb@kernel.org> Cc: Michal Hocko <mhocko@suse.com> Cc: Rafael J. Wysocki <rafael@kernel.org> Cc: Ravi Jonnalagadda <ravis.opensrc@micron.com> Cc: SeongJae Park <sj@kernel.org> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
a8585ac686
commit
823430c8e9
@ -940,10 +940,7 @@ static int hmat_set_default_dram_perf(void)
|
|||||||
struct memory_target *target;
|
struct memory_target *target;
|
||||||
struct access_coordinate *attrs;
|
struct access_coordinate *attrs;
|
||||||
|
|
||||||
if (!default_dram_type)
|
for_each_node_mask(nid, default_dram_nodes) {
|
||||||
return -EIO;
|
|
||||||
|
|
||||||
for_each_node_mask(nid, default_dram_type->nodes) {
|
|
||||||
pxm = node_to_pxm(nid);
|
pxm = node_to_pxm(nid);
|
||||||
target = find_mem_target(pxm);
|
target = find_mem_target(pxm);
|
||||||
if (!target)
|
if (!target)
|
||||||
|
@ -38,6 +38,7 @@ struct access_coordinate;
|
|||||||
#ifdef CONFIG_NUMA
|
#ifdef CONFIG_NUMA
|
||||||
extern bool numa_demotion_enabled;
|
extern bool numa_demotion_enabled;
|
||||||
extern struct memory_dev_type *default_dram_type;
|
extern struct memory_dev_type *default_dram_type;
|
||||||
|
extern nodemask_t default_dram_nodes;
|
||||||
struct memory_dev_type *alloc_memory_type(int adistance);
|
struct memory_dev_type *alloc_memory_type(int adistance);
|
||||||
void put_memory_type(struct memory_dev_type *memtype);
|
void put_memory_type(struct memory_dev_type *memtype);
|
||||||
void init_node_memory_type(int node, struct memory_dev_type *default_type);
|
void init_node_memory_type(int node, struct memory_dev_type *default_type);
|
||||||
@ -76,6 +77,7 @@ static inline bool node_is_toptier(int node)
|
|||||||
|
|
||||||
#define numa_demotion_enabled false
|
#define numa_demotion_enabled false
|
||||||
#define default_dram_type NULL
|
#define default_dram_type NULL
|
||||||
|
#define default_dram_nodes NODE_MASK_NONE
|
||||||
/*
|
/*
|
||||||
* CONFIG_NUMA implementation returns non NULL error.
|
* CONFIG_NUMA implementation returns non NULL error.
|
||||||
*/
|
*/
|
||||||
|
@ -43,6 +43,7 @@ static LIST_HEAD(memory_tiers);
|
|||||||
static LIST_HEAD(default_memory_types);
|
static LIST_HEAD(default_memory_types);
|
||||||
static struct node_memory_type_map node_memory_types[MAX_NUMNODES];
|
static struct node_memory_type_map node_memory_types[MAX_NUMNODES];
|
||||||
struct memory_dev_type *default_dram_type;
|
struct memory_dev_type *default_dram_type;
|
||||||
|
nodemask_t default_dram_nodes __initdata = NODE_MASK_NONE;
|
||||||
|
|
||||||
static const struct bus_type memory_tier_subsys = {
|
static const struct bus_type memory_tier_subsys = {
|
||||||
.name = "memory_tiering",
|
.name = "memory_tiering",
|
||||||
@ -671,28 +672,35 @@ EXPORT_SYMBOL_GPL(mt_put_memory_types);
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* This is invoked via `late_initcall()` to initialize memory tiers for
|
* This is invoked via `late_initcall()` to initialize memory tiers for
|
||||||
* CPU-less memory nodes after driver initialization, which is
|
* memory nodes, both with and without CPUs. After the initialization of
|
||||||
* expected to provide `adistance` algorithms.
|
* firmware and devices, adistance algorithms are expected to be provided.
|
||||||
*/
|
*/
|
||||||
static int __init memory_tier_late_init(void)
|
static int __init memory_tier_late_init(void)
|
||||||
{
|
{
|
||||||
int nid;
|
int nid;
|
||||||
|
struct memory_tier *memtier;
|
||||||
|
|
||||||
|
get_online_mems();
|
||||||
guard(mutex)(&memory_tier_lock);
|
guard(mutex)(&memory_tier_lock);
|
||||||
|
|
||||||
|
/* Assign each uninitialized N_MEMORY node to a memory tier. */
|
||||||
for_each_node_state(nid, N_MEMORY) {
|
for_each_node_state(nid, N_MEMORY) {
|
||||||
/*
|
/*
|
||||||
* Some device drivers may have initialized memory tiers
|
* Some device drivers may have initialized
|
||||||
* between `memory_tier_init()` and `memory_tier_late_init()`,
|
* memory tiers, potentially bringing memory nodes
|
||||||
* potentially bringing online memory nodes and
|
* online and configuring memory tiers.
|
||||||
* configuring memory tiers. Exclude them here.
|
* Exclude them here.
|
||||||
*/
|
*/
|
||||||
if (node_memory_types[nid].memtype)
|
if (node_memory_types[nid].memtype)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
set_node_memory_tier(nid);
|
memtier = set_node_memory_tier(nid);
|
||||||
|
if (IS_ERR(memtier))
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
establish_demotion_targets();
|
establish_demotion_targets();
|
||||||
|
put_online_mems();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -875,8 +883,7 @@ static int __meminit memtier_hotplug_callback(struct notifier_block *self,
|
|||||||
|
|
||||||
static int __init memory_tier_init(void)
|
static int __init memory_tier_init(void)
|
||||||
{
|
{
|
||||||
int ret, node;
|
int ret;
|
||||||
struct memory_tier *memtier;
|
|
||||||
|
|
||||||
ret = subsys_virtual_register(&memory_tier_subsys, NULL);
|
ret = subsys_virtual_register(&memory_tier_subsys, NULL);
|
||||||
if (ret)
|
if (ret)
|
||||||
@ -887,7 +894,8 @@ static int __init memory_tier_init(void)
|
|||||||
GFP_KERNEL);
|
GFP_KERNEL);
|
||||||
WARN_ON(!node_demotion);
|
WARN_ON(!node_demotion);
|
||||||
#endif
|
#endif
|
||||||
mutex_lock(&memory_tier_lock);
|
|
||||||
|
guard(mutex)(&memory_tier_lock);
|
||||||
/*
|
/*
|
||||||
* For now we can have 4 faster memory tiers with smaller adistance
|
* For now we can have 4 faster memory tiers with smaller adistance
|
||||||
* than default DRAM tier.
|
* than default DRAM tier.
|
||||||
@ -897,29 +905,9 @@ static int __init memory_tier_init(void)
|
|||||||
if (IS_ERR(default_dram_type))
|
if (IS_ERR(default_dram_type))
|
||||||
panic("%s() failed to allocate default DRAM tier\n", __func__);
|
panic("%s() failed to allocate default DRAM tier\n", __func__);
|
||||||
|
|
||||||
/*
|
/* Record nodes with memory and CPU to set default DRAM performance. */
|
||||||
* Look at all the existing N_MEMORY nodes and add them to
|
nodes_and(default_dram_nodes, node_states[N_MEMORY],
|
||||||
* default memory tier or to a tier if we already have memory
|
node_states[N_CPU]);
|
||||||
* types assigned.
|
|
||||||
*/
|
|
||||||
for_each_node_state(node, N_MEMORY) {
|
|
||||||
if (!node_state(node, N_CPU))
|
|
||||||
/*
|
|
||||||
* Defer memory tier initialization on
|
|
||||||
* CPUless numa nodes. These will be initialized
|
|
||||||
* after firmware and devices are initialized.
|
|
||||||
*/
|
|
||||||
continue;
|
|
||||||
|
|
||||||
memtier = set_node_memory_tier(node);
|
|
||||||
if (IS_ERR(memtier))
|
|
||||||
/*
|
|
||||||
* Continue with memtiers we are able to setup
|
|
||||||
*/
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
establish_demotion_targets();
|
|
||||||
mutex_unlock(&memory_tier_lock);
|
|
||||||
|
|
||||||
hotplug_memory_notifier(memtier_hotplug_callback, MEMTIER_HOTPLUG_PRI);
|
hotplug_memory_notifier(memtier_hotplug_callback, MEMTIER_HOTPLUG_PRI);
|
||||||
return 0;
|
return 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user