diff options
author | Rakie Kim <rakie.kim@sk.com> | 2025-04-17 16:28:37 +0900 |
---|---|---|
committer | Andrew Morton <akpm@linux-foundation.org> | 2025-05-11 17:48:35 -0700 |
commit | dec92bf95f5a710287b588cb203ff27abcd24706 (patch) | |
tree | c32ab5d4b537dc804118c4ca12bc5a783fe7fe64 | |
parent | cf8cecf2bc221cea99fadc2dc62cef2d4c970dff (diff) |
mm/mempolicy: support memory hotplug in weighted interleave
The weighted interleave policy distributes page allocations across
multiple NUMA nodes based on their performance weight, thereby improving
memory bandwidth utilization. The weight values for each node are
configured through sysfs.
Previously, sysfs entries for configuring weighted interleave were created
for all possible nodes (N_POSSIBLE) at initialization, including nodes
that might not have memory. However, not all nodes in N_POSSIBLE are
usable at runtime, as some may remain memoryless or offline. This led to
sysfs entries being created for unusable nodes, causing potential
misconfiguration issues.
To address this issue, this patch modifies the sysfs creation logic to:
1) Limit sysfs entries to nodes that are online and have memory, avoiding
the creation of sysfs entries for nodes that cannot be used.
2) Support memory hotplug by dynamically adding and removing sysfs entries
based on whether a node transitions into or out of the N_MEMORY state.
Additionally, the patch ensures that sysfs attributes are properly managed
when nodes go offline, preventing stale or redundant entries from
persisting in the system.
By making these changes, the weighted interleave policy now manages its
sysfs entries more efficiently, ensuring that only relevant nodes are
considered for interleaving, and dynamically adapting to memory hotplug
events.
[dan.carpenter@linaro.org: fix error code in sysfs_wi_node_add()]
Link: https://lkml.kernel.org/r/aBjL7Bwc0QBzgajK@stanley.mountain
Link: https://lkml.kernel.org/r/20250417072839.711-4-rakie.kim@sk.com
Co-developed-by: Honggyu Kim <honggyu.kim@sk.com>
Signed-off-by: Honggyu Kim <honggyu.kim@sk.com>
Co-developed-by: Yunjeong Mun <yunjeong.mun@sk.com>
Signed-off-by: Yunjeong Mun <yunjeong.mun@sk.com>
Signed-off-by: Rakie Kim <rakie.kim@sk.com>
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Joshua Hahn <joshua.hahnjy@gmail.com>
Reviewed-by: Gregory Price <gourry@gourry.net>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-rw-r--r-- | mm/mempolicy.c | 107 |
1 files changed, 84 insertions, 23 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index c99bae7d8219..9a2b4b36f558 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -113,6 +113,7 @@ #include <asm/tlbflush.h> #include <asm/tlb.h> #include <linux/uaccess.h> +#include <linux/memory.h> #include "internal.h" @@ -3429,6 +3430,7 @@ struct iw_node_attr { struct sysfs_wi_group { struct kobject wi_kobj; + struct mutex kobj_lock; struct iw_node_attr *nattrs[]; }; @@ -3478,13 +3480,24 @@ static ssize_t node_store(struct kobject *kobj, struct kobj_attribute *attr, static void sysfs_wi_node_delete(int nid) { - if (!wi_group->nattrs[nid]) + struct iw_node_attr *attr; + + if (nid < 0 || nid >= nr_node_ids) + return; + + mutex_lock(&wi_group->kobj_lock); + attr = wi_group->nattrs[nid]; + if (!attr) { + mutex_unlock(&wi_group->kobj_lock); return; + } + + wi_group->nattrs[nid] = NULL; + mutex_unlock(&wi_group->kobj_lock); - sysfs_remove_file(&wi_group->wi_kobj, - &wi_group->nattrs[nid]->kobj_attr.attr); - kfree(wi_group->nattrs[nid]->kobj_attr.attr.name); - kfree(wi_group->nattrs[nid]); + sysfs_remove_file(&wi_group->wi_kobj, &attr->kobj_attr.attr); + kfree(attr->kobj_attr.attr.name); + kfree(attr); } static void sysfs_wi_node_delete_all(void) @@ -3526,35 +3539,77 @@ static const struct kobj_type wi_ktype = { static int sysfs_wi_node_add(int nid) { - struct iw_node_attr *node_attr; + int ret; char *name; + struct iw_node_attr *new_attr; - node_attr = kzalloc(sizeof(*node_attr), GFP_KERNEL); - if (!node_attr) + if (nid < 0 || nid >= nr_node_ids) { + pr_err("invalid node id: %d\n", nid); + return -EINVAL; + } + + new_attr = kzalloc(sizeof(*new_attr), GFP_KERNEL); + if (!new_attr) return -ENOMEM; name = kasprintf(GFP_KERNEL, "node%d", nid); if (!name) { - kfree(node_attr); + kfree(new_attr); return -ENOMEM; } - sysfs_attr_init(&node_attr->kobj_attr.attr); - node_attr->kobj_attr.attr.name = name; - node_attr->kobj_attr.attr.mode = 0644; - node_attr->kobj_attr.show = node_show; - node_attr->kobj_attr.store = node_store; - node_attr->nid = nid; + sysfs_attr_init(&new_attr->kobj_attr.attr); + new_attr->kobj_attr.attr.name = name; + new_attr->kobj_attr.attr.mode = 0644; + new_attr->kobj_attr.show = node_show; + new_attr->kobj_attr.store = node_store; + new_attr->nid = nid; - if (sysfs_create_file(&wi_group->wi_kobj, &node_attr->kobj_attr.attr)) { - kfree(node_attr->kobj_attr.attr.name); - kfree(node_attr); - pr_err("failed to add attribute to weighted_interleave\n"); - return -ENOMEM; + mutex_lock(&wi_group->kobj_lock); + if (wi_group->nattrs[nid]) { + mutex_unlock(&wi_group->kobj_lock); + ret = -EEXIST; + goto out; } - wi_group->nattrs[nid] = node_attr; + ret = sysfs_create_file(&wi_group->wi_kobj, &new_attr->kobj_attr.attr); + if (ret) { + mutex_unlock(&wi_group->kobj_lock); + goto out; + } + wi_group->nattrs[nid] = new_attr; + mutex_unlock(&wi_group->kobj_lock); return 0; + +out: + kfree(new_attr->kobj_attr.attr.name); + kfree(new_attr); + return ret; +} + +static int wi_node_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + int err; + struct memory_notify *arg = data; + int nid = arg->status_change_nid; + + if (nid < 0) + return NOTIFY_OK; + + switch (action) { + case MEM_ONLINE: + err = sysfs_wi_node_add(nid); + if (err) + pr_err("failed to add sysfs for node%d during hotplug: %d\n", + nid, err); + break; + case MEM_OFFLINE: + sysfs_wi_node_delete(nid); + break; + } + + return NOTIFY_OK; } static int __init add_weighted_interleave_group(struct kobject *mempolicy_kobj) @@ -3565,20 +3620,26 @@ static int __init add_weighted_interleave_group(struct kobject *mempolicy_kobj) GFP_KERNEL); if (!wi_group) return -ENOMEM; + mutex_init(&wi_group->kobj_lock); err = kobject_init_and_add(&wi_group->wi_kobj, &wi_ktype, mempolicy_kobj, "weighted_interleave"); if (err) goto err_put_kobj; - for_each_node_state(nid, N_POSSIBLE) { + for_each_online_node(nid) { + if (!node_state(nid, N_MEMORY)) + continue; + err = sysfs_wi_node_add(nid); if (err) { - pr_err("failed to add sysfs [node%d]\n", nid); + pr_err("failed to add sysfs for node%d during init: %d\n", + nid, err); goto err_cleanup_kobj; } } + hotplug_memory_notifier(wi_node_notifier, DEFAULT_CALLBACK_PRI); return 0; err_cleanup_kobj: |