Patch series "improve the creation of persister".
The main modification is to create a persistent thread only when a NUMA node has an online CPU at least, instead of on all NUMA nodes.
In the current code, the number of NUMA nodes is obtained when the module is loaded, and the same number of persistent threads are created when the file system is mounted. However, we should ignore NUMA nodes that do not have an online CPU. So we should decide how many threads to create according to the online CPU condition when initializing persist.
Patch4 is a bugfix about dep_init().
Gou Hao (4): eulerfs: add socket_num to eufs_sb_info eulerfs: create persister on node when it has online cpu eulerfs: record persister num eulerfs: fix potential sbi->persisters free error
fs/eulerfs/dep.c | 48 +++++++++++++++++++++++++++--------------- fs/eulerfs/euler.h | 2 -- fs/eulerfs/euler_def.h | 1 + fs/eulerfs/super.c | 21 ++++-------------- 4 files changed, 36 insertions(+), 36 deletions(-)
uniontech inclusion category: bugfix bugzilla: NA CVE: NA
----------------------
We should not get the socket num in init_eufs_fs, becuse system usually supports cpu hotplug,so we should get socket num in dep_int.
Socket num is not necessarily the same when file system mount,so it shoule be eufs_sb_info's member.
Signed-off-by: Gou Hao gouhao@uniontech.com --- fs/eulerfs/dep.c | 28 ++++++++++++++++++++++------ fs/eulerfs/euler.h | 2 -- fs/eulerfs/euler_def.h | 1 + fs/eulerfs/super.c | 15 +-------------- 4 files changed, 24 insertions(+), 22 deletions(-)
diff --git a/fs/eulerfs/dep.c b/fs/eulerfs/dep.c index ec014bbf3700..4f8bad75c0c3 100644 --- a/fs/eulerfs/dep.c +++ b/fs/eulerfs/dep.c @@ -677,7 +677,7 @@ static int persister(void *data) /* more than a second */ (HZ * persist_period)); int idx = 0; - int num_persisters = num_sockets * persisters_per_socket; + int num_persisters = sbi->socket_num * persisters_per_socket;
eufs_info("sb=%px cpu=%d cpumask=%*pbl period=%d\n", data, smp_processor_id(), cpumask_pr_args(mask), period); @@ -700,6 +700,19 @@ static int persister(void *data) return 0; }
+static inline int get_socket_num(void) +{ + int num = 0; + int cpu, sock; + for_each_possible_cpu(cpu) { + sock = cpu_to_node(cpu); + if (sock > num) + num = sock; + } + num += 1; + return num; +} + int dep_init(struct super_block *sb) { struct eufs_sb_info *sbi = EUFS_SB(sb); @@ -707,6 +720,7 @@ int dep_init(struct super_block *sb) int i, j; char name[BDEVNAME_SIZE]; int err; + int socket_num = get_socket_num();
sbi->persistee_list = alloc_percpu(struct llist_head); if (!sbi->persistee_list) { @@ -714,12 +728,14 @@ int dep_init(struct super_block *sb) goto cleanup; }
+ sbi->socket_num = socket_num; + /* init each llist */ for_each_possible_cpu(cpu) init_llist_head(per_cpu_ptr(sbi->persistee_list, cpu));
sbi->persisters = kmalloc(sizeof(struct task_struct *) * - persisters_per_socket * num_sockets, + persisters_per_socket * socket_num, GFP_KERNEL); if (!sbi->persisters) { err = -ENOMEM; @@ -727,7 +743,7 @@ int dep_init(struct super_block *sb) }
sbi->need_sync = kzalloc( - sizeof(bool) * persisters_per_socket * num_sockets, GFP_KERNEL); + sizeof(bool) * persisters_per_socket * socket_num, GFP_KERNEL); if (!sbi->need_sync) { err = -ENOMEM; goto cleanup; @@ -736,7 +752,7 @@ int dep_init(struct super_block *sb) init_waitqueue_head(&sbi->sync_wq);
bdevname(sb->s_bdev, name); - for (i = 0; i < num_sockets; ++i) { + for (i = 0; i < socket_num; ++i) { for (j = 0; j < persisters_per_socket; ++j) { int idx = i * persisters_per_socket + j;
@@ -768,11 +784,11 @@ int dep_init(struct super_block *sb) void dep_fini(struct super_block *sb) { struct eufs_sb_info *sbi = EUFS_SB(sb); - + int socket_num = sbi->socket_num; if (sbi->persisters) { int i;
- for (i = 0; i < persisters_per_socket * num_sockets; ++i) { + for (i = 0; i < persisters_per_socket * socket_num; ++i) { if (sbi->persisters[i]) { kthread_stop(sbi->persisters[i]); sbi->persisters[i] = NULL; diff --git a/fs/eulerfs/euler.h b/fs/eulerfs/euler.h index 0abb7602bb63..71201f9a61b2 100644 --- a/fs/eulerfs/euler.h +++ b/fs/eulerfs/euler.h @@ -57,8 +57,6 @@ #include "inode.h" #include "nvalloc.h"
-extern int num_sockets; - /* Function Prototypes */ extern __printf(2, 3) void eufs_error_mng(struct super_block *sb, const char *fmt, ...); diff --git a/fs/eulerfs/euler_def.h b/fs/eulerfs/euler_def.h index 727f1c4cf181..cca190c43439 100644 --- a/fs/eulerfs/euler_def.h +++ b/fs/eulerfs/euler_def.h @@ -179,6 +179,7 @@ struct eufs_sb_info { bool *need_sync; /* for fssync */ wait_queue_head_t sync_wq; /* for fssync's thread */ struct mutex sync_mutex; /* serialize fssync request */ + int socket_num; /* End of Persister */
/* The word `draining` is reserved for volatility quota limitation */ diff --git a/fs/eulerfs/super.c b/fs/eulerfs/super.c index 43fc717002d7..db6ad3497e1b 100644 --- a/fs/eulerfs/super.c +++ b/fs/eulerfs/super.c @@ -72,8 +72,6 @@ module_param(wear_alloc_threshold, int, 0444); MODULE_PARM_DESC(wear_alloc_threshold, "Wear leveling threshold for allocation");
-int num_sockets; - static struct super_operations eufs_sops;
void eufs_error_mng(struct super_block *sb, const char *fmt, ...) @@ -688,7 +686,7 @@ static int eufs_sync_fs(struct super_block *sb, int sync) { struct eufs_sb_info *sbi = EUFS_SB(sb); int i; - int num_persisters = num_sockets * persisters_per_socket; + int num_persisters = sbi->socket_num * persisters_per_socket; int wait_flag;
if (!sync) @@ -752,7 +750,6 @@ static struct file_system_type eufs_fs_type = { static int __init init_eufs_fs(void) { int rc = 0; - int cpu;
BUILD_BUG_ON(sizeof(struct eufs_renamej) != 2 * CACHELINE_SIZE);
@@ -772,16 +769,6 @@ static int __init init_eufs_fs(void) if (rc) goto out4;
- num_sockets = 0; - for_each_possible_cpu(cpu) { - int sock = cpu_to_node(cpu); - - if (sock > num_sockets) - num_sockets = sock; - } - num_sockets += 1; - eufs_info("Num socket: %d\n", num_sockets); - eufs_show_params();
return 0;
uniontech inclusion category: bugfix bugzilla: NA CVE: NA
---------------------
If a node doesn't have oneline cpu, we shouldn't create presister on it.
Signed-off-by: Gou Hao gouhao@uniontech.com --- fs/eulerfs/dep.c | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-)
diff --git a/fs/eulerfs/dep.c b/fs/eulerfs/dep.c index 4f8bad75c0c3..e85130f7eddc 100644 --- a/fs/eulerfs/dep.c +++ b/fs/eulerfs/dep.c @@ -700,27 +700,15 @@ static int persister(void *data) return 0; }
-static inline int get_socket_num(void) -{ - int num = 0; - int cpu, sock; - for_each_possible_cpu(cpu) { - sock = cpu_to_node(cpu); - if (sock > num) - num = sock; - } - num += 1; - return num; -}
int dep_init(struct super_block *sb) { struct eufs_sb_info *sbi = EUFS_SB(sb); - int cpu; - int i, j; + int cpu, j; char name[BDEVNAME_SIZE]; int err; - int socket_num = get_socket_num(); + int socket_num, node; + unsigned long node_mask;
sbi->persistee_list = alloc_percpu(struct llist_head); if (!sbi->persistee_list) { @@ -728,11 +716,18 @@ int dep_init(struct super_block *sb) goto cleanup; }
- sbi->socket_num = socket_num; - - /* init each llist */ - for_each_possible_cpu(cpu) + socket_num = node_mask = 0; + for_each_online_cpu(cpu) { + /* init each llist */ init_llist_head(per_cpu_ptr(sbi->persistee_list, cpu)); + node = cpu_to_node(cpu); + node_mask |= 1 << node; + if (node > socket_num) + socket_num++; + } + socket_num++; + + sbi->socket_num = socket_num;
sbi->persisters = kmalloc(sizeof(struct task_struct *) * persisters_per_socket * socket_num, @@ -752,23 +747,28 @@ int dep_init(struct super_block *sb) init_waitqueue_head(&sbi->sync_wq);
bdevname(sb->s_bdev, name); - for (i = 0; i < socket_num; ++i) { + + node = -1; + while (1) { + node = find_next_bit(&node_mask, BITS_PER_LONG, node + 1); + if (node >= BITS_PER_LONG) + break; for (j = 0; j < persisters_per_socket; ++j) { - int idx = i * persisters_per_socket + j; + int idx = node * persisters_per_socket + j;
sbi->persisters[idx] = kthread_create_on_node( - persister, sb, i, "hmfs/%s-%d.%d", name, i, j); + persister, sb, node, "hmfs/%s-%d.%d", name, node, j);
if (IS_ERR(sbi->persisters[idx])) { err = PTR_ERR(sbi->persisters[idx]); pr_err("create persister %s-%d.%d error %d", - name, i, j, err); + name, node, j, err); sbi->persisters[idx] = NULL; goto cleanup; }
set_cpus_allowed_ptr(sbi->persisters[idx], - cpumask_of_node(i)); + cpumask_of_node(node));
wake_up_process(sbi->persisters[idx]); }
uniontech inclusion category: bugfix bugzilla: NA CVE: NA
------------------
We record the socket_num for calculate persister num, why don't we direct record persister num?
Signed-off-by: Gou Hao gouhao@uniontech.com --- fs/eulerfs/dep.c | 24 +++++++++++------------- fs/eulerfs/euler_def.h | 2 +- fs/eulerfs/super.c | 8 ++++---- 3 files changed, 16 insertions(+), 18 deletions(-)
diff --git a/fs/eulerfs/dep.c b/fs/eulerfs/dep.c index e85130f7eddc..da826a18fa31 100644 --- a/fs/eulerfs/dep.c +++ b/fs/eulerfs/dep.c @@ -677,7 +677,7 @@ static int persister(void *data) /* more than a second */ (HZ * persist_period)); int idx = 0; - int num_persisters = sbi->socket_num * persisters_per_socket; + int num_persisters = sbi->persister_num;
eufs_info("sb=%px cpu=%d cpumask=%*pbl period=%d\n", data, smp_processor_id(), cpumask_pr_args(mask), period); @@ -707,7 +707,7 @@ int dep_init(struct super_block *sb) int cpu, j; char name[BDEVNAME_SIZE]; int err; - int socket_num, node; + int node_num, node; unsigned long node_mask;
sbi->persistee_list = alloc_percpu(struct llist_head); @@ -716,29 +716,27 @@ int dep_init(struct super_block *sb) goto cleanup; }
- socket_num = node_mask = 0; + node_num = node_mask = 0; for_each_online_cpu(cpu) { /* init each llist */ init_llist_head(per_cpu_ptr(sbi->persistee_list, cpu)); node = cpu_to_node(cpu); node_mask |= 1 << node; - if (node > socket_num) - socket_num++; + if (node > node_num) + node_num++; } - socket_num++; + node_num++;
- sbi->socket_num = socket_num; + sbi->persister_num = node_num * persisters_per_socket;
sbi->persisters = kmalloc(sizeof(struct task_struct *) * - persisters_per_socket * socket_num, - GFP_KERNEL); + sbi->persister_num, GFP_KERNEL); if (!sbi->persisters) { err = -ENOMEM; goto cleanup; }
- sbi->need_sync = kzalloc( - sizeof(bool) * persisters_per_socket * socket_num, GFP_KERNEL); + sbi->need_sync = kzalloc(sizeof(bool) * sbi->persister_num, GFP_KERNEL); if (!sbi->need_sync) { err = -ENOMEM; goto cleanup; @@ -784,11 +782,11 @@ int dep_init(struct super_block *sb) void dep_fini(struct super_block *sb) { struct eufs_sb_info *sbi = EUFS_SB(sb); - int socket_num = sbi->socket_num; + int persister_num = sbi->persister_num; if (sbi->persisters) { int i;
- for (i = 0; i < persisters_per_socket * socket_num; ++i) { + for (i = 0; i < persister_num; ++i) { if (sbi->persisters[i]) { kthread_stop(sbi->persisters[i]); sbi->persisters[i] = NULL; diff --git a/fs/eulerfs/euler_def.h b/fs/eulerfs/euler_def.h index cca190c43439..4513ff1d3887 100644 --- a/fs/eulerfs/euler_def.h +++ b/fs/eulerfs/euler_def.h @@ -179,7 +179,7 @@ struct eufs_sb_info { bool *need_sync; /* for fssync */ wait_queue_head_t sync_wq; /* for fssync's thread */ struct mutex sync_mutex; /* serialize fssync request */ - int socket_num; + int persister_num; /* End of Persister */
/* The word `draining` is reserved for volatility quota limitation */ diff --git a/fs/eulerfs/super.c b/fs/eulerfs/super.c index db6ad3497e1b..6f888debbb46 100644 --- a/fs/eulerfs/super.c +++ b/fs/eulerfs/super.c @@ -686,7 +686,7 @@ static int eufs_sync_fs(struct super_block *sb, int sync) { struct eufs_sb_info *sbi = EUFS_SB(sb); int i; - int num_persisters = sbi->socket_num * persisters_per_socket; + int persister_num = sbi->persister_num; int wait_flag;
if (!sync) @@ -694,16 +694,16 @@ static int eufs_sync_fs(struct super_block *sb, int sync)
mutex_lock(&sbi->sync_mutex);
- for (i = 0; i < num_persisters; i++) + for (i = 0; i < persister_num; i++) sbi->need_sync[i] = true;
/* FIXME: Persisters may miss the wake-up message. */ - for (i = 0; i < num_persisters; ++i) + for (i = 0; i < persister_num; ++i) wake_up_process(sbi->persisters[i]);
do { wait_flag = false; - for (i = 0; i < num_persisters; i++) { + for (i = 0; i < persister_num; i++) { if (sbi->need_sync[i] == false) continue; wait_flag = true;
uniontech inclusion category: bugfix bugzilla: NA CVE: NA
-------------------
After alloc the sbi->persisters memory, dep_init will call dep_fini when error happened.Because sbi->persisters is not set to 0, kthread_stop may happened error.
Signed-off-by: Gou Hao gouhao@uniontech.com --- fs/eulerfs/dep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/eulerfs/dep.c b/fs/eulerfs/dep.c index da826a18fa31..62fd1376f980 100644 --- a/fs/eulerfs/dep.c +++ b/fs/eulerfs/dep.c @@ -728,8 +728,8 @@ int dep_init(struct super_block *sb) node_num++;
sbi->persister_num = node_num * persisters_per_socket; - - sbi->persisters = kmalloc(sizeof(struct task_struct *) * + + sbi->persisters = kzalloc(sizeof(struct task_struct *) * sbi->persister_num, GFP_KERNEL); if (!sbi->persisters) { err = -ENOMEM;