diff options
-rw-r--r-- | lib/dlock-list.c | 74 |
1 files changed, 59 insertions, 15 deletions
diff --git a/lib/dlock-list.c b/lib/dlock-list.c index f64ea4cc5e79..e2860944ec9f 100644 --- a/lib/dlock-list.c +++ b/lib/dlock-list.c @@ -25,31 +25,65 @@ * The distributed and locked list is a distributed set of lists each of * which is protected by its own spinlock, but acts like a single * consolidated list to the callers. For scaling purpose, the number of - * lists used is equal to the number of possible CPUs in the system to - * minimize contention. + * lists used is equal to the number of possible cores in the system to + * minimize contention. All threads of the same CPU core will share the + * same list. * - * However, it is possible that individual CPU numbers may be equal to - * or greater than the number of possible CPUs when there are holes in - * the CPU number list. As a result, we need to map the CPU number to a - * list index. + * We need to map each CPU number to a list index. */ static DEFINE_PER_CPU_READ_MOSTLY(int, cpu2idx); +static int nr_dlock_lists __read_mostly; /* - * Initialize cpu2idx mapping table + * Initialize cpu2idx mapping table & nr_dlock_lists. * * It is possible that a dlock-list can be allocated before the cpu2idx is * initialized. In this case, all the cpus are mapped to the first entry * before initialization. * + * All the sibling CPUs of a sibling group will map to the same dlock list so + * as to reduce the number of dlock lists to be maintained while minimizing + * cacheline contention. + * + * As the sibling masks are set up in the core initcall phase, this function + * has to be done in the postcore phase to get the right data. */ static int __init cpu2idx_init(void) { int idx, cpu; + struct cpumask *sibling_mask; + static struct cpumask mask __initdata; + cpumask_clear(&mask); idx = 0; - for_each_possible_cpu(cpu) - per_cpu(cpu2idx, cpu) = idx++; + for_each_possible_cpu(cpu) { + int scpu; + + if (cpumask_test_cpu(cpu, &mask)) + continue; + per_cpu(cpu2idx, cpu) = idx; + cpumask_set_cpu(cpu, &mask); + + sibling_mask = topology_sibling_cpumask(cpu); + if (sibling_mask) { + for_each_cpu(scpu, sibling_mask) { + per_cpu(cpu2idx, scpu) = idx; + cpumask_set_cpu(scpu, &mask); + } + } + idx++; + } + + /* + * nr_dlock_lists can only be set after cpu2idx is properly + * initialized. + */ + smp_mb(); + nr_dlock_lists = idx; + WARN_ON(nr_dlock_lists > nr_cpu_ids); + + pr_info("dlock-list: %d head entries per dlock list.\n", + nr_dlock_lists); return 0; } postcore_initcall(cpu2idx_init); @@ -67,19 +101,23 @@ postcore_initcall(cpu2idx_init); * * Dynamically allocated locks need to have their own special lock class * to avoid lockdep warning. + * + * Since nr_dlock_lists will always be <= nr_cpu_ids, having more lists + * than necessary allocated is not a problem other than some wasted memory. + * The extra lists will not be ever used as all the cpu2idx entries will be + * 0 before initialization. */ int __alloc_dlock_list_heads(struct dlock_list_heads *dlist, struct lock_class_key *key) { - int idx; + int idx, cnt = nr_dlock_lists ? nr_dlock_lists : nr_cpu_ids; - dlist->heads = kcalloc(nr_cpu_ids, sizeof(struct dlock_list_head), - GFP_KERNEL); + dlist->heads = kcalloc(cnt, sizeof(struct dlock_list_head), GFP_KERNEL); if (!dlist->heads) return -ENOMEM; - for (idx = 0; idx < nr_cpu_ids; idx++) { + for (idx = 0; idx < cnt; idx++) { struct dlock_list_head *head = &dlist->heads[idx]; INIT_LIST_HEAD(&head->list); @@ -117,7 +155,10 @@ bool dlock_lists_empty(struct dlock_list_heads *dlist) { int idx; - for (idx = 0; idx < nr_cpu_ids; idx++) + /* Shouldn't be called before nr_dlock_lists is initialized */ + WARN_ON_ONCE(!nr_dlock_lists); + + for (idx = 0; idx < nr_dlock_lists; idx++) if (!list_empty(&dlist->heads[idx].list)) return false; return true; @@ -199,6 +240,9 @@ struct dlock_list_node *__dlock_list_next_list(struct dlock_list_iter *iter) struct dlock_list_node *next; struct dlock_list_head *head; + /* Shouldn't be called before nr_dlock_lists is initialized */ + WARN_ON_ONCE(!nr_dlock_lists); + restart: if (iter->entry) { spin_unlock(&iter->entry->lock); @@ -209,7 +253,7 @@ next_list: /* * Try next list */ - if (++iter->index >= nr_cpu_ids) + if (++iter->index >= nr_dlock_lists) return NULL; /* All the entries iterated */ if (list_empty(&iter->head[iter->index].list)) |