diff options
-rw-r--r-- | fs/aio.c | 126 | ||||
-rw-r--r-- | include/linux/mm_types.h | 5 | ||||
-rw-r--r-- | kernel/fork.c | 2 |
3 files changed, 112 insertions, 21 deletions
@@ -61,6 +61,12 @@ struct aio_ring { #define AIO_RING_PAGES 8 +struct kioctx_table { + struct rcu_head rcu; + unsigned nr; + struct kioctx *table[]; +}; + struct kioctx_cpu { unsigned reqs_available; }; @@ -69,9 +75,7 @@ struct kioctx { struct percpu_ref users; atomic_t dead; - /* This needs improving */ unsigned long user_id; - struct hlist_node list; struct __percpu kioctx_cpu *cpu; @@ -129,6 +133,8 @@ struct kioctx { } ____cacheline_aligned_in_smp; struct page *internal_pages[AIO_RING_PAGES]; + + unsigned id; }; /*------ sysctl variables----*/ @@ -225,7 +231,7 @@ static int aio_setup_ring(struct kioctx *ctx) ring = kmap_atomic(ctx->ring_pages[0]); ring->nr = nr_events; /* user copy */ - ring->id = ctx->user_id; + ring->id = ~0U; ring->head = ring->tail = 0; ring->magic = AIO_RING_MAGIC; ring->compat_features = AIO_RING_COMPAT_FEATURES; @@ -361,6 +367,58 @@ static void free_ioctx_ref(struct percpu_ref *ref) schedule_work(&ctx->free_work); } +static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) +{ + unsigned i, new_nr; + struct kioctx_table *table, *old; + struct aio_ring *ring; + + spin_lock(&mm->ioctx_lock); + table = rcu_dereference(mm->ioctx_table); + + while (1) { + if (table) + for (i = 0; i < table->nr; i++) + if (!table->table[i]) { + ctx->id = i; + table->table[i] = ctx; + spin_unlock(&mm->ioctx_lock); + + ring = kmap_atomic(ctx->ring_pages[0]); + ring->id = ctx->id; + kunmap_atomic(ring); + return 0; + } + + new_nr = (table ? table->nr : 1) * 4; + + spin_unlock(&mm->ioctx_lock); + + table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) * + new_nr, GFP_KERNEL); + if (!table) + return -ENOMEM; + + table->nr = new_nr; + + spin_lock(&mm->ioctx_lock); + old = rcu_dereference(mm->ioctx_table); + + if (!old) { + rcu_assign_pointer(mm->ioctx_table, table); + } else if (table->nr > old->nr) { + memcpy(table->table, old->table, + old->nr * sizeof(struct kioctx *)); + + rcu_assign_pointer(mm->ioctx_table, table); + kfree_rcu(old, rcu); + } else { + kfree(table); + table = old; + } + } +} + /* ioctx_alloc * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. */ @@ -419,6 +477,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); BUG_ON(!ctx->req_batch); + err = ioctx_add_table(ctx, mm); + if (err) + goto out_cleanup_noerr; + /* limit the number of system wide aios */ spin_lock(&aio_nr_lock); if (aio_nr + nr_events > aio_max_nr || @@ -431,17 +493,13 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */ - /* now link into global list. */ - spin_lock(&mm->ioctx_lock); - hlist_add_head_rcu(&ctx->list, &mm->ioctx_list); - spin_unlock(&mm->ioctx_lock); - pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", ctx, ctx->user_id, mm, ctx->nr_events); return ctx; out_cleanup: err = -EAGAIN; +out_cleanup_noerr: aio_free_ring(ctx); out_freepcpu: free_percpu(ctx->cpu); @@ -461,7 +519,16 @@ out_freectx: static void kill_ioctx(struct kioctx *ctx) { if (!atomic_xchg(&ctx->dead, 1)) { - hlist_del_rcu(&ctx->list); + struct mm_struct *mm = current->mm; + struct kioctx_table *table; + + spin_lock(&mm->ioctx_lock); + table = rcu_dereference(mm->ioctx_table); + + WARN_ON(ctx != table->table[ctx->id]); + table->table[ctx->id] = NULL; + spin_unlock(&mm->ioctx_lock); + /* percpu_ref_kill() will do the necessary call_rcu() */ wake_up_all(&ctx->wait); @@ -510,10 +577,25 @@ EXPORT_SYMBOL(wait_on_sync_kiocb); */ void exit_aio(struct mm_struct *mm) { + struct kioctx_table *table; struct kioctx *ctx; - struct hlist_node *n; + unsigned i = 0; + + while (1) { + rcu_read_lock(); + table = rcu_dereference(mm->ioctx_table); + + do { + if (!table || i >= table->nr) { + rcu_read_unlock(); + return; + } + + ctx = table->table[i++]; + } while (!ctx); + + rcu_read_unlock(); - hlist_for_each_entry_safe(ctx, n, &mm->ioctx_list, list) { /* * We don't need to bother with munmap() here - * exit_mmap(mm) is coming and it'll unmap everything. @@ -607,19 +689,27 @@ static void kiocb_free(struct kiocb *req) static struct kioctx *lookup_ioctx(unsigned long ctx_id) { + struct aio_ring __user *ring = (void __user *)ctx_id; struct mm_struct *mm = current->mm; struct kioctx *ctx, *ret = NULL; + struct kioctx_table *table; + unsigned id; + + if (get_user(id, &ring->id)) + return NULL; rcu_read_lock(); + table = rcu_dereference(mm->ioctx_table); - hlist_for_each_entry_rcu(ctx, &mm->ioctx_list, list) { - if (ctx->user_id == ctx_id) { - percpu_ref_get(&ctx->users); - ret = ctx; - break; - } - } + if (!table || id >= table->nr) + goto out; + ctx = table->table[id]; + if (ctx->user_id == ctx_id) { + percpu_ref_get(&ctx->users); + ret = ctx; + } +out: rcu_read_unlock(); return ret; } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index ace9a5f01c64..0d6daacc173a 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -322,6 +322,7 @@ struct mm_rss_stat { atomic_long_t count[NR_MM_COUNTERS]; }; +struct kioctx_table; struct mm_struct { struct vm_area_struct * mmap; /* list of VMAs */ struct rb_root mm_rb; @@ -385,8 +386,8 @@ struct mm_struct { struct core_state *core_state; /* coredumping support */ #ifdef CONFIG_AIO - spinlock_t ioctx_lock; - struct hlist_head ioctx_list; + spinlock_t ioctx_lock; + struct kioctx_table __rcu *ioctx_table; #endif #ifdef CONFIG_MM_OWNER /* diff --git a/kernel/fork.c b/kernel/fork.c index 987b28a1f01b..019ef85deb18 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -524,7 +524,7 @@ static void mm_init_aio(struct mm_struct *mm) { #ifdef CONFIG_AIO spin_lock_init(&mm->ioctx_lock); - INIT_HLIST_HEAD(&mm->ioctx_list); + mm->ioctx_table = NULL; #endif } |