diff options
-rw-r--r-- | fs/inode.c | 200 | ||||
-rw-r--r-- | include/linux/fs.h | 9 |
2 files changed, 132 insertions, 77 deletions
diff --git a/fs/inode.c b/fs/inode.c index 5f010f944917..cb7969ab3633 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -56,8 +56,7 @@ static unsigned int i_hash_mask __read_mostly; static unsigned int i_hash_shift __read_mostly; -static struct hlist_head *inode_hashtable __read_mostly; -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); +static struct hlist_bl_head *inode_hashtable __read_mostly; static unsigned long hash(struct super_block *sb, unsigned long hashval) { @@ -69,7 +68,7 @@ static unsigned long hash(struct super_block *sb, unsigned long hashval) return tmp & i_hash_mask; } -static inline struct hlist_head *i_hash_head(struct super_block *sb, +static inline struct hlist_bl_head *i_hash_head(struct super_block *sb, unsigned int hashval) { return inode_hashtable + hash(sb, hashval); @@ -433,7 +432,7 @@ EXPORT_SYMBOL(address_space_init_once); void inode_init_once(struct inode *inode) { memset(inode, 0, sizeof(*inode)); - INIT_HLIST_NODE(&inode->i_hash); + INIT_HLIST_BL_NODE(&inode->i_hash); INIT_LIST_HEAD(&inode->i_devices); INIT_LIST_HEAD(&inode->i_io_list); INIT_LIST_HEAD(&inode->i_wb_list); @@ -522,6 +521,17 @@ static inline void inode_sb_list_del(struct inode *inode) } } +/* + * Ensure that we store the hash head in the inode when we insert the inode into + * the hlist_bl_head... + */ +static inline void +__insert_inode_hash_head(struct inode *inode, struct hlist_bl_head *b) +{ + hlist_bl_add_head_rcu(&inode->i_hash, b); + inode->i_hash_head = b; +} + /** * __insert_inode_hash - hash an inode * @inode: unhashed inode @@ -532,13 +542,13 @@ static inline void inode_sb_list_del(struct inode *inode) */ void __insert_inode_hash(struct inode *inode, unsigned long hashval) { - struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); + struct hlist_bl_head *b = i_hash_head(inode->i_sb, hashval); - spin_lock(&inode_hash_lock); + hlist_bl_lock(b); spin_lock(&inode->i_lock); - hlist_add_head_rcu(&inode->i_hash, b); + __insert_inode_hash_head(inode, b); spin_unlock(&inode->i_lock); - spin_unlock(&inode_hash_lock); + hlist_bl_unlock(b); } EXPORT_SYMBOL(__insert_inode_hash); @@ -550,11 +560,44 @@ EXPORT_SYMBOL(__insert_inode_hash); */ void __remove_inode_hash(struct inode *inode) { - spin_lock(&inode_hash_lock); - spin_lock(&inode->i_lock); - hlist_del_init_rcu(&inode->i_hash); - spin_unlock(&inode->i_lock); - spin_unlock(&inode_hash_lock); + struct hlist_bl_head *b = inode->i_hash_head; + + /* + * There are some callers that come through here without synchronisation + * and potentially with multiple references to the inode. Hence we have + * to handle the case that we might race with a remove and insert to a + * different list. Coda, in particular, seems to have a userspace API + * that can directly trigger "unhash/rehash to different list" behaviour + * without any serialisation at all. + * + * Hence we have to handle the situation where the inode->i_hash_head + * might point to a different list than what we expect, indicating that + * we raced with another unhash and potentially a new insertion. This + * means we have to retest the head once we have everything locked up + * and loop again if it doesn't match. + */ + while (b) { + hlist_bl_lock(b); + spin_lock(&inode->i_lock); + if (b != inode->i_hash_head) { + hlist_bl_unlock(b); + b = inode->i_hash_head; + spin_unlock(&inode->i_lock); + continue; + } + /* + * Need to set the pprev pointer to NULL after list removal so + * that both RCU traversals and hlist_bl_unhashed() work + * correctly at this point. + */ + hlist_bl_del_rcu(&inode->i_hash); + inode->i_hash.pprev = NULL; + inode->i_hash_head = NULL; + spin_unlock(&inode->i_lock); + hlist_bl_unlock(b); + break; + } + } EXPORT_SYMBOL(__remove_inode_hash); @@ -904,26 +947,28 @@ long prune_icache_sb(struct super_block *sb, struct shrink_control *sc) return freed; } -static void __wait_on_freeing_inode(struct inode *inode); +static void __wait_on_freeing_inode(struct hlist_bl_head *b, + struct inode *inode); /* * Called with the inode lock held. */ static struct inode *find_inode(struct super_block *sb, - struct hlist_head *head, + struct hlist_bl_head *b, int (*test)(struct inode *, void *), void *data) { + struct hlist_bl_node *node; struct inode *inode = NULL; repeat: - hlist_for_each_entry(inode, head, i_hash) { + hlist_bl_for_each_entry(inode, node, b, i_hash) { if (inode->i_sb != sb) continue; if (!test(inode, data)) continue; spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE)) { - __wait_on_freeing_inode(inode); + __wait_on_freeing_inode(b, inode); goto repeat; } if (unlikely(inode->i_state & I_CREATING)) { @@ -942,19 +987,20 @@ repeat: * iget_locked for details. */ static struct inode *find_inode_fast(struct super_block *sb, - struct hlist_head *head, unsigned long ino) + struct hlist_bl_head *b, unsigned long ino) { + struct hlist_bl_node *node; struct inode *inode = NULL; repeat: - hlist_for_each_entry(inode, head, i_hash) { + hlist_bl_for_each_entry(inode, node, b, i_hash) { if (inode->i_ino != ino) continue; if (inode->i_sb != sb) continue; spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE)) { - __wait_on_freeing_inode(inode); + __wait_on_freeing_inode(b, inode); goto repeat; } if (unlikely(inode->i_state & I_CREATING)) { @@ -1162,25 +1208,25 @@ EXPORT_SYMBOL(unlock_two_nondirectories); * return it locked, hashed, and with the I_NEW flag set. The file system gets * to fill it in before unlocking it via unlock_new_inode(). * - * Note both @test and @set are called with the inode_hash_lock held, so can't - * sleep. + * Note both @test and @set are called with the inode hash chain lock held, + * so can't sleep. */ struct inode *inode_insert5(struct inode *inode, unsigned long hashval, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data) { - struct hlist_head *head = i_hash_head(inode->i_sb, hashval); + struct hlist_bl_head *b = i_hash_head(inode->i_sb, hashval); struct inode *old; again: - spin_lock(&inode_hash_lock); - old = find_inode(inode->i_sb, head, test, data); + hlist_bl_lock(b); + old = find_inode(inode->i_sb, b, test, data); if (unlikely(old)) { /* * Uhhuh, somebody else created the same inode under us. * Use the old inode instead of the preallocated one. */ - spin_unlock(&inode_hash_lock); + hlist_bl_unlock(b); if (IS_ERR(old)) return NULL; wait_on_inode(old); @@ -1202,7 +1248,7 @@ again: */ spin_lock(&inode->i_lock); inode->i_state |= I_NEW; - hlist_add_head_rcu(&inode->i_hash, head); + __insert_inode_hash_head(inode, b); spin_unlock(&inode->i_lock); /* @@ -1212,7 +1258,7 @@ again: if (list_empty(&inode->i_sb_list)) inode_sb_list_add(inode); unlock: - spin_unlock(&inode_hash_lock); + hlist_bl_unlock(b); return inode; } @@ -1273,12 +1319,12 @@ EXPORT_SYMBOL(iget5_locked); */ struct inode *iget_locked(struct super_block *sb, unsigned long ino) { - struct hlist_head *head = i_hash_head(sb, ino); + struct hlist_bl_head *b = i_hash_head(sb, ino); struct inode *inode; again: - spin_lock(&inode_hash_lock); - inode = find_inode_fast(sb, head, ino); - spin_unlock(&inode_hash_lock); + hlist_bl_lock(b); + inode = find_inode_fast(sb, b, ino); + hlist_bl_unlock(b); if (inode) { if (IS_ERR(inode)) return NULL; @@ -1294,17 +1340,17 @@ again: if (inode) { struct inode *old; - spin_lock(&inode_hash_lock); + hlist_bl_lock(b); /* We released the lock, so.. */ - old = find_inode_fast(sb, head, ino); + old = find_inode_fast(sb, b, ino); if (!old) { inode->i_ino = ino; spin_lock(&inode->i_lock); inode->i_state = I_NEW; - hlist_add_head_rcu(&inode->i_hash, head); + __insert_inode_hash_head(inode, b); spin_unlock(&inode->i_lock); inode_sb_list_add(inode); - spin_unlock(&inode_hash_lock); + hlist_bl_unlock(b); /* Return the locked inode with I_NEW set, the * caller is responsible for filling in the contents @@ -1317,7 +1363,7 @@ again: * us. Use the old inode instead of the one we just * allocated. */ - spin_unlock(&inode_hash_lock); + hlist_bl_unlock(b); destroy_inode(inode); if (IS_ERR(old)) return NULL; @@ -1341,10 +1387,11 @@ EXPORT_SYMBOL(iget_locked); */ static int test_inode_iunique(struct super_block *sb, unsigned long ino) { - struct hlist_head *b = i_hash_head(sb, ino); + struct hlist_bl_head *b = i_hash_head(sb, ino); + struct hlist_bl_node *node; struct inode *inode; - hlist_for_each_entry_rcu(inode, b, i_hash) { + hlist_bl_for_each_entry_rcu(inode, node, b, i_hash) { if (inode->i_ino == ino && inode->i_sb == sb) return 0; } @@ -1428,12 +1475,12 @@ EXPORT_SYMBOL(igrab); struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data) { - struct hlist_head *head = i_hash_head(sb, hashval); + struct hlist_bl_head *b = i_hash_head(sb, hashval); struct inode *inode; - spin_lock(&inode_hash_lock); - inode = find_inode(sb, head, test, data); - spin_unlock(&inode_hash_lock); + hlist_bl_lock(b); + inode = find_inode(sb, b, test, data); + hlist_bl_unlock(b); return IS_ERR(inode) ? NULL : inode; } @@ -1483,12 +1530,12 @@ EXPORT_SYMBOL(ilookup5); */ struct inode *ilookup(struct super_block *sb, unsigned long ino) { - struct hlist_head *head = i_hash_head(sb, ino); + struct hlist_bl_head *b = i_hash_head(sb, ino); struct inode *inode; again: - spin_lock(&inode_hash_lock); - inode = find_inode_fast(sb, head, ino); - spin_unlock(&inode_hash_lock); + hlist_bl_lock(b); + inode = find_inode_fast(sb, b, ino); + hlist_bl_unlock(b); if (inode) { if (IS_ERR(inode)) @@ -1532,12 +1579,13 @@ struct inode *find_inode_nowait(struct super_block *sb, void *), void *data) { - struct hlist_head *head = i_hash_head(sb, hashval); + struct hlist_bl_head *b = i_hash_head(sb, hashval); + struct hlist_bl_node *node; struct inode *inode, *ret_inode = NULL; int mval; - spin_lock(&inode_hash_lock); - hlist_for_each_entry(inode, head, i_hash) { + hlist_bl_lock(b); + hlist_bl_for_each_entry(inode, node, b, i_hash) { if (inode->i_sb != sb) continue; mval = match(inode, hashval, data); @@ -1548,7 +1596,7 @@ struct inode *find_inode_nowait(struct super_block *sb, goto out; } out: - spin_unlock(&inode_hash_lock); + hlist_bl_unlock(b); return ret_inode; } EXPORT_SYMBOL(find_inode_nowait); @@ -1577,13 +1625,14 @@ EXPORT_SYMBOL(find_inode_nowait); struct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data) { - struct hlist_head *head = i_hash_head(sb, hashval); + struct hlist_bl_head *b = i_hash_head(sb, hashval); + struct hlist_bl_node *node; struct inode *inode; RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious find_inode_rcu() usage"); - hlist_for_each_entry_rcu(inode, head, i_hash) { + hlist_bl_for_each_entry_rcu(inode, node, b, i_hash) { if (inode->i_sb == sb && !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)) && test(inode, data)) @@ -1615,13 +1664,14 @@ EXPORT_SYMBOL(find_inode_rcu); struct inode *find_inode_by_ino_rcu(struct super_block *sb, unsigned long ino) { - struct hlist_head *head = i_hash_head(sb, ino); + struct hlist_bl_head *b = i_hash_head(sb, ino); + struct hlist_bl_node *node; struct inode *inode; RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious find_inode_by_ino_rcu() usage"); - hlist_for_each_entry_rcu(inode, head, i_hash) { + hlist_bl_for_each_entry_rcu(inode, node, b, i_hash) { if (inode->i_ino == ino && inode->i_sb == sb && !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE))) @@ -1635,39 +1685,42 @@ int insert_inode_locked(struct inode *inode) { struct super_block *sb = inode->i_sb; ino_t ino = inode->i_ino; - struct hlist_head *head = i_hash_head(sb, ino); + struct hlist_bl_head *b = i_hash_head(sb, ino); while (1) { - struct inode *old = NULL; - spin_lock(&inode_hash_lock); - hlist_for_each_entry(old, head, i_hash) { - if (old->i_ino != ino) + struct hlist_bl_node *node; + struct inode *old = NULL, *t; + + hlist_bl_lock(b); + hlist_bl_for_each_entry(t, node, b, i_hash) { + if (t->i_ino != ino) continue; - if (old->i_sb != sb) + if (t->i_sb != sb) continue; - spin_lock(&old->i_lock); - if (old->i_state & (I_FREEING|I_WILL_FREE)) { - spin_unlock(&old->i_lock); + spin_lock(&t->i_lock); + if (t->i_state & (I_FREEING|I_WILL_FREE)) { + spin_unlock(&t->i_lock); continue; } + old = t; break; } if (likely(!old)) { spin_lock(&inode->i_lock); inode->i_state |= I_NEW | I_CREATING; - hlist_add_head_rcu(&inode->i_hash, head); + __insert_inode_hash_head(inode, b); spin_unlock(&inode->i_lock); - spin_unlock(&inode_hash_lock); + hlist_bl_unlock(b); return 0; } if (unlikely(old->i_state & I_CREATING)) { spin_unlock(&old->i_lock); - spin_unlock(&inode_hash_lock); + hlist_bl_unlock(b); return -EBUSY; } __iget(old); spin_unlock(&old->i_lock); - spin_unlock(&inode_hash_lock); + hlist_bl_unlock(b); wait_on_inode(old); if (unlikely(!inode_unhashed(old))) { iput(old); @@ -2223,17 +2276,18 @@ EXPORT_SYMBOL(inode_needs_sync); * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list * will DTRT. */ -static void __wait_on_freeing_inode(struct inode *inode) +static void __wait_on_freeing_inode(struct hlist_bl_head *b, + struct inode *inode) { wait_queue_head_t *wq; DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); wq = bit_waitqueue(&inode->i_state, __I_NEW); prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); spin_unlock(&inode->i_lock); - spin_unlock(&inode_hash_lock); + hlist_bl_unlock(b); schedule(); finish_wait(wq, &wait.wq_entry); - spin_lock(&inode_hash_lock); + hlist_bl_lock(b); } static __initdata unsigned long ihash_entries; @@ -2259,7 +2313,7 @@ void __init inode_init_early(void) inode_hashtable = alloc_large_system_hash("Inode-cache", - sizeof(struct hlist_head), + sizeof(struct hlist_bl_head), ihash_entries, 14, HASH_EARLY | HASH_ZERO, @@ -2285,7 +2339,7 @@ void __init inode_init(void) inode_hashtable = alloc_large_system_hash("Inode-cache", - sizeof(struct hlist_head), + sizeof(struct hlist_bl_head), ihash_entries, 14, HASH_ZERO, diff --git a/include/linux/fs.h b/include/linux/fs.h index 9eced4cc286e..612ac13ace17 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -645,7 +645,8 @@ struct inode { unsigned long dirtied_when; /* jiffies of first dirtying */ unsigned long dirtied_time_when; - struct hlist_node i_hash; + struct hlist_bl_node i_hash; + struct hlist_bl_head *i_hash_head; struct list_head i_io_list; /* backing dev IO list */ #ifdef CONFIG_CGROUP_WRITEBACK struct bdi_writeback *i_wb; /* the associated cgroup wb */ @@ -711,7 +712,7 @@ static inline unsigned int i_blocksize(const struct inode *node) static inline int inode_unhashed(struct inode *inode) { - return hlist_unhashed(&inode->i_hash); + return hlist_bl_unhashed(&inode->i_hash); } /* @@ -722,7 +723,7 @@ static inline int inode_unhashed(struct inode *inode) */ static inline void inode_fake_hash(struct inode *inode) { - hlist_add_fake(&inode->i_hash); + hlist_bl_add_fake(&inode->i_hash); } /* @@ -3112,7 +3113,7 @@ static inline void insert_inode_hash(struct inode *inode) extern void __remove_inode_hash(struct inode *); static inline void remove_inode_hash(struct inode *inode) { - if (!inode_unhashed(inode) && !hlist_fake(&inode->i_hash)) + if (!inode_unhashed(inode) && !hlist_bl_fake(&inode->i_hash)) __remove_inode_hash(inode); } |