summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r--fs/xfs/xfs_inode.c207
1 files changed, 207 insertions, 0 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b9696d762c8f..baee8c894447 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1881,6 +1881,167 @@ xfs_inactive(
}
/*
+ * In-Core Unlinked List Lookups
+ * ====================================
+ *
+ * Every inode is supposed to be reachable from some other piece of metadata
+ * with the exception of the root directory. Inodes with a connection to a
+ * file descriptor but not linked from anywhere in the on-disk directory tree
+ * are collectively known as unlinked inodes, though the filesystem itself
+ * maintains links to these inodes so that on-disk metadata are consistent.
+ *
+ * XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI
+ * header contains a number of buckets that point to an inode, and each inode
+ * record has a pointer to the next inode in the hash chain. This
+ * singly-linked list causes scaling problems in the iunlink remove function
+ * because we must walk that list to find the inode that points to the inode
+ * being removed from the unlinked hash bucket list.
+ *
+ * What if we modelled the unlinked list as a collection of records capturing
+ * "X.next_unlinked = Y" relations? If we indexed those records on Y, we'd
+ * have a fast way to look up unlinked list predecessors, which avoids the
+ * slow list walk. That's exactly what we do here (in-core) with a per-AG
+ * rhashtable.
+ */
+
+/* Capture a "X.next_unlinked = Y" relationship. */
+struct xfs_iunlink {
+ struct rhash_head iu_rhash_head;
+ xfs_agino_t iu_agino;
+ xfs_agino_t iu_next_unlinked;
+};
+
+/* Unlinked list predecessor lookup hashtable construction */
+static int
+xfs_iunlink_obj_cmpfn(
+ struct rhashtable_compare_arg *arg,
+ const void *obj)
+{
+ const xfs_agino_t *key = arg->key;
+ const struct xfs_iunlink *iu = obj;
+
+ if (iu->iu_next_unlinked != *key)
+ return 1;
+ return 0;
+}
+
+static const struct rhashtable_params xfs_iunlink_hash_params = {
+ .min_size = XFS_AGI_UNLINKED_BUCKETS,
+ .nelem_hint = 512,
+ .key_len = sizeof(xfs_agino_t),
+ .key_offset = offsetof(struct xfs_iunlink, iu_next_unlinked),
+ .head_offset = offsetof(struct xfs_iunlink, iu_rhash_head),
+ .automatic_shrinking = true,
+ .obj_cmpfn = xfs_iunlink_obj_cmpfn,
+};
+
+/*
+ * Return X, where X.next_unlinked == @agino. Returns NULLAGINO if no such
+ * relation is found.
+ */
+xfs_agino_t
+xfs_iunlink_lookup_backref(
+ struct xfs_perag *pag,
+ xfs_agino_t agino)
+{
+ struct xfs_iunlink *iu;
+
+ iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &agino,
+ xfs_iunlink_hash_params);
+ return iu ? iu->iu_agino : NULLAGINO;
+}
+
+/* Remember that @prev_agino.next_unlinked = @this_agino. */
+int
+xfs_iunlink_add_backref(
+ struct xfs_perag *pag,
+ xfs_agino_t prev_agino,
+ xfs_agino_t this_agino)
+{
+ struct xfs_iunlink *iu;
+
+ iu = kmem_zalloc(sizeof(*iu), KM_SLEEP | KM_NOFS);
+ iu->iu_agino = prev_agino;
+ iu->iu_next_unlinked = this_agino;
+
+ return rhashtable_insert_fast(&pag->pagi_unlinked_hash,
+ &iu->iu_rhash_head, xfs_iunlink_hash_params);
+}
+
+/*
+ * Replace X.next_unlinked = @agino with X.next_unlinked = @next_unlinked.
+ * If @next_unlinked is NULLAGINO, we drop the backref and exit.
+ */
+int
+xfs_iunlink_change_backref(
+ struct xfs_perag *pag,
+ xfs_agino_t agino,
+ xfs_agino_t next_unlinked)
+{
+ struct xfs_iunlink *iu;
+ int error;
+
+ iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &agino,
+ xfs_iunlink_hash_params);
+ if (!iu)
+ return -ENOENT;
+
+ error = rhashtable_remove_fast(&pag->pagi_unlinked_hash,
+ &iu->iu_rhash_head, xfs_iunlink_hash_params);
+ if (error)
+ return error;
+
+ /*
+ * If there is no new next entry just free our item and return.
+ */
+ if (next_unlinked == NULLAGINO) {
+ kmem_free(iu);
+ return 0;
+ }
+
+ /*
+ * Else update the hash table to the new entry.
+ */
+ iu->iu_next_unlinked = next_unlinked;
+ return rhashtable_insert_fast(&pag->pagi_unlinked_hash,
+ &iu->iu_rhash_head, xfs_iunlink_hash_params);
+}
+
+/* Set up the in-core predecessor structures. */
+int
+xfs_iunlink_init(
+ struct xfs_perag *pag)
+{
+ return rhashtable_init(&pag->pagi_unlinked_hash,
+ &xfs_iunlink_hash_params);
+}
+
+/* Free the in-core predecessor structures. */
+static void
+xfs_iunlink_free_item(
+ void *ptr,
+ void *arg)
+{
+ struct xfs_iunlink *iu = ptr;
+ bool *freed_anything = arg;
+
+ *freed_anything = true;
+ kmem_free(iu);
+}
+
+void
+xfs_iunlink_destroy(
+ struct xfs_perag *pag)
+{
+ bool freed_anything = false;
+
+ rhashtable_free_and_destroy(&pag->pagi_unlinked_hash,
+ xfs_iunlink_free_item, &freed_anything);
+
+ ASSERT(freed_anything == false || XFS_FORCED_SHUTDOWN(pag->pag_mount));
+}
+
+/*
* Point the AGI unlinked bucket at an inode and log the results. The caller
* is responsible for validating the old value.
*/
@@ -2055,6 +2216,14 @@ xfs_iunlink(
if (error)
goto out;
ASSERT(old_agino == NULLAGINO);
+
+ /*
+ * agino has been unlinked, add a backref from the next inode
+ * back to agino.
+ */
+ error = xfs_iunlink_add_backref(pag, agino, next_agino);
+ if (error)
+ goto out;
}
/* Point the head of the list to point to this inode. */
@@ -2127,6 +2296,17 @@ xfs_iunlink_map_prev(
ASSERT(head_agino != target_agino);
+ /* See if our backref cache can find it faster. */
+ next_agino = xfs_iunlink_lookup_backref(pag, target_agino);
+ if (next_agino != NULLAGINO) {
+ next_ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, next_agino);
+ error = xfs_iunlink_map_ino(tp, next_ino, imap, &last_dip,
+ &last_ibp);
+ if (error)
+ return error;
+ goto out;
+ }
+
next_agino = head_agino;
while (next_agino != target_agino) {
xfs_agino_t unlinked_agino;
@@ -2156,6 +2336,7 @@ xfs_iunlink_map_prev(
next_agino = unlinked_agino;
}
+out:
/* Should never happen, but don't return garbage. */
if (next_ino == NULLFSINO)
return -EFSCORRUPTED;
@@ -2218,6 +2399,20 @@ xfs_iunlink_remove(
if (error)
goto out;
+ /*
+ * If there was a backref pointing from the next inode back to this
+ * one, remove it because we've removed this inode from the list.
+ *
+ * Later, if this inode was in the middle of the list we'll update
+ * this inode's backref to point from the next inode.
+ */
+ if (next_agino != NULLAGINO) {
+ error = xfs_iunlink_change_backref(pag, next_agino,
+ NULLAGINO);
+ if (error)
+ goto out;
+ }
+
if (head_agino == agino) {
/* Point the head of the list to the next unlinked inode. */
error = xfs_iunlink_update_bucket(tp, agno, agibp, bucket_index,
@@ -2236,6 +2431,18 @@ xfs_iunlink_remove(
/* Point the previous inode on the list to the next inode. */
xfs_iunlink_update_dinode(tp, agno, last_ibp, last_dip, &imap,
prev_ino, next_agino);
+
+ /*
+ * Now we deal with the backref for this inode. If this inode
+ * pointed at a real inode, change the backref that pointed to
+ * us to point to our old next. If this inode was the end of
+ * the list, delete the backref that pointed to us. Note that
+ * change_backref takes care of deleting the backref if
+ * next_agino is NULLAGINO.
+ */
+ error = xfs_iunlink_change_backref(pag, agino, next_agino);
+ if (error)
+ goto out;
}
pag->pagi_unlinked_count--;
out: