summaryrefslogtreecommitdiff
path: root/fs/ext4/fast_commit.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/fast_commit.c')
-rw-r--r--fs/ext4/fast_commit.c473
1 files changed, 271 insertions, 202 deletions
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 0f32b445582a..3d72565ec6e8 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -65,21 +65,11 @@
*
* Fast Commit Ineligibility
* -------------------------
- * Not all operations are supported by fast commits today (e.g extended
- * attributes). Fast commit ineligibility is marked by calling one of the
- * two following functions:
- *
- * - ext4_fc_mark_ineligible(): This makes next fast commit operation to fall
- * back to full commit. This is useful in case of transient errors.
*
- * - ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() - This makes all
- * the fast commits happening between ext4_fc_start_ineligible() and
- * ext4_fc_stop_ineligible() and one fast commit after the call to
- * ext4_fc_stop_ineligible() to fall back to full commits. It is important to
- * make one more fast commit to fall back to full commit after stop call so
- * that it guaranteed that the fast commit ineligible operation contained
- * within ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() is
- * followed by at least 1 full commit.
+ * Not all operations are supported by fast commits today (e.g extended
+ * attributes). Fast commit ineligibility is marked by calling
+ * ext4_fc_mark_ineligible(): This makes next fast commit operation to fall back
+ * to full commit.
*
* Atomicity of commits
* --------------------
@@ -166,15 +156,13 @@
* fast commit recovery even if that area is invalidated by later full
* commits.
*
- * 1) Make fast commit atomic updates more fine grained. Today, a fast commit
- * eligible update must be protected within ext4_fc_start_update() and
- * ext4_fc_stop_update(). These routines are called at much higher
- * routines. This can be made more fine grained by combining with
- * ext4_journal_start().
+ * 1) Fast commit's commit path locks the entire file system during fast
+ * commit. This has significant performance penalty. Instead of that, we
+ * should use ext4_fc_start/stop_update functions to start inode level
+ * updates from ext4_journal_start/stop. Once we do that we can drop file
+ * system locking during commit path.
*
- * 2) Same above for ext4_fc_start_ineligible() and ext4_fc_stop_ineligible()
- *
- * 3) Handle more ineligible cases.
+ * 2) Handle more ineligible cases.
*/
#include <trace/events/ext4.h>
@@ -211,6 +199,7 @@ void ext4_fc_init_inode(struct inode *inode)
ext4_fc_reset_inode(inode);
ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
INIT_LIST_HEAD(&ei->i_fc_list);
+ INIT_LIST_HEAD(&ei->i_fc_dilist);
init_waitqueue_head(&ei->i_fc_wait);
atomic_set(&ei->i_fc_updates, 0);
}
@@ -291,6 +280,8 @@ void ext4_fc_stop_update(struct inode *inode)
void ext4_fc_del(struct inode *inode)
{
struct ext4_inode_info *ei = EXT4_I(inode);
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ struct ext4_fc_dentry_update *fc_dentry;
if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
@@ -298,7 +289,7 @@ void ext4_fc_del(struct inode *inode)
restart:
spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
- if (list_empty(&ei->i_fc_list)) {
+ if (list_empty(&ei->i_fc_list) && list_empty(&ei->i_fc_dilist)) {
spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
return;
}
@@ -307,63 +298,64 @@ restart:
ext4_fc_wait_committing_inode(inode);
goto restart;
}
- list_del_init(&ei->i_fc_list);
- spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
-}
-/*
- * Mark file system as fast commit ineligible. This means that next commit
- * operation would result in a full jbd2 commit.
- */
-void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
+ if (!list_empty(&ei->i_fc_list))
+ list_del_init(&ei->i_fc_list);
- if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
- (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
+ /*
+ * Since this inode is getting removed, let's also remove all FC
+ * dentry create references, since it is not needed to log it anyways.
+ */
+ if (list_empty(&ei->i_fc_dilist)) {
+ spin_unlock(&sbi->s_fc_lock);
return;
+ }
- ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
- WARN_ON(reason >= EXT4_FC_REASON_MAX);
- sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
-}
+ fc_dentry = list_first_entry(&ei->i_fc_dilist, struct ext4_fc_dentry_update, fcd_dilist);
+ WARN_ON(fc_dentry->fcd_op != EXT4_FC_TAG_CREAT);
+ list_del_init(&fc_dentry->fcd_list);
+ list_del_init(&fc_dentry->fcd_dilist);
-/*
- * Start a fast commit ineligible update. Any commits that happen while
- * such an operation is in progress fall back to full commits.
- */
-void ext4_fc_start_ineligible(struct super_block *sb, int reason)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
+ WARN_ON(!list_empty(&ei->i_fc_dilist));
+ spin_unlock(&sbi->s_fc_lock);
- if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
- (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
- return;
+ if (fc_dentry->fcd_name.name &&
+ fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
+ kfree(fc_dentry->fcd_name.name);
+ kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
- WARN_ON(reason >= EXT4_FC_REASON_MAX);
- sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
- atomic_inc(&sbi->s_fc_ineligible_updates);
+ return;
}
/*
- * Stop a fast commit ineligible update. We set EXT4_MF_FC_INELIGIBLE flag here
- * to ensure that after stopping the ineligible update, at least one full
- * commit takes place.
+ * Mark file system as fast commit ineligible, and record latest
+ * ineligible transaction tid. This means until the recorded
+ * transaction, commit operation would result in a full jbd2 commit.
*/
-void ext4_fc_stop_ineligible(struct super_block *sb)
+void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle)
{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ tid_t tid;
+
if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
return;
ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
- atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates);
-}
-
-static inline int ext4_fc_is_ineligible(struct super_block *sb)
-{
- return (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE) ||
- atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates));
+ if (handle && !IS_ERR(handle))
+ tid = handle->h_transaction->t_tid;
+ else {
+ read_lock(&sbi->s_journal->j_state_lock);
+ tid = sbi->s_journal->j_running_transaction ?
+ sbi->s_journal->j_running_transaction->t_tid : 0;
+ read_unlock(&sbi->s_journal->j_state_lock);
+ }
+ spin_lock(&sbi->s_fc_lock);
+ if (sbi->s_fc_ineligible_tid < tid)
+ sbi->s_fc_ineligible_tid = tid;
+ spin_unlock(&sbi->s_fc_lock);
+ WARN_ON(reason >= EXT4_FC_REASON_MAX);
+ sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
}
/*
@@ -387,13 +379,6 @@ static int ext4_fc_track_template(
tid_t tid = 0;
int ret;
- if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
- (sbi->s_mount_state & EXT4_FC_REPLAY))
- return -EOPNOTSUPP;
-
- if (ext4_fc_is_ineligible(inode->i_sb))
- return -EINVAL;
-
tid = handle->h_transaction->t_tid;
mutex_lock(&ei->i_fc_lock);
if (tid == ei->i_sync_tid) {
@@ -411,7 +396,8 @@ static int ext4_fc_track_template(
spin_lock(&sbi->s_fc_lock);
if (list_empty(&EXT4_I(inode)->i_fc_list))
list_add_tail(&EXT4_I(inode)->i_fc_list,
- (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ?
+ (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
+ sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ?
&sbi->s_fc_q[FC_Q_STAGING] :
&sbi->s_fc_q[FC_Q_MAIN]);
spin_unlock(&sbi->s_fc_lock);
@@ -437,7 +423,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
mutex_unlock(&ei->i_fc_lock);
node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
if (!node) {
- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
+ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
mutex_lock(&ei->i_fc_lock);
return -ENOMEM;
}
@@ -450,7 +436,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
if (!node->fcd_name.name) {
kmem_cache_free(ext4_fc_dentry_cachep, node);
ext4_fc_mark_ineligible(inode->i_sb,
- EXT4_FC_REASON_NOMEM);
+ EXT4_FC_REASON_NOMEM, NULL);
mutex_lock(&ei->i_fc_lock);
return -ENOMEM;
}
@@ -462,13 +448,28 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
node->fcd_name.name = node->fcd_iname;
}
node->fcd_name.len = dentry->d_name.len;
-
+ INIT_LIST_HEAD(&node->fcd_dilist);
spin_lock(&sbi->s_fc_lock);
- if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING))
+ if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
+ sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING)
list_add_tail(&node->fcd_list,
&sbi->s_fc_dentry_q[FC_Q_STAGING]);
else
list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]);
+
+ /*
+ * This helps us keep a track of all fc_dentry updates which is part of
+ * this ext4 inode. So in case the inode is getting unlinked, before
+ * even we get a chance to fsync, we could remove all fc_dentry
+ * references while evicting the inode in ext4_fc_del().
+ * Also with this, we don't need to loop over all the inodes in
+ * sbi->s_fc_q to get the corresponding inode in
+ * ext4_fc_commit_dentry_updates().
+ */
+ if (dentry_update->op == EXT4_FC_TAG_CREAT) {
+ WARN_ON(!list_empty(&ei->i_fc_dilist));
+ list_add_tail(&node->fcd_dilist, &ei->i_fc_dilist);
+ }
spin_unlock(&sbi->s_fc_lock);
mutex_lock(&ei->i_fc_lock);
@@ -486,12 +487,22 @@ void __ext4_fc_track_unlink(handle_t *handle,
ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
(void *)&args, 0);
- trace_ext4_fc_track_unlink(inode, dentry, ret);
+ trace_ext4_fc_track_unlink(handle, inode, dentry, ret);
}
void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
{
- __ext4_fc_track_unlink(handle, d_inode(dentry), dentry);
+ struct inode *inode = d_inode(dentry);
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+
+ if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
+ (sbi->s_mount_state & EXT4_FC_REPLAY))
+ return;
+
+ if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
+ return;
+
+ __ext4_fc_track_unlink(handle, inode, dentry);
}
void __ext4_fc_track_link(handle_t *handle,
@@ -505,12 +516,22 @@ void __ext4_fc_track_link(handle_t *handle,
ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
(void *)&args, 0);
- trace_ext4_fc_track_link(inode, dentry, ret);
+ trace_ext4_fc_track_link(handle, inode, dentry, ret);
}
void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
{
- __ext4_fc_track_link(handle, d_inode(dentry), dentry);
+ struct inode *inode = d_inode(dentry);
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+
+ if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
+ (sbi->s_mount_state & EXT4_FC_REPLAY))
+ return;
+
+ if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
+ return;
+
+ __ext4_fc_track_link(handle, inode, dentry);
}
void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
@@ -524,12 +545,22 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
(void *)&args, 0);
- trace_ext4_fc_track_create(inode, dentry, ret);
+ trace_ext4_fc_track_create(handle, inode, dentry, ret);
}
void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
{
- __ext4_fc_track_create(handle, d_inode(dentry), dentry);
+ struct inode *inode = d_inode(dentry);
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+
+ if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
+ (sbi->s_mount_state & EXT4_FC_REPLAY))
+ return;
+
+ if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
+ return;
+
+ __ext4_fc_track_create(handle, inode, dentry);
}
/* __track_fn for inode tracking */
@@ -545,6 +576,7 @@ static int __track_inode(struct inode *inode, void *arg, bool update)
void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
int ret;
if (S_ISDIR(inode->i_mode))
@@ -552,12 +584,19 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
if (ext4_should_journal_data(inode)) {
ext4_fc_mark_ineligible(inode->i_sb,
- EXT4_FC_REASON_INODE_JOURNAL_DATA);
+ EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);
return;
}
+ if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
+ (sbi->s_mount_state & EXT4_FC_REPLAY))
+ return;
+
+ if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
+ return;
+
ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1);
- trace_ext4_fc_track_inode(inode, ret);
+ trace_ext4_fc_track_inode(handle, inode, ret);
}
struct __track_range_args {
@@ -595,18 +634,26 @@ static int __track_range(struct inode *inode, void *arg, bool update)
void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
ext4_lblk_t end)
{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct __track_range_args args;
int ret;
if (S_ISDIR(inode->i_mode))
return;
+ if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
+ (sbi->s_mount_state & EXT4_FC_REPLAY))
+ return;
+
+ if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
+ return;
+
args.start = start;
args.end = end;
ret = ext4_fc_track_template(handle, inode, __track_range, &args, 1);
- trace_ext4_fc_track_range(inode, start, end, ret);
+ trace_ext4_fc_track_range(handle, inode, start, end, ret);
}
static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail)
@@ -796,7 +843,6 @@ static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc,
ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc);
dst += sizeof(fcd);
ext4_fc_memcpy(sb, dst, fc_dentry->fcd_name.name, dlen, crc);
- dst += dlen;
return true;
}
@@ -930,7 +976,6 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal)
int ret = 0;
spin_lock(&sbi->s_fc_lock);
- ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING);
list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
while (atomic_read(&ei->i_fc_updates)) {
@@ -990,7 +1035,7 @@ __releases(&sbi->s_fc_lock)
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n;
struct inode *inode;
- struct ext4_inode_info *ei, *ei_n;
+ struct ext4_inode_info *ei;
int ret;
if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN]))
@@ -1006,21 +1051,16 @@ __releases(&sbi->s_fc_lock)
spin_lock(&sbi->s_fc_lock);
continue;
}
-
- inode = NULL;
- list_for_each_entry_safe(ei, ei_n, &sbi->s_fc_q[FC_Q_MAIN],
- i_fc_list) {
- if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) {
- inode = &ei->vfs_inode;
- break;
- }
- }
/*
- * If we don't find inode in our list, then it was deleted,
- * in which case, we don't need to record it's create tag.
+ * With fcd_dilist we need not loop in sbi->s_fc_q to get the
+ * corresponding inode pointer
*/
- if (!inode)
- continue;
+ WARN_ON(list_empty(&fc_dentry->fcd_dilist));
+ ei = list_first_entry(&fc_dentry->fcd_dilist,
+ struct ext4_inode_info, i_fc_dilist);
+ inode = &ei->vfs_inode;
+ WARN_ON(inode->i_ino != fc_dentry->fcd_ino);
+
spin_unlock(&sbi->s_fc_lock);
/*
@@ -1123,6 +1163,33 @@ out:
return ret;
}
+static void ext4_fc_update_stats(struct super_block *sb, int status,
+ u64 commit_time, int nblks, tid_t commit_tid)
+{
+ struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats;
+
+ jbd_debug(1, "Fast commit ended with status = %d for tid %u",
+ status, commit_tid);
+ if (status == EXT4_FC_STATUS_OK) {
+ stats->fc_num_commits++;
+ stats->fc_numblks += nblks;
+ if (likely(stats->s_fc_avg_commit_time))
+ stats->s_fc_avg_commit_time =
+ (commit_time +
+ stats->s_fc_avg_commit_time * 3) / 4;
+ else
+ stats->s_fc_avg_commit_time = commit_time;
+ } else if (status == EXT4_FC_STATUS_FAILED ||
+ status == EXT4_FC_STATUS_INELIGIBLE) {
+ if (status == EXT4_FC_STATUS_FAILED)
+ stats->fc_failed_commits++;
+ stats->fc_ineligible_commits++;
+ } else {
+ stats->fc_skipped_commits++;
+ }
+ trace_ext4_fc_commit_stop(sb, nblks, status, commit_tid);
+}
+
/*
* The main commit entry point. Performs a fast commit for transaction
* commit_tid if needed. If it's not possible to perform a fast commit
@@ -1135,18 +1202,15 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
struct ext4_sb_info *sbi = EXT4_SB(sb);
int nblks = 0, ret, bsize = journal->j_blocksize;
int subtid = atomic_read(&sbi->s_fc_subtid);
- int reason = EXT4_FC_REASON_OK, fc_bufs_before = 0;
+ int status = EXT4_FC_STATUS_OK, fc_bufs_before = 0;
ktime_t start_time, commit_time;
- trace_ext4_fc_commit_start(sb);
+ if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
+ return jbd2_complete_transaction(journal, commit_tid);
- start_time = ktime_get();
+ trace_ext4_fc_commit_start(sb, commit_tid);
- if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
- (ext4_fc_is_ineligible(sb))) {
- reason = EXT4_FC_REASON_INELIGIBLE;
- goto out;
- }
+ start_time = ktime_get();
restart_fc:
ret = jbd2_fc_begin_commit(journal, commit_tid);
@@ -1155,74 +1219,61 @@ restart_fc:
if (atomic_read(&sbi->s_fc_subtid) <= subtid &&
commit_tid > journal->j_commit_sequence)
goto restart_fc;
- reason = EXT4_FC_REASON_ALREADY_COMMITTED;
- goto out;
+ ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0,
+ commit_tid);
+ return 0;
} else if (ret) {
- sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
- reason = EXT4_FC_REASON_FC_START_FAILED;
- goto out;
+ /*
+ * Commit couldn't start. Just update stats and perform a
+ * full commit.
+ */
+ ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0,
+ commit_tid);
+ return jbd2_complete_transaction(journal, commit_tid);
+ }
+
+ /*
+ * After establishing journal barrier via jbd2_fc_begin_commit(), check
+ * if we are fast commit ineligible.
+ */
+ if (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE)) {
+ status = EXT4_FC_STATUS_INELIGIBLE;
+ goto fallback;
}
fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
ret = ext4_fc_perform_commit(journal);
if (ret < 0) {
- sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
- reason = EXT4_FC_REASON_FC_FAILED;
- goto out;
+ status = EXT4_FC_STATUS_FAILED;
+ goto fallback;
}
nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before;
ret = jbd2_fc_wait_bufs(journal, nblks);
if (ret < 0) {
- sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
- reason = EXT4_FC_REASON_FC_FAILED;
- goto out;
+ status = EXT4_FC_STATUS_FAILED;
+ goto fallback;
}
atomic_inc(&sbi->s_fc_subtid);
- jbd2_fc_end_commit(journal);
-out:
- /* Has any ineligible update happened since we started? */
- if (reason == EXT4_FC_REASON_OK && ext4_fc_is_ineligible(sb)) {
- sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
- reason = EXT4_FC_REASON_INELIGIBLE;
- }
-
- spin_lock(&sbi->s_fc_lock);
- if (reason != EXT4_FC_REASON_OK &&
- reason != EXT4_FC_REASON_ALREADY_COMMITTED) {
- sbi->s_fc_stats.fc_ineligible_commits++;
- } else {
- sbi->s_fc_stats.fc_num_commits++;
- sbi->s_fc_stats.fc_numblks += nblks;
- }
- spin_unlock(&sbi->s_fc_lock);
- nblks = (reason == EXT4_FC_REASON_OK) ? nblks : 0;
- trace_ext4_fc_commit_stop(sb, nblks, reason);
- commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
+ ret = jbd2_fc_end_commit(journal);
/*
- * weight the commit time higher than the average time so we don't
- * react too strongly to vast changes in the commit time
+ * weight the commit time higher than the average time so we
+ * don't react too strongly to vast changes in the commit time
*/
- if (likely(sbi->s_fc_avg_commit_time))
- sbi->s_fc_avg_commit_time = (commit_time +
- sbi->s_fc_avg_commit_time * 3) / 4;
- else
- sbi->s_fc_avg_commit_time = commit_time;
- jbd_debug(1,
- "Fast commit ended with blks = %d, reason = %d, subtid - %d",
- nblks, reason, subtid);
- if (reason == EXT4_FC_REASON_FC_FAILED)
- return jbd2_fc_end_commit_fallback(journal);
- if (reason == EXT4_FC_REASON_FC_START_FAILED ||
- reason == EXT4_FC_REASON_INELIGIBLE)
- return jbd2_complete_transaction(journal, commit_tid);
- return 0;
+ commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
+ ext4_fc_update_stats(sb, status, commit_time, nblks, commit_tid);
+ return ret;
+
+fallback:
+ ret = jbd2_fc_end_commit_fallback(journal);
+ ext4_fc_update_stats(sb, status, 0, 0, commit_tid);
+ return ret;
}
/*
* Fast commit cleanup routine. This is called after every fast commit and
* full commit. full is true if we are called after a full commit.
*/
-static void ext4_fc_cleanup(journal_t *journal, int full)
+static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
{
struct super_block *sb = journal->j_private;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -1232,6 +1283,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
if (full && sbi->s_fc_bh)
sbi->s_fc_bh = NULL;
+ trace_ext4_fc_cleanup(journal, full, tid);
jbd2_fc_release_bufs(journal);
spin_lock(&sbi->s_fc_lock);
@@ -1240,7 +1292,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
list_del_init(&iter->i_fc_list);
ext4_clear_inode_state(&iter->vfs_inode,
EXT4_STATE_FC_COMMITTING);
- ext4_fc_reset_inode(&iter->vfs_inode);
+ if (iter->i_sync_tid <= tid)
+ ext4_fc_reset_inode(&iter->vfs_inode);
/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
smp_mb();
#if (BITS_PER_LONG < 64)
@@ -1255,6 +1308,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
struct ext4_fc_dentry_update,
fcd_list);
list_del_init(&fc_dentry->fcd_list);
+ list_del_init(&fc_dentry->fcd_dilist);
spin_unlock(&sbi->s_fc_lock);
if (fc_dentry->fcd_name.name &&
@@ -1269,8 +1323,10 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
&sbi->s_fc_q[FC_Q_MAIN]);
- ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
- ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+ if (tid >= sbi->s_fc_ineligible_tid) {
+ sbi->s_fc_ineligible_tid = 0;
+ ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+ }
if (full)
sbi->s_fc_bytes = 0;
@@ -1435,14 +1491,15 @@ static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
if (state->fc_modified_inodes[i] == ino)
return 0;
if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) {
- state->fc_modified_inodes_size +=
- EXT4_FC_REPLAY_REALLOC_INCREMENT;
state->fc_modified_inodes = krealloc(
- state->fc_modified_inodes, sizeof(int) *
- state->fc_modified_inodes_size,
- GFP_KERNEL);
+ state->fc_modified_inodes,
+ sizeof(int) * (state->fc_modified_inodes_size +
+ EXT4_FC_REPLAY_REALLOC_INCREMENT),
+ GFP_KERNEL);
if (!state->fc_modified_inodes)
return -ENOMEM;
+ state->fc_modified_inodes_size +=
+ EXT4_FC_REPLAY_REALLOC_INCREMENT;
}
state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino;
return 0;
@@ -1474,7 +1531,9 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
}
inode = NULL;
- ext4_fc_record_modified_inode(sb, ino);
+ ret = ext4_fc_record_modified_inode(sb, ino);
+ if (ret)
+ goto out;
raw_fc_inode = (struct ext4_inode *)
(val + offsetof(struct ext4_fc_inode, fc_raw_inode));
@@ -1606,16 +1665,23 @@ out:
}
/*
- * Record physical disk regions which are in use as per fast commit area. Our
- * simple replay phase allocator excludes these regions from allocation.
+ * Record physical disk regions which are in use as per fast commit area,
+ * and used by inodes during replay phase. Our simple replay phase
+ * allocator excludes these regions from allocation.
*/
-static int ext4_fc_record_regions(struct super_block *sb, int ino,
- ext4_lblk_t lblk, ext4_fsblk_t pblk, int len)
+int ext4_fc_record_regions(struct super_block *sb, int ino,
+ ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay)
{
struct ext4_fc_replay_state *state;
struct ext4_fc_alloc_region *region;
state = &EXT4_SB(sb)->s_fc_replay_state;
+ /*
+ * during replay phase, the fc_regions_valid may not same as
+ * fc_regions_used, update it when do new additions.
+ */
+ if (replay && state->fc_regions_used != state->fc_regions_valid)
+ state->fc_regions_used = state->fc_regions_valid;
if (state->fc_regions_used == state->fc_regions_size) {
state->fc_regions_size +=
EXT4_FC_REPLAY_REALLOC_INCREMENT;
@@ -1633,6 +1699,9 @@ static int ext4_fc_record_regions(struct super_block *sb, int ino,
region->pblk = pblk;
region->len = len;
+ if (replay)
+ state->fc_regions_valid++;
+
return 0;
}
@@ -1664,6 +1733,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
}
ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
+ if (ret)
+ goto out;
start = le32_to_cpu(ex->ee_block);
start_pblk = ext4_ext_pblock(ex);
@@ -1681,18 +1752,14 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
map.m_pblk = 0;
ret = ext4_map_blocks(NULL, inode, &map, 0);
- if (ret < 0) {
- iput(inode);
- return 0;
- }
+ if (ret < 0)
+ goto out;
if (ret == 0) {
/* Range is not mapped */
path = ext4_find_extent(inode, cur, NULL, 0);
- if (IS_ERR(path)) {
- iput(inode);
- return 0;
- }
+ if (IS_ERR(path))
+ goto out;
memset(&newex, 0, sizeof(newex));
newex.ee_block = cpu_to_le32(cur);
ext4_ext_store_pblock(
@@ -1706,10 +1773,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
up_write((&EXT4_I(inode)->i_data_sem));
ext4_ext_drop_refs(path);
kfree(path);
- if (ret) {
- iput(inode);
- return 0;
- }
+ if (ret)
+ goto out;
goto next;
}
@@ -1722,10 +1787,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
ext4_ext_is_unwritten(ex),
start_pblk + cur - start);
- if (ret) {
- iput(inode);
- return 0;
- }
+ if (ret)
+ goto out;
/*
* Mark the old blocks as free since they aren't used
* anymore. We maintain an array of all the modified
@@ -1745,10 +1808,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
ext4_ext_is_unwritten(ex), map.m_pblk);
ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
ext4_ext_is_unwritten(ex), map.m_pblk);
- if (ret) {
- iput(inode);
- return 0;
- }
+ if (ret)
+ goto out;
/*
* We may have split the extent tree while toggling the state.
* Try to shrink the extent tree now.
@@ -1760,6 +1821,7 @@ next:
}
ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>
sb->s_blocksize_bits);
+out:
iput(inode);
return 0;
}
@@ -1789,6 +1851,8 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
}
ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
+ if (ret)
+ goto out;
jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n",
inode->i_ino, le32_to_cpu(lrange.fc_lblk),
@@ -1798,10 +1862,8 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
map.m_len = remaining;
ret = ext4_map_blocks(NULL, inode, &map, 0);
- if (ret < 0) {
- iput(inode);
- return 0;
- }
+ if (ret < 0)
+ goto out;
if (ret > 0) {
remaining -= ret;
cur += ret;
@@ -1812,16 +1874,18 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
}
}
- ret = ext4_punch_hole(inode,
- le32_to_cpu(lrange.fc_lblk) << sb->s_blocksize_bits,
- le32_to_cpu(lrange.fc_len) << sb->s_blocksize_bits);
+ down_write(&EXT4_I(inode)->i_data_sem);
+ ret = ext4_ext_remove_space(inode, le32_to_cpu(lrange.fc_lblk),
+ le32_to_cpu(lrange.fc_lblk) +
+ le32_to_cpu(lrange.fc_len) - 1);
+ up_write(&EXT4_I(inode)->i_data_sem);
if (ret)
- jbd_debug(1, "ext4_punch_hole returned %d", ret);
+ goto out;
ext4_ext_replay_shrink_inode(inode,
i_size_read(inode) >> sb->s_blocksize_bits);
ext4_mark_inode_dirty(NULL, inode);
+out:
iput(inode);
-
return 0;
}
@@ -1892,8 +1956,8 @@ bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk)
if (state->fc_regions[i].ino == 0 ||
state->fc_regions[i].len == 0)
continue;
- if (blk >= state->fc_regions[i].pblk &&
- blk < state->fc_regions[i].pblk + state->fc_regions[i].len)
+ if (in_range(blk, state->fc_regions[i].pblk,
+ state->fc_regions[i].len))
return true;
}
return false;
@@ -1977,7 +2041,7 @@ static int ext4_fc_replay_scan(journal_t *journal,
ret = ext4_fc_record_regions(sb,
le32_to_cpu(ext.fc_ino),
le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
- ext4_ext_get_actual_len(ex));
+ ext4_ext_get_actual_len(ex), 0);
if (ret < 0)
break;
ret = JBD2_FC_REPLAY_CONTINUE;
@@ -2173,7 +2237,7 @@ int ext4_fc_info_show(struct seq_file *seq, void *v)
"fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n",
stats->fc_num_commits, stats->fc_ineligible_commits,
stats->fc_numblks,
- div_u64(sbi->s_fc_avg_commit_time, 1000));
+ div_u64(stats->s_fc_avg_commit_time, 1000));
seq_puts(seq, "Ineligible reasons:\n");
for (i = 0; i < EXT4_FC_REASON_MAX; i++)
seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i],
@@ -2192,3 +2256,8 @@ int __init ext4_fc_init_dentry_cache(void)
return 0;
}
+
+void ext4_fc_destroy_dentry_cache(void)
+{
+ kmem_cache_destroy(ext4_fc_dentry_cachep);
+}