From 37975f1503715c51c1a2484b8aba2700a23e6b56 Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Thu, 9 Oct 2014 22:49:21 +0200
Subject: GFS2: directly return gfs2_dir_check()

No need to store gfs2_dir_check result and test it before returning.

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index c4ed823d150e..b41b5c7898da 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1045,11 +1045,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
 	if (error)
 		return error;
 
-	error = gfs2_dir_check(&dip->i_inode, name, ip);
-	if (error)
-		return error;
-
-	return 0;
+	return gfs2_dir_check(&dip->i_inode, name, ip);
 }
 
 /**
-- 
cgit v1.2.3


From 0e27c18c30f6850ecd0fb67143f202f0426d76d7 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Wed, 29 Oct 2014 08:02:28 -0500
Subject: GFS2: Set of distributed preferences for rgrps

This patch tries to use the journal numbers to evenly distribute
which node prefers which resource group for block allocations. This
is to help performance.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/incore.h |  1 +
 fs/gfs2/rgrp.c   | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 62 insertions(+), 5 deletions(-)

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 39e7e9959b74..1b899187be5a 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -97,6 +97,7 @@ struct gfs2_rgrpd {
 #define GFS2_RDF_CHECK		0x10000000 /* check for unlinked inodes */
 #define GFS2_RDF_UPTODATE	0x20000000 /* rg is up to date */
 #define GFS2_RDF_ERROR		0x40000000 /* error in rg */
+#define GFS2_RDF_PREFERRED	0x80000000 /* This rgrp is preferred */
 #define GFS2_RDF_MASK		0xf0000000 /* mask for internal flags */
 	spinlock_t rd_rsspin;           /* protects reservation related vars */
 	struct rb_root rd_rstree;       /* multi-block reservation tree */
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 7474c413ffd1..f4e4a0c5babe 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -936,7 +936,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
 	rgd->rd_gl->gl_vm.start = rgd->rd_addr * bsize;
 	rgd->rd_gl->gl_vm.end = rgd->rd_gl->gl_vm.start + (rgd->rd_length * bsize) - 1;
 	rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
-	rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
+	rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED);
 	if (rgd->rd_data > sdp->sd_max_rg_data)
 		sdp->sd_max_rg_data = rgd->rd_data;
 	spin_lock(&sdp->sd_rindex_spin);
@@ -954,6 +954,36 @@ fail:
 	return error;
 }
 
+/**
+ * set_rgrp_preferences - Run all the rgrps, selecting some we prefer to use
+ * @sdp: the GFS2 superblock
+ *
+ * The purpose of this function is to select a subset of the resource groups
+ * and mark them as PREFERRED. We do it in such a way that each node prefers
+ * to use a unique set of rgrps to minimize glock contention.
+ */
+static void set_rgrp_preferences(struct gfs2_sbd *sdp)
+{
+	struct gfs2_rgrpd *rgd, *first;
+	int i;
+
+	/* Skip an initial number of rgrps, based on this node's journal ID.
+	   That should start each node out on its own set. */
+	rgd = gfs2_rgrpd_get_first(sdp);
+	for (i = 0; i < sdp->sd_lockstruct.ls_jid; i++)
+		rgd = gfs2_rgrpd_get_next(rgd);
+	first = rgd;
+
+	do {
+		rgd->rd_flags |= GFS2_RDF_PREFERRED;
+		for (i = 0; i < sdp->sd_journals; i++) {
+			rgd = gfs2_rgrpd_get_next(rgd);
+			if (rgd == first)
+				break;
+		}
+	} while (rgd != first);
+}
+
 /**
  * gfs2_ri_update - Pull in a new resource index from the disk
  * @ip: pointer to the rindex inode
@@ -973,6 +1003,8 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
 	if (error < 0)
 		return error;
 
+	set_rgrp_preferences(sdp);
+
 	sdp->sd_rindex_uptodate = 1;
 	return 0;
 }
@@ -1890,6 +1922,25 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b
 	return false;
 }
 
+/**
+ * fast_to_acquire - determine if a resource group will be fast to acquire
+ *
+ * If this is one of our preferred rgrps, it should be quicker to acquire,
+ * because we tried to set ourselves up as dlm lock master.
+ */
+static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
+{
+	struct gfs2_glock *gl = rgd->rd_gl;
+
+	if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) &&
+	    !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
+	    !test_bit(GLF_DEMOTE, &gl->gl_flags))
+		return 1;
+	if (rgd->rd_flags & GFS2_RDF_PREFERRED)
+		return 1;
+	return 0;
+}
+
 /**
  * gfs2_inplace_reserve - Reserve space in the filesystem
  * @ip: the inode to reserve space for
@@ -1932,10 +1983,15 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a
 			rg_locked = 0;
 			if (skip && skip--)
 				goto next_rgrp;
-			if (!gfs2_rs_active(rs) && (loops < 2) &&
-			     gfs2_rgrp_used_recently(rs, 1000) &&
-			     gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
-				goto next_rgrp;
+			if (!gfs2_rs_active(rs)) {
+				if (loops == 0 &&
+				    !fast_to_acquire(rs->rs_rbm.rgd))
+					goto next_rgrp;
+				if ((loops < 2) &&
+				    gfs2_rgrp_used_recently(rs, 1000) &&
+				    gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
+					goto next_rgrp;
+			}
 			error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
 						   LM_ST_EXCLUSIVE, flags,
 						   &rs->rs_rgd_gh);
-- 
cgit v1.2.3


From 33ad5d54284adf110f6e78aa9c4f42d3d17d7f68 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Wed, 29 Oct 2014 08:02:29 -0500
Subject: GFS2: Only increase rs_sizehint

If an application does a sequence of (1) big write, (2) little write
we don't necessarily want to reset the size hint based on the smaller
size. The fact that they did any big writes implies they may do more,
and therefore we should try to allocate bigger block reservations, even
if the last few were small writes. Therefore this patch changes function
gfs2_size_hint so that the size hint can only grow; it cannot shrink.
This is especially important where there are multiple writers.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 80dd44dca028..5ebe56831794 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -337,7 +337,8 @@ static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size)
 	size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift;
 	int hint = min_t(size_t, INT_MAX, blks);
 
-	atomic_set(&ip->i_res->rs_sizehint, hint);
+	if (hint > atomic_read(&ip->i_res->rs_sizehint))
+		atomic_set(&ip->i_res->rs_sizehint, hint);
 }
 
 /**
-- 
cgit v1.2.3


From 1a8550332a7f0111306b6b34e44a7c696ef68c4d Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Wed, 29 Oct 2014 08:02:30 -0500
Subject: GFS2: If we use up our block reservation, request more next time

If we run out of blocks for a given multi-block allocation, we obviously
did not reserve enough. We should reserve more blocks for the next
reservation to reduce fragmentation. This patch increases the size hint
for reservations when they run out.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/rgrp.c | 3 +++
 fs/gfs2/rgrp.h | 1 +
 2 files changed, 4 insertions(+)

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index f4e4a0c5babe..9150207f365c 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -2251,6 +2251,9 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip,
 			trace_gfs2_rs(rs, TRACE_RS_CLAIM);
 			if (rs->rs_free && !ret)
 				goto out;
+			/* We used up our block reservation, so we should
+			   reserve more blocks next time. */
+			atomic_add(RGRP_RSRV_ADDBLKS, &rs->rs_sizehint);
 		}
 		__rs_deltree(rs);
 	}
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 5d8f085f7ade..b104f4af3afd 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -20,6 +20,7 @@
  */
 #define RGRP_RSRV_MINBYTES 8
 #define RGRP_RSRV_MINBLKS ((u32)(RGRP_RSRV_MINBYTES * GFS2_NBBY))
+#define RGRP_RSRV_ADDBLKS 64
 
 struct gfs2_rgrpd;
 struct gfs2_sbd;
-- 
cgit v1.2.3


From 9c9f1159a54c6163dd0b9d2085985d3c98dad11f Mon Sep 17 00:00:00 2001
From: Andrew Price <anprice@redhat.com>
Date: Wed, 12 Nov 2014 17:24:03 +0000
Subject: GFS2: Use inode_newsize_ok and get_write_access in fallocate

gfs2_fallocate wasn't checking inode_newsize_ok nor get_write_access.
Split out the context setup and inode locking pieces into a separate
function to make it more clear and add these missing calls.

inode_newsize_ok is called conditional on FALLOC_FL_KEEP_SIZE as there
is no need to enforce a file size limit if it isn't going to change.

Signed-off-by: Andrew Price <anprice@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/file.c | 66 ++++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 44 insertions(+), 22 deletions(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 5ebe56831794..3786579dd348 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -797,8 +797,7 @@ static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
 	}
 }
 
-static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
-			   loff_t len)
+static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 {
 	struct inode *inode = file_inode(file);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -812,14 +811,9 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
 	loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1);
 	loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
 	loff_t max_chunk_size = UINT_MAX & bsize_mask;
-	struct gfs2_holder gh;
 
 	next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
 
-	/* We only support the FALLOC_FL_KEEP_SIZE mode */
-	if (mode & ~FALLOC_FL_KEEP_SIZE)
-		return -EOPNOTSUPP;
-
 	offset &= bsize_mask;
 
 	len = next - offset;
@@ -830,17 +824,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
 	if (bytes == 0)
 		bytes = sdp->sd_sb.sb_bsize;
 
-	error = gfs2_rs_alloc(ip);
-	if (error)
-		return error;
-
-	mutex_lock(&inode->i_mutex);
-
-	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
-	error = gfs2_glock_nq(&gh);
-	if (unlikely(error))
-		goto out_uninit;
-
 	gfs2_size_hint(file, offset, len);
 
 	while (len > 0) {
@@ -853,8 +836,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
 		}
 		error = gfs2_quota_lock_check(ip);
 		if (error)
-			goto out_unlock;
-
+			return error;
 retry:
 		gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
 
@@ -898,18 +880,58 @@ retry:
 
 	if (error == 0)
 		error = generic_write_sync(file, pos, count);
-	goto out_unlock;
+	return error;
 
 out_trans_fail:
 	gfs2_inplace_release(ip);
 out_qunlock:
 	gfs2_quota_unlock(ip);
+	return error;
+}
+
+static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
+{
+	struct inode *inode = file_inode(file);
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_holder gh;
+	int ret;
+
+	if (mode & ~FALLOC_FL_KEEP_SIZE)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&inode->i_mutex);
+
+	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+	ret = gfs2_glock_nq(&gh);
+	if (ret)
+		goto out_uninit;
+
+	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+	    (offset + len) > inode->i_size) {
+		ret = inode_newsize_ok(inode, offset + len);
+		if (ret)
+			goto out_unlock;
+	}
+
+	ret = get_write_access(inode);
+	if (ret)
+		goto out_unlock;
+
+	ret = gfs2_rs_alloc(ip);
+	if (ret)
+		goto out_putw;
+
+	ret = __gfs2_fallocate(file, mode, offset, len);
+	if (ret)
+		gfs2_rs_deltree(ip->i_res);
+out_putw:
+	put_write_access(inode);
 out_unlock:
 	gfs2_glock_dq(&gh);
 out_uninit:
 	gfs2_holder_uninit(&gh);
 	mutex_unlock(&inode->i_mutex);
-	return error;
+	return ret;
 }
 
 #ifdef CONFIG_GFS2_FS_LOCKING_DLM
-- 
cgit v1.2.3


From 1885867b84d58e3704ed175e0abac2f38889f34d Mon Sep 17 00:00:00 2001
From: Andrew Price <anprice@redhat.com>
Date: Wed, 12 Nov 2014 17:24:04 +0000
Subject: GFS2: Update i_size properly on fallocate

This addresses an issue caught by fsx where the inode size was not being
updated to the expected value after fallocate(2) with mode 0.

The problem was caused by the offset and len parameters being converted
to multiples of the file system's block size, so i_size would be rounded
up to the nearest block size multiple instead of the requested size.

This replaces the per-chunk i_size updates with a single i_size_write on
successful completion of the operation.  With this patch gfs2 gets
through a complete run of fsx.

For clarity, the check for (error == 0) following the loop is removed as
all failures before that point jump to out_* labels or return.

Signed-off-by: Andrew Price <anprice@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/file.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 3786579dd348..118216419cef 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -729,7 +729,6 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct buffer_head *dibh;
 	int error;
-	loff_t size = len;
 	unsigned int nr_blks;
 	sector_t lblock = offset >> inode->i_blkbits;
 
@@ -763,11 +762,6 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
 			goto out;
 		}
 	}
-	if (offset + size > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE))
-		i_size_write(inode, offset + size);
-
-	mark_inode_dirty(inode);
-
 out:
 	brelse(dibh);
 	return error;
@@ -878,9 +872,12 @@ retry:
 		gfs2_quota_unlock(ip);
 	}
 
-	if (error == 0)
-		error = generic_write_sync(file, pos, count);
-	return error;
+	if (!(mode & FALLOC_FL_KEEP_SIZE) && (pos + count) > inode->i_size) {
+		i_size_write(inode, pos + count);
+		mark_inode_dirty(inode);
+	}
+
+	return generic_write_sync(file, pos, count);
 
 out_trans_fail:
 	gfs2_inplace_release(ip);
-- 
cgit v1.2.3


From 98f1a696a1b2f6cf0c1ce6850b1516beda9c1269 Mon Sep 17 00:00:00 2001
From: Andrew Price <anprice@redhat.com>
Date: Wed, 12 Nov 2014 17:24:05 +0000
Subject: GFS2: Update timestamps on fallocate

gfs2_fallocate() wasn't updating ctime and mtime when modifying the
inode. Add a call to file_update_time() to do that.

Signed-off-by: Andrew Price <anprice@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 118216419cef..6e600abf694a 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -874,7 +874,8 @@ retry:
 
 	if (!(mode & FALLOC_FL_KEEP_SIZE) && (pos + count) > inode->i_size) {
 		i_size_write(inode, pos + count);
-		mark_inode_dirty(inode);
+		/* Marks the inode as dirty */
+		file_update_time(file);
 	}
 
 	return generic_write_sync(file, pos, count);
-- 
cgit v1.2.3


From 48b6bca6b7b8309697fc8a101793befe92d249d9 Mon Sep 17 00:00:00 2001
From: Benjamin Marzinski <bmarzins@redhat.com>
Date: Thu, 13 Nov 2014 20:42:03 -0600
Subject: fs: add freeze_super/thaw_super fs hooks

Currently, freezing a filesystem involves calling freeze_super, which locks
sb->s_umount and then calls the fs-specific freeze_fs hook. This makes it
hard for gfs2 (and potentially other cluster filesystems) to use the vfs
freezing code to do freezes on all the cluster nodes.

In order to communicate that a freeze has been requested, and to make sure
that only one node is trying to freeze at a time, gfs2 uses a glock
(sd_freeze_gl). The problem is that there is no hook for gfs2 to acquire
this lock before calling freeze_super. This means that two nodes can
attempt to freeze the filesystem by both calling freeze_super, acquiring
the sb->s_umount lock, and then attempting to grab the cluster glock
sd_freeze_gl. Only one will succeed, and the other will be stuck in
freeze_super, making it impossible to finish freezing the node.

To solve this problem, this patch adds the freeze_super and thaw_super
hooks.  If a filesystem implements these hooks, they are called instead of
the vfs freeze_super and thaw_super functions. This means that every
filesystem that implements these hooks must call the vfs freeze_super and
thaw_super functions itself within the hook function to make use of the vfs
freezing code.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/block_dev.c     | 10 ++++++++--
 fs/ioctl.c         |  6 +++++-
 include/linux/fs.h |  2 ++
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1d9c9f3754f8..b48c41bf0f86 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -235,7 +235,10 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 	sb = get_active_super(bdev);
 	if (!sb)
 		goto out;
-	error = freeze_super(sb);
+	if (sb->s_op->freeze_super)
+		error = sb->s_op->freeze_super(sb);
+	else
+		error = freeze_super(sb);
 	if (error) {
 		deactivate_super(sb);
 		bdev->bd_fsfreeze_count--;
@@ -272,7 +275,10 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
 	if (!sb)
 		goto out;
 
-	error = thaw_super(sb);
+	if (sb->s_op->thaw_super)
+		error = sb->s_op->thaw_super(sb);
+	else
+		error = thaw_super(sb);
 	if (error) {
 		bdev->bd_fsfreeze_count++;
 		mutex_unlock(&bdev->bd_fsfreeze_mutex);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 8ac3fad36192..77c9a7812542 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -518,10 +518,12 @@ static int ioctl_fsfreeze(struct file *filp)
 		return -EPERM;
 
 	/* If filesystem doesn't support freeze feature, return. */
-	if (sb->s_op->freeze_fs == NULL)
+	if (sb->s_op->freeze_fs == NULL && sb->s_op->freeze_super == NULL)
 		return -EOPNOTSUPP;
 
 	/* Freeze */
+	if (sb->s_op->freeze_super)
+		return sb->s_op->freeze_super(sb);
 	return freeze_super(sb);
 }
 
@@ -533,6 +535,8 @@ static int ioctl_fsthaw(struct file *filp)
 		return -EPERM;
 
 	/* Thaw */
+	if (sb->s_op->thaw_super)
+		return sb->s_op->thaw_super(sb);
 	return thaw_super(sb);
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9ab779e8a63c..b4a1d73c0d5d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1577,7 +1577,9 @@ struct super_operations {
 	void (*evict_inode) (struct inode *);
 	void (*put_super) (struct super_block *);
 	int (*sync_fs)(struct super_block *sb, int wait);
+	int (*freeze_super) (struct super_block *);
 	int (*freeze_fs) (struct super_block *);
+	int (*thaw_super) (struct super_block *);
 	int (*unfreeze_fs) (struct super_block *);
 	int (*statfs) (struct dentry *, struct kstatfs *);
 	int (*remount_fs) (struct super_block *, int *, char *);
-- 
cgit v1.2.3


From 2e60d7683c8d2ea21317f6d9f4cd3bf5428ce162 Mon Sep 17 00:00:00 2001
From: Benjamin Marzinski <bmarzins@redhat.com>
Date: Thu, 13 Nov 2014 20:42:04 -0600
Subject: GFS2: update freeze code to use freeze/thaw_super on all nodes

The current gfs2 freezing code is considerably more complicated than it
should be because it doesn't use the vfs freezing code on any node except
the one that begins the freeze.  This is because it needs to acquire a
cluster glock before calling the vfs code to prevent a deadlock, and
without the new freeze_super and thaw_super hooks, that was impossible. To
deal with the issue, gfs2 had to do some hacky locking tricks to make sure
that a frozen node couldn't be holding on a lock it needed to do the
unfreeze ioctl.

This patch makes use of the new hooks to simply the gfs2 locking code. Now,
all the nodes in the cluster freeze and thaw in exactly the same way. Every
node in the cluster caches the freeze glock in the shared state.  The new
freeze_super hook allows the freezing node to grab this freeze glock in
the exclusive state without first calling the vfs freeze_super function.
All the nodes in the cluster see this lock change, and call the vfs
freeze_super function. The vfs locking code guarantees that the nodes can't
get stuck holding the glocks necessary to unfreeze the system.  To
unfreeze, the freezing node uses the new thaw_super hook to drop the freeze
glock. Again, all the nodes notice this, reacquire the glock in shared mode
and call the vfs thaw_super function.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glops.c      |  26 ++++++------
 fs/gfs2/glops.h      |   2 +
 fs/gfs2/incore.h     |  18 ++++++---
 fs/gfs2/inode.c      |  40 +++++-------------
 fs/gfs2/log.c        |  42 +++++++++----------
 fs/gfs2/main.c       |  11 ++++-
 fs/gfs2/ops_fstype.c |  18 +++------
 fs/gfs2/super.c      | 112 ++++++++++++++++++++++++++++++++++-----------------
 fs/gfs2/super.h      |   1 +
 fs/gfs2/trans.c      |  17 ++++++--
 10 files changed, 161 insertions(+), 126 deletions(-)

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 1cc0bba6313f..fe91951c3361 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -28,6 +28,8 @@
 #include "trans.h"
 #include "dir.h"
 
+struct workqueue_struct *gfs2_freeze_wq;
+
 static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
 {
 	fs_err(gl->gl_sbd, "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page state 0x%lx\n",
@@ -94,11 +96,8 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
          * on the stack */
 	tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
 	tr.tr_ip = _RET_IP_;
-	sb_start_intwrite(sdp->sd_vfs);
-	if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0) {
-		sb_end_intwrite(sdp->sd_vfs);
+	if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0)
 		return;
-	}
 	WARN_ON_ONCE(current->journal_info);
 	current->journal_info = &tr;
 
@@ -469,20 +468,19 @@ static void inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
 
 static void freeze_go_sync(struct gfs2_glock *gl)
 {
+	int error = 0;
 	struct gfs2_sbd *sdp = gl->gl_sbd;
-	DEFINE_WAIT(wait);
 
 	if (gl->gl_state == LM_ST_SHARED &&
 	    test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
-		atomic_set(&sdp->sd_log_freeze, 1);
-		wake_up(&sdp->sd_logd_waitq);
-		do {
-			prepare_to_wait(&sdp->sd_log_frozen_wait, &wait,
-					TASK_UNINTERRUPTIBLE);
-			if (atomic_read(&sdp->sd_log_freeze))
-				io_schedule();
-		} while(atomic_read(&sdp->sd_log_freeze));
-		finish_wait(&sdp->sd_log_frozen_wait, &wait);
+		atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE);
+		error = freeze_super(sdp->sd_vfs);
+		if (error) {
+			printk(KERN_INFO "GFS2: couldn't freeze filesystem: %d\n", error);
+			gfs2_assert_withdraw(sdp, 0);
+		}
+		queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work);
+		gfs2_log_flush(sdp, NULL, FREEZE_FLUSH);
 	}
 }
 
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
index 7455d2629bcb..8ed1857c1a8d 100644
--- a/fs/gfs2/glops.h
+++ b/fs/gfs2/glops.h
@@ -12,6 +12,8 @@
 
 #include "incore.h"
 
+extern struct workqueue_struct *gfs2_freeze_wq;
+
 extern const struct gfs2_glock_operations gfs2_meta_glops;
 extern const struct gfs2_glock_operations gfs2_inode_glops;
 extern const struct gfs2_glock_operations gfs2_rgrp_glops;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 1b899187be5a..7a2dbbc0d634 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -588,6 +588,12 @@ enum {
 	SDF_SKIP_DLM_UNLOCK	= 8,
 };
 
+enum gfs2_freeze_state {
+	SFS_UNFROZEN		= 0,
+	SFS_STARTING_FREEZE	= 1,
+	SFS_FROZEN		= 2,
+};
+
 #define GFS2_FSNAME_LEN		256
 
 struct gfs2_inum_host {
@@ -685,6 +691,7 @@ struct gfs2_sbd {
 	struct gfs2_holder sd_live_gh;
 	struct gfs2_glock *sd_rename_gl;
 	struct gfs2_glock *sd_freeze_gl;
+	struct work_struct sd_freeze_work;
 	wait_queue_head_t sd_glock_wait;
 	atomic_t sd_glock_disposal;
 	struct completion sd_locking_init;
@@ -789,6 +796,9 @@ struct gfs2_sbd {
 	wait_queue_head_t sd_log_flush_wait;
 	int sd_log_error;
 
+	atomic_t sd_reserving_log;
+	wait_queue_head_t sd_reserving_log_wait;
+
 	unsigned int sd_log_flush_head;
 	u64 sd_log_flush_wrapped;
 
@@ -798,12 +808,8 @@ struct gfs2_sbd {
 
 	/* For quiescing the filesystem */
 	struct gfs2_holder sd_freeze_gh;
-	struct gfs2_holder sd_freeze_root_gh;
-	struct gfs2_holder sd_thaw_gh;
-	atomic_t sd_log_freeze;
-	atomic_t sd_frozen_root;
-	wait_queue_head_t sd_frozen_root_wait;
-	wait_queue_head_t sd_log_frozen_wait;
+	atomic_t sd_freeze_state;
+	struct mutex sd_freeze_mutex;
 
 	char sd_fsname[GFS2_FSNAME_LEN];
 	char sd_table_name[GFS2_FSNAME_LEN];
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index b41b5c7898da..04065e5af4b6 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1618,26 +1618,18 @@ int gfs2_permission(struct inode *inode, int mask)
 {
 	struct gfs2_inode *ip;
 	struct gfs2_holder i_gh;
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	int error;
 	int unlock = 0;
-	int frozen_root = 0;
 
 
 	ip = GFS2_I(inode);
 	if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
-		if (unlikely(gfs2_glock_is_held_excl(sdp->sd_freeze_gl) &&
-			     inode == sdp->sd_root_dir->d_inode &&
-			     atomic_inc_not_zero(&sdp->sd_frozen_root)))
-			frozen_root = 1;
-		else {
-			if (mask & MAY_NOT_BLOCK)
-				return -ECHILD;
-			error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
-			if (error)
-				return error;
-			unlock = 1;
-		}
+		if (mask & MAY_NOT_BLOCK)
+			return -ECHILD;
+		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+		if (error)
+			return error;
+		unlock = 1;
 	}
 
 	if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
@@ -1646,8 +1638,6 @@ int gfs2_permission(struct inode *inode, int mask)
 		error = generic_permission(inode, mask);
 	if (unlock)
 		gfs2_glock_dq_uninit(&i_gh);
-	else if (frozen_root && atomic_dec_and_test(&sdp->sd_frozen_root))
-		wake_up(&sdp->sd_frozen_root_wait);
 
 	return error;
 }
@@ -1820,29 +1810,19 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	struct inode *inode = dentry->d_inode;
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_holder gh;
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	int error;
 	int unlock = 0;
-	int frozen_root = 0;
 
 	if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
-		if (unlikely(gfs2_glock_is_held_excl(sdp->sd_freeze_gl) &&
-			     inode == sdp->sd_root_dir->d_inode &&
-			     atomic_inc_not_zero(&sdp->sd_frozen_root)))
-			frozen_root = 1;
-		else {
-			error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
-			if (error)
-				return error;
-			unlock = 1;
-		}
+		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
+		if (error)
+			return error;
+		unlock = 1;
 	}
 
 	generic_fillattr(inode, stat);
 	if (unlock)
 		gfs2_glock_dq_uninit(&gh);
-	else if (frozen_root && atomic_dec_and_test(&sdp->sd_frozen_root))
-		wake_up(&sdp->sd_frozen_root_wait);
 
 	return 0;
 }
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 3966fadbcebd..536e7a6252cd 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -339,6 +339,7 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
 
 int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
 {
+	int ret = 0;
 	unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize);
 	unsigned wanted = blks + reserved_blks;
 	DEFINE_WAIT(wait);
@@ -362,9 +363,13 @@ retry:
 		} while(free_blocks <= wanted);
 		finish_wait(&sdp->sd_log_waitq, &wait);
 	}
+	atomic_inc(&sdp->sd_reserving_log);
 	if (atomic_cmpxchg(&sdp->sd_log_blks_free, free_blocks,
-				free_blocks - blks) != free_blocks)
+				free_blocks - blks) != free_blocks) {
+		if (atomic_dec_and_test(&sdp->sd_reserving_log))
+			wake_up(&sdp->sd_reserving_log_wait);
 		goto retry;
+	}
 	trace_gfs2_log_blocks(sdp, -blks);
 
 	/*
@@ -377,9 +382,11 @@ retry:
 	down_read(&sdp->sd_log_flush_lock);
 	if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) {
 		gfs2_log_release(sdp, blks);
-		return -EROFS;
+		ret = -EROFS;
 	}
-	return 0;
+	if (atomic_dec_and_test(&sdp->sd_reserving_log))
+		wake_up(&sdp->sd_reserving_log_wait);
+	return ret;
 }
 
 /**
@@ -652,9 +659,12 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
 	u32 hash;
 	int rw = WRITE_FLUSH_FUA | REQ_META;
 	struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
+	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
 	lh = page_address(page);
 	clear_page(lh);
 
+	gfs2_assert_withdraw(sdp, (state != SFS_FROZEN));
+
 	tail = current_tail(sdp);
 
 	lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
@@ -695,6 +705,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
 		    enum gfs2_flush_type type)
 {
 	struct gfs2_trans *tr;
+	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
 
 	down_write(&sdp->sd_log_flush_lock);
 
@@ -713,8 +724,12 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
 		INIT_LIST_HEAD(&tr->tr_ail1_list);
 		INIT_LIST_HEAD(&tr->tr_ail2_list);
 		tr->tr_first = sdp->sd_log_flush_head;
+		if (unlikely (state == SFS_FROZEN))
+			gfs2_assert_withdraw(sdp, !tr->tr_num_buf_new && !tr->tr_num_databuf_new);
 	}
 
+	if (unlikely(state == SFS_FROZEN))
+		gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
 	gfs2_assert_withdraw(sdp,
 			sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
 
@@ -745,8 +760,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
 	spin_unlock(&sdp->sd_ail_lock);
 	gfs2_log_unlock(sdp);
 
-	if (atomic_read(&sdp->sd_log_freeze))
-		type = FREEZE_FLUSH;
 	if (type != NORMAL_FLUSH) {
 		if (!sdp->sd_log_idle) {
 			for (;;) {
@@ -763,21 +776,8 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
 		}
 		if (type == SHUTDOWN_FLUSH || type == FREEZE_FLUSH)
 			gfs2_log_shutdown(sdp);
-		if (type == FREEZE_FLUSH) {
-			int error;
-
-			atomic_set(&sdp->sd_log_freeze, 0);
-			wake_up(&sdp->sd_log_frozen_wait);
-			error = gfs2_glock_nq_init(sdp->sd_freeze_gl,
-						   LM_ST_SHARED, 0,
-						   &sdp->sd_thaw_gh);
-			if (error) {
-				printk(KERN_INFO "GFS2: couln't get freeze lock : %d\n", error);
-				gfs2_assert_withdraw(sdp, 0);
-			}
-			else
-				gfs2_glock_dq_uninit(&sdp->sd_thaw_gh);
-		}
+		if (type == FREEZE_FLUSH)
+			atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
 	}
 
 	trace_gfs2_log_flush(sdp, 0);
@@ -888,7 +888,7 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
 
 static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
 {
-	return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1) || atomic_read(&sdp->sd_log_freeze));
+	return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1));
 }
 
 static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 82b6ac829656..241a399bf83d 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -30,6 +30,7 @@
 #include "quota.h"
 #include "recovery.h"
 #include "dir.h"
+#include "glops.h"
 
 struct workqueue_struct *gfs2_control_wq;
 
@@ -161,9 +162,14 @@ static int __init init_gfs2_fs(void)
 	if (!gfs2_control_wq)
 		goto fail_recovery;
 
+	gfs2_freeze_wq = alloc_workqueue("freeze_workqueue", 0, 0);
+
+	if (!gfs2_freeze_wq)
+		goto fail_control;
+
 	gfs2_page_pool = mempool_create_page_pool(64, 0);
 	if (!gfs2_page_pool)
-		goto fail_control;
+		goto fail_freeze;
 
 	gfs2_register_debugfs();
 
@@ -171,6 +177,8 @@ static int __init init_gfs2_fs(void)
 
 	return 0;
 
+fail_freeze:
+	destroy_workqueue(gfs2_freeze_wq);
 fail_control:
 	destroy_workqueue(gfs2_control_wq);
 fail_recovery:
@@ -224,6 +232,7 @@ static void __exit exit_gfs2_fs(void)
 	unregister_filesystem(&gfs2meta_fs_type);
 	destroy_workqueue(gfs_recovery_wq);
 	destroy_workqueue(gfs2_control_wq);
+	destroy_workqueue(gfs2_freeze_wq);
 	list_lru_destroy(&gfs2_qd_lru);
 
 	rcu_barrier();
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index d3eae244076e..b5803acb8818 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -129,11 +129,11 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 
 	init_rwsem(&sdp->sd_log_flush_lock);
 	atomic_set(&sdp->sd_log_in_flight, 0);
+	atomic_set(&sdp->sd_reserving_log, 0);
+	init_waitqueue_head(&sdp->sd_reserving_log_wait);
 	init_waitqueue_head(&sdp->sd_log_flush_wait);
-	init_waitqueue_head(&sdp->sd_log_frozen_wait);
-	atomic_set(&sdp->sd_log_freeze, 0);
-	atomic_set(&sdp->sd_frozen_root, 0);
-	init_waitqueue_head(&sdp->sd_frozen_root_wait);
+	atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
+	mutex_init(&sdp->sd_freeze_mutex);
 
 	return sdp;
 }
@@ -760,15 +760,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
 	set_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags);
 	gfs2_glock_dq_uninit(&ji_gh);
 	jindex = 0;
-	if (!sdp->sd_args.ar_spectator) {
-		error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0,
-					   &sdp->sd_thaw_gh);
-		if (error) {
-			fs_err(sdp, "can't acquire freeze glock: %d\n", error);
-			goto fail_jinode_gh;
-		}
-	}
-	gfs2_glock_dq_uninit(&sdp->sd_thaw_gh);
+	INIT_WORK(&sdp->sd_freeze_work, gfs2_freeze_func);
 	return 0;
 
 fail_jinode_gh:
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index a346f56c4c6d..5b327f837de7 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -26,6 +26,7 @@
 #include <linux/wait.h>
 #include <linux/writeback.h>
 #include <linux/backing-dev.h>
+#include <linux/kernel.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -399,7 +400,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
 {
 	struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
 	struct gfs2_glock *j_gl = ip->i_gl;
-	struct gfs2_holder thaw_gh;
+	struct gfs2_holder freeze_gh;
 	struct gfs2_log_header_host head;
 	int error;
 
@@ -408,7 +409,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
 		return error;
 
 	error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0,
-				   &thaw_gh);
+				   &freeze_gh);
 	if (error)
 		goto fail_threads;
 
@@ -434,13 +435,13 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
 
 	set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
 
-	gfs2_glock_dq_uninit(&thaw_gh);
+	gfs2_glock_dq_uninit(&freeze_gh);
 
 	return 0;
 
 fail:
-	thaw_gh.gh_flags |= GL_NOCACHE;
-	gfs2_glock_dq_uninit(&thaw_gh);
+	freeze_gh.gh_flags |= GL_NOCACHE;
+	gfs2_glock_dq_uninit(&freeze_gh);
 fail_threads:
 	kthread_stop(sdp->sd_quotad_process);
 	kthread_stop(sdp->sd_logd_process);
@@ -580,14 +581,15 @@ int gfs2_statfs_sync(struct super_block *sb, int type)
 	struct buffer_head *m_bh, *l_bh;
 	int error;
 
+	sb_start_write(sb);
 	error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
 				   &gh);
 	if (error)
-		return error;
+		goto out;
 
 	error = gfs2_meta_inode_buffer(m_ip, &m_bh);
 	if (error)
-		goto out;
+		goto out_unlock;
 
 	spin_lock(&sdp->sd_statfs_spin);
 	gfs2_statfs_change_in(m_sc, m_bh->b_data +
@@ -615,8 +617,10 @@ out_bh2:
 	brelse(l_bh);
 out_bh:
 	brelse(m_bh);
-out:
+out_unlock:
 	gfs2_glock_dq_uninit(&gh);
+out:
+	sb_end_write(sb);
 	return error;
 }
 
@@ -643,14 +647,8 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp,
 	struct lfcc *lfcc;
 	LIST_HEAD(list);
 	struct gfs2_log_header_host lh;
-	struct gfs2_inode *dip = GFS2_I(sdp->sd_root_dir->d_inode);
 	int error;
 
-	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0,
-				   &sdp->sd_freeze_root_gh);
-	if (error)
-		return error;
-	atomic_set(&sdp->sd_frozen_root, 1);
 	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
 		lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL);
 		if (!lfcc) {
@@ -692,11 +690,6 @@ out:
 		gfs2_glock_dq_uninit(&lfcc->gh);
 		kfree(lfcc);
 	}
-	if (error) {
-		atomic_dec(&sdp->sd_frozen_root);
-		wait_event(sdp->sd_frozen_root_wait, atomic_read(&sdp->sd_frozen_root) == 0);
-		gfs2_glock_dq_uninit(&sdp->sd_freeze_root_gh);
-	}
 	return error;
 }
 
@@ -834,18 +827,14 @@ out:
 
 static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 {
-	struct gfs2_holder thaw_gh;
+	struct gfs2_holder freeze_gh;
 	int error;
 
 	error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, GL_NOCACHE,
-				   &thaw_gh);
+				   &freeze_gh);
 	if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
 		return error;
 
-	down_write(&sdp->sd_log_flush_lock);
-	clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
-	up_write(&sdp->sd_log_flush_lock);
-
 	kthread_stop(sdp->sd_quotad_process);
 	kthread_stop(sdp->sd_logd_process);
 
@@ -853,11 +842,16 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 	gfs2_quota_sync(sdp->sd_vfs, 0);
 	gfs2_statfs_sync(sdp->sd_vfs, 0);
 
+	down_write(&sdp->sd_log_flush_lock);
+	clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+	up_write(&sdp->sd_log_flush_lock);
+
 	gfs2_log_flush(sdp, NULL, SHUTDOWN_FLUSH);
+	wait_event(sdp->sd_reserving_log_wait, atomic_read(&sdp->sd_reserving_log) == 0);
 	gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks);
 
-	if (thaw_gh.gh_gl)
-		gfs2_glock_dq_uninit(&thaw_gh);
+	if (freeze_gh.gh_gl)
+		gfs2_glock_dq_uninit(&freeze_gh);
 
 	gfs2_quota_cleanup(sdp);
 
@@ -943,11 +937,41 @@ static int gfs2_sync_fs(struct super_block *sb, int wait)
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 
 	gfs2_quota_sync(sb, -1);
-	if (wait && sdp && !atomic_read(&sdp->sd_log_freeze))
+	if (wait && sdp)
 		gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
 	return 0;
 }
 
+void gfs2_freeze_func(struct work_struct *work)
+{
+	int error;
+	struct gfs2_holder freeze_gh;
+	struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_freeze_work);
+	struct super_block *sb = sdp->sd_vfs;
+
+	atomic_inc(&sb->s_active);
+	error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0,
+				   &freeze_gh);
+	if (error) {
+		printk(KERN_INFO "GFS2: couln't get freeze lock : %d\n", error);
+		gfs2_assert_withdraw(sdp, 0);
+	}
+	else {
+		atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
+		error = thaw_super(sb);
+		if (error) {
+			printk(KERN_INFO "GFS2: couldn't thaw filesystem: %d\n",
+			       error);
+			gfs2_assert_withdraw(sdp, 0);
+		}
+		if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
+			freeze_gh.gh_flags |= GL_NOCACHE;
+		gfs2_glock_dq_uninit(&freeze_gh);
+	}
+	deactivate_super(sb);
+	return;
+}
+
 /**
  * gfs2_freeze - prevent further writes to the filesystem
  * @sb: the VFS structure for the filesystem
@@ -957,10 +981,16 @@ static int gfs2_sync_fs(struct super_block *sb, int wait)
 static int gfs2_freeze(struct super_block *sb)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
-	int error;
+	int error = 0;
 
-	if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
-		return -EINVAL;
+	mutex_lock(&sdp->sd_freeze_mutex);
+	if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN)
+		goto out;
+
+	if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
+		error = -EINVAL;
+		goto out;
+	}
 
 	for (;;) {
 		error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
@@ -980,7 +1010,10 @@ static int gfs2_freeze(struct super_block *sb)
 		fs_err(sdp, "retrying...\n");
 		msleep(1000);
 	}
-	return 0;
+	error = 0;
+out:
+	mutex_unlock(&sdp->sd_freeze_mutex);
+	return error;
 }
 
 /**
@@ -993,10 +1026,15 @@ static int gfs2_unfreeze(struct super_block *sb)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 
+	mutex_lock(&sdp->sd_freeze_mutex);
+        if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN ||
+	    sdp->sd_freeze_gh.gh_gl == NULL) {
+		mutex_unlock(&sdp->sd_freeze_mutex);
+                return 0;
+	}
+
 	gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
-	atomic_dec(&sdp->sd_frozen_root);
-	wait_event(sdp->sd_frozen_root_wait, atomic_read(&sdp->sd_frozen_root) == 0);
-	gfs2_glock_dq_uninit(&sdp->sd_freeze_root_gh);
+	mutex_unlock(&sdp->sd_freeze_mutex);
 	return 0;
 }
 
@@ -1618,8 +1656,8 @@ const struct super_operations gfs2_super_ops = {
 	.evict_inode		= gfs2_evict_inode,
 	.put_super		= gfs2_put_super,
 	.sync_fs		= gfs2_sync_fs,
-	.freeze_fs 		= gfs2_freeze,
-	.unfreeze_fs		= gfs2_unfreeze,
+	.freeze_super		= gfs2_freeze,
+	.thaw_super		= gfs2_unfreeze,
 	.statfs			= gfs2_statfs,
 	.remount_fs		= gfs2_remount_fs,
 	.drop_inode		= gfs2_drop_inode,
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 90e3322ffa10..73c97dccae21 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -45,6 +45,7 @@ extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc,
 extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
 			  struct buffer_head *l_bh);
 extern int gfs2_statfs_sync(struct super_block *sb, int type);
+extern void gfs2_freeze_func(struct work_struct *work);
 
 extern struct file_system_type gfs2_fs_type;
 extern struct file_system_type gfs2meta_fs_type;
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 42bfd3361979..88bff2430669 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -89,14 +89,17 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
 {
 	struct gfs2_trans *tr = current->journal_info;
 	s64 nbuf;
+	int alloced = tr->tr_alloced;
+
 	BUG_ON(!tr);
 	current->journal_info = NULL;
 
 	if (!tr->tr_touched) {
 		gfs2_log_release(sdp, tr->tr_reserved);
-		if (tr->tr_alloced)
+		if (alloced) {
 			kfree(tr);
-		sb_end_intwrite(sdp->sd_vfs);
+			sb_end_intwrite(sdp->sd_vfs);
+		}
 		return;
 	}
 
@@ -109,13 +112,14 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
 		gfs2_print_trans(tr);
 
 	gfs2_log_commit(sdp, tr);
-	if (tr->tr_alloced && !tr->tr_attached)
+	if (alloced && !tr->tr_attached)
 			kfree(tr);
 	up_read(&sdp->sd_log_flush_lock);
 
 	if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
 		gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
-	sb_end_intwrite(sdp->sd_vfs);
+	if (alloced)
+		sb_end_intwrite(sdp->sd_vfs);
 }
 
 static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
@@ -192,6 +196,7 @@ static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
 {
 	struct gfs2_meta_header *mh;
 	struct gfs2_trans *tr;
+	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
 
 	tr = current->journal_info;
 	tr->tr_touched = 1;
@@ -205,6 +210,10 @@ static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
 		       (unsigned long long)bd->bd_bh->b_blocknr);
 		BUG();
 	}
+	if (unlikely(state == SFS_FROZEN)) {
+		printk(KERN_INFO "GFS2:adding buf while frozen\n");
+		gfs2_assert_withdraw(sdp, 0);
+	}
 	gfs2_pin(sdp, bd->bd_bh);
 	mh->__pad0 = cpu_to_be64(0);
 	mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
-- 
cgit v1.2.3


From 30badc9543490f41497c42f004db02f1e8a29341 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Tue, 18 Nov 2014 11:31:23 +0100
Subject: GFS2: Deletion of unnecessary checks before two function calls

The functions iput() and put_pid() test whether their argument is NULL
and then return immediately. Thus the test around the call is not needed.

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 8f0c19d1d943..a23524aa3eac 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -836,8 +836,7 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *
 	gh->gh_flags = flags;
 	gh->gh_iflags = 0;
 	gh->gh_ip = _RET_IP_;
-	if (gh->gh_owner_pid)
-		put_pid(gh->gh_owner_pid);
+	put_pid(gh->gh_owner_pid);
 	gh->gh_owner_pid = get_pid(task_pid(current));
 }
 
-- 
cgit v1.2.3


From 571a4b57975aaaf50479a59882f1d44ae2737084 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Wed, 19 Nov 2014 19:34:49 +0000
Subject: GFS2: bugger off early if O_CREAT open finds a directory

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 04065e5af4b6..f41b2fd12416 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -624,6 +624,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	inode = gfs2_dir_search(dir, &dentry->d_name, !S_ISREG(mode) || excl);
 	error = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
+		if (S_ISDIR(inode->i_mode)) {
+			iput(inode);
+			inode = ERR_PTR(-EISDIR);
+			goto fail_gunlock;
+		}
 		d = d_splice_alias(inode, dentry);
 		error = PTR_ERR(d);
 		if (IS_ERR(d)) {
-- 
cgit v1.2.3


From 44bb31bac555b0faf2e7068f3a953d5b23883dc9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Wed, 19 Nov 2014 19:35:24 +0000
Subject: GFS2: gfs2_create_inode(): don't bother with d_splice_alias()

dentry is always hashed and negative, inode - non-error, non-NULL and
non-directory.  In such conditions d_splice_alias() is equivalent to
"d_instantiate(dentry, inode) and return NULL", which simplifies the
downstream code and is consistent with the "have to create a new object"
case.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index f41b2fd12416..9e8545bbc55c 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -596,7 +596,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	struct gfs2_inode *dip = GFS2_I(dir), *ip;
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	struct gfs2_glock *io_gl;
-	struct dentry *d;
 	int error, free_vfs_inode = 0;
 	u32 aflags = 0;
 	unsigned blocks = 1;
@@ -629,22 +628,13 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 			inode = ERR_PTR(-EISDIR);
 			goto fail_gunlock;
 		}
-		d = d_splice_alias(inode, dentry);
-		error = PTR_ERR(d);
-		if (IS_ERR(d)) {
-			inode = ERR_CAST(d);
-			goto fail_gunlock;
-		}
+		d_instantiate(dentry, inode);
 		error = 0;
 		if (file) {
-			if (S_ISREG(inode->i_mode)) {
-				WARN_ON(d != NULL);
+			if (S_ISREG(inode->i_mode))
 				error = finish_open(file, dentry, gfs2_open_common, opened);
-			} else {
-				error = finish_no_open(file, d);
-			}
-		} else {
-			dput(d);
+			else
+				error = finish_no_open(file, NULL);
 		}
 		gfs2_glock_dq_uninit(ghs);
 		return error;
-- 
cgit v1.2.3


From 3cdcf63ed2d169c82d70a506f3569e484cd9e7a0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Thu, 20 Nov 2014 05:18:38 +0000
Subject: GFS2: use kvfree() instead of open-coding it

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/dir.c   | 40 ++++++++--------------------------------
 fs/gfs2/quota.c |  9 ++-------
 2 files changed, 10 insertions(+), 39 deletions(-)

diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 5d4261ff5d23..c247fed4a9a6 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -365,22 +365,15 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip)
 
 	ret = gfs2_dir_read_data(ip, hc, hsize);
 	if (ret < 0) {
-		if (is_vmalloc_addr(hc))
-			vfree(hc);
-		else
-			kfree(hc);
+		kvfree(hc);
 		return ERR_PTR(ret);
 	}
 
 	spin_lock(&inode->i_lock);
-	if (ip->i_hash_cache) {
-		if (is_vmalloc_addr(hc))
-			vfree(hc);
-		else
-			kfree(hc);
-	} else {
+	if (ip->i_hash_cache)
+		kvfree(hc);
+	else
 		ip->i_hash_cache = hc;
-	}
 	spin_unlock(&inode->i_lock);
 
 	return ip->i_hash_cache;
@@ -396,10 +389,7 @@ void gfs2_dir_hash_inval(struct gfs2_inode *ip)
 {
 	__be64 *hc = ip->i_hash_cache;
 	ip->i_hash_cache = NULL;
-	if (is_vmalloc_addr(hc))
-		vfree(hc);
-	else
-		kfree(hc);
+	kvfree(hc);
 }
 
 static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent)
@@ -1168,10 +1158,7 @@ fail:
 	gfs2_dinode_out(dip, dibh->b_data);
 	brelse(dibh);
 out_kfree:
-	if (is_vmalloc_addr(hc2))
-		vfree(hc2);
-	else
-		kfree(hc2);
+	kvfree(hc2);
 	return error;
 }
 
@@ -1302,14 +1289,6 @@ static void *gfs2_alloc_sort_buffer(unsigned size)
 	return ptr;
 }
 
-static void gfs2_free_sort_buffer(void *ptr)
-{
-	if (is_vmalloc_addr(ptr))
-		vfree(ptr);
-	else
-		kfree(ptr);
-}
-
 static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx,
 			      int *copied, unsigned *depth,
 			      u64 leaf_no)
@@ -1393,7 +1372,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx,
 out_free:
 	for(i = 0; i < leaf; i++)
 		brelse(larr[i]);
-	gfs2_free_sort_buffer(larr);
+	kvfree(larr);
 out:
 	return error;
 }
@@ -2004,10 +1983,7 @@ out_rlist:
 	gfs2_rlist_free(&rlist);
 	gfs2_quota_unhold(dip);
 out:
-	if (is_vmalloc_addr(ht))
-		vfree(ht);
-	else
-		kfree(ht);
+	kvfree(ht);
 	return error;
 }
 
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 64b29f7f6b4c..c8b148bbdc8b 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1360,13 +1360,8 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
 
 	gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count));
 
-	if (sdp->sd_quota_bitmap) {
-		if (is_vmalloc_addr(sdp->sd_quota_bitmap))
-			vfree(sdp->sd_quota_bitmap);
-		else
-			kfree(sdp->sd_quota_bitmap);
-		sdp->sd_quota_bitmap = NULL;
-	}
+	kvfree(sdp->sd_quota_bitmap);
+	sdp->sd_quota_bitmap = NULL;
 }
 
 static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error)
-- 
cgit v1.2.3


From 9265f1d0c7593e3e7e1e94a4a83a6dea34230a35 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Thu, 20 Nov 2014 05:19:47 +0000
Subject: GFS2: gfs2_dir_get_hash_table(): avoiding deferred vfree() is easy
 here...

vfree() is allowed under spinlock these days, but it's cheaper when
it doesn't step into deferred case and here it's very easy to avoid.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/dir.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index c247fed4a9a6..c5a34f09e228 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -370,11 +370,12 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip)
 	}
 
 	spin_lock(&inode->i_lock);
-	if (ip->i_hash_cache)
-		kvfree(hc);
-	else
+	if (likely(!ip->i_hash_cache)) {
 		ip->i_hash_cache = hc;
+		hc = NULL;
+	}
 	spin_unlock(&inode->i_lock);
+	kvfree(hc);
 
 	return ip->i_hash_cache;
 }
-- 
cgit v1.2.3


From ec7d879c457611e540cb465c25f3040facbd1185 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Wed, 19 Nov 2014 19:35:58 +0000
Subject: GFS2: gfs2_atomic_open(): simplify the use of finish_no_open()

In ->atomic_open(inode, dentry, file, opened) calling finish_no_open(file, NULL)
is equivalent to dget(dentry); return finish_no_open(file, dentry);

No need to open-code that...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 9e8545bbc55c..9054002ebe70 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1245,11 +1245,8 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
 	if (d != NULL)
 		dentry = d;
 	if (dentry->d_inode) {
-		if (!(*opened & FILE_OPENED)) {
-			if (d == NULL)
-				dget(dentry);
-			return finish_no_open(file, dentry);
-		}
+		if (!(*opened & FILE_OPENED))
+			return finish_no_open(file, d);
 		dput(d);
 		return 0;
 	}
-- 
cgit v1.2.3