summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/dir.c1
-rw-r--r--fs/afs/inode.c8
-rw-r--r--fs/afs/internal.h1
-rw-r--r--fs/afs/write.c5
-rw-r--r--fs/aio.c2
-rw-r--r--fs/cifs/cifsacl.c1
-rw-r--r--fs/cifs/smb2ops.c88
-rw-r--r--fs/efivarfs/super.c1
-rw-r--r--fs/ext4/ext4.h9
-rw-r--r--fs/ext4/super.c15
-rw-r--r--fs/gfs2/glops.c13
-rw-r--r--fs/io_uring.c62
-rw-r--r--fs/jbd2/journal.c34
-rw-r--r--fs/jbd2/transaction.c31
-rw-r--r--fs/libfs.c6
-rw-r--r--fs/notify/fsnotify.c12
-rw-r--r--fs/ocfs2/super.c1
-rw-r--r--fs/proc/self.c7
-rw-r--r--fs/super.c49
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c8
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c2
-rw-r--r--fs/xfs/scrub/bmap.c10
-rw-r--r--fs/xfs/scrub/btree.c45
-rw-r--r--fs/xfs/scrub/dir.c21
-rw-r--r--fs/xfs/scrub/refcount.c8
-rw-r--r--fs/xfs/xfs_iomap.c29
-rw-r--r--fs/xfs/xfs_iwalk.c27
-rw-r--r--fs/xfs/xfs_mount.c11
-rw-r--r--fs/xfs/xfs_pnfs.c2
29 files changed, 327 insertions, 182 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 1bb5b9d7f0a2..9068d5578a26 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -823,6 +823,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
vp->cb_break_before = afs_calc_vnode_cb_break(vnode);
vp->vnode = vnode;
vp->put_vnode = true;
+ vp->speculative = true; /* vnode not locked */
}
}
}
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 0fe8844b4bee..b0d7b892090d 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -294,6 +294,13 @@ void afs_vnode_commit_status(struct afs_operation *op, struct afs_vnode_param *v
op->flags &= ~AFS_OPERATION_DIR_CONFLICT;
}
} else if (vp->scb.have_status) {
+ if (vp->dv_before + vp->dv_delta != vp->scb.status.data_version &&
+ vp->speculative)
+ /* Ignore the result of a speculative bulk status fetch
+ * if it splits around a modification op, thereby
+ * appearing to regress the data version.
+ */
+ goto out;
afs_apply_status(op, vp);
if (vp->scb.have_cb)
afs_apply_callback(op, vp);
@@ -305,6 +312,7 @@ void afs_vnode_commit_status(struct afs_operation *op, struct afs_vnode_param *v
}
}
+out:
write_sequnlock(&vnode->cb_lock);
if (vp->scb.have_status)
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 14d5d75f4b6e..0d150a29e39e 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -755,6 +755,7 @@ struct afs_vnode_param {
bool update_ctime:1; /* Need to update the ctime */
bool set_size:1; /* Must update i_size */
bool op_unlinked:1; /* True if file was unlinked by op */
+ bool speculative:1; /* T if speculative status fetch (no vnode lock) */
};
/*
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 50371207f327..c9195fc67fd8 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -169,11 +169,14 @@ int afs_write_end(struct file *file, struct address_space *mapping,
unsigned int f, from = pos & (PAGE_SIZE - 1);
unsigned int t, to = from + copied;
loff_t i_size, maybe_i_size;
- int ret;
+ int ret = 0;
_enter("{%llx:%llu},{%lx}",
vnode->fid.vid, vnode->fid.vnode, page->index);
+ if (copied == 0)
+ goto out;
+
maybe_i_size = pos + copied;
i_size = i_size_read(&vnode->vfs_inode);
diff --git a/fs/aio.c b/fs/aio.c
index c45c20d87538..6a21d8919409 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1572,7 +1572,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
* we return to userspace.
*/
if (S_ISREG(file_inode(file)->i_mode)) {
- __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true);
+ sb_start_write(file_inode(file)->i_sb);
__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
}
req->ki_flags |= IOCB_WRITE;
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 23b21e943652..ef4784e72b1d 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -1266,6 +1266,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
cifs_dbg(VFS, "%s: error %d getting sec desc\n", __func__, rc);
} else if (mode_from_special_sid) {
rc = parse_sec_desc(cifs_sb, pntsd, acllen, fattr, true);
+ kfree(pntsd);
} else {
/* get approximated mode from ACL */
rc = parse_sec_desc(cifs_sb, pntsd, acllen, fattr, false);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 504766cb6c19..dab94f67c988 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -264,7 +264,7 @@ smb2_revert_current_mid(struct TCP_Server_Info *server, const unsigned int val)
}
static struct mid_q_entry *
-smb2_find_mid(struct TCP_Server_Info *server, char *buf)
+__smb2_find_mid(struct TCP_Server_Info *server, char *buf, bool dequeue)
{
struct mid_q_entry *mid;
struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
@@ -281,6 +281,10 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf)
(mid->mid_state == MID_REQUEST_SUBMITTED) &&
(mid->command == shdr->Command)) {
kref_get(&mid->refcount);
+ if (dequeue) {
+ list_del_init(&mid->qhead);
+ mid->mid_flags |= MID_DELETED;
+ }
spin_unlock(&GlobalMid_Lock);
return mid;
}
@@ -289,6 +293,18 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf)
return NULL;
}
+static struct mid_q_entry *
+smb2_find_mid(struct TCP_Server_Info *server, char *buf)
+{
+ return __smb2_find_mid(server, buf, false);
+}
+
+static struct mid_q_entry *
+smb2_find_dequeue_mid(struct TCP_Server_Info *server, char *buf)
+{
+ return __smb2_find_mid(server, buf, true);
+}
+
static void
smb2_dump_detail(void *buf, struct TCP_Server_Info *server)
{
@@ -4356,7 +4372,8 @@ init_read_bvec(struct page **pages, unsigned int npages, unsigned int data_size,
static int
handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
char *buf, unsigned int buf_len, struct page **pages,
- unsigned int npages, unsigned int page_data_size)
+ unsigned int npages, unsigned int page_data_size,
+ bool is_offloaded)
{
unsigned int data_offset;
unsigned int data_len;
@@ -4378,7 +4395,8 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
if (server->ops->is_session_expired &&
server->ops->is_session_expired(buf)) {
- cifs_reconnect(server);
+ if (!is_offloaded)
+ cifs_reconnect(server);
return -1;
}
@@ -4402,7 +4420,10 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
cifs_dbg(FYI, "%s: server returned error %d\n",
__func__, rdata->result);
/* normal error on read response */
- dequeue_mid(mid, false);
+ if (is_offloaded)
+ mid->mid_state = MID_RESPONSE_RECEIVED;
+ else
+ dequeue_mid(mid, false);
return 0;
}
@@ -4426,7 +4447,10 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
cifs_dbg(FYI, "%s: data offset (%u) beyond end of smallbuf\n",
__func__, data_offset);
rdata->result = -EIO;
- dequeue_mid(mid, rdata->result);
+ if (is_offloaded)
+ mid->mid_state = MID_RESPONSE_MALFORMED;
+ else
+ dequeue_mid(mid, rdata->result);
return 0;
}
@@ -4442,21 +4466,30 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
cifs_dbg(FYI, "%s: data offset (%u) beyond 1st page of response\n",
__func__, data_offset);
rdata->result = -EIO;
- dequeue_mid(mid, rdata->result);
+ if (is_offloaded)
+ mid->mid_state = MID_RESPONSE_MALFORMED;
+ else
+ dequeue_mid(mid, rdata->result);
return 0;
}
if (data_len > page_data_size - pad_len) {
/* data_len is corrupt -- discard frame */
rdata->result = -EIO;
- dequeue_mid(mid, rdata->result);
+ if (is_offloaded)
+ mid->mid_state = MID_RESPONSE_MALFORMED;
+ else
+ dequeue_mid(mid, rdata->result);
return 0;
}
rdata->result = init_read_bvec(pages, npages, page_data_size,
cur_off, &bvec);
if (rdata->result != 0) {
- dequeue_mid(mid, rdata->result);
+ if (is_offloaded)
+ mid->mid_state = MID_RESPONSE_MALFORMED;
+ else
+ dequeue_mid(mid, rdata->result);
return 0;
}
@@ -4471,7 +4504,10 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
/* read response payload cannot be in both buf and pages */
WARN_ONCE(1, "buf can not contain only a part of read data");
rdata->result = -EIO;
- dequeue_mid(mid, rdata->result);
+ if (is_offloaded)
+ mid->mid_state = MID_RESPONSE_MALFORMED;
+ else
+ dequeue_mid(mid, rdata->result);
return 0;
}
@@ -4482,7 +4518,10 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
if (length < 0)
return length;
- dequeue_mid(mid, false);
+ if (is_offloaded)
+ mid->mid_state = MID_RESPONSE_RECEIVED;
+ else
+ dequeue_mid(mid, false);
return length;
}
@@ -4511,15 +4550,34 @@ static void smb2_decrypt_offload(struct work_struct *work)
}
dw->server->lstrp = jiffies;
- mid = smb2_find_mid(dw->server, dw->buf);
+ mid = smb2_find_dequeue_mid(dw->server, dw->buf);
if (mid == NULL)
cifs_dbg(FYI, "mid not found\n");
else {
mid->decrypted = true;
rc = handle_read_data(dw->server, mid, dw->buf,
dw->server->vals->read_rsp_size,
- dw->ppages, dw->npages, dw->len);
- mid->callback(mid);
+ dw->ppages, dw->npages, dw->len,
+ true);
+ if (rc >= 0) {
+#ifdef CONFIG_CIFS_STATS2
+ mid->when_received = jiffies;
+#endif
+ mid->callback(mid);
+ } else {
+ spin_lock(&GlobalMid_Lock);
+ if (dw->server->tcpStatus == CifsNeedReconnect) {
+ mid->mid_state = MID_RETRY_NEEDED;
+ spin_unlock(&GlobalMid_Lock);
+ mid->callback(mid);
+ } else {
+ mid->mid_state = MID_REQUEST_SUBMITTED;
+ mid->mid_flags &= ~(MID_DELETED);
+ list_add_tail(&mid->qhead,
+ &dw->server->pending_mid_q);
+ spin_unlock(&GlobalMid_Lock);
+ }
+ }
cifs_mid_q_entry_release(mid);
}
@@ -4622,7 +4680,7 @@ non_offloaded_decrypt:
(*mid)->decrypted = true;
rc = handle_read_data(server, *mid, buf,
server->vals->read_rsp_size,
- pages, npages, len);
+ pages, npages, len, false);
}
free_pages:
@@ -4765,7 +4823,7 @@ smb3_handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid)
char *buf = server->large_buf ? server->bigbuf : server->smallbuf;
return handle_read_data(server, mid, buf, server->pdu_size,
- NULL, 0, 0);
+ NULL, 0, 0, false);
}
static int
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 15880a68faad..f943fd0b0699 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -21,6 +21,7 @@ LIST_HEAD(efivarfs_list);
static void efivarfs_evict_inode(struct inode *inode)
{
clear_inode(inode);
+ kfree(inode->i_private);
}
static const struct super_operations efivarfs_ops = {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1b399cafb15a..65ecaf96d0a4 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1231,13 +1231,13 @@ struct ext4_inode_info {
blocks */
#define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated
file systems */
-#define EXT4_MOUNT2_DAX_NEVER 0x00000008 /* Do not allow Direct Access */
-#define EXT4_MOUNT2_DAX_INODE 0x00000010 /* For printing options only */
-
#define EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM 0x00000008 /* User explicitly
specified journal checksum */
#define EXT4_MOUNT2_JOURNAL_FAST_COMMIT 0x00000010 /* Journal fast commit */
+#define EXT4_MOUNT2_DAX_NEVER 0x00000020 /* Do not allow Direct Access */
+#define EXT4_MOUNT2_DAX_INODE 0x00000040 /* For printing options only */
+
#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \
~EXT4_MOUNT_##opt
@@ -2695,7 +2695,8 @@ void ext4_insert_dentry(struct inode *inode,
struct ext4_filename *fname);
static inline void ext4_update_dx_flag(struct inode *inode)
{
- if (!ext4_has_feature_dir_index(inode->i_sb)) {
+ if (!ext4_has_feature_dir_index(inode->i_sb) &&
+ ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) {
/* ext4_iget() should have caught this... */
WARN_ON_ONCE(ext4_has_feature_metadata_csum(inode->i_sb));
ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c3b864588a0b..94472044f4c1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -289,18 +289,7 @@ void ext4_superblock_csum_set(struct super_block *sb)
if (!ext4_has_metadata_csum(sb))
return;
- /*
- * Locking the superblock prevents the scenario
- * where:
- * 1) a first thread pauses during checksum calculation.
- * 2) a second thread updates the superblock, recalculates
- * the checksum, and updates s_checksum
- * 3) the first thread resumes and finishes its checksum calculation
- * and updates s_checksum with a potentially stale or torn value.
- */
- lock_buffer(EXT4_SB(sb)->s_sbh);
es->s_checksum = ext4_superblock_csum(sb, es);
- unlock_buffer(EXT4_SB(sb)->s_sbh);
}
ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
@@ -2649,10 +2638,6 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
} else if (test_opt2(sb, DAX_INODE)) {
SEQ_OPTS_PUTS("dax=inode");
}
-
- if (test_opt2(sb, JOURNAL_FAST_COMMIT))
- SEQ_OPTS_PUTS("fast_commit");
-
ext4_show_quota_options(seq, sb);
return 0;
}
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 6c1432d78dce..67f2921ae8d4 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -571,7 +571,18 @@ static int freeze_go_sync(struct gfs2_glock *gl)
int error = 0;
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- if (gl->gl_req == LM_ST_EXCLUSIVE && !gfs2_withdrawn(sdp)) {
+ /*
+ * We need to check gl_state == LM_ST_SHARED here and not gl_req ==
+ * LM_ST_EXCLUSIVE. That's because when any node does a freeze,
+ * all the nodes should have the freeze glock in SH mode and they all
+ * call do_xmote: One for EX and the others for UN. They ALL must
+ * freeze locally, and they ALL must queue freeze work. The freeze_work
+ * calls freeze_func, which tries to reacquire the freeze glock in SH,
+ * effectively waiting for the thaw on the node who holds it in EX.
+ * Once thawed, the work func acquires the freeze glock in
+ * SH and everybody goes back to thawed.
+ */
+ if (gl->gl_state == LM_ST_SHARED && !gfs2_withdrawn(sdp)) {
atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE);
error = freeze_super(sdp->sd_vfs);
if (error) {
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8018c7076b25..a8c136a1cf4e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -205,6 +205,7 @@ struct fixed_file_ref_node {
struct list_head file_list;
struct fixed_file_data *file_data;
struct llist_node llist;
+ bool done;
};
struct fixed_file_data {
@@ -478,6 +479,7 @@ struct io_sr_msg {
struct io_open {
struct file *file;
int dfd;
+ bool ignore_nonblock;
struct filename *filename;
struct open_how how;
unsigned long nofile;
@@ -2577,7 +2579,6 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error)
}
end_req:
req_set_fail_links(req);
- io_req_complete(req, ret);
return false;
}
#endif
@@ -3547,8 +3548,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
* we return to userspace.
*/
if (req->flags & REQ_F_ISREG) {
- __sb_start_write(file_inode(req->file)->i_sb,
- SB_FREEZE_WRITE, true);
+ sb_start_write(file_inode(req->file)->i_sb);
__sb_writers_release(file_inode(req->file)->i_sb,
SB_FREEZE_WRITE);
}
@@ -3796,6 +3796,7 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
return ret;
}
req->open.nofile = rlimit(RLIMIT_NOFILE);
+ req->open.ignore_nonblock = false;
req->flags |= REQ_F_NEED_CLEANUP;
return 0;
}
@@ -3839,7 +3840,7 @@ static int io_openat2(struct io_kiocb *req, bool force_nonblock)
struct file *file;
int ret;
- if (force_nonblock)
+ if (force_nonblock && !req->open.ignore_nonblock)
return -EAGAIN;
ret = build_open_flags(&req->open.how, &op);
@@ -3854,6 +3855,21 @@ static int io_openat2(struct io_kiocb *req, bool force_nonblock)
if (IS_ERR(file)) {
put_unused_fd(ret);
ret = PTR_ERR(file);
+ /*
+ * A work-around to ensure that /proc/self works that way
+ * that it should - if we get -EOPNOTSUPP back, then assume
+ * that proc_self_get_link() failed us because we're in async
+ * context. We should be safe to retry this from the task
+ * itself with force_nonblock == false set, as it should not
+ * block on lookup. Would be nice to know this upfront and
+ * avoid the async dance, but doesn't seem feasible.
+ */
+ if (ret == -EOPNOTSUPP && io_wq_current_is_worker()) {
+ req->open.ignore_nonblock = true;
+ refcount_inc(&req->refs);
+ io_req_task_queue(req);
+ return 0;
+ }
} else {
fsnotify_open(file);
fd_install(ret, file);
@@ -6958,9 +6974,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
return -ENXIO;
spin_lock(&data->lock);
- if (!list_empty(&data->ref_list))
- ref_node = list_first_entry(&data->ref_list,
- struct fixed_file_ref_node, node);
+ ref_node = data->node;
spin_unlock(&data->lock);
if (ref_node)
percpu_ref_kill(&ref_node->refs);
@@ -7309,10 +7323,6 @@ static void __io_file_put_work(struct fixed_file_ref_node *ref_node)
kfree(pfile);
}
- spin_lock(&file_data->lock);
- list_del(&ref_node->node);
- spin_unlock(&file_data->lock);
-
percpu_ref_exit(&ref_node->refs);
kfree(ref_node);
percpu_ref_put(&file_data->refs);
@@ -7339,17 +7349,32 @@ static void io_file_put_work(struct work_struct *work)
static void io_file_data_ref_zero(struct percpu_ref *ref)
{
struct fixed_file_ref_node *ref_node;
+ struct fixed_file_data *data;
struct io_ring_ctx *ctx;
- bool first_add;
+ bool first_add = false;
int delay = HZ;
ref_node = container_of(ref, struct fixed_file_ref_node, refs);
- ctx = ref_node->file_data->ctx;
+ data = ref_node->file_data;
+ ctx = data->ctx;
+
+ spin_lock(&data->lock);
+ ref_node->done = true;
+
+ while (!list_empty(&data->ref_list)) {
+ ref_node = list_first_entry(&data->ref_list,
+ struct fixed_file_ref_node, node);
+ /* recycle ref nodes in order */
+ if (!ref_node->done)
+ break;
+ list_del(&ref_node->node);
+ first_add |= llist_add(&ref_node->llist, &ctx->file_put_llist);
+ }
+ spin_unlock(&data->lock);
- if (percpu_ref_is_dying(&ctx->file_data->refs))
+ if (percpu_ref_is_dying(&data->refs))
delay = 0;
- first_add = llist_add(&ref_node->llist, &ctx->file_put_llist);
if (!delay)
mod_delayed_work(system_wq, &ctx->file_put_work, 0);
else if (first_add)
@@ -7373,6 +7398,7 @@ static struct fixed_file_ref_node *alloc_fixed_file_ref_node(
INIT_LIST_HEAD(&ref_node->node);
INIT_LIST_HEAD(&ref_node->file_list);
ref_node->file_data = ctx->file_data;
+ ref_node->done = false;
return ref_node;
}
@@ -7468,7 +7494,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
file_data->node = ref_node;
spin_lock(&file_data->lock);
- list_add(&ref_node->node, &file_data->ref_list);
+ list_add_tail(&ref_node->node, &file_data->ref_list);
spin_unlock(&file_data->lock);
percpu_ref_get(&file_data->refs);
return ret;
@@ -7627,7 +7653,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
if (needs_switch) {
percpu_ref_kill(&data->node->refs);
spin_lock(&data->lock);
- list_add(&ref_node->node, &data->ref_list);
+ list_add_tail(&ref_node->node, &data->ref_list);
data->node = ref_node;
spin_unlock(&data->lock);
percpu_ref_get(&ctx->file_data->refs);
@@ -9226,6 +9252,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
* to a power-of-two, if it isn't already. We do NOT impose
* any cq vs sq ring sizing.
*/
+ p->cq_entries = roundup_pow_of_two(p->cq_entries);
if (p->cq_entries < p->sq_entries)
return -EINVAL;
if (p->cq_entries > IORING_MAX_CQ_ENTRIES) {
@@ -9233,7 +9260,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
return -EINVAL;
p->cq_entries = IORING_MAX_CQ_ENTRIES;
}
- p->cq_entries = roundup_pow_of_two(p->cq_entries);
} else {
p->cq_entries = 2 * p->sq_entries;
}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 0c3d5e3b24b2..188f79d76988 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -566,12 +566,14 @@ static int __jbd2_journal_force_commit(journal_t *journal)
}
/**
- * Force and wait upon a commit if the calling process is not within
- * transaction. This is used for forcing out undo-protected data which contains
- * bitmaps, when the fs is running out of space.
+ * jbd2_journal_force_commit_nested - Force and wait upon a commit if the
+ * calling process is not within transaction.
*
* @journal: journal to force
* Returns true if progress was made.
+ *
+ * This is used for forcing out undo-protected data which contains
+ * bitmaps, when the fs is running out of space.
*/
int jbd2_journal_force_commit_nested(journal_t *journal)
{
@@ -582,7 +584,7 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
}
/**
- * int journal_force_commit() - force any uncommitted transactions
+ * jbd2_journal_force_commit() - force any uncommitted transactions
* @journal: journal to force
*
* Caller want unconditional commit. We can only force the running transaction
@@ -1881,7 +1883,7 @@ static int load_superblock(journal_t *journal)
/**
- * int jbd2_journal_load() - Read journal from disk.
+ * jbd2_journal_load() - Read journal from disk.
* @journal: Journal to act on.
*
* Given a journal_t structure which tells us which disk blocks contain
@@ -1951,7 +1953,7 @@ recovery_error:
}
/**
- * void jbd2_journal_destroy() - Release a journal_t structure.
+ * jbd2_journal_destroy() - Release a journal_t structure.
* @journal: Journal to act on.
*
* Release a journal_t structure once it is no longer in use by the
@@ -2028,7 +2030,7 @@ int jbd2_journal_destroy(journal_t *journal)
/**
- *int jbd2_journal_check_used_features() - Check if features specified are used.
+ * jbd2_journal_check_used_features() - Check if features specified are used.
* @journal: Journal to check.
* @compat: bitmask of compatible features
* @ro: bitmask of features that force read-only mount
@@ -2063,7 +2065,7 @@ int jbd2_journal_check_used_features(journal_t *journal, unsigned long compat,
}
/**
- * int jbd2_journal_check_available_features() - Check feature set in journalling layer
+ * jbd2_journal_check_available_features() - Check feature set in journalling layer
* @journal: Journal to check.
* @compat: bitmask of compatible features
* @ro: bitmask of features that force read-only mount
@@ -2126,7 +2128,7 @@ jbd2_journal_initialize_fast_commit(journal_t *journal)
}
/**
- * int jbd2_journal_set_features() - Mark a given journal feature in the superblock
+ * jbd2_journal_set_features() - Mark a given journal feature in the superblock
* @journal: Journal to act on.
* @compat: bitmask of compatible features
* @ro: bitmask of features that force read-only mount
@@ -2217,7 +2219,7 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
}
/*
- * jbd2_journal_clear_features () - Clear a given journal feature in the
+ * jbd2_journal_clear_features() - Clear a given journal feature in the
* superblock
* @journal: Journal to act on.
* @compat: bitmask of compatible features
@@ -2246,7 +2248,7 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
EXPORT_SYMBOL(jbd2_journal_clear_features);
/**
- * int jbd2_journal_flush () - Flush journal
+ * jbd2_journal_flush() - Flush journal
* @journal: Journal to act on.
*
* Flush all data for a given journal to disk and empty the journal.
@@ -2321,7 +2323,7 @@ out:
}
/**
- * int jbd2_journal_wipe() - Wipe journal contents
+ * jbd2_journal_wipe() - Wipe journal contents
* @journal: Journal to act on.
* @write: flag (see below)
*
@@ -2362,7 +2364,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
}
/**
- * void jbd2_journal_abort () - Shutdown the journal immediately.
+ * jbd2_journal_abort () - Shutdown the journal immediately.
* @journal: the journal to shutdown.
* @errno: an error number to record in the journal indicating
* the reason for the shutdown.
@@ -2453,7 +2455,7 @@ void jbd2_journal_abort(journal_t *journal, int errno)
}
/**
- * int jbd2_journal_errno () - returns the journal's error state.
+ * jbd2_journal_errno() - returns the journal's error state.
* @journal: journal to examine.
*
* This is the errno number set with jbd2_journal_abort(), the last
@@ -2477,7 +2479,7 @@ int jbd2_journal_errno(journal_t *journal)
}
/**
- * int jbd2_journal_clear_err () - clears the journal's error state
+ * jbd2_journal_clear_err() - clears the journal's error state
* @journal: journal to act on.
*
* An error must be cleared or acked to take a FS out of readonly
@@ -2497,7 +2499,7 @@ int jbd2_journal_clear_err(journal_t *journal)
}
/**
- * void jbd2_journal_ack_err() - Ack journal err.
+ * jbd2_journal_ack_err() - Ack journal err.
* @journal: journal to act on.
*
* An error must be cleared or acked to take a FS out of readonly
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index d54f04674e8e..9396666b7314 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -519,7 +519,7 @@ EXPORT_SYMBOL(jbd2__journal_start);
/**
- * handle_t *jbd2_journal_start() - Obtain a new handle.
+ * jbd2_journal_start() - Obtain a new handle.
* @journal: Journal to start transaction on.
* @nblocks: number of block buffer we might modify
*
@@ -566,7 +566,7 @@ void jbd2_journal_free_reserved(handle_t *handle)
EXPORT_SYMBOL(jbd2_journal_free_reserved);
/**
- * int jbd2_journal_start_reserved() - start reserved handle
+ * jbd2_journal_start_reserved() - start reserved handle
* @handle: handle to start
* @type: for handle statistics
* @line_no: for handle statistics
@@ -620,7 +620,7 @@ int jbd2_journal_start_reserved(handle_t *handle, unsigned int type,
EXPORT_SYMBOL(jbd2_journal_start_reserved);
/**
- * int jbd2_journal_extend() - extend buffer credits.
+ * jbd2_journal_extend() - extend buffer credits.
* @handle: handle to 'extend'
* @nblocks: nr blocks to try to extend by.
* @revoke_records: number of revoke records to try to extend by.
@@ -745,7 +745,7 @@ static void stop_this_handle(handle_t *handle)
}
/**
- * int jbd2_journal_restart() - restart a handle .
+ * jbd2__journal_restart() - restart a handle .
* @handle: handle to restart
* @nblocks: nr credits requested
* @revoke_records: number of revoke record credits requested
@@ -815,7 +815,7 @@ int jbd2_journal_restart(handle_t *handle, int nblocks)
EXPORT_SYMBOL(jbd2_journal_restart);
/**
- * void jbd2_journal_lock_updates () - establish a transaction barrier.
+ * jbd2_journal_lock_updates () - establish a transaction barrier.
* @journal: Journal to establish a barrier on.
*
* This locks out any further updates from being started, and blocks
@@ -874,7 +874,7 @@ void jbd2_journal_lock_updates(journal_t *journal)
}
/**
- * void jbd2_journal_unlock_updates (journal_t* journal) - release barrier
+ * jbd2_journal_unlock_updates () - release barrier
* @journal: Journal to release the barrier on.
*
* Release a transaction barrier obtained with jbd2_journal_lock_updates().
@@ -1182,7 +1182,8 @@ out:
}
/**
- * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
+ * jbd2_journal_get_write_access() - notify intent to modify a buffer
+ * for metadata (not data) update.
* @handle: transaction to add buffer modifications to
* @bh: bh to be used for metadata writes
*
@@ -1226,7 +1227,7 @@ int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
* unlocked buffer beforehand. */
/**
- * int jbd2_journal_get_create_access () - notify intent to use newly created bh
+ * jbd2_journal_get_create_access () - notify intent to use newly created bh
* @handle: transaction to new buffer to
* @bh: new buffer.
*
@@ -1306,7 +1307,7 @@ out:
}
/**
- * int jbd2_journal_get_undo_access() - Notify intent to modify metadata with
+ * jbd2_journal_get_undo_access() - Notify intent to modify metadata with
* non-rewindable consequences
* @handle: transaction
* @bh: buffer to undo
@@ -1383,7 +1384,7 @@ out:
}
/**
- * void jbd2_journal_set_triggers() - Add triggers for commit writeout
+ * jbd2_journal_set_triggers() - Add triggers for commit writeout
* @bh: buffer to trigger on
* @type: struct jbd2_buffer_trigger_type containing the trigger(s).
*
@@ -1425,7 +1426,7 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
}
/**
- * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
+ * jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
* @handle: transaction to add buffer to.
* @bh: buffer to mark
*
@@ -1593,7 +1594,7 @@ out:
}
/**
- * void jbd2_journal_forget() - bforget() for potentially-journaled buffers.
+ * jbd2_journal_forget() - bforget() for potentially-journaled buffers.
* @handle: transaction handle
* @bh: bh to 'forget'
*
@@ -1762,7 +1763,7 @@ drop:
}
/**
- * int jbd2_journal_stop() - complete a transaction
+ * jbd2_journal_stop() - complete a transaction
* @handle: transaction to complete.
*
* All done for a particular handle.
@@ -2080,7 +2081,7 @@ out:
}
/**
- * int jbd2_journal_try_to_free_buffers() - try to free page buffers.
+ * jbd2_journal_try_to_free_buffers() - try to free page buffers.
* @journal: journal for operation
* @page: to try and free
*
@@ -2411,7 +2412,7 @@ zap_buffer_unlocked:
}
/**
- * void jbd2_journal_invalidatepage()
+ * jbd2_journal_invalidatepage()
* @journal: journal to use for flush...
* @page: page to flush
* @offset: start of the range to invalidate
diff --git a/fs/libfs.c b/fs/libfs.c
index fc34361c1489..7124c2e8df2f 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -959,7 +959,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
size_t len, loff_t *ppos)
{
struct simple_attr *attr;
- u64 val;
+ unsigned long long val;
size_t size;
ssize_t ret;
@@ -977,7 +977,9 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
goto out;
attr->set_buf[size] = '\0';
- val = simple_strtoll(attr->set_buf, NULL, 0);
+ ret = kstrtoull(attr->set_buf, 0, &val);
+ if (ret)
+ goto out;
ret = attr->set(attr->data, val);
if (ret == 0)
ret = len; /* on success, claim we got the whole input */
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index a960ec3a569a..8d3ad5ef2925 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -178,6 +178,7 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data,
struct inode *inode = d_inode(dentry);
struct dentry *parent;
bool parent_watched = dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED;
+ bool parent_needed, parent_interested;
__u32 p_mask;
struct inode *p_inode = NULL;
struct name_snapshot name;
@@ -193,7 +194,8 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data,
return 0;
parent = NULL;
- if (!parent_watched && !fsnotify_event_needs_parent(inode, mnt, mask))
+ parent_needed = fsnotify_event_needs_parent(inode, mnt, mask);
+ if (!parent_watched && !parent_needed)
goto notify;
/* Does parent inode care about events on children? */
@@ -205,17 +207,17 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data,
/*
* Include parent/name in notification either if some notification
- * groups require parent info (!parent_watched case) or the parent is
- * interested in this event.
+ * groups require parent info or the parent is interested in this event.
*/
- if (!parent_watched || (mask & p_mask & ALL_FSNOTIFY_EVENTS)) {
+ parent_interested = mask & p_mask & ALL_FSNOTIFY_EVENTS;
+ if (parent_needed || parent_interested) {
/* When notifying parent, child should be passed as data */
WARN_ON_ONCE(inode != fsnotify_data_inode(data, data_type));
/* Notify both parent and child with child name info */
take_dentry_name_snapshot(&name, dentry);
file_name = &name.name;
- if (parent_watched)
+ if (parent_interested)
mask |= FS_EVENT_ON_CHILD;
}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 1d91dd1e8711..2febc76e9de7 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1713,6 +1713,7 @@ static void ocfs2_inode_init_once(void *data)
oi->ip_blkno = 0ULL;
oi->ip_clusters = 0;
+ oi->ip_next_orphan = NULL;
ocfs2_resv_init_once(&oi->ip_la_data_resv);
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 72cd69bcaf4a..cc71ce3466dc 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -16,6 +16,13 @@ static const char *proc_self_get_link(struct dentry *dentry,
pid_t tgid = task_tgid_nr_ns(current, ns);
char *name;
+ /*
+ * Not currently supported. Once we can inherit all of struct pid,
+ * we can allow this.
+ */
+ if (current->flags & PF_KTHREAD)
+ return ERR_PTR(-EOPNOTSUPP);
+
if (!tgid)
return ERR_PTR(-ENOENT);
/* max length of unsigned int in decimal + NULL term */
diff --git a/fs/super.c b/fs/super.c
index a51c2083cd6b..98bb0629ee10 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1631,55 +1631,6 @@ int super_setup_bdi(struct super_block *sb)
}
EXPORT_SYMBOL(super_setup_bdi);
-/*
- * This is an internal function, please use sb_end_{write,pagefault,intwrite}
- * instead.
- */
-void __sb_end_write(struct super_block *sb, int level)
-{
- percpu_up_read(sb->s_writers.rw_sem + level-1);
-}
-EXPORT_SYMBOL(__sb_end_write);
-
-/*
- * This is an internal function, please use sb_start_{write,pagefault,intwrite}
- * instead.
- */
-int __sb_start_write(struct super_block *sb, int level, bool wait)
-{
- bool force_trylock = false;
- int ret = 1;
-
-#ifdef CONFIG_LOCKDEP
- /*
- * We want lockdep to tell us about possible deadlocks with freezing
- * but it's it bit tricky to properly instrument it. Getting a freeze
- * protection works as getting a read lock but there are subtle
- * problems. XFS for example gets freeze protection on internal level
- * twice in some cases, which is OK only because we already hold a
- * freeze protection also on higher level. Due to these cases we have
- * to use wait == F (trylock mode) which must not fail.
- */
- if (wait) {
- int i;
-
- for (i = 0; i < level - 1; i++)
- if (percpu_rwsem_is_held(sb->s_writers.rw_sem + i)) {
- force_trylock = true;
- break;
- }
- }
-#endif
- if (wait && !force_trylock)
- percpu_down_read(sb->s_writers.rw_sem + level-1);
- else
- ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1);
-
- WARN_ON(force_trylock && !ret);
- return ret;
-}
-EXPORT_SYMBOL(__sb_start_write);
-
/**
* sb_wait_write - wait until all writers to given file system finish
* @sb: the super for which we wait
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index bb128db220ac..d6ef69ab1c67 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -515,7 +515,7 @@ xfs_attr_copy_value(
*========================================================================*/
/*
- * Query whether the requested number of additional bytes of extended
+ * Query whether the total requested number of attr fork bytes of extended
* attribute space will be able to fit inline.
*
* Returns zero if not, else the di_forkoff fork offset to be used in the
@@ -535,6 +535,12 @@ xfs_attr_shortform_bytesfit(
int maxforkoff;
int offset;
+ /*
+ * Check if the new size could fit at all first:
+ */
+ if (bytes > XFS_LITINO(mp))
+ return 0;
+
/* rounded down */
offset = (XFS_LITINO(mp) - bytes) >> 3;
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 340c83f76c80..2668ebe02865 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -1514,7 +1514,7 @@ xfs_rmap_convert_shared(
* record for our insertion point. This will also give us the record for
* start block contiguity tests.
*/
- error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, flags,
+ error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, oldext,
&PREV, &i);
if (error)
goto done;
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 955302e7cdde..fed56d213a3f 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -113,6 +113,8 @@ xchk_bmap_get_rmap(
if (info->whichfork == XFS_ATTR_FORK)
rflags |= XFS_RMAP_ATTR_FORK;
+ if (irec->br_state == XFS_EXT_UNWRITTEN)
+ rflags |= XFS_RMAP_UNWRITTEN;
/*
* CoW staging extents are owned (on disk) by the refcountbt, so
@@ -216,13 +218,13 @@ xchk_bmap_xref_rmap(
* which doesn't track unwritten state.
*/
if (owner != XFS_RMAP_OWN_COW &&
- irec->br_state == XFS_EXT_UNWRITTEN &&
- !(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
+ !!(irec->br_state == XFS_EXT_UNWRITTEN) !=
+ !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
- if (info->whichfork == XFS_ATTR_FORK &&
- !(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
+ if (!!(info->whichfork == XFS_ATTR_FORK) !=
+ !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index f52a7b8256f9..debf392e0515 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -452,32 +452,41 @@ xchk_btree_check_minrecs(
int level,
struct xfs_btree_block *block)
{
- unsigned int numrecs;
- int ok_level;
-
- numrecs = be16_to_cpu(block->bb_numrecs);
+ struct xfs_btree_cur *cur = bs->cur;
+ unsigned int root_level = cur->bc_nlevels - 1;
+ unsigned int numrecs = be16_to_cpu(block->bb_numrecs);
/* More records than minrecs means the block is ok. */
- if (numrecs >= bs->cur->bc_ops->get_minrecs(bs->cur, level))
+ if (numrecs >= cur->bc_ops->get_minrecs(cur, level))
return;
/*
- * Certain btree blocks /can/ have fewer than minrecs records. Any
- * level greater than or equal to the level of the highest dedicated
- * btree block are allowed to violate this constraint.
- *
- * For a btree rooted in a block, the btree root can have fewer than
- * minrecs records. If the btree is rooted in an inode and does not
- * store records in the root, the direct children of the root and the
- * root itself can have fewer than minrecs records.
+ * For btrees rooted in the inode, it's possible that the root block
+ * contents spilled into a regular ondisk block because there wasn't
+ * enough space in the inode root. The number of records in that
+ * child block might be less than the standard minrecs, but that's ok
+ * provided that there's only one direct child of the root.
*/
- ok_level = bs->cur->bc_nlevels - 1;
- if (bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
- ok_level--;
- if (level >= ok_level)
+ if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+ level == cur->bc_nlevels - 2) {
+ struct xfs_btree_block *root_block;
+ struct xfs_buf *root_bp;
+ int root_maxrecs;
+
+ root_block = xfs_btree_get_block(cur, root_level, &root_bp);
+ root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level);
+ if (be16_to_cpu(root_block->bb_numrecs) != 1 ||
+ numrecs <= root_maxrecs)
+ xchk_btree_set_corrupt(bs->sc, cur, level);
return;
+ }
- xchk_btree_set_corrupt(bs->sc, bs->cur, level);
+ /*
+ * Otherwise, only the root level is allowed to have fewer than minrecs
+ * records or keyptrs.
+ */
+ if (level < root_level)
+ xchk_btree_set_corrupt(bs->sc, cur, level);
}
/*
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index 7c432997edad..b045e95c2ea7 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -558,14 +558,27 @@ xchk_directory_leaf1_bestfree(
/* Check all the bestfree entries. */
for (i = 0; i < bestcount; i++, bestp++) {
best = be16_to_cpu(*bestp);
- if (best == NULLDATAOFF)
- continue;
error = xfs_dir3_data_read(sc->tp, sc->ip,
- i * args->geo->fsbcount, 0, &dbp);
+ xfs_dir2_db_to_da(args->geo, i),
+ XFS_DABUF_MAP_HOLE_OK,
+ &dbp);
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk,
&error))
break;
- xchk_directory_check_freesp(sc, lblk, dbp, best);
+
+ if (!dbp) {
+ if (best != NULLDATAOFF) {
+ xchk_fblock_set_corrupt(sc, XFS_DATA_FORK,
+ lblk);
+ break;
+ }
+ continue;
+ }
+
+ if (best == NULLDATAOFF)
+ xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+ else
+ xchk_directory_check_freesp(sc, lblk, dbp, best);
xfs_trans_brelse(sc->tp, dbp);
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
break;
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index beaeb6fa3119..dd672e6bbc75 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -170,7 +170,6 @@ xchk_refcountbt_process_rmap_fragments(
*/
INIT_LIST_HEAD(&worklist);
rbno = NULLAGBLOCK;
- nr = 1;
/* Make sure the fragments actually /are/ in agbno order. */
bno = 0;
@@ -184,15 +183,14 @@ xchk_refcountbt_process_rmap_fragments(
* Find all the rmaps that start at or before the refc extent,
* and put them on the worklist.
*/
+ nr = 0;
list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
- if (frag->rm.rm_startblock > refchk->bno)
- goto done;
+ if (frag->rm.rm_startblock > refchk->bno || nr > target_nr)
+ break;
bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
if (bno < rbno)
rbno = bno;
list_move_tail(&frag->list, &worklist);
- if (nr == target_nr)
- break;
nr++;
}
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 3abb8b9d6f4c..7b9ff824e82d 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -706,6 +706,23 @@ relock:
return 0;
}
+/*
+ * Check that the imap we are going to return to the caller spans the entire
+ * range that the caller requested for the IO.
+ */
+static bool
+imap_spans_range(
+ struct xfs_bmbt_irec *imap,
+ xfs_fileoff_t offset_fsb,
+ xfs_fileoff_t end_fsb)
+{
+ if (imap->br_startoff > offset_fsb)
+ return false;
+ if (imap->br_startoff + imap->br_blockcount < end_fsb)
+ return false;
+ return true;
+}
+
static int
xfs_direct_write_iomap_begin(
struct inode *inode,
@@ -766,6 +783,18 @@ xfs_direct_write_iomap_begin(
if (imap_needs_alloc(inode, flags, &imap, nimaps))
goto allocate_blocks;
+ /*
+ * NOWAIT IO needs to span the entire requested IO with a single map so
+ * that we avoid partial IO failures due to the rest of the IO range not
+ * covered by this map triggering an EAGAIN condition when it is
+ * subsequently mapped and aborting the IO.
+ */
+ if ((flags & IOMAP_NOWAIT) &&
+ !imap_spans_range(&imap, offset_fsb, end_fsb)) {
+ error = -EAGAIN;
+ goto out_unlock;
+ }
+
xfs_iunlock(ip, lockmode);
trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags);
diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c
index 233dcc8784db..2a45138831e3 100644
--- a/fs/xfs/xfs_iwalk.c
+++ b/fs/xfs/xfs_iwalk.c
@@ -55,6 +55,9 @@ struct xfs_iwalk_ag {
/* Where do we start the traversal? */
xfs_ino_t startino;
+ /* What was the last inode number we saw when iterating the inobt? */
+ xfs_ino_t lastino;
+
/* Array of inobt records we cache. */
struct xfs_inobt_rec_incore *recs;
@@ -301,6 +304,9 @@ xfs_iwalk_ag_start(
if (XFS_IS_CORRUPT(mp, *has_more != 1))
return -EFSCORRUPTED;
+ iwag->lastino = XFS_AGINO_TO_INO(mp, agno,
+ irec->ir_startino + XFS_INODES_PER_CHUNK - 1);
+
/*
* If the LE lookup yielded an inobt record before the cursor position,
* skip it and see if there's another one after it.
@@ -347,15 +353,17 @@ xfs_iwalk_run_callbacks(
struct xfs_mount *mp = iwag->mp;
struct xfs_trans *tp = iwag->tp;
struct xfs_inobt_rec_incore *irec;
- xfs_agino_t restart;
+ xfs_agino_t next_agino;
int error;
+ next_agino = XFS_INO_TO_AGINO(mp, iwag->lastino) + 1;
+
ASSERT(iwag->nr_recs > 0);
/* Delete cursor but remember the last record we cached... */
xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0);
irec = &iwag->recs[iwag->nr_recs - 1];
- restart = irec->ir_startino + XFS_INODES_PER_CHUNK - 1;
+ ASSERT(next_agino == irec->ir_startino + XFS_INODES_PER_CHUNK);
error = xfs_iwalk_ag_recs(iwag);
if (error)
@@ -372,7 +380,7 @@ xfs_iwalk_run_callbacks(
if (error)
return error;
- return xfs_inobt_lookup(*curpp, restart, XFS_LOOKUP_GE, has_more);
+ return xfs_inobt_lookup(*curpp, next_agino, XFS_LOOKUP_GE, has_more);
}
/* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */
@@ -396,6 +404,7 @@ xfs_iwalk_ag(
while (!error && has_more) {
struct xfs_inobt_rec_incore *irec;
+ xfs_ino_t rec_fsino;
cond_resched();
if (xfs_pwork_want_abort(&iwag->pwork))
@@ -407,6 +416,15 @@ xfs_iwalk_ag(
if (error || !has_more)
break;
+ /* Make sure that we always move forward. */
+ rec_fsino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino);
+ if (iwag->lastino != NULLFSINO &&
+ XFS_IS_CORRUPT(mp, iwag->lastino >= rec_fsino)) {
+ error = -EFSCORRUPTED;
+ goto out;
+ }
+ iwag->lastino = rec_fsino + XFS_INODES_PER_CHUNK - 1;
+
/* No allocated inodes in this chunk; skip it. */
if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) {
error = xfs_btree_increment(cur, 0, &has_more);
@@ -535,6 +553,7 @@ xfs_iwalk(
.trim_start = 1,
.skip_empty = 1,
.pwork = XFS_PWORK_SINGLE_THREADED,
+ .lastino = NULLFSINO,
};
xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
int error;
@@ -623,6 +642,7 @@ xfs_iwalk_threaded(
iwag->data = data;
iwag->startino = startino;
iwag->sz_recs = xfs_iwalk_prefetch(inode_records);
+ iwag->lastino = NULLFSINO;
xfs_pwork_queue(&pctl, &iwag->pwork);
startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
if (flags & XFS_INOBT_WALK_SAME_AG)
@@ -696,6 +716,7 @@ xfs_inobt_walk(
.startino = startino,
.sz_recs = xfs_inobt_walk_prefetch(inobt_records),
.pwork = XFS_PWORK_SINGLE_THREADED,
+ .lastino = NULLFSINO,
};
xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
int error;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 150ee5cb8645..7110507a2b6b 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -194,20 +194,25 @@ xfs_initialize_perag(
}
pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
- if (!pag)
+ if (!pag) {
+ error = -ENOMEM;
goto out_unwind_new_pags;
+ }
pag->pag_agno = index;
pag->pag_mount = mp;
spin_lock_init(&pag->pag_ici_lock);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
- if (xfs_buf_hash_init(pag))
+
+ error = xfs_buf_hash_init(pag);
+ if (error)
goto out_free_pag;
init_waitqueue_head(&pag->pagb_wait);
spin_lock_init(&pag->pagb_lock);
pag->pagb_count = 0;
pag->pagb_tree = RB_ROOT;
- if (radix_tree_preload(GFP_NOFS))
+ error = radix_tree_preload(GFP_NOFS);
+ if (error)
goto out_hash_destroy;
spin_lock(&mp->m_perag_lock);
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index b101feb2aab4..f3082a957d5e 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -134,7 +134,7 @@ xfs_fs_map_blocks(
goto out_unlock;
error = invalidate_inode_pages2(inode->i_mapping);
if (WARN_ON_ONCE(error))
- return error;
+ goto out_unlock;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + length);
offset_fsb = XFS_B_TO_FSBT(mp, offset);