diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/afs/dir.c | 1 | ||||
-rw-r--r-- | fs/afs/inode.c | 8 | ||||
-rw-r--r-- | fs/afs/internal.h | 1 | ||||
-rw-r--r-- | fs/afs/write.c | 5 | ||||
-rw-r--r-- | fs/aio.c | 2 | ||||
-rw-r--r-- | fs/cifs/cifsacl.c | 1 | ||||
-rw-r--r-- | fs/cifs/smb2ops.c | 88 | ||||
-rw-r--r-- | fs/efivarfs/super.c | 1 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 9 | ||||
-rw-r--r-- | fs/ext4/super.c | 15 | ||||
-rw-r--r-- | fs/gfs2/glops.c | 13 | ||||
-rw-r--r-- | fs/io_uring.c | 62 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 34 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 31 | ||||
-rw-r--r-- | fs/libfs.c | 6 | ||||
-rw-r--r-- | fs/notify/fsnotify.c | 12 | ||||
-rw-r--r-- | fs/ocfs2/super.c | 1 | ||||
-rw-r--r-- | fs/proc/self.c | 7 | ||||
-rw-r--r-- | fs/super.c | 49 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr_leaf.c | 8 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_rmap.c | 2 | ||||
-rw-r--r-- | fs/xfs/scrub/bmap.c | 10 | ||||
-rw-r--r-- | fs/xfs/scrub/btree.c | 45 | ||||
-rw-r--r-- | fs/xfs/scrub/dir.c | 21 | ||||
-rw-r--r-- | fs/xfs/scrub/refcount.c | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 29 | ||||
-rw-r--r-- | fs/xfs/xfs_iwalk.c | 27 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 11 | ||||
-rw-r--r-- | fs/xfs/xfs_pnfs.c | 2 |
29 files changed, 327 insertions, 182 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 1bb5b9d7f0a2..9068d5578a26 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -823,6 +823,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, vp->cb_break_before = afs_calc_vnode_cb_break(vnode); vp->vnode = vnode; vp->put_vnode = true; + vp->speculative = true; /* vnode not locked */ } } } diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 0fe8844b4bee..b0d7b892090d 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -294,6 +294,13 @@ void afs_vnode_commit_status(struct afs_operation *op, struct afs_vnode_param *v op->flags &= ~AFS_OPERATION_DIR_CONFLICT; } } else if (vp->scb.have_status) { + if (vp->dv_before + vp->dv_delta != vp->scb.status.data_version && + vp->speculative) + /* Ignore the result of a speculative bulk status fetch + * if it splits around a modification op, thereby + * appearing to regress the data version. + */ + goto out; afs_apply_status(op, vp); if (vp->scb.have_cb) afs_apply_callback(op, vp); @@ -305,6 +312,7 @@ void afs_vnode_commit_status(struct afs_operation *op, struct afs_vnode_param *v } } +out: write_sequnlock(&vnode->cb_lock); if (vp->scb.have_status) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 14d5d75f4b6e..0d150a29e39e 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -755,6 +755,7 @@ struct afs_vnode_param { bool update_ctime:1; /* Need to update the ctime */ bool set_size:1; /* Must update i_size */ bool op_unlinked:1; /* True if file was unlinked by op */ + bool speculative:1; /* T if speculative status fetch (no vnode lock) */ }; /* diff --git a/fs/afs/write.c b/fs/afs/write.c index 50371207f327..c9195fc67fd8 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -169,11 +169,14 @@ int afs_write_end(struct file *file, struct address_space *mapping, unsigned int f, from = pos & (PAGE_SIZE - 1); unsigned int t, to = from + copied; loff_t i_size, maybe_i_size; - int ret; + int ret = 0; _enter("{%llx:%llu},{%lx}", vnode->fid.vid, vnode->fid.vnode, page->index); + if (copied == 0) + goto out; + maybe_i_size = pos + copied; i_size = i_size_read(&vnode->vfs_inode); @@ -1572,7 +1572,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb, * we return to userspace. */ if (S_ISREG(file_inode(file)->i_mode)) { - __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true); + sb_start_write(file_inode(file)->i_sb); __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE); } req->ki_flags |= IOCB_WRITE; diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 23b21e943652..ef4784e72b1d 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -1266,6 +1266,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, cifs_dbg(VFS, "%s: error %d getting sec desc\n", __func__, rc); } else if (mode_from_special_sid) { rc = parse_sec_desc(cifs_sb, pntsd, acllen, fattr, true); + kfree(pntsd); } else { /* get approximated mode from ACL */ rc = parse_sec_desc(cifs_sb, pntsd, acllen, fattr, false); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 504766cb6c19..dab94f67c988 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -264,7 +264,7 @@ smb2_revert_current_mid(struct TCP_Server_Info *server, const unsigned int val) } static struct mid_q_entry * -smb2_find_mid(struct TCP_Server_Info *server, char *buf) +__smb2_find_mid(struct TCP_Server_Info *server, char *buf, bool dequeue) { struct mid_q_entry *mid; struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf; @@ -281,6 +281,10 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf) (mid->mid_state == MID_REQUEST_SUBMITTED) && (mid->command == shdr->Command)) { kref_get(&mid->refcount); + if (dequeue) { + list_del_init(&mid->qhead); + mid->mid_flags |= MID_DELETED; + } spin_unlock(&GlobalMid_Lock); return mid; } @@ -289,6 +293,18 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf) return NULL; } +static struct mid_q_entry * +smb2_find_mid(struct TCP_Server_Info *server, char *buf) +{ + return __smb2_find_mid(server, buf, false); +} + +static struct mid_q_entry * +smb2_find_dequeue_mid(struct TCP_Server_Info *server, char *buf) +{ + return __smb2_find_mid(server, buf, true); +} + static void smb2_dump_detail(void *buf, struct TCP_Server_Info *server) { @@ -4356,7 +4372,8 @@ init_read_bvec(struct page **pages, unsigned int npages, unsigned int data_size, static int handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, char *buf, unsigned int buf_len, struct page **pages, - unsigned int npages, unsigned int page_data_size) + unsigned int npages, unsigned int page_data_size, + bool is_offloaded) { unsigned int data_offset; unsigned int data_len; @@ -4378,7 +4395,8 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, if (server->ops->is_session_expired && server->ops->is_session_expired(buf)) { - cifs_reconnect(server); + if (!is_offloaded) + cifs_reconnect(server); return -1; } @@ -4402,7 +4420,10 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, cifs_dbg(FYI, "%s: server returned error %d\n", __func__, rdata->result); /* normal error on read response */ - dequeue_mid(mid, false); + if (is_offloaded) + mid->mid_state = MID_RESPONSE_RECEIVED; + else + dequeue_mid(mid, false); return 0; } @@ -4426,7 +4447,10 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, cifs_dbg(FYI, "%s: data offset (%u) beyond end of smallbuf\n", __func__, data_offset); rdata->result = -EIO; - dequeue_mid(mid, rdata->result); + if (is_offloaded) + mid->mid_state = MID_RESPONSE_MALFORMED; + else + dequeue_mid(mid, rdata->result); return 0; } @@ -4442,21 +4466,30 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, cifs_dbg(FYI, "%s: data offset (%u) beyond 1st page of response\n", __func__, data_offset); rdata->result = -EIO; - dequeue_mid(mid, rdata->result); + if (is_offloaded) + mid->mid_state = MID_RESPONSE_MALFORMED; + else + dequeue_mid(mid, rdata->result); return 0; } if (data_len > page_data_size - pad_len) { /* data_len is corrupt -- discard frame */ rdata->result = -EIO; - dequeue_mid(mid, rdata->result); + if (is_offloaded) + mid->mid_state = MID_RESPONSE_MALFORMED; + else + dequeue_mid(mid, rdata->result); return 0; } rdata->result = init_read_bvec(pages, npages, page_data_size, cur_off, &bvec); if (rdata->result != 0) { - dequeue_mid(mid, rdata->result); + if (is_offloaded) + mid->mid_state = MID_RESPONSE_MALFORMED; + else + dequeue_mid(mid, rdata->result); return 0; } @@ -4471,7 +4504,10 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, /* read response payload cannot be in both buf and pages */ WARN_ONCE(1, "buf can not contain only a part of read data"); rdata->result = -EIO; - dequeue_mid(mid, rdata->result); + if (is_offloaded) + mid->mid_state = MID_RESPONSE_MALFORMED; + else + dequeue_mid(mid, rdata->result); return 0; } @@ -4482,7 +4518,10 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, if (length < 0) return length; - dequeue_mid(mid, false); + if (is_offloaded) + mid->mid_state = MID_RESPONSE_RECEIVED; + else + dequeue_mid(mid, false); return length; } @@ -4511,15 +4550,34 @@ static void smb2_decrypt_offload(struct work_struct *work) } dw->server->lstrp = jiffies; - mid = smb2_find_mid(dw->server, dw->buf); + mid = smb2_find_dequeue_mid(dw->server, dw->buf); if (mid == NULL) cifs_dbg(FYI, "mid not found\n"); else { mid->decrypted = true; rc = handle_read_data(dw->server, mid, dw->buf, dw->server->vals->read_rsp_size, - dw->ppages, dw->npages, dw->len); - mid->callback(mid); + dw->ppages, dw->npages, dw->len, + true); + if (rc >= 0) { +#ifdef CONFIG_CIFS_STATS2 + mid->when_received = jiffies; +#endif + mid->callback(mid); + } else { + spin_lock(&GlobalMid_Lock); + if (dw->server->tcpStatus == CifsNeedReconnect) { + mid->mid_state = MID_RETRY_NEEDED; + spin_unlock(&GlobalMid_Lock); + mid->callback(mid); + } else { + mid->mid_state = MID_REQUEST_SUBMITTED; + mid->mid_flags &= ~(MID_DELETED); + list_add_tail(&mid->qhead, + &dw->server->pending_mid_q); + spin_unlock(&GlobalMid_Lock); + } + } cifs_mid_q_entry_release(mid); } @@ -4622,7 +4680,7 @@ non_offloaded_decrypt: (*mid)->decrypted = true; rc = handle_read_data(server, *mid, buf, server->vals->read_rsp_size, - pages, npages, len); + pages, npages, len, false); } free_pages: @@ -4765,7 +4823,7 @@ smb3_handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid) char *buf = server->large_buf ? server->bigbuf : server->smallbuf; return handle_read_data(server, mid, buf, server->pdu_size, - NULL, 0, 0); + NULL, 0, 0, false); } static int diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 15880a68faad..f943fd0b0699 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -21,6 +21,7 @@ LIST_HEAD(efivarfs_list); static void efivarfs_evict_inode(struct inode *inode) { clear_inode(inode); + kfree(inode->i_private); } static const struct super_operations efivarfs_ops = { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1b399cafb15a..65ecaf96d0a4 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1231,13 +1231,13 @@ struct ext4_inode_info { blocks */ #define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated file systems */ -#define EXT4_MOUNT2_DAX_NEVER 0x00000008 /* Do not allow Direct Access */ -#define EXT4_MOUNT2_DAX_INODE 0x00000010 /* For printing options only */ - #define EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM 0x00000008 /* User explicitly specified journal checksum */ #define EXT4_MOUNT2_JOURNAL_FAST_COMMIT 0x00000010 /* Journal fast commit */ +#define EXT4_MOUNT2_DAX_NEVER 0x00000020 /* Do not allow Direct Access */ +#define EXT4_MOUNT2_DAX_INODE 0x00000040 /* For printing options only */ + #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ ~EXT4_MOUNT_##opt @@ -2695,7 +2695,8 @@ void ext4_insert_dentry(struct inode *inode, struct ext4_filename *fname); static inline void ext4_update_dx_flag(struct inode *inode) { - if (!ext4_has_feature_dir_index(inode->i_sb)) { + if (!ext4_has_feature_dir_index(inode->i_sb) && + ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) { /* ext4_iget() should have caught this... */ WARN_ON_ONCE(ext4_has_feature_metadata_csum(inode->i_sb)); ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c3b864588a0b..94472044f4c1 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -289,18 +289,7 @@ void ext4_superblock_csum_set(struct super_block *sb) if (!ext4_has_metadata_csum(sb)) return; - /* - * Locking the superblock prevents the scenario - * where: - * 1) a first thread pauses during checksum calculation. - * 2) a second thread updates the superblock, recalculates - * the checksum, and updates s_checksum - * 3) the first thread resumes and finishes its checksum calculation - * and updates s_checksum with a potentially stale or torn value. - */ - lock_buffer(EXT4_SB(sb)->s_sbh); es->s_checksum = ext4_superblock_csum(sb, es); - unlock_buffer(EXT4_SB(sb)->s_sbh); } ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, @@ -2649,10 +2638,6 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, } else if (test_opt2(sb, DAX_INODE)) { SEQ_OPTS_PUTS("dax=inode"); } - - if (test_opt2(sb, JOURNAL_FAST_COMMIT)) - SEQ_OPTS_PUTS("fast_commit"); - ext4_show_quota_options(seq, sb); return 0; } diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 6c1432d78dce..67f2921ae8d4 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -571,7 +571,18 @@ static int freeze_go_sync(struct gfs2_glock *gl) int error = 0; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - if (gl->gl_req == LM_ST_EXCLUSIVE && !gfs2_withdrawn(sdp)) { + /* + * We need to check gl_state == LM_ST_SHARED here and not gl_req == + * LM_ST_EXCLUSIVE. That's because when any node does a freeze, + * all the nodes should have the freeze glock in SH mode and they all + * call do_xmote: One for EX and the others for UN. They ALL must + * freeze locally, and they ALL must queue freeze work. The freeze_work + * calls freeze_func, which tries to reacquire the freeze glock in SH, + * effectively waiting for the thaw on the node who holds it in EX. + * Once thawed, the work func acquires the freeze glock in + * SH and everybody goes back to thawed. + */ + if (gl->gl_state == LM_ST_SHARED && !gfs2_withdrawn(sdp)) { atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE); error = freeze_super(sdp->sd_vfs); if (error) { diff --git a/fs/io_uring.c b/fs/io_uring.c index 8018c7076b25..a8c136a1cf4e 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -205,6 +205,7 @@ struct fixed_file_ref_node { struct list_head file_list; struct fixed_file_data *file_data; struct llist_node llist; + bool done; }; struct fixed_file_data { @@ -478,6 +479,7 @@ struct io_sr_msg { struct io_open { struct file *file; int dfd; + bool ignore_nonblock; struct filename *filename; struct open_how how; unsigned long nofile; @@ -2577,7 +2579,6 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error) } end_req: req_set_fail_links(req); - io_req_complete(req, ret); return false; } #endif @@ -3547,8 +3548,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, * we return to userspace. */ if (req->flags & REQ_F_ISREG) { - __sb_start_write(file_inode(req->file)->i_sb, - SB_FREEZE_WRITE, true); + sb_start_write(file_inode(req->file)->i_sb); __sb_writers_release(file_inode(req->file)->i_sb, SB_FREEZE_WRITE); } @@ -3796,6 +3796,7 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe return ret; } req->open.nofile = rlimit(RLIMIT_NOFILE); + req->open.ignore_nonblock = false; req->flags |= REQ_F_NEED_CLEANUP; return 0; } @@ -3839,7 +3840,7 @@ static int io_openat2(struct io_kiocb *req, bool force_nonblock) struct file *file; int ret; - if (force_nonblock) + if (force_nonblock && !req->open.ignore_nonblock) return -EAGAIN; ret = build_open_flags(&req->open.how, &op); @@ -3854,6 +3855,21 @@ static int io_openat2(struct io_kiocb *req, bool force_nonblock) if (IS_ERR(file)) { put_unused_fd(ret); ret = PTR_ERR(file); + /* + * A work-around to ensure that /proc/self works that way + * that it should - if we get -EOPNOTSUPP back, then assume + * that proc_self_get_link() failed us because we're in async + * context. We should be safe to retry this from the task + * itself with force_nonblock == false set, as it should not + * block on lookup. Would be nice to know this upfront and + * avoid the async dance, but doesn't seem feasible. + */ + if (ret == -EOPNOTSUPP && io_wq_current_is_worker()) { + req->open.ignore_nonblock = true; + refcount_inc(&req->refs); + io_req_task_queue(req); + return 0; + } } else { fsnotify_open(file); fd_install(ret, file); @@ -6958,9 +6974,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) return -ENXIO; spin_lock(&data->lock); - if (!list_empty(&data->ref_list)) - ref_node = list_first_entry(&data->ref_list, - struct fixed_file_ref_node, node); + ref_node = data->node; spin_unlock(&data->lock); if (ref_node) percpu_ref_kill(&ref_node->refs); @@ -7309,10 +7323,6 @@ static void __io_file_put_work(struct fixed_file_ref_node *ref_node) kfree(pfile); } - spin_lock(&file_data->lock); - list_del(&ref_node->node); - spin_unlock(&file_data->lock); - percpu_ref_exit(&ref_node->refs); kfree(ref_node); percpu_ref_put(&file_data->refs); @@ -7339,17 +7349,32 @@ static void io_file_put_work(struct work_struct *work) static void io_file_data_ref_zero(struct percpu_ref *ref) { struct fixed_file_ref_node *ref_node; + struct fixed_file_data *data; struct io_ring_ctx *ctx; - bool first_add; + bool first_add = false; int delay = HZ; ref_node = container_of(ref, struct fixed_file_ref_node, refs); - ctx = ref_node->file_data->ctx; + data = ref_node->file_data; + ctx = data->ctx; + + spin_lock(&data->lock); + ref_node->done = true; + + while (!list_empty(&data->ref_list)) { + ref_node = list_first_entry(&data->ref_list, + struct fixed_file_ref_node, node); + /* recycle ref nodes in order */ + if (!ref_node->done) + break; + list_del(&ref_node->node); + first_add |= llist_add(&ref_node->llist, &ctx->file_put_llist); + } + spin_unlock(&data->lock); - if (percpu_ref_is_dying(&ctx->file_data->refs)) + if (percpu_ref_is_dying(&data->refs)) delay = 0; - first_add = llist_add(&ref_node->llist, &ctx->file_put_llist); if (!delay) mod_delayed_work(system_wq, &ctx->file_put_work, 0); else if (first_add) @@ -7373,6 +7398,7 @@ static struct fixed_file_ref_node *alloc_fixed_file_ref_node( INIT_LIST_HEAD(&ref_node->node); INIT_LIST_HEAD(&ref_node->file_list); ref_node->file_data = ctx->file_data; + ref_node->done = false; return ref_node; } @@ -7468,7 +7494,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, file_data->node = ref_node; spin_lock(&file_data->lock); - list_add(&ref_node->node, &file_data->ref_list); + list_add_tail(&ref_node->node, &file_data->ref_list); spin_unlock(&file_data->lock); percpu_ref_get(&file_data->refs); return ret; @@ -7627,7 +7653,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, if (needs_switch) { percpu_ref_kill(&data->node->refs); spin_lock(&data->lock); - list_add(&ref_node->node, &data->ref_list); + list_add_tail(&ref_node->node, &data->ref_list); data->node = ref_node; spin_unlock(&data->lock); percpu_ref_get(&ctx->file_data->refs); @@ -9226,6 +9252,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, * to a power-of-two, if it isn't already. We do NOT impose * any cq vs sq ring sizing. */ + p->cq_entries = roundup_pow_of_two(p->cq_entries); if (p->cq_entries < p->sq_entries) return -EINVAL; if (p->cq_entries > IORING_MAX_CQ_ENTRIES) { @@ -9233,7 +9260,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, return -EINVAL; p->cq_entries = IORING_MAX_CQ_ENTRIES; } - p->cq_entries = roundup_pow_of_two(p->cq_entries); } else { p->cq_entries = 2 * p->sq_entries; } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 0c3d5e3b24b2..188f79d76988 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -566,12 +566,14 @@ static int __jbd2_journal_force_commit(journal_t *journal) } /** - * Force and wait upon a commit if the calling process is not within - * transaction. This is used for forcing out undo-protected data which contains - * bitmaps, when the fs is running out of space. + * jbd2_journal_force_commit_nested - Force and wait upon a commit if the + * calling process is not within transaction. * * @journal: journal to force * Returns true if progress was made. + * + * This is used for forcing out undo-protected data which contains + * bitmaps, when the fs is running out of space. */ int jbd2_journal_force_commit_nested(journal_t *journal) { @@ -582,7 +584,7 @@ int jbd2_journal_force_commit_nested(journal_t *journal) } /** - * int journal_force_commit() - force any uncommitted transactions + * jbd2_journal_force_commit() - force any uncommitted transactions * @journal: journal to force * * Caller want unconditional commit. We can only force the running transaction @@ -1881,7 +1883,7 @@ static int load_superblock(journal_t *journal) /** - * int jbd2_journal_load() - Read journal from disk. + * jbd2_journal_load() - Read journal from disk. * @journal: Journal to act on. * * Given a journal_t structure which tells us which disk blocks contain @@ -1951,7 +1953,7 @@ recovery_error: } /** - * void jbd2_journal_destroy() - Release a journal_t structure. + * jbd2_journal_destroy() - Release a journal_t structure. * @journal: Journal to act on. * * Release a journal_t structure once it is no longer in use by the @@ -2028,7 +2030,7 @@ int jbd2_journal_destroy(journal_t *journal) /** - *int jbd2_journal_check_used_features() - Check if features specified are used. + * jbd2_journal_check_used_features() - Check if features specified are used. * @journal: Journal to check. * @compat: bitmask of compatible features * @ro: bitmask of features that force read-only mount @@ -2063,7 +2065,7 @@ int jbd2_journal_check_used_features(journal_t *journal, unsigned long compat, } /** - * int jbd2_journal_check_available_features() - Check feature set in journalling layer + * jbd2_journal_check_available_features() - Check feature set in journalling layer * @journal: Journal to check. * @compat: bitmask of compatible features * @ro: bitmask of features that force read-only mount @@ -2126,7 +2128,7 @@ jbd2_journal_initialize_fast_commit(journal_t *journal) } /** - * int jbd2_journal_set_features() - Mark a given journal feature in the superblock + * jbd2_journal_set_features() - Mark a given journal feature in the superblock * @journal: Journal to act on. * @compat: bitmask of compatible features * @ro: bitmask of features that force read-only mount @@ -2217,7 +2219,7 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat, } /* - * jbd2_journal_clear_features () - Clear a given journal feature in the + * jbd2_journal_clear_features() - Clear a given journal feature in the * superblock * @journal: Journal to act on. * @compat: bitmask of compatible features @@ -2246,7 +2248,7 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat, EXPORT_SYMBOL(jbd2_journal_clear_features); /** - * int jbd2_journal_flush () - Flush journal + * jbd2_journal_flush() - Flush journal * @journal: Journal to act on. * * Flush all data for a given journal to disk and empty the journal. @@ -2321,7 +2323,7 @@ out: } /** - * int jbd2_journal_wipe() - Wipe journal contents + * jbd2_journal_wipe() - Wipe journal contents * @journal: Journal to act on. * @write: flag (see below) * @@ -2362,7 +2364,7 @@ int jbd2_journal_wipe(journal_t *journal, int write) } /** - * void jbd2_journal_abort () - Shutdown the journal immediately. + * jbd2_journal_abort () - Shutdown the journal immediately. * @journal: the journal to shutdown. * @errno: an error number to record in the journal indicating * the reason for the shutdown. @@ -2453,7 +2455,7 @@ void jbd2_journal_abort(journal_t *journal, int errno) } /** - * int jbd2_journal_errno () - returns the journal's error state. + * jbd2_journal_errno() - returns the journal's error state. * @journal: journal to examine. * * This is the errno number set with jbd2_journal_abort(), the last @@ -2477,7 +2479,7 @@ int jbd2_journal_errno(journal_t *journal) } /** - * int jbd2_journal_clear_err () - clears the journal's error state + * jbd2_journal_clear_err() - clears the journal's error state * @journal: journal to act on. * * An error must be cleared or acked to take a FS out of readonly @@ -2497,7 +2499,7 @@ int jbd2_journal_clear_err(journal_t *journal) } /** - * void jbd2_journal_ack_err() - Ack journal err. + * jbd2_journal_ack_err() - Ack journal err. * @journal: journal to act on. * * An error must be cleared or acked to take a FS out of readonly diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index d54f04674e8e..9396666b7314 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -519,7 +519,7 @@ EXPORT_SYMBOL(jbd2__journal_start); /** - * handle_t *jbd2_journal_start() - Obtain a new handle. + * jbd2_journal_start() - Obtain a new handle. * @journal: Journal to start transaction on. * @nblocks: number of block buffer we might modify * @@ -566,7 +566,7 @@ void jbd2_journal_free_reserved(handle_t *handle) EXPORT_SYMBOL(jbd2_journal_free_reserved); /** - * int jbd2_journal_start_reserved() - start reserved handle + * jbd2_journal_start_reserved() - start reserved handle * @handle: handle to start * @type: for handle statistics * @line_no: for handle statistics @@ -620,7 +620,7 @@ int jbd2_journal_start_reserved(handle_t *handle, unsigned int type, EXPORT_SYMBOL(jbd2_journal_start_reserved); /** - * int jbd2_journal_extend() - extend buffer credits. + * jbd2_journal_extend() - extend buffer credits. * @handle: handle to 'extend' * @nblocks: nr blocks to try to extend by. * @revoke_records: number of revoke records to try to extend by. @@ -745,7 +745,7 @@ static void stop_this_handle(handle_t *handle) } /** - * int jbd2_journal_restart() - restart a handle . + * jbd2__journal_restart() - restart a handle . * @handle: handle to restart * @nblocks: nr credits requested * @revoke_records: number of revoke record credits requested @@ -815,7 +815,7 @@ int jbd2_journal_restart(handle_t *handle, int nblocks) EXPORT_SYMBOL(jbd2_journal_restart); /** - * void jbd2_journal_lock_updates () - establish a transaction barrier. + * jbd2_journal_lock_updates () - establish a transaction barrier. * @journal: Journal to establish a barrier on. * * This locks out any further updates from being started, and blocks @@ -874,7 +874,7 @@ void jbd2_journal_lock_updates(journal_t *journal) } /** - * void jbd2_journal_unlock_updates (journal_t* journal) - release barrier + * jbd2_journal_unlock_updates () - release barrier * @journal: Journal to release the barrier on. * * Release a transaction barrier obtained with jbd2_journal_lock_updates(). @@ -1182,7 +1182,8 @@ out: } /** - * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. + * jbd2_journal_get_write_access() - notify intent to modify a buffer + * for metadata (not data) update. * @handle: transaction to add buffer modifications to * @bh: bh to be used for metadata writes * @@ -1226,7 +1227,7 @@ int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh) * unlocked buffer beforehand. */ /** - * int jbd2_journal_get_create_access () - notify intent to use newly created bh + * jbd2_journal_get_create_access () - notify intent to use newly created bh * @handle: transaction to new buffer to * @bh: new buffer. * @@ -1306,7 +1307,7 @@ out: } /** - * int jbd2_journal_get_undo_access() - Notify intent to modify metadata with + * jbd2_journal_get_undo_access() - Notify intent to modify metadata with * non-rewindable consequences * @handle: transaction * @bh: buffer to undo @@ -1383,7 +1384,7 @@ out: } /** - * void jbd2_journal_set_triggers() - Add triggers for commit writeout + * jbd2_journal_set_triggers() - Add triggers for commit writeout * @bh: buffer to trigger on * @type: struct jbd2_buffer_trigger_type containing the trigger(s). * @@ -1425,7 +1426,7 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh, } /** - * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata + * jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata * @handle: transaction to add buffer to. * @bh: buffer to mark * @@ -1593,7 +1594,7 @@ out: } /** - * void jbd2_journal_forget() - bforget() for potentially-journaled buffers. + * jbd2_journal_forget() - bforget() for potentially-journaled buffers. * @handle: transaction handle * @bh: bh to 'forget' * @@ -1762,7 +1763,7 @@ drop: } /** - * int jbd2_journal_stop() - complete a transaction + * jbd2_journal_stop() - complete a transaction * @handle: transaction to complete. * * All done for a particular handle. @@ -2080,7 +2081,7 @@ out: } /** - * int jbd2_journal_try_to_free_buffers() - try to free page buffers. + * jbd2_journal_try_to_free_buffers() - try to free page buffers. * @journal: journal for operation * @page: to try and free * @@ -2411,7 +2412,7 @@ zap_buffer_unlocked: } /** - * void jbd2_journal_invalidatepage() + * jbd2_journal_invalidatepage() * @journal: journal to use for flush... * @page: page to flush * @offset: start of the range to invalidate diff --git a/fs/libfs.c b/fs/libfs.c index fc34361c1489..7124c2e8df2f 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -959,7 +959,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos) { struct simple_attr *attr; - u64 val; + unsigned long long val; size_t size; ssize_t ret; @@ -977,7 +977,9 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, goto out; attr->set_buf[size] = '\0'; - val = simple_strtoll(attr->set_buf, NULL, 0); + ret = kstrtoull(attr->set_buf, 0, &val); + if (ret) + goto out; ret = attr->set(attr->data, val); if (ret == 0) ret = len; /* on success, claim we got the whole input */ diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index a960ec3a569a..8d3ad5ef2925 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -178,6 +178,7 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, struct inode *inode = d_inode(dentry); struct dentry *parent; bool parent_watched = dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED; + bool parent_needed, parent_interested; __u32 p_mask; struct inode *p_inode = NULL; struct name_snapshot name; @@ -193,7 +194,8 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, return 0; parent = NULL; - if (!parent_watched && !fsnotify_event_needs_parent(inode, mnt, mask)) + parent_needed = fsnotify_event_needs_parent(inode, mnt, mask); + if (!parent_watched && !parent_needed) goto notify; /* Does parent inode care about events on children? */ @@ -205,17 +207,17 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, /* * Include parent/name in notification either if some notification - * groups require parent info (!parent_watched case) or the parent is - * interested in this event. + * groups require parent info or the parent is interested in this event. */ - if (!parent_watched || (mask & p_mask & ALL_FSNOTIFY_EVENTS)) { + parent_interested = mask & p_mask & ALL_FSNOTIFY_EVENTS; + if (parent_needed || parent_interested) { /* When notifying parent, child should be passed as data */ WARN_ON_ONCE(inode != fsnotify_data_inode(data, data_type)); /* Notify both parent and child with child name info */ take_dentry_name_snapshot(&name, dentry); file_name = &name.name; - if (parent_watched) + if (parent_interested) mask |= FS_EVENT_ON_CHILD; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 1d91dd1e8711..2febc76e9de7 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1713,6 +1713,7 @@ static void ocfs2_inode_init_once(void *data) oi->ip_blkno = 0ULL; oi->ip_clusters = 0; + oi->ip_next_orphan = NULL; ocfs2_resv_init_once(&oi->ip_la_data_resv); diff --git a/fs/proc/self.c b/fs/proc/self.c index 72cd69bcaf4a..cc71ce3466dc 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -16,6 +16,13 @@ static const char *proc_self_get_link(struct dentry *dentry, pid_t tgid = task_tgid_nr_ns(current, ns); char *name; + /* + * Not currently supported. Once we can inherit all of struct pid, + * we can allow this. + */ + if (current->flags & PF_KTHREAD) + return ERR_PTR(-EOPNOTSUPP); + if (!tgid) return ERR_PTR(-ENOENT); /* max length of unsigned int in decimal + NULL term */ diff --git a/fs/super.c b/fs/super.c index a51c2083cd6b..98bb0629ee10 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1631,55 +1631,6 @@ int super_setup_bdi(struct super_block *sb) } EXPORT_SYMBOL(super_setup_bdi); -/* - * This is an internal function, please use sb_end_{write,pagefault,intwrite} - * instead. - */ -void __sb_end_write(struct super_block *sb, int level) -{ - percpu_up_read(sb->s_writers.rw_sem + level-1); -} -EXPORT_SYMBOL(__sb_end_write); - -/* - * This is an internal function, please use sb_start_{write,pagefault,intwrite} - * instead. - */ -int __sb_start_write(struct super_block *sb, int level, bool wait) -{ - bool force_trylock = false; - int ret = 1; - -#ifdef CONFIG_LOCKDEP - /* - * We want lockdep to tell us about possible deadlocks with freezing - * but it's it bit tricky to properly instrument it. Getting a freeze - * protection works as getting a read lock but there are subtle - * problems. XFS for example gets freeze protection on internal level - * twice in some cases, which is OK only because we already hold a - * freeze protection also on higher level. Due to these cases we have - * to use wait == F (trylock mode) which must not fail. - */ - if (wait) { - int i; - - for (i = 0; i < level - 1; i++) - if (percpu_rwsem_is_held(sb->s_writers.rw_sem + i)) { - force_trylock = true; - break; - } - } -#endif - if (wait && !force_trylock) - percpu_down_read(sb->s_writers.rw_sem + level-1); - else - ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1); - - WARN_ON(force_trylock && !ret); - return ret; -} -EXPORT_SYMBOL(__sb_start_write); - /** * sb_wait_write - wait until all writers to given file system finish * @sb: the super for which we wait diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index bb128db220ac..d6ef69ab1c67 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -515,7 +515,7 @@ xfs_attr_copy_value( *========================================================================*/ /* - * Query whether the requested number of additional bytes of extended + * Query whether the total requested number of attr fork bytes of extended * attribute space will be able to fit inline. * * Returns zero if not, else the di_forkoff fork offset to be used in the @@ -535,6 +535,12 @@ xfs_attr_shortform_bytesfit( int maxforkoff; int offset; + /* + * Check if the new size could fit at all first: + */ + if (bytes > XFS_LITINO(mp)) + return 0; + /* rounded down */ offset = (XFS_LITINO(mp) - bytes) >> 3; diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 340c83f76c80..2668ebe02865 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -1514,7 +1514,7 @@ xfs_rmap_convert_shared( * record for our insertion point. This will also give us the record for * start block contiguity tests. */ - error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, flags, + error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, oldext, &PREV, &i); if (error) goto done; diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 955302e7cdde..fed56d213a3f 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -113,6 +113,8 @@ xchk_bmap_get_rmap( if (info->whichfork == XFS_ATTR_FORK) rflags |= XFS_RMAP_ATTR_FORK; + if (irec->br_state == XFS_EXT_UNWRITTEN) + rflags |= XFS_RMAP_UNWRITTEN; /* * CoW staging extents are owned (on disk) by the refcountbt, so @@ -216,13 +218,13 @@ xchk_bmap_xref_rmap( * which doesn't track unwritten state. */ if (owner != XFS_RMAP_OWN_COW && - irec->br_state == XFS_EXT_UNWRITTEN && - !(rmap.rm_flags & XFS_RMAP_UNWRITTEN)) + !!(irec->br_state == XFS_EXT_UNWRITTEN) != + !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN)) xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, irec->br_startoff); - if (info->whichfork == XFS_ATTR_FORK && - !(rmap.rm_flags & XFS_RMAP_ATTR_FORK)) + if (!!(info->whichfork == XFS_ATTR_FORK) != + !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK)) xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, irec->br_startoff); if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index f52a7b8256f9..debf392e0515 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -452,32 +452,41 @@ xchk_btree_check_minrecs( int level, struct xfs_btree_block *block) { - unsigned int numrecs; - int ok_level; - - numrecs = be16_to_cpu(block->bb_numrecs); + struct xfs_btree_cur *cur = bs->cur; + unsigned int root_level = cur->bc_nlevels - 1; + unsigned int numrecs = be16_to_cpu(block->bb_numrecs); /* More records than minrecs means the block is ok. */ - if (numrecs >= bs->cur->bc_ops->get_minrecs(bs->cur, level)) + if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) return; /* - * Certain btree blocks /can/ have fewer than minrecs records. Any - * level greater than or equal to the level of the highest dedicated - * btree block are allowed to violate this constraint. - * - * For a btree rooted in a block, the btree root can have fewer than - * minrecs records. If the btree is rooted in an inode and does not - * store records in the root, the direct children of the root and the - * root itself can have fewer than minrecs records. + * For btrees rooted in the inode, it's possible that the root block + * contents spilled into a regular ondisk block because there wasn't + * enough space in the inode root. The number of records in that + * child block might be less than the standard minrecs, but that's ok + * provided that there's only one direct child of the root. */ - ok_level = bs->cur->bc_nlevels - 1; - if (bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) - ok_level--; - if (level >= ok_level) + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + level == cur->bc_nlevels - 2) { + struct xfs_btree_block *root_block; + struct xfs_buf *root_bp; + int root_maxrecs; + + root_block = xfs_btree_get_block(cur, root_level, &root_bp); + root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level); + if (be16_to_cpu(root_block->bb_numrecs) != 1 || + numrecs <= root_maxrecs) + xchk_btree_set_corrupt(bs->sc, cur, level); return; + } - xchk_btree_set_corrupt(bs->sc, bs->cur, level); + /* + * Otherwise, only the root level is allowed to have fewer than minrecs + * records or keyptrs. + */ + if (level < root_level) + xchk_btree_set_corrupt(bs->sc, cur, level); } /* diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index 7c432997edad..b045e95c2ea7 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -558,14 +558,27 @@ xchk_directory_leaf1_bestfree( /* Check all the bestfree entries. */ for (i = 0; i < bestcount; i++, bestp++) { best = be16_to_cpu(*bestp); - if (best == NULLDATAOFF) - continue; error = xfs_dir3_data_read(sc->tp, sc->ip, - i * args->geo->fsbcount, 0, &dbp); + xfs_dir2_db_to_da(args->geo, i), + XFS_DABUF_MAP_HOLE_OK, + &dbp); if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) break; - xchk_directory_check_freesp(sc, lblk, dbp, best); + + if (!dbp) { + if (best != NULLDATAOFF) { + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, + lblk); + break; + } + continue; + } + + if (best == NULLDATAOFF) + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + else + xchk_directory_check_freesp(sc, lblk, dbp, best); xfs_trans_brelse(sc->tp, dbp); if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) break; diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index beaeb6fa3119..dd672e6bbc75 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -170,7 +170,6 @@ xchk_refcountbt_process_rmap_fragments( */ INIT_LIST_HEAD(&worklist); rbno = NULLAGBLOCK; - nr = 1; /* Make sure the fragments actually /are/ in agbno order. */ bno = 0; @@ -184,15 +183,14 @@ xchk_refcountbt_process_rmap_fragments( * Find all the rmaps that start at or before the refc extent, * and put them on the worklist. */ + nr = 0; list_for_each_entry_safe(frag, n, &refchk->fragments, list) { - if (frag->rm.rm_startblock > refchk->bno) - goto done; + if (frag->rm.rm_startblock > refchk->bno || nr > target_nr) + break; bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; if (bno < rbno) rbno = bno; list_move_tail(&frag->list, &worklist); - if (nr == target_nr) - break; nr++; } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 3abb8b9d6f4c..7b9ff824e82d 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -706,6 +706,23 @@ relock: return 0; } +/* + * Check that the imap we are going to return to the caller spans the entire + * range that the caller requested for the IO. + */ +static bool +imap_spans_range( + struct xfs_bmbt_irec *imap, + xfs_fileoff_t offset_fsb, + xfs_fileoff_t end_fsb) +{ + if (imap->br_startoff > offset_fsb) + return false; + if (imap->br_startoff + imap->br_blockcount < end_fsb) + return false; + return true; +} + static int xfs_direct_write_iomap_begin( struct inode *inode, @@ -766,6 +783,18 @@ xfs_direct_write_iomap_begin( if (imap_needs_alloc(inode, flags, &imap, nimaps)) goto allocate_blocks; + /* + * NOWAIT IO needs to span the entire requested IO with a single map so + * that we avoid partial IO failures due to the rest of the IO range not + * covered by this map triggering an EAGAIN condition when it is + * subsequently mapped and aborting the IO. + */ + if ((flags & IOMAP_NOWAIT) && + !imap_spans_range(&imap, offset_fsb, end_fsb)) { + error = -EAGAIN; + goto out_unlock; + } + xfs_iunlock(ip, lockmode); trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags); diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c index 233dcc8784db..2a45138831e3 100644 --- a/fs/xfs/xfs_iwalk.c +++ b/fs/xfs/xfs_iwalk.c @@ -55,6 +55,9 @@ struct xfs_iwalk_ag { /* Where do we start the traversal? */ xfs_ino_t startino; + /* What was the last inode number we saw when iterating the inobt? */ + xfs_ino_t lastino; + /* Array of inobt records we cache. */ struct xfs_inobt_rec_incore *recs; @@ -301,6 +304,9 @@ xfs_iwalk_ag_start( if (XFS_IS_CORRUPT(mp, *has_more != 1)) return -EFSCORRUPTED; + iwag->lastino = XFS_AGINO_TO_INO(mp, agno, + irec->ir_startino + XFS_INODES_PER_CHUNK - 1); + /* * If the LE lookup yielded an inobt record before the cursor position, * skip it and see if there's another one after it. @@ -347,15 +353,17 @@ xfs_iwalk_run_callbacks( struct xfs_mount *mp = iwag->mp; struct xfs_trans *tp = iwag->tp; struct xfs_inobt_rec_incore *irec; - xfs_agino_t restart; + xfs_agino_t next_agino; int error; + next_agino = XFS_INO_TO_AGINO(mp, iwag->lastino) + 1; + ASSERT(iwag->nr_recs > 0); /* Delete cursor but remember the last record we cached... */ xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0); irec = &iwag->recs[iwag->nr_recs - 1]; - restart = irec->ir_startino + XFS_INODES_PER_CHUNK - 1; + ASSERT(next_agino == irec->ir_startino + XFS_INODES_PER_CHUNK); error = xfs_iwalk_ag_recs(iwag); if (error) @@ -372,7 +380,7 @@ xfs_iwalk_run_callbacks( if (error) return error; - return xfs_inobt_lookup(*curpp, restart, XFS_LOOKUP_GE, has_more); + return xfs_inobt_lookup(*curpp, next_agino, XFS_LOOKUP_GE, has_more); } /* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */ @@ -396,6 +404,7 @@ xfs_iwalk_ag( while (!error && has_more) { struct xfs_inobt_rec_incore *irec; + xfs_ino_t rec_fsino; cond_resched(); if (xfs_pwork_want_abort(&iwag->pwork)) @@ -407,6 +416,15 @@ xfs_iwalk_ag( if (error || !has_more) break; + /* Make sure that we always move forward. */ + rec_fsino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino); + if (iwag->lastino != NULLFSINO && + XFS_IS_CORRUPT(mp, iwag->lastino >= rec_fsino)) { + error = -EFSCORRUPTED; + goto out; + } + iwag->lastino = rec_fsino + XFS_INODES_PER_CHUNK - 1; + /* No allocated inodes in this chunk; skip it. */ if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) { error = xfs_btree_increment(cur, 0, &has_more); @@ -535,6 +553,7 @@ xfs_iwalk( .trim_start = 1, .skip_empty = 1, .pwork = XFS_PWORK_SINGLE_THREADED, + .lastino = NULLFSINO, }; xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); int error; @@ -623,6 +642,7 @@ xfs_iwalk_threaded( iwag->data = data; iwag->startino = startino; iwag->sz_recs = xfs_iwalk_prefetch(inode_records); + iwag->lastino = NULLFSINO; xfs_pwork_queue(&pctl, &iwag->pwork); startino = XFS_AGINO_TO_INO(mp, agno + 1, 0); if (flags & XFS_INOBT_WALK_SAME_AG) @@ -696,6 +716,7 @@ xfs_inobt_walk( .startino = startino, .sz_recs = xfs_inobt_walk_prefetch(inobt_records), .pwork = XFS_PWORK_SINGLE_THREADED, + .lastino = NULLFSINO, }; xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); int error; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 150ee5cb8645..7110507a2b6b 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -194,20 +194,25 @@ xfs_initialize_perag( } pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL); - if (!pag) + if (!pag) { + error = -ENOMEM; goto out_unwind_new_pags; + } pag->pag_agno = index; pag->pag_mount = mp; spin_lock_init(&pag->pag_ici_lock); INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); - if (xfs_buf_hash_init(pag)) + + error = xfs_buf_hash_init(pag); + if (error) goto out_free_pag; init_waitqueue_head(&pag->pagb_wait); spin_lock_init(&pag->pagb_lock); pag->pagb_count = 0; pag->pagb_tree = RB_ROOT; - if (radix_tree_preload(GFP_NOFS)) + error = radix_tree_preload(GFP_NOFS); + if (error) goto out_hash_destroy; spin_lock(&mp->m_perag_lock); diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index b101feb2aab4..f3082a957d5e 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -134,7 +134,7 @@ xfs_fs_map_blocks( goto out_unlock; error = invalidate_inode_pages2(inode->i_mapping); if (WARN_ON_ONCE(error)) - return error; + goto out_unlock; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + length); offset_fsb = XFS_B_TO_FSBT(mp, offset); |