From eda4623cf989d033c07355be1469e44f321ce8ae Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Tue, 27 May 2025 16:27:57 +0100
Subject: io_uring/zcrx: init id for xa_find

xa_find() interprets id as the lower bound and thus expects it initialised.

Reported-by: syzbot+c3ff04150c30d3df0f57@syzkaller.appspotmail.com
Fixes: 76f1cc98b23ce ("io_uring/zcrx: add support for multiple ifqs")
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/faea44ef63131e6968f635e1b6b7ca6056f1f533.1748359655.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/zcrx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 9a568d049204..0c5b7d8f8d67 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -630,12 +630,13 @@ ifq_free:
 void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
 {
 	struct io_zcrx_ifq *ifq;
-	unsigned long id;
 
 	lockdep_assert_held(&ctx->uring_lock);
 
 	while (1) {
 		scoped_guard(mutex, &ctx->mmap_lock) {
+			unsigned long id = 0;
+
 			ifq = xa_find(&ctx->zcrx_ctxs, &id, ULONG_MAX, XA_PRESENT);
 			if (ifq)
 				xa_erase(&ctx->zcrx_ctxs, id);
-- 
cgit v1.2.3


From 0ec33c81d9c7342f03864101ddb2e717a0cce03e Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Tue, 27 May 2025 18:07:33 +0100
Subject: io_uring/zcrx: fix area release on registration failure

On area registration failure there might be no ifq set and it's not safe
to access area->ifq in the release path without checking it first.

Cc: stable@vger.kernel.org
Fixes: f12ecf5e1c5ec ("io_uring/zcrx: fix late dma unmap for a dead dev")
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/bc02878678a5fec28bc77d33355cdba735418484.1748365640.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/zcrx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 0c5b7d8f8d67..21c816c3bfe0 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -366,7 +366,8 @@ static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq)
 
 static void io_zcrx_free_area(struct io_zcrx_area *area)
 {
-	io_zcrx_unmap_area(area->ifq, area);
+	if (area->ifq)
+		io_zcrx_unmap_area(area->ifq, area);
 	io_release_area_mem(&area->mem);
 
 	kvfree(area->freelist);
-- 
cgit v1.2.3


From 2c7f023219966777be0687e15b57689894304cd3 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 28 May 2025 13:45:44 -0600
Subject: io_uring/net: only consider msg_inq if larger than 1

Currently retry and general validity of msg_inq is gated on it being
larger than zero, but it's entirely possible for this to be slightly
inaccurate. In particular, if FIN is received, it'll return 1.

Just use larger than 1 as the check. This covers both the FIN case, and
at the same time, it doesn't make much sense to retry a recv immediately
if there's even just a single 1 byte of valid data in the socket.

Leave the SOCK_NONEMPTY flagging when larger than 0 still, as an app may
use that for the final receive.

Cc: stable@vger.kernel.org
Reported-by: Christian Mazakas <christian.mazakas@gmail.com>
Fixes: 7c71a0af81ba ("io_uring/net: improve recv bundles")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/net.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/io_uring/net.c b/io_uring/net.c
index d13f3e8f6c72..e16633fd6630 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -832,7 +832,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
 		 * If more is available AND it was a full transfer, retry and
 		 * append to this one
 		 */
-		if (!sr->retry && kmsg->msg.msg_inq > 0 && this_ret > 0 &&
+		if (!sr->retry && kmsg->msg.msg_inq > 1 && this_ret > 0 &&
 		    !iov_iter_count(&kmsg->msg.msg_iter)) {
 			req->cqe.flags = cflags & ~CQE_F_MASK;
 			sr->len = kmsg->msg.msg_inq;
@@ -1070,7 +1070,7 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
 			arg.mode |= KBUF_MODE_FREE;
 		}
 
-		if (kmsg->msg.msg_inq > 0)
+		if (kmsg->msg.msg_inq > 1)
 			arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq);
 
 		ret = io_buffers_peek(req, &arg);
-- 
cgit v1.2.3


From e931d3a9d5200bae9d938be2582072b2898e37f7 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 23 May 2025 23:37:39 +0100
Subject: MAINTAINERS: remove myself from io_uring

Disassociate my name from the project over disagreements on development
practices.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/814ec73b73323a8e1c87643d193a73f467fb191f.1748034476.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index e45559690b28..cd6ef0839df6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12492,7 +12492,6 @@ F:	include/linux/iosys-map.h
 
 IO_URING
 M:	Jens Axboe <axboe@kernel.dk>
-M:	Pavel Begunkov <asml.silence@gmail.com>
 L:	io-uring@vger.kernel.org
 S:	Maintained
 T:	git git://git.kernel.dk/linux-block
-- 
cgit v1.2.3


From 607d09d1a01e9f29e91733e3a08b63ed240aacb2 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 3 Jun 2025 07:42:28 -0600
Subject: io_uring/kbuf: limit legacy provided buffer lists to USHRT_MAX

The buffer ID for a provided buffer is an unsigned short, and hence
there can only be 64k added to any given buffer list before having
duplicate BIDs. Cap the legacy provided buffers at 64k in the list.
This is mostly to prevent silly stall reports from syzbot, which
likes to dump tons of buffers into a list and then have kernels with
lockdep and kasan churning through them and hitting long wait times
for buffer pruning at ring exit time.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/kbuf.c | 17 +++++++++++++++--
 io_uring/kbuf.h |  3 +++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 8cce3ebd813f..2ea65f3cef72 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -108,6 +108,7 @@ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags)
 	buf = req->kbuf;
 	bl = io_buffer_get_list(ctx, buf->bgid);
 	list_add(&buf->list, &bl->buf_list);
+	bl->nbufs++;
 	req->flags &= ~REQ_F_BUFFER_SELECTED;
 
 	io_ring_submit_unlock(ctx, issue_flags);
@@ -122,6 +123,7 @@ static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len,
 
 		kbuf = list_first_entry(&bl->buf_list, struct io_buffer, list);
 		list_del(&kbuf->list);
+		bl->nbufs--;
 		if (*len == 0 || *len > kbuf->len)
 			*len = kbuf->len;
 		if (list_empty(&bl->buf_list))
@@ -390,6 +392,7 @@ static int io_remove_buffers_legacy(struct io_ring_ctx *ctx,
 	for (i = 0; i < nbufs && !list_empty(&bl->buf_list); i++) {
 		nxt = list_first_entry(&bl->buf_list, struct io_buffer, list);
 		list_del(&nxt->list);
+		bl->nbufs--;
 		kfree(nxt);
 		cond_resched();
 	}
@@ -491,14 +494,24 @@ static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
 {
 	struct io_buffer *buf;
 	u64 addr = pbuf->addr;
-	int i, bid = pbuf->bid;
+	int ret = -ENOMEM, i, bid = pbuf->bid;
 
 	for (i = 0; i < pbuf->nbufs; i++) {
+		/*
+		 * Nonsensical to have more than sizeof(bid) buffers in a
+		 * buffer list, as the application then has no way of knowing
+		 * which duplicate bid refers to what buffer.
+		 */
+		if (bl->nbufs == USHRT_MAX) {
+			ret = -EOVERFLOW;
+			break;
+		}
 		buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
 		if (!buf)
 			break;
 
 		list_add_tail(&buf->list, &bl->buf_list);
+		bl->nbufs++;
 		buf->addr = addr;
 		buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
 		buf->bid = bid;
@@ -508,7 +521,7 @@ static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
 		cond_resched();
 	}
 
-	return i ? 0 : -ENOMEM;
+	return i ? 0 : ret;
 }
 
 static int __io_manage_buffers_legacy(struct io_kiocb *req,
diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h
index 4d2c209d1a41..5d83c7adc739 100644
--- a/io_uring/kbuf.h
+++ b/io_uring/kbuf.h
@@ -21,6 +21,9 @@ struct io_buffer_list {
 		struct list_head buf_list;
 		struct io_uring_buf_ring *buf_ring;
 	};
+	/* count of classic/legacy buffers in buffer list */
+	int nbufs;
+
 	__u16 bgid;
 
 	/* below is for ring provided buffers */
-- 
cgit v1.2.3


From 6a8118a77eec5fc4dfec69cc6bdc52229943f6ef Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 4 Jun 2025 08:49:32 -0600
Subject: io_uring/futex: get rid of struct io_futex addr union

Rather than use a union of a u32 and struct futex_waitv user address,
consolidate it into a single void __user pointer instead. This also
makes prep easier to use as the store happens to the member that will
be used.

No functional changes in this patch.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/futex.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/io_uring/futex.c b/io_uring/futex.c
index fa374afbaa51..5a3991b0d1a7 100644
--- a/io_uring/futex.c
+++ b/io_uring/futex.c
@@ -14,10 +14,7 @@
 
 struct io_futex {
 	struct file	*file;
-	union {
-		u32 __user			*uaddr;
-		struct futex_waitv __user	*uwaitv;
-	};
+	void __user	*uaddr;
 	unsigned long	futex_val;
 	unsigned long	futex_mask;
 	unsigned long	futexv_owned;
@@ -186,7 +183,7 @@ int io_futexv_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	if (!futexv)
 		return -ENOMEM;
 
-	ret = futex_parse_waitv(futexv, iof->uwaitv, iof->futex_nr,
+	ret = futex_parse_waitv(futexv, iof->uaddr, iof->futex_nr,
 				io_futex_wakev_fn, req);
 	if (ret) {
 		kfree(futexv);
-- 
cgit v1.2.3


From 079afb081c4288e94d5e4223d3eb6306d853c68b Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 4 Jun 2025 10:25:42 -0600
Subject: io_uring/futex: mark wait requests as inflight

Inflight marking is used so that do_exit() -> io_uring_files_cancel()
will find requests with files that reference an io_uring instance,
so they can get appropriately canceled before the files go away.
However, it's also called before the mm goes away.

Mark futex/futexv wait requests as being inflight, so that
io_uring_files_cancel() will prune them. This ensures that the mm stays
alive, which is important as an exiting mm will also free the futex
private hash buckets. An io_uring futex request with FUTEX2_PRIVATE
set relies on those being alive until the request has completed. A
recent commit added these futex private hashes, which get killed when
the mm goes away.

Fixes: 80367ad01d93 ("futex: Add basic infrastructure for local task local hash")
Link: https://lore.kernel.org/io-uring/38053.1749045482@localhost/
Reported-by: Robert Morris <rtm@csail.mit.edu>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/futex.c    | 4 ++++
 io_uring/io_uring.c | 7 ++++++-
 io_uring/io_uring.h | 1 +
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/io_uring/futex.c b/io_uring/futex.c
index 5a3991b0d1a7..692462d50c8c 100644
--- a/io_uring/futex.c
+++ b/io_uring/futex.c
@@ -145,6 +145,8 @@ int io_futex_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	    !futex_validate_input(iof->futex_flags, iof->futex_mask))
 		return -EINVAL;
 
+	/* Mark as inflight, so file exit cancelation will find it */
+	io_req_track_inflight(req);
 	return 0;
 }
 
@@ -190,6 +192,8 @@ int io_futexv_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		return ret;
 	}
 
+	/* Mark as inflight, so file exit cancelation will find it */
+	io_req_track_inflight(req);
 	iof->futexv_owned = 0;
 	iof->futexv_unqueued = 0;
 	req->flags |= REQ_F_ASYNC_DATA;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index c7a9cecf528e..cf759c172083 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -408,7 +408,12 @@ static void io_clean_op(struct io_kiocb *req)
 	req->flags &= ~IO_REQ_CLEAN_FLAGS;
 }
 
-static inline void io_req_track_inflight(struct io_kiocb *req)
+/*
+ * Mark the request as inflight, so that file cancelation will find it.
+ * Can be used if the file is an io_uring instance, or if the request itself
+ * relies on ->mm being alive for the duration of the request.
+ */
+inline void io_req_track_inflight(struct io_kiocb *req)
 {
 	if (!(req->flags & REQ_F_INFLIGHT)) {
 		req->flags |= REQ_F_INFLIGHT;
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 0ea7a435d1de..d59c12277d58 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -83,6 +83,7 @@ void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
 bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags);
 void __io_commit_cqring_flush(struct io_ring_ctx *ctx);
 
+void io_req_track_inflight(struct io_kiocb *req);
 struct file *io_file_get_normal(struct io_kiocb *req, int fd);
 struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
 			       unsigned issue_flags);
-- 
cgit v1.2.3