summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/uapi/linux/io_uring.h8
-rw-r--r--io_uring/fdinfo.c34
-rw-r--r--io_uring/io_uring.c37
-rw-r--r--io_uring/io_uring.h14
-rw-r--r--io_uring/opdef.c26
-rw-r--r--io_uring/opdef.h2
-rw-r--r--io_uring/register.c2
-rw-r--r--io_uring/uring_cmd.c17
8 files changed, 114 insertions, 26 deletions
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 263bed13473e..04797a9b76bc 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -231,6 +231,12 @@ enum io_uring_sqe_flags_bit {
*/
#define IORING_SETUP_CQE_MIXED (1U << 18)
+/*
+ * Allow both 64b and 128b SQEs. If a 128b SQE is posted, it will have
+ * a 128b opcode.
+ */
+#define IORING_SETUP_SQE_MIXED (1U << 19)
+
enum io_uring_op {
IORING_OP_NOP,
IORING_OP_READV,
@@ -295,6 +301,8 @@ enum io_uring_op {
IORING_OP_READV_FIXED,
IORING_OP_WRITEV_FIXED,
IORING_OP_PIPE,
+ IORING_OP_NOP128,
+ IORING_OP_URING_CMD128,
/* this goes last, obviously */
IORING_OP_LAST,
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
index ff3364531c77..1a806ad16840 100644
--- a/io_uring/fdinfo.c
+++ b/io_uring/fdinfo.c
@@ -14,6 +14,7 @@
#include "fdinfo.h"
#include "cancel.h"
#include "rsrc.h"
+#include "opdef.h"
#ifdef CONFIG_NET_RX_BUSY_POLL
static __cold void common_tracking_show_fdinfo(struct io_ring_ctx *ctx,
@@ -66,7 +67,6 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
unsigned int cq_head = READ_ONCE(r->cq.head);
unsigned int cq_tail = READ_ONCE(r->cq.tail);
unsigned int sq_shift = 0;
- unsigned int sq_entries;
int sq_pid = -1, sq_cpu = -1;
u64 sq_total_time = 0, sq_work_time = 0;
unsigned int i;
@@ -89,26 +89,45 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
seq_printf(m, "CqTail:\t%u\n", cq_tail);
seq_printf(m, "CachedCqTail:\t%u\n", data_race(ctx->cached_cq_tail));
seq_printf(m, "SQEs:\t%u\n", sq_tail - sq_head);
- sq_entries = min(sq_tail - sq_head, ctx->sq_entries);
- for (i = 0; i < sq_entries; i++) {
- unsigned int entry = i + sq_head;
+ while (sq_head < sq_tail) {
struct io_uring_sqe *sqe;
unsigned int sq_idx;
+ bool sqe128 = false;
+ u8 opcode;
if (ctx->flags & IORING_SETUP_NO_SQARRAY)
break;
- sq_idx = READ_ONCE(ctx->sq_array[entry & sq_mask]);
+ sq_idx = READ_ONCE(ctx->sq_array[sq_head & sq_mask]);
if (sq_idx > sq_mask)
continue;
+
sqe = &ctx->sq_sqes[sq_idx << sq_shift];
+ opcode = READ_ONCE(sqe->opcode);
+ if (sq_shift) {
+ sqe128 = true;
+ } else if (io_issue_defs[opcode].is_128) {
+ if (!(ctx->flags & IORING_SETUP_SQE_MIXED)) {
+ seq_printf(m,
+ "%5u: invalid sqe, 128B entry on non-mixed sq\n",
+ sq_idx);
+ break;
+ }
+ if ((++sq_head & sq_mask) == 0) {
+ seq_printf(m,
+ "%5u: corrupted sqe, wrapping 128B entry\n",
+ sq_idx);
+ break;
+ }
+ sqe128 = true;
+ }
seq_printf(m, "%5u: opcode:%s, fd:%d, flags:%x, off:%llu, "
"addr:0x%llx, rw_flags:0x%x, buf_index:%d "
"user_data:%llu",
- sq_idx, io_uring_get_opcode(sqe->opcode), sqe->fd,
+ sq_idx, io_uring_get_opcode(opcode), sqe->fd,
sqe->flags, (unsigned long long) sqe->off,
(unsigned long long) sqe->addr, sqe->rw_flags,
sqe->buf_index, sqe->user_data);
- if (sq_shift) {
+ if (sqe128) {
u64 *sqeb = (void *) (sqe + 1);
int size = sizeof(struct io_uring_sqe) / sizeof(u64);
int j;
@@ -120,6 +139,7 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
}
}
seq_printf(m, "\n");
+ sq_head++;
}
seq_printf(m, "CQEs:\t%u\n", cq_tail - cq_head);
while (cq_head < cq_tail) {
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index e4ede0bad36f..be44d636fe1f 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2164,7 +2164,7 @@ static __cold int io_init_fail_req(struct io_kiocb *req, int err)
}
static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
- const struct io_uring_sqe *sqe)
+ const struct io_uring_sqe *sqe, unsigned int *left)
__must_hold(&ctx->uring_lock)
{
const struct io_issue_def *def;
@@ -2190,6 +2190,24 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
opcode = array_index_nospec(opcode, IORING_OP_LAST);
def = &io_issue_defs[opcode];
+ if (def->is_128 && !(ctx->flags & IORING_SETUP_SQE128)) {
+ /*
+ * A 128b op on a non-128b SQ requires mixed SQE support as
+ * well as 2 contiguous entries.
+ */
+ if (!(ctx->flags & IORING_SETUP_SQE_MIXED) || *left < 2 ||
+ !(ctx->cached_sq_head & (ctx->sq_entries - 1)))
+ return io_init_fail_req(req, -EINVAL);
+ /*
+ * A 128b operation on a mixed SQ uses two entries, so we have
+ * to increment the head and cached refs, and decrement what's
+ * left.
+ */
+ current->io_uring->cached_refs++;
+ ctx->cached_sq_head++;
+ (*left)--;
+ }
+
if (unlikely(sqe_flags & ~SQE_COMMON_FLAGS)) {
/* enforce forwards compatibility on users */
if (sqe_flags & ~SQE_VALID_FLAGS)
@@ -2299,13 +2317,13 @@ static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
}
static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
- const struct io_uring_sqe *sqe)
+ const struct io_uring_sqe *sqe, unsigned int *left)
__must_hold(&ctx->uring_lock)
{
struct io_submit_link *link = &ctx->submit_state.link;
int ret;
- ret = io_init_req(ctx, req, sqe);
+ ret = io_init_req(ctx, req, sqe, left);
if (unlikely(ret))
return io_submit_fail_init(sqe, req, ret);
@@ -2457,7 +2475,7 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
* Continue submitting even for sqe failure if the
* ring was setup with IORING_SETUP_SUBMIT_ALL
*/
- if (unlikely(io_submit_sqe(ctx, req, sqe)) &&
+ if (unlikely(io_submit_sqe(ctx, req, sqe, &left)) &&
!(ctx->flags & IORING_SETUP_SUBMIT_ALL)) {
left--;
break;
@@ -2802,6 +2820,10 @@ unsigned long rings_size(unsigned int flags, unsigned int sq_entries,
if (cq_entries < 2)
return SIZE_MAX;
}
+ if (flags & IORING_SETUP_SQE_MIXED) {
+ if (sq_entries < 2)
+ return SIZE_MAX;
+ }
#ifdef CONFIG_SMP
off = ALIGN(off, SMP_CACHE_BYTES);
@@ -3726,6 +3748,13 @@ static int io_uring_sanitise_params(struct io_uring_params *p)
if ((flags & (IORING_SETUP_CQE32|IORING_SETUP_CQE_MIXED)) ==
(IORING_SETUP_CQE32|IORING_SETUP_CQE_MIXED))
return -EINVAL;
+ /*
+ * Nonsensical to ask for SQE128 and mixed SQE support, it's not
+ * supported to post 64b SQEs on a ring setup with SQE128.
+ */
+ if ((flags & (IORING_SETUP_SQE128|IORING_SETUP_SQE_MIXED)) ==
+ (IORING_SETUP_SQE128|IORING_SETUP_SQE_MIXED))
+ return -EINVAL;
return 0;
}
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 78777bf1ea4b..44b8091c7fcd 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -54,7 +54,8 @@
IORING_SETUP_REGISTERED_FD_ONLY |\
IORING_SETUP_NO_SQARRAY |\
IORING_SETUP_HYBRID_IOPOLL |\
- IORING_SETUP_CQE_MIXED)
+ IORING_SETUP_CQE_MIXED |\
+ IORING_SETUP_SQE_MIXED)
#define IORING_ENTER_FLAGS (IORING_ENTER_GETEVENTS |\
IORING_ENTER_SQ_WAKEUP |\
@@ -565,17 +566,6 @@ static inline void io_req_queue_tw_complete(struct io_kiocb *req, s32 res)
io_req_task_work_add(req);
}
-/*
- * IORING_SETUP_SQE128 contexts allocate twice the normal SQE size for each
- * slot.
- */
-static inline size_t uring_sqe_size(struct io_ring_ctx *ctx)
-{
- if (ctx->flags & IORING_SETUP_SQE128)
- return 2 * sizeof(struct io_uring_sqe);
- return sizeof(struct io_uring_sqe);
-}
-
static inline bool io_file_can_poll(struct io_kiocb *req)
{
if (req->flags & REQ_F_CAN_POLL)
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 932319633eac..df52d760240e 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -575,6 +575,24 @@ const struct io_issue_def io_issue_defs[] = {
.prep = io_pipe_prep,
.issue = io_pipe,
},
+ [IORING_OP_NOP128] = {
+ .audit_skip = 1,
+ .iopoll = 1,
+ .is_128 = 1,
+ .prep = io_nop_prep,
+ .issue = io_nop,
+ },
+ [IORING_OP_URING_CMD128] = {
+ .buffer_select = 1,
+ .needs_file = 1,
+ .plug = 1,
+ .iopoll = 1,
+ .iopoll_queue = 1,
+ .is_128 = 1,
+ .async_size = sizeof(struct io_async_cmd),
+ .prep = io_uring_cmd_prep,
+ .issue = io_uring_cmd,
+ },
};
const struct io_cold_def io_cold_defs[] = {
@@ -825,6 +843,14 @@ const struct io_cold_def io_cold_defs[] = {
[IORING_OP_PIPE] = {
.name = "PIPE",
},
+ [IORING_OP_NOP128] = {
+ .name = "NOP128",
+ },
+ [IORING_OP_URING_CMD128] = {
+ .name = "URING_CMD128",
+ .sqe_copy = io_uring_cmd_sqe_copy,
+ .cleanup = io_uring_cmd_cleanup,
+ },
};
const char *io_uring_get_opcode(u8 opcode)
diff --git a/io_uring/opdef.h b/io_uring/opdef.h
index c2f0907ed78c..aa37846880ff 100644
--- a/io_uring/opdef.h
+++ b/io_uring/opdef.h
@@ -27,6 +27,8 @@ struct io_issue_def {
unsigned iopoll_queue : 1;
/* vectored opcode, set if 1) vectored, and 2) handler needs to know */
unsigned vectored : 1;
+ /* set to 1 if this opcode uses 128b sqes in a mixed sq */
+ unsigned is_128 : 1;
/* size of async data needed, if any */
unsigned short async_size;
diff --git a/io_uring/register.c b/io_uring/register.c
index 43eb02004824..1a3e05be6e7b 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -394,7 +394,7 @@ static void io_register_free_rings(struct io_ring_ctx *ctx,
#define RESIZE_FLAGS (IORING_SETUP_CQSIZE | IORING_SETUP_CLAMP)
#define COPY_FLAGS (IORING_SETUP_NO_SQARRAY | IORING_SETUP_SQE128 | \
IORING_SETUP_CQE32 | IORING_SETUP_NO_MMAP | \
- IORING_SETUP_CQE_MIXED)
+ IORING_SETUP_CQE_MIXED | IORING_SETUP_SQE_MIXED)
static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
{
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 1225f8124e4b..9d67a2a721aa 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -216,6 +216,18 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return 0;
}
+/*
+ * IORING_SETUP_SQE128 contexts allocate twice the normal SQE size for each
+ * slot.
+ */
+static inline size_t uring_sqe_size(struct io_kiocb *req)
+{
+ if (req->ctx->flags & IORING_SETUP_SQE128 ||
+ req->opcode == IORING_OP_URING_CMD128)
+ return 2 * sizeof(struct io_uring_sqe);
+ return sizeof(struct io_uring_sqe);
+}
+
void io_uring_cmd_sqe_copy(struct io_kiocb *req)
{
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
@@ -224,7 +236,7 @@ void io_uring_cmd_sqe_copy(struct io_kiocb *req)
/* Should not happen, as REQ_F_SQE_COPIED covers this */
if (WARN_ON_ONCE(ioucmd->sqe == ac->sqes))
return;
- memcpy(ac->sqes, ioucmd->sqe, uring_sqe_size(req->ctx));
+ memcpy(ac->sqes, ioucmd->sqe, uring_sqe_size(req));
ioucmd->sqe = ac->sqes;
}
@@ -242,7 +254,8 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
if (ret)
return ret;
- if (ctx->flags & IORING_SETUP_SQE128)
+ if (ctx->flags & IORING_SETUP_SQE128 ||
+ req->opcode == IORING_OP_URING_CMD128)
issue_flags |= IO_URING_F_SQE128;
if (ctx->flags & (IORING_SETUP_CQE32 | IORING_SETUP_CQE_MIXED))
issue_flags |= IO_URING_F_CQE32;