summaryrefslogtreecommitdiff
path: root/io_uring
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring')
-rw-r--r--io_uring/io_uring.c9
-rw-r--r--io_uring/kbuf.c6
-rw-r--r--io_uring/openclose.c6
-rw-r--r--io_uring/rw.c58
4 files changed, 41 insertions, 38 deletions
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index f4591b912ea8..bb201503f0db 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2643,14 +2643,10 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
static void io_mem_free(void *ptr)
{
- struct page *page;
-
if (!ptr)
return;
- page = virt_to_head_page(ptr);
- if (put_page_testzero(page))
- free_compound_page(page);
+ folio_put(virt_to_folio(ptr));
}
static void io_pages_free(struct page ***pages, int npages)
@@ -3470,6 +3466,8 @@ static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp,
* - use the kernel virtual address of the shared io_uring context
* (instead of the userspace-provided address, which has to be 0UL
* anyway).
+ * - use the same pgoff which the get_unmapped_area() uses to
+ * calculate the page colouring.
* For architectures without such aliasing requirements, the
* architecture will return any suitable mapping because addr is 0.
*/
@@ -3478,6 +3476,7 @@ static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp,
pgoff = 0; /* has been translated to ptr above */
#ifdef SHM_COLOUR
addr = (uintptr_t) ptr;
+ pgoff = addr >> PAGE_SHIFT;
#else
addr = 0UL;
#endif
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 2f0181521c98..556f4df25b0f 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -218,11 +218,7 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
if (bl->is_mapped) {
i = bl->buf_ring->tail - bl->head;
if (bl->is_mmap) {
- struct page *page;
-
- page = virt_to_head_page(bl->buf_ring);
- if (put_page_testzero(page))
- free_compound_page(page);
+ folio_put(virt_to_folio(bl->buf_ring));
bl->buf_ring = NULL;
bl->is_mmap = 0;
} else if (bl->buf_nr_pages) {
diff --git a/io_uring/openclose.c b/io_uring/openclose.c
index 10ca57f5bd24..e3fae26e025d 100644
--- a/io_uring/openclose.c
+++ b/io_uring/openclose.c
@@ -35,9 +35,11 @@ static bool io_openat_force_async(struct io_open *open)
{
/*
* Don't bother trying for O_TRUNC, O_CREAT, or O_TMPFILE open,
- * it'll always -EAGAIN
+ * it'll always -EAGAIN. Note that we test for __O_TMPFILE because
+ * O_TMPFILE includes O_DIRECTORY, which isn't a flag we need to force
+ * async for.
*/
- return open->how.flags & (O_TRUNC | O_CREAT | O_TMPFILE);
+ return open->how.flags & (O_TRUNC | O_CREAT | __O_TMPFILE);
}
static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 1bce2208b65c..b3435033fadf 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -105,6 +105,7 @@ int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
} else {
rw->kiocb.ki_ioprio = get_current_ioprio();
}
+ rw->kiocb.dio_complete = NULL;
rw->addr = READ_ONCE(sqe->addr);
rw->len = READ_ONCE(sqe->len);
@@ -220,17 +221,12 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
}
#endif
-static void kiocb_end_write(struct io_kiocb *req)
+static void io_req_end_write(struct io_kiocb *req)
{
- /*
- * Tell lockdep we inherited freeze protection from submission
- * thread.
- */
if (req->flags & REQ_F_ISREG) {
- struct super_block *sb = file_inode(req->file)->i_sb;
+ struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
- __sb_writers_acquired(sb, SB_FREEZE_WRITE);
- sb_end_write(sb);
+ kiocb_end_write(&rw->kiocb);
}
}
@@ -243,7 +239,7 @@ static void io_req_io_end(struct io_kiocb *req)
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
if (rw->kiocb.ki_flags & IOCB_WRITE) {
- kiocb_end_write(req);
+ io_req_end_write(req);
fsnotify_modify(req->file);
} else {
fsnotify_access(req->file);
@@ -285,6 +281,15 @@ static inline int io_fixup_rw_res(struct io_kiocb *req, long res)
void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts)
{
+ struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+ struct kiocb *kiocb = &rw->kiocb;
+
+ if ((kiocb->ki_flags & IOCB_DIO_CALLER_COMP) && kiocb->dio_complete) {
+ long res = kiocb->dio_complete(rw->kiocb.private);
+
+ io_req_set_res(req, io_fixup_rw_res(req, res), 0);
+ }
+
io_req_io_end(req);
if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) {
@@ -300,9 +305,11 @@ static void io_complete_rw(struct kiocb *kiocb, long res)
struct io_rw *rw = container_of(kiocb, struct io_rw, kiocb);
struct io_kiocb *req = cmd_to_io_kiocb(rw);
- if (__io_complete_rw_common(req, res))
- return;
- io_req_set_res(req, io_fixup_rw_res(req, res), 0);
+ if (!kiocb->dio_complete || !(kiocb->ki_flags & IOCB_DIO_CALLER_COMP)) {
+ if (__io_complete_rw_common(req, res))
+ return;
+ io_req_set_res(req, io_fixup_rw_res(req, res), 0);
+ }
req->io_task_work.func = io_req_rw_complete;
__io_req_task_work_add(req, IOU_F_TWQ_LAZY_WAKE);
}
@@ -313,7 +320,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
struct io_kiocb *req = cmd_to_io_kiocb(rw);
if (kiocb->ki_flags & IOCB_WRITE)
- kiocb_end_write(req);
+ io_req_end_write(req);
if (unlikely(res != req->cqe.res)) {
if (res == -EAGAIN && io_rw_should_reissue(req)) {
req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO;
@@ -902,19 +909,18 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
return ret;
}
+ if (req->flags & REQ_F_ISREG)
+ kiocb_start_write(kiocb);
+ kiocb->ki_flags |= IOCB_WRITE;
+
/*
- * Open-code file_start_write here to grab freeze protection,
- * which will be released by another thread in
- * io_complete_rw(). Fool lockdep by telling it the lock got
- * released so that it doesn't complain about the held lock when
- * we return to userspace.
+ * For non-polled IO, set IOCB_DIO_CALLER_COMP, stating that our handler
+ * groks deferring the completion to task context. This isn't
+ * necessary and useful for polled IO as that can always complete
+ * directly.
*/
- if (req->flags & REQ_F_ISREG) {
- sb_start_write(file_inode(req->file)->i_sb);
- __sb_writers_release(file_inode(req->file)->i_sb,
- SB_FREEZE_WRITE);
- }
- kiocb->ki_flags |= IOCB_WRITE;
+ if (!(kiocb->ki_flags & IOCB_HIPRI))
+ kiocb->ki_flags |= IOCB_DIO_CALLER_COMP;
if (likely(req->file->f_op->write_iter))
ret2 = call_write_iter(req->file, kiocb, &s->iter);
@@ -961,7 +967,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
io->bytes_done += ret2;
if (kiocb->ki_flags & IOCB_WRITE)
- kiocb_end_write(req);
+ io_req_end_write(req);
return ret ? ret : -EAGAIN;
}
done:
@@ -972,7 +978,7 @@ copy_iov:
ret = io_setup_async_rw(req, iovec, s, false);
if (!ret) {
if (kiocb->ki_flags & IOCB_WRITE)
- kiocb_end_write(req);
+ io_req_end_write(req);
return -EAGAIN;
}
return ret;