summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--rust/kernel/fs.rs3
-rw-r--r--rust/kernel/fs/kiocb.rs68
-rw-r--r--rust/kernel/iov.rs314
-rw-r--r--rust/kernel/lib.rs1
-rw-r--r--rust/kernel/miscdevice.rs63
-rw-r--r--samples/rust/rust_misc_device.rs36
6 files changed, 482 insertions, 3 deletions
diff --git a/rust/kernel/fs.rs b/rust/kernel/fs.rs
index 0121b38c59e6..6ba6bdf143cb 100644
--- a/rust/kernel/fs.rs
+++ b/rust/kernel/fs.rs
@@ -6,3 +6,6 @@
pub mod file;
pub use self::file::{File, LocalFile};
+
+mod kiocb;
+pub use self::kiocb::Kiocb;
diff --git a/rust/kernel/fs/kiocb.rs b/rust/kernel/fs/kiocb.rs
new file mode 100644
index 000000000000..84c936cd69b0
--- /dev/null
+++ b/rust/kernel/fs/kiocb.rs
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2024 Google LLC.
+
+//! Kernel IO callbacks.
+//!
+//! C headers: [`include/linux/fs.h`](srctree/include/linux/fs.h)
+
+use core::marker::PhantomData;
+use core::ptr::NonNull;
+use kernel::types::ForeignOwnable;
+
+/// Wrapper for the kernel's `struct kiocb`.
+///
+/// Currently this abstractions is incomplete and is essentially just a tuple containing a
+/// reference to a file and a file position.
+///
+/// The type `T` represents the filesystem or driver specific data associated with the file.
+///
+/// # Invariants
+///
+/// `inner` points at a valid `struct kiocb` whose file has the type `T` as its private data.
+pub struct Kiocb<'a, T> {
+ inner: NonNull<bindings::kiocb>,
+ _phantom: PhantomData<&'a T>,
+}
+
+impl<'a, T: ForeignOwnable> Kiocb<'a, T> {
+ /// Create a `Kiocb` from a raw pointer.
+ ///
+ /// # Safety
+ ///
+ /// The pointer must reference a valid `struct kiocb` for the duration of `'a`. The private
+ /// data of the file must be `T`.
+ pub unsafe fn from_raw(kiocb: *mut bindings::kiocb) -> Self {
+ Self {
+ // SAFETY: If a pointer is valid it is not null.
+ inner: unsafe { NonNull::new_unchecked(kiocb) },
+ _phantom: PhantomData,
+ }
+ }
+
+ /// Access the underlying `struct kiocb` directly.
+ pub fn as_raw(&self) -> *mut bindings::kiocb {
+ self.inner.as_ptr()
+ }
+
+ /// Get the filesystem or driver specific data associated with the file.
+ pub fn file(&self) -> <T as ForeignOwnable>::Borrowed<'a> {
+ // SAFETY: We have shared access to this kiocb and hence the underlying file, so we can
+ // read the file's private data.
+ let private = unsafe { (*(*self.as_raw()).ki_filp).private_data };
+ // SAFETY: The kiocb has shared access to the private data.
+ unsafe { <T as ForeignOwnable>::borrow(private) }
+ }
+
+ /// Gets the current value of `ki_pos`.
+ pub fn ki_pos(&self) -> i64 {
+ // SAFETY: We have shared access to the kiocb, so we can read its `ki_pos` field.
+ unsafe { (*self.as_raw()).ki_pos }
+ }
+
+ /// Gets a mutable reference to the `ki_pos` field.
+ pub fn ki_pos_mut(&mut self) -> &mut i64 {
+ // SAFETY: We have exclusive access to the kiocb, so we can write to `ki_pos`.
+ unsafe { &mut (*self.as_raw()).ki_pos }
+ }
+}
diff --git a/rust/kernel/iov.rs b/rust/kernel/iov.rs
new file mode 100644
index 000000000000..43bae8923c46
--- /dev/null
+++ b/rust/kernel/iov.rs
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+//! IO vectors.
+//!
+//! C headers: [`include/linux/iov_iter.h`](srctree/include/linux/iov_iter.h),
+//! [`include/linux/uio.h`](srctree/include/linux/uio.h)
+
+use crate::{
+ alloc::{Allocator, Flags},
+ bindings,
+ prelude::*,
+ types::Opaque,
+};
+use core::{marker::PhantomData, mem::MaybeUninit, ptr, slice};
+
+const ITER_SOURCE: bool = bindings::ITER_SOURCE != 0;
+const ITER_DEST: bool = bindings::ITER_DEST != 0;
+
+// Compile-time assertion for the above constants.
+const _: () = {
+ build_assert!(
+ ITER_SOURCE != ITER_DEST,
+ "ITER_DEST and ITER_SOURCE should be different."
+ );
+};
+
+/// An IO vector that acts as a source of data.
+///
+/// The data may come from many different sources. This includes both things in kernel-space and
+/// reading from userspace. It's not necessarily the case that the data source is immutable, so
+/// rewinding the IO vector to read the same data twice is not guaranteed to result in the same
+/// bytes. It's also possible that the data source is mapped in a thread-local manner using e.g.
+/// `kmap_local_page()`, so this type is not `Send` to ensure that the mapping is read from the
+/// right context in that scenario.
+///
+/// # Invariants
+///
+/// Must hold a valid `struct iov_iter` with `data_source` set to `ITER_SOURCE`. For the duration
+/// of `'data`, it must be safe to read from this IO vector using the standard C methods for this
+/// purpose.
+#[repr(transparent)]
+pub struct IovIterSource<'data> {
+ iov: Opaque<bindings::iov_iter>,
+ /// Represent to the type system that this value contains a pointer to readable data it does
+ /// not own.
+ _source: PhantomData<&'data [u8]>,
+}
+
+impl<'data> IovIterSource<'data> {
+ /// Obtain an `IovIterSource` from a raw pointer.
+ ///
+ /// # Safety
+ ///
+ /// * The referenced `struct iov_iter` must be valid and must only be accessed through the
+ /// returned reference for the duration of `'iov`.
+ /// * The referenced `struct iov_iter` must have `data_source` set to `ITER_SOURCE`.
+ /// * For the duration of `'data`, it must be safe to read from this IO vector using the
+ /// standard C methods for this purpose.
+ #[track_caller]
+ #[inline]
+ pub unsafe fn from_raw<'iov>(ptr: *mut bindings::iov_iter) -> &'iov mut IovIterSource<'data> {
+ // SAFETY: The caller ensures that `ptr` is valid.
+ let data_source = unsafe { (*ptr).data_source };
+ assert_eq!(data_source, ITER_SOURCE);
+
+ // SAFETY: The caller ensures the type invariants for the right durations, and
+ // `IovIterSource` is layout compatible with `struct iov_iter`.
+ unsafe { &mut *ptr.cast::<IovIterSource<'data>>() }
+ }
+
+ /// Access this as a raw `struct iov_iter`.
+ #[inline]
+ pub fn as_raw(&mut self) -> *mut bindings::iov_iter {
+ self.iov.get()
+ }
+
+ /// Returns the number of bytes available in this IO vector.
+ ///
+ /// Note that this may overestimate the number of bytes. For example, reading from userspace
+ /// memory could fail with `EFAULT`, which will be treated as the end of the IO vector.
+ #[inline]
+ pub fn len(&self) -> usize {
+ // SAFETY: We have shared access to this IO vector, so we can read its `count` field.
+ unsafe {
+ (*self.iov.get())
+ .__bindgen_anon_1
+ .__bindgen_anon_1
+ .as_ref()
+ .count
+ }
+ }
+
+ /// Returns whether there are any bytes left in this IO vector.
+ ///
+ /// This may return `true` even if there are no more bytes available. For example, reading from
+ /// userspace memory could fail with `EFAULT`, which will be treated as the end of the IO vector.
+ #[inline]
+ pub fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+
+ /// Advance this IO vector by `bytes` bytes.
+ ///
+ /// If `bytes` is larger than the size of this IO vector, it is advanced to the end.
+ #[inline]
+ pub fn advance(&mut self, bytes: usize) {
+ // SAFETY: By the type invariants, `self.iov` is a valid IO vector.
+ unsafe { bindings::iov_iter_advance(self.as_raw(), bytes) };
+ }
+
+ /// Advance this IO vector backwards by `bytes` bytes.
+ ///
+ /// # Safety
+ ///
+ /// The IO vector must not be reverted to before its beginning.
+ #[inline]
+ pub unsafe fn revert(&mut self, bytes: usize) {
+ // SAFETY: By the type invariants, `self.iov` is a valid IO vector, and the caller
+ // ensures that `bytes` is in bounds.
+ unsafe { bindings::iov_iter_revert(self.as_raw(), bytes) };
+ }
+
+ /// Read data from this IO vector.
+ ///
+ /// Returns the number of bytes that have been copied.
+ #[inline]
+ pub fn copy_from_iter(&mut self, out: &mut [u8]) -> usize {
+ // SAFETY: `Self::copy_from_iter_raw` guarantees that it will not write any uninitialized
+ // bytes in the provided buffer, so `out` is still a valid `u8` slice after this call.
+ let out = unsafe { &mut *(ptr::from_mut(out) as *mut [MaybeUninit<u8>]) };
+
+ self.copy_from_iter_raw(out).len()
+ }
+
+ /// Read data from this IO vector and append it to a vector.
+ ///
+ /// Returns the number of bytes that have been copied.
+ #[inline]
+ pub fn copy_from_iter_vec<A: Allocator>(
+ &mut self,
+ out: &mut Vec<u8, A>,
+ flags: Flags,
+ ) -> Result<usize> {
+ out.reserve(self.len(), flags)?;
+ let len = self.copy_from_iter_raw(out.spare_capacity_mut()).len();
+ // SAFETY:
+ // - `len` is the length of a subslice of the spare capacity, so `len` is at most the
+ // length of the spare capacity.
+ // - `Self::copy_from_iter_raw` guarantees that the first `len` bytes of the spare capacity
+ // have been initialized.
+ unsafe { out.inc_len(len) };
+ Ok(len)
+ }
+
+ /// Read data from this IO vector into potentially uninitialized memory.
+ ///
+ /// Returns the sub-slice of the output that has been initialized. If the returned slice is
+ /// shorter than the input buffer, then the entire IO vector has been read.
+ ///
+ /// This will never write uninitialized bytes to the provided buffer.
+ #[inline]
+ pub fn copy_from_iter_raw(&mut self, out: &mut [MaybeUninit<u8>]) -> &mut [u8] {
+ let capacity = out.len();
+ let out = out.as_mut_ptr().cast::<u8>();
+
+ // GUARANTEES: The C API guarantees that it does not write uninitialized bytes to the
+ // provided buffer.
+ // SAFETY:
+ // * By the type invariants, it is still valid to read from this IO vector.
+ // * `out` is valid for writing for `capacity` bytes because it comes from a slice of
+ // that length.
+ let len = unsafe { bindings::_copy_from_iter(out.cast(), capacity, self.as_raw()) };
+
+ // SAFETY: The underlying C api guarantees that initialized bytes have been written to the
+ // first `len` bytes of the spare capacity.
+ unsafe { slice::from_raw_parts_mut(out, len) }
+ }
+}
+
+/// An IO vector that acts as a destination for data.
+///
+/// IO vectors support many different types of destinations. This includes both buffers in
+/// kernel-space and writing to userspace. It's possible that the destination buffer is mapped in a
+/// thread-local manner using e.g. `kmap_local_page()`, so this type is not `Send` to ensure that
+/// the mapping is written to the right context in that scenario.
+///
+/// # Invariants
+///
+/// Must hold a valid `struct iov_iter` with `data_source` set to `ITER_DEST`. For the duration of
+/// `'data`, it must be safe to write to this IO vector using the standard C methods for this
+/// purpose.
+#[repr(transparent)]
+pub struct IovIterDest<'data> {
+ iov: Opaque<bindings::iov_iter>,
+ /// Represent to the type system that this value contains a pointer to writable data it does
+ /// not own.
+ _source: PhantomData<&'data mut [u8]>,
+}
+
+impl<'data> IovIterDest<'data> {
+ /// Obtain an `IovIterDest` from a raw pointer.
+ ///
+ /// # Safety
+ ///
+ /// * The referenced `struct iov_iter` must be valid and must only be accessed through the
+ /// returned reference for the duration of `'iov`.
+ /// * The referenced `struct iov_iter` must have `data_source` set to `ITER_DEST`.
+ /// * For the duration of `'data`, it must be safe to write to this IO vector using the
+ /// standard C methods for this purpose.
+ #[track_caller]
+ #[inline]
+ pub unsafe fn from_raw<'iov>(ptr: *mut bindings::iov_iter) -> &'iov mut IovIterDest<'data> {
+ // SAFETY: The caller ensures that `ptr` is valid.
+ let data_source = unsafe { (*ptr).data_source };
+ assert_eq!(data_source, ITER_DEST);
+
+ // SAFETY: The caller ensures the type invariants for the right durations, and
+ // `IovIterSource` is layout compatible with `struct iov_iter`.
+ unsafe { &mut *ptr.cast::<IovIterDest<'data>>() }
+ }
+
+ /// Access this as a raw `struct iov_iter`.
+ #[inline]
+ pub fn as_raw(&mut self) -> *mut bindings::iov_iter {
+ self.iov.get()
+ }
+
+ /// Returns the number of bytes available in this IO vector.
+ ///
+ /// Note that this may overestimate the number of bytes. For example, reading from userspace
+ /// memory could fail with EFAULT, which will be treated as the end of the IO vector.
+ #[inline]
+ pub fn len(&self) -> usize {
+ // SAFETY: We have shared access to this IO vector, so we can read its `count` field.
+ unsafe {
+ (*self.iov.get())
+ .__bindgen_anon_1
+ .__bindgen_anon_1
+ .as_ref()
+ .count
+ }
+ }
+
+ /// Returns whether there are any bytes left in this IO vector.
+ ///
+ /// This may return `true` even if there are no more bytes available. For example, reading from
+ /// userspace memory could fail with EFAULT, which will be treated as the end of the IO vector.
+ #[inline]
+ pub fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+
+ /// Advance this IO vector by `bytes` bytes.
+ ///
+ /// If `bytes` is larger than the size of this IO vector, it is advanced to the end.
+ #[inline]
+ pub fn advance(&mut self, bytes: usize) {
+ // SAFETY: By the type invariants, `self.iov` is a valid IO vector.
+ unsafe { bindings::iov_iter_advance(self.as_raw(), bytes) };
+ }
+
+ /// Advance this IO vector backwards by `bytes` bytes.
+ ///
+ /// # Safety
+ ///
+ /// The IO vector must not be reverted to before its beginning.
+ #[inline]
+ pub unsafe fn revert(&mut self, bytes: usize) {
+ // SAFETY: By the type invariants, `self.iov` is a valid IO vector, and the caller
+ // ensures that `bytes` is in bounds.
+ unsafe { bindings::iov_iter_revert(self.as_raw(), bytes) };
+ }
+
+ /// Write data to this IO vector.
+ ///
+ /// Returns the number of bytes that were written. If this is shorter than the provided slice,
+ /// then no more bytes can be written.
+ #[inline]
+ pub fn copy_to_iter(&mut self, input: &[u8]) -> usize {
+ // SAFETY:
+ // * By the type invariants, it is still valid to write to this IO vector.
+ // * `input` is valid for `input.len()` bytes.
+ unsafe { bindings::_copy_to_iter(input.as_ptr().cast(), input.len(), self.as_raw()) }
+ }
+
+ /// Utility for implementing `read_iter` given the full contents of the file.
+ ///
+ /// The full contents of the file being read from is represented by `contents`. This call will
+ /// write the appropriate sub-slice of `contents` and update the file position in `ppos` so
+ /// that the file will appear to contain `contents` even if takes multiple reads to read the
+ /// entire file.
+ #[inline]
+ pub fn simple_read_from_buffer(&mut self, ppos: &mut i64, contents: &[u8]) -> Result<usize> {
+ if *ppos < 0 {
+ return Err(EINVAL);
+ }
+ let Ok(pos) = usize::try_from(*ppos) else {
+ return Ok(0);
+ };
+ if pos >= contents.len() {
+ return Ok(0);
+ }
+
+ // BOUNDS: We just checked that `pos < contents.len()` above.
+ let num_written = self.copy_to_iter(&contents[pos..]);
+
+ // OVERFLOW: `pos+num_written <= contents.len() <= isize::MAX <= i64::MAX`.
+ *ppos = (pos + num_written) as i64;
+
+ Ok(num_written)
+ }
+}
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index ed53169e795c..99dbb7b2812e 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -92,6 +92,7 @@ pub mod fs;
pub mod init;
pub mod io;
pub mod ioctl;
+pub mod iov;
pub mod jump_label;
#[cfg(CONFIG_KUNIT)]
pub mod kunit;
diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs
index 6373fe183b27..35630fc63875 100644
--- a/rust/kernel/miscdevice.rs
+++ b/rust/kernel/miscdevice.rs
@@ -13,7 +13,8 @@ use crate::{
device::Device,
error::{to_result, Error, Result, VTABLE_DEFAULT_ERROR},
ffi::{c_int, c_long, c_uint, c_ulong},
- fs::File,
+ fs::{File, Kiocb},
+ iov::{IovIterDest, IovIterSource},
mm::virt::VmaNew,
prelude::*,
seq_file::SeqFile,
@@ -141,6 +142,16 @@ pub trait MiscDevice: Sized {
build_error!(VTABLE_DEFAULT_ERROR)
}
+ /// Read from this miscdevice.
+ fn read_iter(_kiocb: Kiocb<'_, Self::Ptr>, _iov: &mut IovIterDest<'_>) -> Result<usize> {
+ build_error!(VTABLE_DEFAULT_ERROR)
+ }
+
+ /// Write to this miscdevice.
+ fn write_iter(_kiocb: Kiocb<'_, Self::Ptr>, _iov: &mut IovIterSource<'_>) -> Result<usize> {
+ build_error!(VTABLE_DEFAULT_ERROR)
+ }
+
/// Handler for ioctls.
///
/// The `cmd` argument is usually manipulated using the utilities in [`kernel::ioctl`].
@@ -247,6 +258,46 @@ impl<T: MiscDevice> MiscdeviceVTable<T> {
/// # Safety
///
+ /// `kiocb` must be correspond to a valid file that is associated with a
+ /// `MiscDeviceRegistration<T>`. `iter` must be a valid `struct iov_iter` for writing.
+ unsafe extern "C" fn read_iter(
+ kiocb: *mut bindings::kiocb,
+ iter: *mut bindings::iov_iter,
+ ) -> isize {
+ // SAFETY: The caller provides a valid `struct kiocb` associated with a
+ // `MiscDeviceRegistration<T>` file.
+ let kiocb = unsafe { Kiocb::from_raw(kiocb) };
+ // SAFETY: This is a valid `struct iov_iter` for writing.
+ let iov = unsafe { IovIterDest::from_raw(iter) };
+
+ match T::read_iter(kiocb, iov) {
+ Ok(res) => res as isize,
+ Err(err) => err.to_errno() as isize,
+ }
+ }
+
+ /// # Safety
+ ///
+ /// `kiocb` must be correspond to a valid file that is associated with a
+ /// `MiscDeviceRegistration<T>`. `iter` must be a valid `struct iov_iter` for writing.
+ unsafe extern "C" fn write_iter(
+ kiocb: *mut bindings::kiocb,
+ iter: *mut bindings::iov_iter,
+ ) -> isize {
+ // SAFETY: The caller provides a valid `struct kiocb` associated with a
+ // `MiscDeviceRegistration<T>` file.
+ let kiocb = unsafe { Kiocb::from_raw(kiocb) };
+ // SAFETY: This is a valid `struct iov_iter` for reading.
+ let iov = unsafe { IovIterSource::from_raw(iter) };
+
+ match T::write_iter(kiocb, iov) {
+ Ok(res) => res as isize,
+ Err(err) => err.to_errno() as isize,
+ }
+ }
+
+ /// # Safety
+ ///
/// `file` must be a valid file that is associated with a `MiscDeviceRegistration<T>`.
/// `vma` must be a vma that is currently being mmap'ed with this file.
unsafe extern "C" fn mmap(
@@ -341,6 +392,16 @@ impl<T: MiscDevice> MiscdeviceVTable<T> {
open: Some(Self::open),
release: Some(Self::release),
mmap: if T::HAS_MMAP { Some(Self::mmap) } else { None },
+ read_iter: if T::HAS_READ_ITER {
+ Some(Self::read_iter)
+ } else {
+ None
+ },
+ write_iter: if T::HAS_WRITE_ITER {
+ Some(Self::write_iter)
+ } else {
+ None
+ },
unlocked_ioctl: if T::HAS_IOCTL {
Some(Self::ioctl)
} else {
diff --git a/samples/rust/rust_misc_device.rs b/samples/rust/rust_misc_device.rs
index 911b9320d3c0..d69bc33dbd99 100644
--- a/samples/rust/rust_misc_device.rs
+++ b/samples/rust/rust_misc_device.rs
@@ -100,8 +100,9 @@ use core::pin::Pin;
use kernel::{
c_str,
device::Device,
- fs::File,
+ fs::{File, Kiocb},
ioctl::{_IO, _IOC_SIZE, _IOR, _IOW},
+ iov::{IovIterDest, IovIterSource},
miscdevice::{MiscDevice, MiscDeviceOptions, MiscDeviceRegistration},
new_mutex,
prelude::*,
@@ -143,6 +144,7 @@ impl kernel::InPlaceModule for RustMiscDeviceModule {
struct Inner {
value: i32,
+ buffer: KVVec<u8>,
}
#[pin_data(PinnedDrop)]
@@ -164,7 +166,10 @@ impl MiscDevice for RustMiscDevice {
KBox::try_pin_init(
try_pin_init! {
RustMiscDevice {
- inner <- new_mutex!( Inner{ value: 0_i32 } ),
+ inner <- new_mutex!(Inner {
+ value: 0_i32,
+ buffer: KVVec::new(),
+ }),
dev: dev,
}
},
@@ -172,6 +177,33 @@ impl MiscDevice for RustMiscDevice {
)
}
+ fn read_iter(mut kiocb: Kiocb<'_, Self::Ptr>, iov: &mut IovIterDest<'_>) -> Result<usize> {
+ let me = kiocb.file();
+ dev_info!(me.dev, "Reading from Rust Misc Device Sample\n");
+
+ let inner = me.inner.lock();
+ // Read the buffer contents, taking the file position into account.
+ let read = iov.simple_read_from_buffer(kiocb.ki_pos_mut(), &inner.buffer)?;
+
+ Ok(read)
+ }
+
+ fn write_iter(mut kiocb: Kiocb<'_, Self::Ptr>, iov: &mut IovIterSource<'_>) -> Result<usize> {
+ let me = kiocb.file();
+ dev_info!(me.dev, "Writing to Rust Misc Device Sample\n");
+
+ let mut inner = me.inner.lock();
+
+ // Replace buffer contents.
+ inner.buffer.clear();
+ let len = iov.copy_from_iter_vec(&mut inner.buffer, GFP_KERNEL)?;
+
+ // Set position to zero so that future `read` calls will see the new contents.
+ *kiocb.ki_pos_mut() = 0;
+
+ Ok(len)
+ }
+
fn ioctl(me: Pin<&RustMiscDevice>, _file: &File, cmd: u32, arg: usize) -> Result<isize> {
dev_info!(me.dev, "IOCTLing Rust Misc Device Sample\n");