doc: update porting, vfs documentation to describe mmap_prepare()

Now that we have established .mmap_prepare() as the preferred means by
which filesystems establish state upon memory mapping of a file, update the
VFS and porting documentation to reflect this.

As part of this change, additionally update the VFS documentation to
contain the current state of the file_operations struct.

Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Link: https://lore.kernel.org/20250723123036.35472-1-lorenzo.stoakes@oracle.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Lorenzo Stoakes 2025-07-23 13:30:36 +01:00 committed by Christian Brauner
parent 738a6cf8fc
commit 425c8bb39b
No known key found for this signature in database
GPG key ID: 91C61BC06578DCA2
2 changed files with 30 additions and 4 deletions

View file

@ -1249,3 +1249,15 @@ Using try_lookup_noperm() will require linux/namei.h to be included.
Calling conventions for ->d_automount() have changed; we should *not* grab Calling conventions for ->d_automount() have changed; we should *not* grab
an extra reference to new mount - it should be returned with refcount 1. an extra reference to new mount - it should be returned with refcount 1.
---
**highly recommended**
The file operations mmap() callback is deprecated in favour of
mmap_prepare(). This passes a pointer to a vm_area_desc to the callback
rather than a VMA, as the VMA at this stage is not yet valid.
The vm_area_desc provides the minimum required information for a filesystem
to initialise state upon memory mapping of a file-backed region, and output
parameters for the file system to set this state.

View file

@ -1071,12 +1071,14 @@ This describes how the VFS can manipulate an open file. As of kernel
struct file_operations { struct file_operations {
struct module *owner; struct module *owner;
fop_flags_t fop_flags;
loff_t (*llseek) (struct file *, loff_t, int); loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
int (*iopoll)(struct kiocb *kiocb, bool spin); int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *,
unsigned int flags);
int (*iterate_shared) (struct file *, struct dir_context *); int (*iterate_shared) (struct file *, struct dir_context *);
__poll_t (*poll) (struct file *, struct poll_table_struct *); __poll_t (*poll) (struct file *, struct poll_table_struct *);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
@ -1093,18 +1095,24 @@ This describes how the VFS can manipulate an open file. As of kernel
int (*flock) (struct file *, int, struct file_lock *); int (*flock) (struct file *, int, struct file_lock *);
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
int (*setlease)(struct file *, long, struct file_lock **, void **); void (*splice_eof)(struct file *file);
int (*setlease)(struct file *, int, struct file_lease **, void **);
long (*fallocate)(struct file *file, int mode, loff_t offset, long (*fallocate)(struct file *file, int mode, loff_t offset,
loff_t len); loff_t len);
void (*show_fdinfo)(struct seq_file *m, struct file *f); void (*show_fdinfo)(struct seq_file *m, struct file *f);
#ifndef CONFIG_MMU #ifndef CONFIG_MMU
unsigned (*mmap_capabilities)(struct file *); unsigned (*mmap_capabilities)(struct file *);
#endif #endif
ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int); ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
loff_t, size_t, unsigned int);
loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in, loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out, struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags); loff_t len, unsigned int remap_flags);
int (*fadvise)(struct file *, loff_t, loff_t, int); int (*fadvise)(struct file *, loff_t, loff_t, int);
int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *,
unsigned int poll_flags);
int (*mmap_prepare)(struct vm_area_desc *);
}; };
Again, all methods are called without any locks being held, unless Again, all methods are called without any locks being held, unless
@ -1144,7 +1152,8 @@ otherwise noted.
used on 64 bit kernels. used on 64 bit kernels.
``mmap`` ``mmap``
called by the mmap(2) system call called by the mmap(2) system call. Deprecated in favour of
``mmap_prepare``.
``open`` ``open``
called by the VFS when an inode should be opened. When the VFS called by the VFS when an inode should be opened. When the VFS
@ -1221,6 +1230,11 @@ otherwise noted.
``fadvise`` ``fadvise``
possibly called by the fadvise64() system call. possibly called by the fadvise64() system call.
``mmap_prepare``
Called by the mmap(2) system call. Allows a VFS to set up a
file-backed memory mapping, most notably establishing relevant
private state and VMA callbacks.
Note that the file operations are implemented by the specific Note that the file operations are implemented by the specific
filesystem in which the inode resides. When opening a device node filesystem in which the inode resides. When opening a device node
(character or block special) most filesystems will call special (character or block special) most filesystems will call special