linux/fs/omfs/file.c
Linus Torvalds 7031769e10 vfs-6.17-rc1.mmap_prepare
-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCaINCgQAKCRCRxhvAZXjc
 os+nAP9LFHUwWO6EBzHJJGEVjJvvzsbzqeYrRFamYiMc5ulPJwD+KW4RIgJa/MWO
 pcYE40CacaekD8rFWwYUyszpgmv6ewc=
 =wCwp
 -----END PGP SIGNATURE-----

Merge tag 'vfs-6.17-rc1.mmap_prepare' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull mmap_prepare updates from Christian Brauner:
 "Last cycle we introduce f_op->mmap_prepare() in c84bf6dd2b ("mm:
  introduce new .mmap_prepare() file callback").

  This is preferred to the existing f_op->mmap() hook as it does require
  a VMA to be established yet, thus allowing the mmap logic to invoke
  this hook far, far earlier, prior to inserting a VMA into the virtual
  address space, or performing any other heavy handed operations.

  This allows for much simpler unwinding on error, and for there to be a
  single attempt at merging a VMA rather than having to possibly
  reattempt a merge based on potentially altered VMA state.

  Far more importantly, it prevents inappropriate manipulation of
  incompletely initialised VMA state, which is something that has been
  the cause of bugs and complexity in the past.

  The intent is to gradually deprecate f_op->mmap, and in that vein this
  series coverts the majority of file systems to using f_op->mmap_prepare.

  Prerequisite steps are taken - firstly ensuring all checks for mmap
  capabilities use the file_has_valid_mmap_hooks() helper rather than
  directly checking for f_op->mmap (which is now not a valid check) and
  secondly updating daxdev_mapping_supported() to not require a VMA
  parameter to allow ext4 and xfs to be converted.

  Commit bb666b7c27 ("mm: add mmap_prepare() compatibility layer for
  nested file systems") handles the nasty edge-case of nested file
  systems like overlayfs, which introduces a compatibility shim to allow
  f_op->mmap_prepare() to be invoked from an f_op->mmap() callback.

  This allows for nested filesystems to continue to function correctly
  with all file systems regardless of which callback is used. Once we
  finally convert all file systems, this shim can be removed.

  As a result, ecryptfs, fuse, and overlayfs remain unaltered so they
  can nest all other file systems.

  We additionally do not update resctl - as this requires an update to
  remap_pfn_range() (or an alternative to it) which we defer to a later
  series, equally we do not update cramfs which needs a mixed mapping
  insertion with the same issue, nor do we update procfs, hugetlbfs,
  syfs or kernfs all of which require VMAs for internal state and hooks.
  We shall return to all of these later"

* tag 'vfs-6.17-rc1.mmap_prepare' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  doc: update porting, vfs documentation to describe mmap_prepare()
  fs: replace mmap hook with .mmap_prepare for simple mappings
  fs: convert most other generic_file_*mmap() users to .mmap_prepare()
  fs: convert simple use of generic_file_*_mmap() to .mmap_prepare()
  mm/filemap: introduce generic_file_*_mmap_prepare() helpers
  fs/xfs: transition from deprecated .mmap hook to .mmap_prepare
  fs/ext4: transition from deprecated .mmap hook to .mmap_prepare
  fs/dax: make it possible to check dev dax support without a VMA
  fs: consistently use can_mmap_file() helper
  mm/nommu: use file_has_valid_mmap_hooks() helper
  mm: rename call_mmap/mmap_prepare to vfs_mmap/mmap_prepare
2025-07-28 13:43:25 -07:00

380 lines
9.1 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* OMFS (as used by RIO Karma) file operations.
* Copyright (C) 2005 Bob Copeland <me@bobcopeland.com>
*/
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
#include "omfs.h"
static u32 omfs_max_extents(struct omfs_sb_info *sbi, int offset)
{
return (sbi->s_sys_blocksize - offset -
sizeof(struct omfs_extent)) /
sizeof(struct omfs_extent_entry);
}
void omfs_make_empty_table(struct buffer_head *bh, int offset)
{
struct omfs_extent *oe = (struct omfs_extent *) &bh->b_data[offset];
oe->e_next = ~cpu_to_be64(0ULL);
oe->e_extent_count = cpu_to_be32(1),
oe->e_fill = cpu_to_be32(0x22),
oe->e_entry[0].e_cluster = ~cpu_to_be64(0ULL);
oe->e_entry[0].e_blocks = ~cpu_to_be64(0ULL);
}
int omfs_shrink_inode(struct inode *inode)
{
struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
struct omfs_extent *oe;
struct omfs_extent_entry *entry;
struct buffer_head *bh;
u64 next, last;
u32 extent_count;
u32 max_extents;
int ret;
/* traverse extent table, freeing each entry that is greater
* than inode->i_size;
*/
next = inode->i_ino;
/* only support truncate -> 0 for now */
ret = -EIO;
if (inode->i_size != 0)
goto out;
bh = omfs_bread(inode->i_sb, next);
if (!bh)
goto out;
oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]);
max_extents = omfs_max_extents(sbi, OMFS_EXTENT_START);
for (;;) {
if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next))
goto out_brelse;
extent_count = be32_to_cpu(oe->e_extent_count);
if (extent_count > max_extents)
goto out_brelse;
last = next;
next = be64_to_cpu(oe->e_next);
entry = oe->e_entry;
/* ignore last entry as it is the terminator */
for (; extent_count > 1; extent_count--) {
u64 start, count;
start = be64_to_cpu(entry->e_cluster);
count = be64_to_cpu(entry->e_blocks);
omfs_clear_range(inode->i_sb, start, (int) count);
entry++;
}
omfs_make_empty_table(bh, (char *) oe - bh->b_data);
mark_buffer_dirty(bh);
brelse(bh);
if (last != inode->i_ino)
omfs_clear_range(inode->i_sb, last, sbi->s_mirrors);
if (next == ~0)
break;
bh = omfs_bread(inode->i_sb, next);
if (!bh)
goto out;
oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
max_extents = omfs_max_extents(sbi, OMFS_EXTENT_CONT);
}
ret = 0;
out:
return ret;
out_brelse:
brelse(bh);
return ret;
}
static void omfs_truncate(struct inode *inode)
{
omfs_shrink_inode(inode);
mark_inode_dirty(inode);
}
/*
* Add new blocks to the current extent, or create new entries/continuations
* as necessary.
*/
static int omfs_grow_extent(struct inode *inode, struct omfs_extent *oe,
u64 *ret_block)
{
struct omfs_extent_entry *terminator;
struct omfs_extent_entry *entry = oe->e_entry;
struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
u32 extent_count = be32_to_cpu(oe->e_extent_count);
u64 new_block = 0;
u32 max_count;
int new_count;
int ret = 0;
/* reached the end of the extent table with no blocks mapped.
* there are three possibilities for adding: grow last extent,
* add a new extent to the current extent table, and add a
* continuation inode. in last two cases need an allocator for
* sbi->s_cluster_size
*/
/* TODO: handle holes */
/* should always have a terminator */
if (extent_count < 1)
return -EIO;
/* trivially grow current extent, if next block is not taken */
terminator = entry + extent_count - 1;
if (extent_count > 1) {
entry = terminator-1;
new_block = be64_to_cpu(entry->e_cluster) +
be64_to_cpu(entry->e_blocks);
if (omfs_allocate_block(inode->i_sb, new_block)) {
be64_add_cpu(&entry->e_blocks, 1);
terminator->e_blocks = ~(cpu_to_be64(
be64_to_cpu(~terminator->e_blocks) + 1));
goto out;
}
}
max_count = omfs_max_extents(sbi, OMFS_EXTENT_START);
/* TODO: add a continuation block here */
if (be32_to_cpu(oe->e_extent_count) > max_count-1)
return -EIO;
/* try to allocate a new cluster */
ret = omfs_allocate_range(inode->i_sb, 1, sbi->s_clustersize,
&new_block, &new_count);
if (ret)
goto out_fail;
/* copy terminator down an entry */
entry = terminator;
terminator++;
memcpy(terminator, entry, sizeof(struct omfs_extent_entry));
entry->e_cluster = cpu_to_be64(new_block);
entry->e_blocks = cpu_to_be64((u64) new_count);
terminator->e_blocks = ~(cpu_to_be64(
be64_to_cpu(~terminator->e_blocks) + (u64) new_count));
/* write in new entry */
be32_add_cpu(&oe->e_extent_count, 1);
out:
*ret_block = new_block;
out_fail:
return ret;
}
/*
* Scans across the directory table for a given file block number.
* If block not found, return 0.
*/
static sector_t find_block(struct inode *inode, struct omfs_extent_entry *ent,
sector_t block, int count, int *left)
{
/* count > 1 because of terminator */
sector_t searched = 0;
for (; count > 1; count--) {
int numblocks = clus_to_blk(OMFS_SB(inode->i_sb),
be64_to_cpu(ent->e_blocks));
if (block >= searched &&
block < searched + numblocks) {
/*
* found it at cluster + (block - searched)
* numblocks - (block - searched) is remainder
*/
*left = numblocks - (block - searched);
return clus_to_blk(OMFS_SB(inode->i_sb),
be64_to_cpu(ent->e_cluster)) +
block - searched;
}
searched += numblocks;
ent++;
}
return 0;
}
static int omfs_get_block(struct inode *inode, sector_t block,
struct buffer_head *bh_result, int create)
{
struct buffer_head *bh;
sector_t next, offset;
int ret;
u64 new_block;
u32 max_extents;
int extent_count;
struct omfs_extent *oe;
struct omfs_extent_entry *entry;
struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
int max_blocks = bh_result->b_size >> inode->i_blkbits;
int remain;
ret = -EIO;
bh = omfs_bread(inode->i_sb, inode->i_ino);
if (!bh)
goto out;
oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]);
max_extents = omfs_max_extents(sbi, OMFS_EXTENT_START);
next = inode->i_ino;
for (;;) {
if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next))
goto out_brelse;
extent_count = be32_to_cpu(oe->e_extent_count);
next = be64_to_cpu(oe->e_next);
entry = oe->e_entry;
if (extent_count > max_extents)
goto out_brelse;
offset = find_block(inode, entry, block, extent_count, &remain);
if (offset > 0) {
ret = 0;
map_bh(bh_result, inode->i_sb, offset);
if (remain > max_blocks)
remain = max_blocks;
bh_result->b_size = (remain << inode->i_blkbits);
goto out_brelse;
}
if (next == ~0)
break;
brelse(bh);
bh = omfs_bread(inode->i_sb, next);
if (!bh)
goto out;
oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
max_extents = omfs_max_extents(sbi, OMFS_EXTENT_CONT);
}
if (create) {
ret = omfs_grow_extent(inode, oe, &new_block);
if (ret == 0) {
mark_buffer_dirty(bh);
mark_inode_dirty(inode);
map_bh(bh_result, inode->i_sb,
clus_to_blk(sbi, new_block));
}
}
out_brelse:
brelse(bh);
out:
return ret;
}
static int omfs_read_folio(struct file *file, struct folio *folio)
{
return block_read_full_folio(folio, omfs_get_block);
}
static void omfs_readahead(struct readahead_control *rac)
{
mpage_readahead(rac, omfs_get_block);
}
static int
omfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
return mpage_writepages(mapping, wbc, omfs_get_block);
}
static void omfs_write_failed(struct address_space *mapping, loff_t to)
{
struct inode *inode = mapping->host;
if (to > inode->i_size) {
truncate_pagecache(inode, inode->i_size);
omfs_truncate(inode);
}
}
static int omfs_write_begin(const struct kiocb *iocb,
struct address_space *mapping,
loff_t pos, unsigned len,
struct folio **foliop, void **fsdata)
{
int ret;
ret = block_write_begin(mapping, pos, len, foliop, omfs_get_block);
if (unlikely(ret))
omfs_write_failed(mapping, pos + len);
return ret;
}
static sector_t omfs_bmap(struct address_space *mapping, sector_t block)
{
return generic_block_bmap(mapping, block, omfs_get_block);
}
const struct file_operations omfs_file_operations = {
.llseek = generic_file_llseek,
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.mmap_prepare = generic_file_mmap_prepare,
.fsync = generic_file_fsync,
.splice_read = filemap_splice_read,
};
static int omfs_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
int error;
error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (error)
return error;
if ((attr->ia_valid & ATTR_SIZE) &&
attr->ia_size != i_size_read(inode)) {
error = inode_newsize_ok(inode, attr->ia_size);
if (error)
return error;
truncate_setsize(inode, attr->ia_size);
omfs_truncate(inode);
}
setattr_copy(&nop_mnt_idmap, inode, attr);
mark_inode_dirty(inode);
return 0;
}
const struct inode_operations omfs_file_inops = {
.setattr = omfs_setattr,
};
const struct address_space_operations omfs_aops = {
.dirty_folio = block_dirty_folio,
.invalidate_folio = block_invalidate_folio,
.read_folio = omfs_read_folio,
.readahead = omfs_readahead,
.writepages = omfs_writepages,
.write_begin = omfs_write_begin,
.write_end = generic_write_end,
.bmap = omfs_bmap,
.migrate_folio = buffer_migrate_folio,
};