mirror of
https://github.com/torvalds/linux.git
synced 2025-08-15 14:11:42 +02:00

-----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCaINCgQAKCRCRxhvAZXjc os+nAP9LFHUwWO6EBzHJJGEVjJvvzsbzqeYrRFamYiMc5ulPJwD+KW4RIgJa/MWO pcYE40CacaekD8rFWwYUyszpgmv6ewc= =wCwp -----END PGP SIGNATURE----- Merge tag 'vfs-6.17-rc1.mmap_prepare' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs Pull mmap_prepare updates from Christian Brauner: "Last cycle we introduce f_op->mmap_prepare() inc84bf6dd2b
("mm: introduce new .mmap_prepare() file callback"). This is preferred to the existing f_op->mmap() hook as it does require a VMA to be established yet, thus allowing the mmap logic to invoke this hook far, far earlier, prior to inserting a VMA into the virtual address space, or performing any other heavy handed operations. This allows for much simpler unwinding on error, and for there to be a single attempt at merging a VMA rather than having to possibly reattempt a merge based on potentially altered VMA state. Far more importantly, it prevents inappropriate manipulation of incompletely initialised VMA state, which is something that has been the cause of bugs and complexity in the past. The intent is to gradually deprecate f_op->mmap, and in that vein this series coverts the majority of file systems to using f_op->mmap_prepare. Prerequisite steps are taken - firstly ensuring all checks for mmap capabilities use the file_has_valid_mmap_hooks() helper rather than directly checking for f_op->mmap (which is now not a valid check) and secondly updating daxdev_mapping_supported() to not require a VMA parameter to allow ext4 and xfs to be converted. Commitbb666b7c27
("mm: add mmap_prepare() compatibility layer for nested file systems") handles the nasty edge-case of nested file systems like overlayfs, which introduces a compatibility shim to allow f_op->mmap_prepare() to be invoked from an f_op->mmap() callback. This allows for nested filesystems to continue to function correctly with all file systems regardless of which callback is used. Once we finally convert all file systems, this shim can be removed. As a result, ecryptfs, fuse, and overlayfs remain unaltered so they can nest all other file systems. We additionally do not update resctl - as this requires an update to remap_pfn_range() (or an alternative to it) which we defer to a later series, equally we do not update cramfs which needs a mixed mapping insertion with the same issue, nor do we update procfs, hugetlbfs, syfs or kernfs all of which require VMAs for internal state and hooks. We shall return to all of these later" * tag 'vfs-6.17-rc1.mmap_prepare' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: doc: update porting, vfs documentation to describe mmap_prepare() fs: replace mmap hook with .mmap_prepare for simple mappings fs: convert most other generic_file_*mmap() users to .mmap_prepare() fs: convert simple use of generic_file_*_mmap() to .mmap_prepare() mm/filemap: introduce generic_file_*_mmap_prepare() helpers fs/xfs: transition from deprecated .mmap hook to .mmap_prepare fs/ext4: transition from deprecated .mmap hook to .mmap_prepare fs/dax: make it possible to check dev dax support without a VMA fs: consistently use can_mmap_file() helper mm/nommu: use file_has_valid_mmap_hooks() helper mm: rename call_mmap/mmap_prepare to vfs_mmap/mmap_prepare
380 lines
9.1 KiB
C
380 lines
9.1 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* OMFS (as used by RIO Karma) file operations.
|
|
* Copyright (C) 2005 Bob Copeland <me@bobcopeland.com>
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/buffer_head.h>
|
|
#include <linux/mpage.h>
|
|
#include "omfs.h"
|
|
|
|
static u32 omfs_max_extents(struct omfs_sb_info *sbi, int offset)
|
|
{
|
|
return (sbi->s_sys_blocksize - offset -
|
|
sizeof(struct omfs_extent)) /
|
|
sizeof(struct omfs_extent_entry);
|
|
}
|
|
|
|
void omfs_make_empty_table(struct buffer_head *bh, int offset)
|
|
{
|
|
struct omfs_extent *oe = (struct omfs_extent *) &bh->b_data[offset];
|
|
|
|
oe->e_next = ~cpu_to_be64(0ULL);
|
|
oe->e_extent_count = cpu_to_be32(1),
|
|
oe->e_fill = cpu_to_be32(0x22),
|
|
oe->e_entry[0].e_cluster = ~cpu_to_be64(0ULL);
|
|
oe->e_entry[0].e_blocks = ~cpu_to_be64(0ULL);
|
|
}
|
|
|
|
int omfs_shrink_inode(struct inode *inode)
|
|
{
|
|
struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
|
|
struct omfs_extent *oe;
|
|
struct omfs_extent_entry *entry;
|
|
struct buffer_head *bh;
|
|
u64 next, last;
|
|
u32 extent_count;
|
|
u32 max_extents;
|
|
int ret;
|
|
|
|
/* traverse extent table, freeing each entry that is greater
|
|
* than inode->i_size;
|
|
*/
|
|
next = inode->i_ino;
|
|
|
|
/* only support truncate -> 0 for now */
|
|
ret = -EIO;
|
|
if (inode->i_size != 0)
|
|
goto out;
|
|
|
|
bh = omfs_bread(inode->i_sb, next);
|
|
if (!bh)
|
|
goto out;
|
|
|
|
oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]);
|
|
max_extents = omfs_max_extents(sbi, OMFS_EXTENT_START);
|
|
|
|
for (;;) {
|
|
|
|
if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next))
|
|
goto out_brelse;
|
|
|
|
extent_count = be32_to_cpu(oe->e_extent_count);
|
|
|
|
if (extent_count > max_extents)
|
|
goto out_brelse;
|
|
|
|
last = next;
|
|
next = be64_to_cpu(oe->e_next);
|
|
entry = oe->e_entry;
|
|
|
|
/* ignore last entry as it is the terminator */
|
|
for (; extent_count > 1; extent_count--) {
|
|
u64 start, count;
|
|
start = be64_to_cpu(entry->e_cluster);
|
|
count = be64_to_cpu(entry->e_blocks);
|
|
|
|
omfs_clear_range(inode->i_sb, start, (int) count);
|
|
entry++;
|
|
}
|
|
omfs_make_empty_table(bh, (char *) oe - bh->b_data);
|
|
mark_buffer_dirty(bh);
|
|
brelse(bh);
|
|
|
|
if (last != inode->i_ino)
|
|
omfs_clear_range(inode->i_sb, last, sbi->s_mirrors);
|
|
|
|
if (next == ~0)
|
|
break;
|
|
|
|
bh = omfs_bread(inode->i_sb, next);
|
|
if (!bh)
|
|
goto out;
|
|
oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
|
|
max_extents = omfs_max_extents(sbi, OMFS_EXTENT_CONT);
|
|
}
|
|
ret = 0;
|
|
out:
|
|
return ret;
|
|
out_brelse:
|
|
brelse(bh);
|
|
return ret;
|
|
}
|
|
|
|
static void omfs_truncate(struct inode *inode)
|
|
{
|
|
omfs_shrink_inode(inode);
|
|
mark_inode_dirty(inode);
|
|
}
|
|
|
|
/*
|
|
* Add new blocks to the current extent, or create new entries/continuations
|
|
* as necessary.
|
|
*/
|
|
static int omfs_grow_extent(struct inode *inode, struct omfs_extent *oe,
|
|
u64 *ret_block)
|
|
{
|
|
struct omfs_extent_entry *terminator;
|
|
struct omfs_extent_entry *entry = oe->e_entry;
|
|
struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
|
|
u32 extent_count = be32_to_cpu(oe->e_extent_count);
|
|
u64 new_block = 0;
|
|
u32 max_count;
|
|
int new_count;
|
|
int ret = 0;
|
|
|
|
/* reached the end of the extent table with no blocks mapped.
|
|
* there are three possibilities for adding: grow last extent,
|
|
* add a new extent to the current extent table, and add a
|
|
* continuation inode. in last two cases need an allocator for
|
|
* sbi->s_cluster_size
|
|
*/
|
|
|
|
/* TODO: handle holes */
|
|
|
|
/* should always have a terminator */
|
|
if (extent_count < 1)
|
|
return -EIO;
|
|
|
|
/* trivially grow current extent, if next block is not taken */
|
|
terminator = entry + extent_count - 1;
|
|
if (extent_count > 1) {
|
|
entry = terminator-1;
|
|
new_block = be64_to_cpu(entry->e_cluster) +
|
|
be64_to_cpu(entry->e_blocks);
|
|
|
|
if (omfs_allocate_block(inode->i_sb, new_block)) {
|
|
be64_add_cpu(&entry->e_blocks, 1);
|
|
terminator->e_blocks = ~(cpu_to_be64(
|
|
be64_to_cpu(~terminator->e_blocks) + 1));
|
|
goto out;
|
|
}
|
|
}
|
|
max_count = omfs_max_extents(sbi, OMFS_EXTENT_START);
|
|
|
|
/* TODO: add a continuation block here */
|
|
if (be32_to_cpu(oe->e_extent_count) > max_count-1)
|
|
return -EIO;
|
|
|
|
/* try to allocate a new cluster */
|
|
ret = omfs_allocate_range(inode->i_sb, 1, sbi->s_clustersize,
|
|
&new_block, &new_count);
|
|
if (ret)
|
|
goto out_fail;
|
|
|
|
/* copy terminator down an entry */
|
|
entry = terminator;
|
|
terminator++;
|
|
memcpy(terminator, entry, sizeof(struct omfs_extent_entry));
|
|
|
|
entry->e_cluster = cpu_to_be64(new_block);
|
|
entry->e_blocks = cpu_to_be64((u64) new_count);
|
|
|
|
terminator->e_blocks = ~(cpu_to_be64(
|
|
be64_to_cpu(~terminator->e_blocks) + (u64) new_count));
|
|
|
|
/* write in new entry */
|
|
be32_add_cpu(&oe->e_extent_count, 1);
|
|
|
|
out:
|
|
*ret_block = new_block;
|
|
out_fail:
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Scans across the directory table for a given file block number.
|
|
* If block not found, return 0.
|
|
*/
|
|
static sector_t find_block(struct inode *inode, struct omfs_extent_entry *ent,
|
|
sector_t block, int count, int *left)
|
|
{
|
|
/* count > 1 because of terminator */
|
|
sector_t searched = 0;
|
|
for (; count > 1; count--) {
|
|
int numblocks = clus_to_blk(OMFS_SB(inode->i_sb),
|
|
be64_to_cpu(ent->e_blocks));
|
|
|
|
if (block >= searched &&
|
|
block < searched + numblocks) {
|
|
/*
|
|
* found it at cluster + (block - searched)
|
|
* numblocks - (block - searched) is remainder
|
|
*/
|
|
*left = numblocks - (block - searched);
|
|
return clus_to_blk(OMFS_SB(inode->i_sb),
|
|
be64_to_cpu(ent->e_cluster)) +
|
|
block - searched;
|
|
}
|
|
searched += numblocks;
|
|
ent++;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int omfs_get_block(struct inode *inode, sector_t block,
|
|
struct buffer_head *bh_result, int create)
|
|
{
|
|
struct buffer_head *bh;
|
|
sector_t next, offset;
|
|
int ret;
|
|
u64 new_block;
|
|
u32 max_extents;
|
|
int extent_count;
|
|
struct omfs_extent *oe;
|
|
struct omfs_extent_entry *entry;
|
|
struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
|
|
int max_blocks = bh_result->b_size >> inode->i_blkbits;
|
|
int remain;
|
|
|
|
ret = -EIO;
|
|
bh = omfs_bread(inode->i_sb, inode->i_ino);
|
|
if (!bh)
|
|
goto out;
|
|
|
|
oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]);
|
|
max_extents = omfs_max_extents(sbi, OMFS_EXTENT_START);
|
|
next = inode->i_ino;
|
|
|
|
for (;;) {
|
|
|
|
if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next))
|
|
goto out_brelse;
|
|
|
|
extent_count = be32_to_cpu(oe->e_extent_count);
|
|
next = be64_to_cpu(oe->e_next);
|
|
entry = oe->e_entry;
|
|
|
|
if (extent_count > max_extents)
|
|
goto out_brelse;
|
|
|
|
offset = find_block(inode, entry, block, extent_count, &remain);
|
|
if (offset > 0) {
|
|
ret = 0;
|
|
map_bh(bh_result, inode->i_sb, offset);
|
|
if (remain > max_blocks)
|
|
remain = max_blocks;
|
|
bh_result->b_size = (remain << inode->i_blkbits);
|
|
goto out_brelse;
|
|
}
|
|
if (next == ~0)
|
|
break;
|
|
|
|
brelse(bh);
|
|
bh = omfs_bread(inode->i_sb, next);
|
|
if (!bh)
|
|
goto out;
|
|
oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
|
|
max_extents = omfs_max_extents(sbi, OMFS_EXTENT_CONT);
|
|
}
|
|
if (create) {
|
|
ret = omfs_grow_extent(inode, oe, &new_block);
|
|
if (ret == 0) {
|
|
mark_buffer_dirty(bh);
|
|
mark_inode_dirty(inode);
|
|
map_bh(bh_result, inode->i_sb,
|
|
clus_to_blk(sbi, new_block));
|
|
}
|
|
}
|
|
out_brelse:
|
|
brelse(bh);
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
static int omfs_read_folio(struct file *file, struct folio *folio)
|
|
{
|
|
return block_read_full_folio(folio, omfs_get_block);
|
|
}
|
|
|
|
static void omfs_readahead(struct readahead_control *rac)
|
|
{
|
|
mpage_readahead(rac, omfs_get_block);
|
|
}
|
|
|
|
static int
|
|
omfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
|
|
{
|
|
return mpage_writepages(mapping, wbc, omfs_get_block);
|
|
}
|
|
|
|
static void omfs_write_failed(struct address_space *mapping, loff_t to)
|
|
{
|
|
struct inode *inode = mapping->host;
|
|
|
|
if (to > inode->i_size) {
|
|
truncate_pagecache(inode, inode->i_size);
|
|
omfs_truncate(inode);
|
|
}
|
|
}
|
|
|
|
static int omfs_write_begin(const struct kiocb *iocb,
|
|
struct address_space *mapping,
|
|
loff_t pos, unsigned len,
|
|
struct folio **foliop, void **fsdata)
|
|
{
|
|
int ret;
|
|
|
|
ret = block_write_begin(mapping, pos, len, foliop, omfs_get_block);
|
|
if (unlikely(ret))
|
|
omfs_write_failed(mapping, pos + len);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static sector_t omfs_bmap(struct address_space *mapping, sector_t block)
|
|
{
|
|
return generic_block_bmap(mapping, block, omfs_get_block);
|
|
}
|
|
|
|
const struct file_operations omfs_file_operations = {
|
|
.llseek = generic_file_llseek,
|
|
.read_iter = generic_file_read_iter,
|
|
.write_iter = generic_file_write_iter,
|
|
.mmap_prepare = generic_file_mmap_prepare,
|
|
.fsync = generic_file_fsync,
|
|
.splice_read = filemap_splice_read,
|
|
};
|
|
|
|
static int omfs_setattr(struct mnt_idmap *idmap,
|
|
struct dentry *dentry, struct iattr *attr)
|
|
{
|
|
struct inode *inode = d_inode(dentry);
|
|
int error;
|
|
|
|
error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
|
|
if (error)
|
|
return error;
|
|
|
|
if ((attr->ia_valid & ATTR_SIZE) &&
|
|
attr->ia_size != i_size_read(inode)) {
|
|
error = inode_newsize_ok(inode, attr->ia_size);
|
|
if (error)
|
|
return error;
|
|
truncate_setsize(inode, attr->ia_size);
|
|
omfs_truncate(inode);
|
|
}
|
|
|
|
setattr_copy(&nop_mnt_idmap, inode, attr);
|
|
mark_inode_dirty(inode);
|
|
return 0;
|
|
}
|
|
|
|
const struct inode_operations omfs_file_inops = {
|
|
.setattr = omfs_setattr,
|
|
};
|
|
|
|
const struct address_space_operations omfs_aops = {
|
|
.dirty_folio = block_dirty_folio,
|
|
.invalidate_folio = block_invalidate_folio,
|
|
.read_folio = omfs_read_folio,
|
|
.readahead = omfs_readahead,
|
|
.writepages = omfs_writepages,
|
|
.write_begin = omfs_write_begin,
|
|
.write_end = generic_write_end,
|
|
.bmap = omfs_bmap,
|
|
.migrate_folio = buffer_migrate_folio,
|
|
};
|
|
|