replace collect_mounts()/drop_collected_mounts() with a safer variant

collect_mounts() has several problems - one can't iterate over the results
directly, so it has to be done with callback passed to iterate_mounts();
it has an oopsable race with d_invalidate(); it creates temporary clones
of mounts invisibly for sync umount (IOW, you can have non-lazy umount
succeed leaving filesystem not mounted anywhere and yet still busy).

A saner approach is to give caller an array of struct path that would pin
every mount in a subtree, without cloning any mounts.

        * collect_mounts()/drop_collected_mounts()/iterate_mounts() is gone
        * collect_paths(where, preallocated, size) gives either ERR_PTR(-E...) or
a pointer to array of struct path, one for each chunk of tree visible under
'where' (i.e. the first element is a copy of where, followed by (mount,root)
for everything mounted under it - the same set collect_mounts() would give).
Unlike collect_mounts(), the mounts are *not* cloned - we just get pinning
references to the roots of subtrees in the caller's namespace.
        Array is terminated by {NULL, NULL} struct path.  If it fits into
preallocated array (on-stack, normally), that's where it goes; otherwise
it's allocated by kmalloc_array().  Passing 0 as size means that 'preallocated'
is ignored (and expected to be NULL).
        * drop_collected_paths(paths, preallocated) is given the array returned
by an earlier call of collect_paths() and the preallocated array passed to that
call.  All mount/dentry references are dropped and array is kfree'd if it's not
equal to 'preallocated'.
        * instead of iterate_mounts(), users should just iterate over array
of struct path - nothing exotic is needed for that.  Existing users (all in
audit_tree.c) are converted.

[folded a fix for braino reported by Venkat Rao Bagalkote <venkat88@linux.ibm.com>]

Fixes: 80b5dce8c5 ("vfs: Add a function to lazily unmount all mounts from any dentry")
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
Al Viro 2025-06-17 00:09:51 -04:00
parent 19272b37aa
commit 7484e15dbb
5 changed files with 105 additions and 74 deletions

View file

@ -1249,3 +1249,12 @@ Using try_lookup_noperm() will require linux/namei.h to be included.
Calling conventions for ->d_automount() have changed; we should *not* grab Calling conventions for ->d_automount() have changed; we should *not* grab
an extra reference to new mount - it should be returned with refcount 1. an extra reference to new mount - it should be returned with refcount 1.
---
collect_mounts()/drop_collected_mounts()/iterate_mounts() are gone now.
Replacement is collect_paths()/drop_collected_path(), with no special
iterator needed. Instead of a cloned mount tree, the new interface returns
an array of struct path, one for each mount collect_mounts() would've
created. These struct path point to locations in the caller's namespace
that would be roots of the cloned mounts.

View file

@ -2310,21 +2310,62 @@ out:
return dst_mnt; return dst_mnt;
} }
/* Caller should check returned pointer for errors */ static inline bool extend_array(struct path **res, struct path **to_free,
unsigned n, unsigned *count, unsigned new_count)
struct vfsmount *collect_mounts(const struct path *path)
{ {
struct mount *tree; struct path *p;
namespace_lock();
if (!check_mnt(real_mount(path->mnt))) if (likely(n < *count))
tree = ERR_PTR(-EINVAL); return true;
else p = kmalloc_array(new_count, sizeof(struct path), GFP_KERNEL);
tree = copy_tree(real_mount(path->mnt), path->dentry, if (p && *count)
CL_COPY_ALL | CL_PRIVATE); memcpy(p, *res, *count * sizeof(struct path));
namespace_unlock(); *count = new_count;
if (IS_ERR(tree)) kfree(*to_free);
return ERR_CAST(tree); *to_free = *res = p;
return &tree->mnt; return p;
}
struct path *collect_paths(const struct path *path,
struct path *prealloc, unsigned count)
{
struct mount *root = real_mount(path->mnt);
struct mount *child;
struct path *res = prealloc, *to_free = NULL;
unsigned n = 0;
guard(rwsem_read)(&namespace_sem);
if (!check_mnt(root))
return ERR_PTR(-EINVAL);
if (!extend_array(&res, &to_free, 0, &count, 32))
return ERR_PTR(-ENOMEM);
res[n++] = *path;
list_for_each_entry(child, &root->mnt_mounts, mnt_child) {
if (!is_subdir(child->mnt_mountpoint, path->dentry))
continue;
for (struct mount *m = child; m; m = next_mnt(m, child)) {
if (!extend_array(&res, &to_free, n, &count, 2 * count))
return ERR_PTR(-ENOMEM);
res[n].mnt = &m->mnt;
res[n].dentry = m->mnt.mnt_root;
n++;
}
}
if (!extend_array(&res, &to_free, n, &count, count + 1))
return ERR_PTR(-ENOMEM);
memset(res + n, 0, (count - n) * sizeof(struct path));
for (struct path *p = res; p->mnt; p++)
path_get(p);
return res;
}
void drop_collected_paths(struct path *paths, struct path *prealloc)
{
for (struct path *p = paths; p->mnt; p++)
path_put(p);
if (paths != prealloc)
kfree(paths);
} }
static void free_mnt_ns(struct mnt_namespace *); static void free_mnt_ns(struct mnt_namespace *);
@ -2401,15 +2442,6 @@ void dissolve_on_fput(struct vfsmount *mnt)
free_mnt_ns(ns); free_mnt_ns(ns);
} }
void drop_collected_mounts(struct vfsmount *mnt)
{
namespace_lock();
lock_mount_hash();
umount_tree(real_mount(mnt), 0);
unlock_mount_hash();
namespace_unlock();
}
static bool __has_locked_children(struct mount *mnt, struct dentry *dentry) static bool __has_locked_children(struct mount *mnt, struct dentry *dentry)
{ {
struct mount *child; struct mount *child;
@ -2511,21 +2543,6 @@ struct vfsmount *clone_private_mount(const struct path *path)
} }
EXPORT_SYMBOL_GPL(clone_private_mount); EXPORT_SYMBOL_GPL(clone_private_mount);
int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
struct vfsmount *root)
{
struct mount *mnt;
int res = f(root, arg);
if (res)
return res;
list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
res = f(&mnt->mnt, arg);
if (res)
return res;
}
return 0;
}
static void lock_mnt_tree(struct mount *mnt) static void lock_mnt_tree(struct mount *mnt)
{ {
struct mount *p; struct mount *p;
@ -6262,7 +6279,11 @@ void put_mnt_ns(struct mnt_namespace *ns)
{ {
if (!refcount_dec_and_test(&ns->ns.count)) if (!refcount_dec_and_test(&ns->ns.count))
return; return;
drop_collected_mounts(&ns->root->mnt); namespace_lock();
lock_mount_hash();
umount_tree(ns->root, 0);
unlock_mount_hash();
namespace_unlock();
free_mnt_ns(ns); free_mnt_ns(ns);
} }

View file

@ -28,8 +28,6 @@
#define CL_SHARED_TO_SLAVE 0x20 #define CL_SHARED_TO_SLAVE 0x20
#define CL_COPY_MNT_NS_FILE 0x40 #define CL_COPY_MNT_NS_FILE 0x40
#define CL_COPY_ALL (CL_COPY_UNBINDABLE | CL_COPY_MNT_NS_FILE)
static inline void set_mnt_shared(struct mount *mnt) static inline void set_mnt_shared(struct mount *mnt)
{ {
mnt->mnt.mnt_flags &= ~MNT_SHARED_MASK; mnt->mnt.mnt_flags &= ~MNT_SHARED_MASK;

View file

@ -116,10 +116,8 @@ extern int may_umount_tree(struct vfsmount *);
extern int may_umount(struct vfsmount *); extern int may_umount(struct vfsmount *);
int do_mount(const char *, const char __user *, int do_mount(const char *, const char __user *,
const char *, unsigned long, void *); const char *, unsigned long, void *);
extern struct vfsmount *collect_mounts(const struct path *); extern struct path *collect_paths(const struct path *, struct path *, unsigned);
extern void drop_collected_mounts(struct vfsmount *); extern void drop_collected_paths(struct path *, struct path *);
extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
struct vfsmount *);
extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num); extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num);
extern int cifs_root_data(char **dev, char **opts); extern int cifs_root_data(char **dev, char **opts);

View file

@ -668,12 +668,6 @@ int audit_remove_tree_rule(struct audit_krule *rule)
return 0; return 0;
} }
static int compare_root(struct vfsmount *mnt, void *arg)
{
return inode_to_key(d_backing_inode(mnt->mnt_root)) ==
(unsigned long)arg;
}
void audit_trim_trees(void) void audit_trim_trees(void)
{ {
struct list_head cursor; struct list_head cursor;
@ -683,8 +677,9 @@ void audit_trim_trees(void)
while (cursor.next != &tree_list) { while (cursor.next != &tree_list) {
struct audit_tree *tree; struct audit_tree *tree;
struct path path; struct path path;
struct vfsmount *root_mnt;
struct audit_node *node; struct audit_node *node;
struct path *paths;
struct path array[16];
int err; int err;
tree = container_of(cursor.next, struct audit_tree, list); tree = container_of(cursor.next, struct audit_tree, list);
@ -696,9 +691,9 @@ void audit_trim_trees(void)
if (err) if (err)
goto skip_it; goto skip_it;
root_mnt = collect_mounts(&path); paths = collect_paths(&path, array, 16);
path_put(&path); path_put(&path);
if (IS_ERR(root_mnt)) if (IS_ERR(paths))
goto skip_it; goto skip_it;
spin_lock(&hash_lock); spin_lock(&hash_lock);
@ -706,14 +701,17 @@ void audit_trim_trees(void)
struct audit_chunk *chunk = find_chunk(node); struct audit_chunk *chunk = find_chunk(node);
/* this could be NULL if the watch is dying else where... */ /* this could be NULL if the watch is dying else where... */
node->index |= 1U<<31; node->index |= 1U<<31;
if (iterate_mounts(compare_root, for (struct path *p = paths; p->dentry; p++) {
(void *)(chunk->key), struct inode *inode = p->dentry->d_inode;
root_mnt)) if (inode_to_key(inode) == chunk->key) {
node->index &= ~(1U<<31); node->index &= ~(1U<<31);
break;
}
}
} }
spin_unlock(&hash_lock); spin_unlock(&hash_lock);
trim_marked(tree); trim_marked(tree);
drop_collected_mounts(root_mnt); drop_collected_paths(paths, array);
skip_it: skip_it:
put_tree(tree); put_tree(tree);
mutex_lock(&audit_filter_mutex); mutex_lock(&audit_filter_mutex);
@ -742,9 +740,14 @@ void audit_put_tree(struct audit_tree *tree)
put_tree(tree); put_tree(tree);
} }
static int tag_mount(struct vfsmount *mnt, void *arg) static int tag_mounts(struct path *paths, struct audit_tree *tree)
{ {
return tag_chunk(d_backing_inode(mnt->mnt_root), arg); for (struct path *p = paths; p->dentry; p++) {
int err = tag_chunk(p->dentry->d_inode, tree);
if (err)
return err;
}
return 0;
} }
/* /*
@ -801,7 +804,8 @@ int audit_add_tree_rule(struct audit_krule *rule)
{ {
struct audit_tree *seed = rule->tree, *tree; struct audit_tree *seed = rule->tree, *tree;
struct path path; struct path path;
struct vfsmount *mnt; struct path array[16];
struct path *paths;
int err; int err;
rule->tree = NULL; rule->tree = NULL;
@ -828,16 +832,16 @@ int audit_add_tree_rule(struct audit_krule *rule)
err = kern_path(tree->pathname, 0, &path); err = kern_path(tree->pathname, 0, &path);
if (err) if (err)
goto Err; goto Err;
mnt = collect_mounts(&path); paths = collect_paths(&path, array, 16);
path_put(&path); path_put(&path);
if (IS_ERR(mnt)) { if (IS_ERR(paths)) {
err = PTR_ERR(mnt); err = PTR_ERR(paths);
goto Err; goto Err;
} }
get_tree(tree); get_tree(tree);
err = iterate_mounts(tag_mount, tree, mnt); err = tag_mounts(paths, tree);
drop_collected_mounts(mnt); drop_collected_paths(paths, array);
if (!err) { if (!err) {
struct audit_node *node; struct audit_node *node;
@ -872,20 +876,21 @@ int audit_tag_tree(char *old, char *new)
struct list_head cursor, barrier; struct list_head cursor, barrier;
int failed = 0; int failed = 0;
struct path path1, path2; struct path path1, path2;
struct vfsmount *tagged; struct path array[16];
struct path *paths;
int err; int err;
err = kern_path(new, 0, &path2); err = kern_path(new, 0, &path2);
if (err) if (err)
return err; return err;
tagged = collect_mounts(&path2); paths = collect_paths(&path2, array, 16);
path_put(&path2); path_put(&path2);
if (IS_ERR(tagged)) if (IS_ERR(paths))
return PTR_ERR(tagged); return PTR_ERR(paths);
err = kern_path(old, 0, &path1); err = kern_path(old, 0, &path1);
if (err) { if (err) {
drop_collected_mounts(tagged); drop_collected_paths(paths, array);
return err; return err;
} }
@ -914,7 +919,7 @@ int audit_tag_tree(char *old, char *new)
continue; continue;
} }
failed = iterate_mounts(tag_mount, tree, tagged); failed = tag_mounts(paths, tree);
if (failed) { if (failed) {
put_tree(tree); put_tree(tree);
mutex_lock(&audit_filter_mutex); mutex_lock(&audit_filter_mutex);
@ -955,7 +960,7 @@ int audit_tag_tree(char *old, char *new)
list_del(&cursor); list_del(&cursor);
mutex_unlock(&audit_filter_mutex); mutex_unlock(&audit_filter_mutex);
path_put(&path1); path_put(&path1);
drop_collected_mounts(tagged); drop_collected_paths(paths, array);
return failed; return failed;
} }