Skip to content

Commit 8ed936b

Browse files
Eric W. BiedermanAl Viro
authored andcommitted
vfs: Lazily remove mounts on unlinked files and directories.
With the introduction of mount namespaces and bind mounts it became possible to access files and directories that on some paths are mount points but are not mount points on other paths. It is very confusing when rm -rf somedir returns -EBUSY simply because somedir is mounted somewhere else. With the addition of user namespaces allowing unprivileged mounts this condition has gone from annoying to allowing a DOS attack on other users in the system. The possibility for mischief is removed by updating the vfs to support rename, unlink and rmdir on a dentry that is a mountpoint and by lazily unmounting mountpoints on deleted dentries. In particular this change allows rename, unlink and rmdir system calls on a dentry without a mountpoint in the current mount namespace to succeed, and it allows rename, unlink, and rmdir performed on a distributed filesystem to update the vfs cache even if when there is a mount in some namespace on the original dentry. There are two common patterns of maintaining mounts: Mounts on trusted paths with the parent directory of the mount point and all ancestory directories up to / owned by root and modifiable only by root (i.e. /media/xxx, /dev, /dev/pts, /proc, /sys, /sys/fs/cgroup/{cpu, cpuacct, ...}, /usr, /usr/local). Mounts on unprivileged directories maintained by fusermount. In the case of mounts in trusted directories owned by root and modifiable only by root the current parent directory permissions are sufficient to ensure a mount point on a trusted path is not removed or renamed by anyone other than root, even if there is a context where the there are no mount points to prevent this. In the case of mounts in directories owned by less privileged users races with users modifying the path of a mount point are already a danger. fusermount already uses a combination of chdir, /proc/<pid>/fd/NNN, and UMOUNT_NOFOLLOW to prevent these races. The removable of global rename, unlink, and rmdir protection really adds nothing new to consider only a widening of the attack window, and fusermount is already safe against unprivileged users modifying the directory simultaneously. In principle for perfect userspace programs returning -EBUSY for unlink, rmdir, and rename of dentires that have mounts in the local namespace is actually unnecessary. Unfortunately not all userspace programs are perfect so retaining -EBUSY for unlink, rmdir and rename of dentries that have mounts in the current mount namespace plays an important role of maintaining consistency with historical behavior and making imperfect userspace applications hard to exploit. v2: Remove spurious old_dentry. v3: Optimized shrink_submounts_and_drop Removed unsued afs label v4: Simplified the changes to check_submounts_and_drop Do not rename check_submounts_and_drop shrink_submounts_and_drop Document what why we need atomicity in check_submounts_and_drop Rely on the parent inode mutex to make d_revalidate and d_invalidate an atomic unit. v5: Refcount the mountpoint to detach in case of simultaneous renames. Reviewed-by: Miklos Szeredi <[email protected]> Signed-off-by: "Eric W. Biederman" <[email protected]> Signed-off-by: Al Viro <[email protected]>
1 parent 80b5dce commit 8ed936b

File tree

2 files changed

+39
-33
lines changed

2 files changed

+39
-33
lines changed

fs/dcache.c

Lines changed: 33 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1343,36 +1343,39 @@ void shrink_dcache_for_umount(struct super_block *sb)
13431343
}
13441344
}
13451345

1346-
static enum d_walk_ret check_and_collect(void *_data, struct dentry *dentry)
1346+
struct detach_data {
1347+
struct select_data select;
1348+
struct dentry *mountpoint;
1349+
};
1350+
static enum d_walk_ret detach_and_collect(void *_data, struct dentry *dentry)
13471351
{
1348-
struct select_data *data = _data;
1352+
struct detach_data *data = _data;
13491353

13501354
if (d_mountpoint(dentry)) {
1351-
data->found = -EBUSY;
1355+
__dget_dlock(dentry);
1356+
data->mountpoint = dentry;
13521357
return D_WALK_QUIT;
13531358
}
13541359

1355-
return select_collect(_data, dentry);
1360+
return select_collect(&data->select, dentry);
13561361
}
13571362

13581363
static void check_and_drop(void *_data)
13591364
{
1360-
struct select_data *data = _data;
1365+
struct detach_data *data = _data;
13611366

1362-
if (d_mountpoint(data->start))
1363-
data->found = -EBUSY;
1364-
if (!data->found)
1365-
__d_drop(data->start);
1367+
if (!data->mountpoint && !data->select.found)
1368+
__d_drop(data->select.start);
13661369
}
13671370

13681371
/**
1369-
* check_submounts_and_drop - prune dcache, check for submounts and drop
1372+
* check_submounts_and_drop - detach submounts, prune dcache, and drop
13701373
*
1371-
* All done as a single atomic operation relative to has_unlinked_ancestor().
1372-
* Returns 0 if successfully unhashed @parent. If there were submounts then
1373-
* return -EBUSY.
1374+
* The final d_drop is done as an atomic operation relative to
1375+
* rename_lock ensuring there are no races with d_set_mounted. This
1376+
* ensures there are no unhashed dentries on the path to a mountpoint.
13741377
*
1375-
* @dentry: dentry to prune and drop
1378+
* @dentry: dentry to detach, prune and drop
13761379
*/
13771380
int check_submounts_and_drop(struct dentry *dentry)
13781381
{
@@ -1385,19 +1388,24 @@ int check_submounts_and_drop(struct dentry *dentry)
13851388
}
13861389

13871390
for (;;) {
1388-
struct select_data data;
1391+
struct detach_data data;
13891392

1390-
INIT_LIST_HEAD(&data.dispose);
1391-
data.start = dentry;
1392-
data.found = 0;
1393+
data.mountpoint = NULL;
1394+
INIT_LIST_HEAD(&data.select.dispose);
1395+
data.select.start = dentry;
1396+
data.select.found = 0;
1397+
1398+
d_walk(dentry, &data, detach_and_collect, check_and_drop);
13931399

1394-
d_walk(dentry, &data, check_and_collect, check_and_drop);
1395-
ret = data.found;
1400+
if (data.select.found)
1401+
shrink_dentry_list(&data.select.dispose);
13961402

1397-
if (!list_empty(&data.dispose))
1398-
shrink_dentry_list(&data.dispose);
1403+
if (data.mountpoint) {
1404+
detach_mounts(data.mountpoint);
1405+
dput(data.mountpoint);
1406+
}
13991407

1400-
if (ret <= 0)
1408+
if (!data.mountpoint && !data.select.found)
14011409
break;
14021410

14031411
cond_resched();
@@ -2639,10 +2647,8 @@ static struct dentry *__d_unalias(struct inode *inode,
26392647
goto out_err;
26402648
m2 = &alias->d_parent->d_inode->i_mutex;
26412649
out_unalias:
2642-
if (likely(!d_mountpoint(alias))) {
2643-
__d_move(alias, dentry, false);
2644-
ret = alias;
2645-
}
2650+
__d_move(alias, dentry, false);
2651+
ret = alias;
26462652
out_err:
26472653
spin_unlock(&inode->i_lock);
26482654
if (m2)

fs/namei.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3567,8 +3567,6 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
35673567
error = -EBUSY;
35683568
if (is_local_mountpoint(dentry))
35693569
goto out;
3570-
if (d_mountpoint(dentry))
3571-
goto out;
35723570

35733571
error = security_inode_rmdir(dir, dentry);
35743572
if (error)
@@ -3581,6 +3579,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
35813579

35823580
dentry->d_inode->i_flags |= S_DEAD;
35833581
dont_mount(dentry);
3582+
detach_mounts(dentry);
35843583

35853584
out:
35863585
mutex_unlock(&dentry->d_inode->i_mutex);
@@ -3683,7 +3682,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate
36833682
return -EPERM;
36843683

36853684
mutex_lock(&target->i_mutex);
3686-
if (is_local_mountpoint(dentry) || d_mountpoint(dentry))
3685+
if (is_local_mountpoint(dentry))
36873686
error = -EBUSY;
36883687
else {
36893688
error = security_inode_unlink(dir, dentry);
@@ -3692,8 +3691,10 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate
36923691
if (error)
36933692
goto out;
36943693
error = dir->i_op->unlink(dir, dentry);
3695-
if (!error)
3694+
if (!error) {
36963695
dont_mount(dentry);
3696+
detach_mounts(dentry);
3697+
}
36973698
}
36983699
}
36993700
out:
@@ -4130,8 +4131,6 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
41304131
error = -EBUSY;
41314132
if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry))
41324133
goto out;
4133-
if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
4134-
goto out;
41354134

41364135
if (max_links && new_dir != old_dir) {
41374136
error = -EMLINK;
@@ -4168,6 +4167,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
41684167
if (is_dir)
41694168
target->i_flags |= S_DEAD;
41704169
dont_mount(new_dentry);
4170+
detach_mounts(new_dentry);
41714171
}
41724172
if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) {
41734173
if (!(flags & RENAME_EXCHANGE))

0 commit comments

Comments
 (0)