summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/file.c95
-rw-r--r--include/linux/fdtable.h8
-rw-r--r--kernel/fork.c32
3 files changed, 52 insertions, 83 deletions
diff --git a/fs/file.c b/fs/file.c
index 5125607d040a..eb093e736972 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -272,59 +272,45 @@ static inline bool fd_is_open(unsigned int fd, const struct fdtable *fdt)
return test_bit(fd, fdt->open_fds);
}
-static unsigned int count_open_files(struct fdtable *fdt)
-{
- unsigned int size = fdt->max_fds;
- unsigned int i;
-
- /* Find the last open fd */
- for (i = size / BITS_PER_LONG; i > 0; ) {
- if (fdt->open_fds[--i])
- break;
- }
- i = (i + 1) * BITS_PER_LONG;
- return i;
-}
-
/*
* Note that a sane fdtable size always has to be a multiple of
* BITS_PER_LONG, since we have bitmaps that are sized by this.
*
- * 'max_fds' will normally already be properly aligned, but it
- * turns out that in the close_range() -> __close_range() ->
- * unshare_fd() -> dup_fd() -> sane_fdtable_size() we can end
- * up having a 'max_fds' value that isn't already aligned.
- *
- * Rather than make close_range() have to worry about this,
- * just make that BITS_PER_LONG alignment be part of a sane
- * fdtable size. Becuase that's really what it is.
+ * punch_hole is optional - when close_range() is asked to unshare
+ * and close, we don't need to copy descriptors in that range, so
+ * a smaller cloned descriptor table might suffice if the last
+ * currently opened descriptor falls into that range.
*/
-static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds)
+static unsigned int sane_fdtable_size(struct fdtable *fdt, struct fd_range *punch_hole)
{
- unsigned int count;
-
- count = count_open_files(fdt);
- if (max_fds < NR_OPEN_DEFAULT)
- max_fds = NR_OPEN_DEFAULT;
- return ALIGN(min(count, max_fds), BITS_PER_LONG);
+ unsigned int last = find_last_bit(fdt->open_fds, fdt->max_fds);
+
+ if (last == fdt->max_fds)
+ return NR_OPEN_DEFAULT;
+ if (punch_hole && punch_hole->to >= last && punch_hole->from <= last) {
+ last = find_last_bit(fdt->open_fds, punch_hole->from);
+ if (last == punch_hole->from)
+ return NR_OPEN_DEFAULT;
+ }
+ return ALIGN(last + 1, BITS_PER_LONG);
}
/*
- * Allocate a new files structure and copy contents from the
- * passed in files structure.
- * errorp will be valid only when the returned files_struct is NULL.
+ * Allocate a new descriptor table and copy contents from the passed in
+ * instance. Returns a pointer to cloned table on success, ERR_PTR()
+ * on failure. For 'punch_hole' see sane_fdtable_size().
*/
-struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int *errorp)
+struct files_struct *dup_fd(struct files_struct *oldf, struct fd_range *punch_hole)
{
struct files_struct *newf;
struct file **old_fds, **new_fds;
unsigned int open_files, i;
struct fdtable *old_fdt, *new_fdt;
+ int error;
- *errorp = -ENOMEM;
newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
if (!newf)
- goto out;
+ return ERR_PTR(-ENOMEM);
atomic_set(&newf->count, 1);
@@ -341,7 +327,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
spin_lock(&oldf->file_lock);
old_fdt = files_fdtable(oldf);
- open_files = sane_fdtable_size(old_fdt, max_fds);
+ open_files = sane_fdtable_size(old_fdt, punch_hole);
/*
* Check whether we need to allocate a larger fd array and fd set.
@@ -354,14 +340,14 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
new_fdt = alloc_fdtable(open_files - 1);
if (!new_fdt) {
- *errorp = -ENOMEM;
+ error = -ENOMEM;
goto out_release;
}
/* beyond sysctl_nr_open; nothing to do */
if (unlikely(new_fdt->max_fds < open_files)) {
__free_fdtable(new_fdt);
- *errorp = -EMFILE;
+ error = -EMFILE;
goto out_release;
}
@@ -372,7 +358,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
*/
spin_lock(&oldf->file_lock);
old_fdt = files_fdtable(oldf);
- open_files = sane_fdtable_size(old_fdt, max_fds);
+ open_files = sane_fdtable_size(old_fdt, punch_hole);
}
copy_fd_bitmaps(new_fdt, old_fdt, open_files / BITS_PER_LONG);
@@ -406,8 +392,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
out_release:
kmem_cache_free(files_cachep, newf);
-out:
- return NULL;
+ return ERR_PTR(error);
}
static struct fdtable *close_files(struct files_struct * files)
@@ -748,37 +733,25 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
if (fd > max_fd)
return -EINVAL;
- if (flags & CLOSE_RANGE_UNSHARE) {
- int ret;
- unsigned int max_unshare_fds = NR_OPEN_MAX;
+ if ((flags & CLOSE_RANGE_UNSHARE) && atomic_read(&cur_fds->count) > 1) {
+ struct fd_range range = {fd, max_fd}, *punch_hole = &range;
/*
* If the caller requested all fds to be made cloexec we always
* copy all of the file descriptors since they still want to
* use them.
*/
- if (!(flags & CLOSE_RANGE_CLOEXEC)) {
- /*
- * If the requested range is greater than the current
- * maximum, we're closing everything so only copy all
- * file descriptors beneath the lowest file descriptor.
- */
- rcu_read_lock();
- if (max_fd >= last_fd(files_fdtable(cur_fds)))
- max_unshare_fds = fd;
- rcu_read_unlock();
- }
-
- ret = unshare_fd(CLONE_FILES, max_unshare_fds, &fds);
- if (ret)
- return ret;
+ if (flags & CLOSE_RANGE_CLOEXEC)
+ punch_hole = NULL;
+ fds = dup_fd(cur_fds, punch_hole);
+ if (IS_ERR(fds))
+ return PTR_ERR(fds);
/*
* We used to share our file descriptor table, and have now
* created a private one, make sure we're using it below.
*/
- if (fds)
- swap(cur_fds, fds);
+ swap(cur_fds, fds);
}
if (flags & CLOSE_RANGE_CLOEXEC)
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 2944d4aa413b..b1c5722f2b3c 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -22,7 +22,6 @@
* as this is the granularity returned by copy_fdset().
*/
#define NR_OPEN_DEFAULT BITS_PER_LONG
-#define NR_OPEN_MAX ~0U
struct fdtable {
unsigned int max_fds;
@@ -106,7 +105,10 @@ struct task_struct;
void put_files_struct(struct files_struct *fs);
int unshare_files(void);
-struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy;
+struct fd_range {
+ unsigned int from, to;
+};
+struct files_struct *dup_fd(struct files_struct *, struct fd_range *) __latent_entropy;
void do_close_on_exec(struct files_struct *);
int iterate_fd(struct files_struct *, unsigned,
int (*)(const void *, struct file *, unsigned),
@@ -115,8 +117,6 @@ int iterate_fd(struct files_struct *, unsigned,
extern int close_fd(unsigned int fd);
extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags);
extern struct file *file_close_fd(unsigned int fd);
-extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
- struct files_struct **new_fdp);
extern struct kmem_cache *files_cachep;
diff --git a/kernel/fork.c b/kernel/fork.c
index 60c0b4868fd4..89ceb4a68af2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1756,33 +1756,30 @@ static int copy_files(unsigned long clone_flags, struct task_struct *tsk,
int no_files)
{
struct files_struct *oldf, *newf;
- int error = 0;
/*
* A background process may not have any files ...
*/
oldf = current->files;
if (!oldf)
- goto out;
+ return 0;
if (no_files) {
tsk->files = NULL;
- goto out;
+ return 0;
}
if (clone_flags & CLONE_FILES) {
atomic_inc(&oldf->count);
- goto out;
+ return 0;
}
- newf = dup_fd(oldf, NR_OPEN_MAX, &error);
- if (!newf)
- goto out;
+ newf = dup_fd(oldf, NULL);
+ if (IS_ERR(newf))
+ return PTR_ERR(newf);
tsk->files = newf;
- error = 0;
-out:
- return error;
+ return 0;
}
static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
@@ -3238,17 +3235,16 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
/*
* Unshare file descriptor table if it is being shared
*/
-int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
- struct files_struct **new_fdp)
+static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
{
struct files_struct *fd = current->files;
- int error = 0;
if ((unshare_flags & CLONE_FILES) &&
(fd && atomic_read(&fd->count) > 1)) {
- *new_fdp = dup_fd(fd, max_fds, &error);
- if (!*new_fdp)
- return error;
+ fd = dup_fd(fd, NULL);
+ if (IS_ERR(fd))
+ return PTR_ERR(fd);
+ *new_fdp = fd;
}
return 0;
@@ -3306,7 +3302,7 @@ int ksys_unshare(unsigned long unshare_flags)
err = unshare_fs(unshare_flags, &new_fs);
if (err)
goto bad_unshare_out;
- err = unshare_fd(unshare_flags, NR_OPEN_MAX, &new_fd);
+ err = unshare_fd(unshare_flags, &new_fd);
if (err)
goto bad_unshare_cleanup_fs;
err = unshare_userns(unshare_flags, &new_cred);
@@ -3398,7 +3394,7 @@ int unshare_files(void)
struct files_struct *old, *copy = NULL;
int error;
- error = unshare_fd(CLONE_FILES, NR_OPEN_MAX, &copy);
+ error = unshare_fd(CLONE_FILES, &copy);
if (error || !copy)
return error;