summaryrefslogtreecommitdiff
path: root/mm/shmem.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/shmem.c')
-rw-r--r--mm/shmem.c129
1 files changed, 123 insertions, 6 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index b2abca3f7f33..eb6b36d89722 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -114,11 +114,13 @@ struct shmem_options {
kuid_t uid;
kgid_t gid;
umode_t mode;
+ bool full_inums;
int huge;
int seen;
#define SHMEM_SEEN_BLOCKS 1
#define SHMEM_SEEN_INODES 2
#define SHMEM_SEEN_HUGE 4
+#define SHMEM_SEEN_INUMS 8
};
#ifdef CONFIG_TMPFS
@@ -260,18 +262,76 @@ bool vma_is_shmem(struct vm_area_struct *vma)
static LIST_HEAD(shmem_swaplist);
static DEFINE_MUTEX(shmem_swaplist_mutex);
-static int shmem_reserve_inode(struct super_block *sb)
+/*
+ * shmem_reserve_inode() performs bookkeeping to reserve a shmem inode, and
+ * produces a novel ino for the newly allocated inode.
+ *
+ * It may also be called when making a hard link to permit the space needed by
+ * each dentry. However, in that case, no new inode number is needed since that
+ * internally draws from another pool of inode numbers (currently global
+ * get_next_ino()). This case is indicated by passing NULL as inop.
+ */
+#define SHMEM_INO_BATCH 1024
+static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
{
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
- if (sbinfo->max_inodes) {
+ ino_t ino;
+
+ if (!(sb->s_flags & SB_KERNMOUNT)) {
spin_lock(&sbinfo->stat_lock);
if (!sbinfo->free_inodes) {
spin_unlock(&sbinfo->stat_lock);
return -ENOSPC;
}
sbinfo->free_inodes--;
+ if (inop) {
+ ino = sbinfo->next_ino++;
+ if (unlikely(is_zero_ino(ino)))
+ ino = sbinfo->next_ino++;
+ if (unlikely(!sbinfo->full_inums &&
+ ino > UINT_MAX)) {
+ /*
+ * Emulate get_next_ino uint wraparound for
+ * compatibility
+ */
+ if (IS_ENABLED(CONFIG_64BIT))
+ pr_warn("%s: inode number overflow on device %d, consider using inode64 mount option\n",
+ __func__, MINOR(sb->s_dev));
+ sbinfo->next_ino = 1;
+ ino = sbinfo->next_ino++;
+ }
+ *inop = ino;
+ }
spin_unlock(&sbinfo->stat_lock);
+ } else if (inop) {
+ /*
+ * __shmem_file_setup, one of our callers, is lock-free: it
+ * doesn't hold stat_lock in shmem_reserve_inode since
+ * max_inodes is always 0, and is called from potentially
+ * unknown contexts. As such, use a per-cpu batched allocator
+ * which doesn't require the per-sb stat_lock unless we are at
+ * the batch boundary.
+ *
+ * We don't need to worry about inode{32,64} since SB_KERNMOUNT
+ * shmem mounts are not exposed to userspace, so we don't need
+ * to worry about things like glibc compatibility.
+ */
+ ino_t *next_ino;
+ next_ino = per_cpu_ptr(sbinfo->ino_batch, get_cpu());
+ ino = *next_ino;
+ if (unlikely(ino % SHMEM_INO_BATCH == 0)) {
+ spin_lock(&sbinfo->stat_lock);
+ ino = sbinfo->next_ino;
+ sbinfo->next_ino += SHMEM_INO_BATCH;
+ spin_unlock(&sbinfo->stat_lock);
+ if (unlikely(is_zero_ino(ino)))
+ ino++;
+ }
+ *inop = ino;
+ *next_ino = ++ino;
+ put_cpu();
}
+
return 0;
}
@@ -2222,13 +2282,14 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
struct inode *inode;
struct shmem_inode_info *info;
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+ ino_t ino;
- if (shmem_reserve_inode(sb))
+ if (shmem_reserve_inode(sb, &ino))
return NULL;
inode = new_inode(sb);
if (inode) {
- inode->i_ino = get_next_ino();
+ inode->i_ino = ino;
inode_init_owner(inode, dir, mode);
inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
@@ -2932,7 +2993,7 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
* first link must skip that, to get the accounting right.
*/
if (inode->i_nlink) {
- ret = shmem_reserve_inode(inode->i_sb);
+ ret = shmem_reserve_inode(inode->i_sb, NULL);
if (ret)
goto out;
}
@@ -3347,6 +3408,8 @@ enum shmem_param {
Opt_nr_inodes,
Opt_size,
Opt_uid,
+ Opt_inode32,
+ Opt_inode64,
};
static const struct constant_table shmem_param_enums_huge[] = {
@@ -3366,6 +3429,8 @@ const struct fs_parameter_spec shmem_fs_parameters[] = {
fsparam_string("nr_inodes", Opt_nr_inodes),
fsparam_string("size", Opt_size),
fsparam_u32 ("uid", Opt_uid),
+ fsparam_flag ("inode32", Opt_inode32),
+ fsparam_flag ("inode64", Opt_inode64),
{}
};
@@ -3437,6 +3502,18 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
break;
}
goto unsupported_parameter;
+ case Opt_inode32:
+ ctx->full_inums = false;
+ ctx->seen |= SHMEM_SEEN_INUMS;
+ break;
+ case Opt_inode64:
+ if (sizeof(ino_t) < 8) {
+ return invalfc(fc,
+ "Cannot use inode64 with <64bit inums in kernel\n");
+ }
+ ctx->full_inums = true;
+ ctx->seen |= SHMEM_SEEN_INUMS;
+ break;
}
return 0;
@@ -3528,8 +3605,16 @@ static int shmem_reconfigure(struct fs_context *fc)
}
}
+ if ((ctx->seen & SHMEM_SEEN_INUMS) && !ctx->full_inums &&
+ sbinfo->next_ino > UINT_MAX) {
+ err = "Current inum too high to switch to 32-bit inums";
+ goto out;
+ }
+
if (ctx->seen & SHMEM_SEEN_HUGE)
sbinfo->huge = ctx->huge;
+ if (ctx->seen & SHMEM_SEEN_INUMS)
+ sbinfo->full_inums = ctx->full_inums;
if (ctx->seen & SHMEM_SEEN_BLOCKS)
sbinfo->max_blocks = ctx->blocks;
if (ctx->seen & SHMEM_SEEN_INODES) {
@@ -3569,6 +3654,29 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
seq_printf(seq, ",gid=%u",
from_kgid_munged(&init_user_ns, sbinfo->gid));
+
+ /*
+ * Showing inode{64,32} might be useful even if it's the system default,
+ * since then people don't have to resort to checking both here and
+ * /proc/config.gz to confirm 64-bit inums were successfully applied
+ * (which may not even exist if IKCONFIG_PROC isn't enabled).
+ *
+ * We hide it when inode64 isn't the default and we are using 32-bit
+ * inodes, since that probably just means the feature isn't even under
+ * consideration.
+ *
+ * As such:
+ *
+ * +-----------------+-----------------+
+ * | TMPFS_INODE64=y | TMPFS_INODE64=n |
+ * +------------------+-----------------+-----------------+
+ * | full_inums=true | show | show |
+ * | full_inums=false | show | hide |
+ * +------------------+-----------------+-----------------+
+ *
+ */
+ if (IS_ENABLED(CONFIG_TMPFS_INODE64) || sbinfo->full_inums)
+ seq_printf(seq, ",inode%d", (sbinfo->full_inums ? 64 : 32));
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* Rightly or wrongly, show huge mount option unmasked by shmem_huge */
if (sbinfo->huge)
@@ -3584,6 +3692,7 @@ static void shmem_put_super(struct super_block *sb)
{
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+ free_percpu(sbinfo->ino_batch);
percpu_counter_destroy(&sbinfo->used_blocks);
mpol_put(sbinfo->mpol);
kfree(sbinfo);
@@ -3616,6 +3725,8 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
ctx->blocks = shmem_default_max_blocks();
if (!(ctx->seen & SHMEM_SEEN_INODES))
ctx->inodes = shmem_default_max_inodes();
+ if (!(ctx->seen & SHMEM_SEEN_INUMS))
+ ctx->full_inums = IS_ENABLED(CONFIG_TMPFS_INODE64);
} else {
sb->s_flags |= SB_NOUSER;
}
@@ -3626,8 +3737,14 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
#endif
sbinfo->max_blocks = ctx->blocks;
sbinfo->free_inodes = sbinfo->max_inodes = ctx->inodes;
+ if (sb->s_flags & SB_KERNMOUNT) {
+ sbinfo->ino_batch = alloc_percpu(ino_t);
+ if (!sbinfo->ino_batch)
+ goto failed;
+ }
sbinfo->uid = ctx->uid;
sbinfo->gid = ctx->gid;
+ sbinfo->full_inums = ctx->full_inums;
sbinfo->mode = ctx->mode;
sbinfo->huge = ctx->huge;
sbinfo->mpol = ctx->mpol;
@@ -4128,7 +4245,7 @@ EXPORT_SYMBOL_GPL(shmem_file_setup_with_mnt);
/**
* shmem_zero_setup - setup a shared anonymous mapping
- * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
+ * @vma: the vma to be mmapped is prepared by do_mmap
*/
int shmem_zero_setup(struct vm_area_struct *vma)
{