summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-04-27 16:52:33 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-04-27 16:52:33 -0700
commit888d3c9f7f3ae44101a3fd76528d3dd6f96e9fd0 (patch)
tree833fa91e015ea12e4eb4e0aa1111bcc08832fa91 /fs
parentb6a7828502dc769e1a5329027bc5048222fa210a (diff)
parente3184de9d46c2eebdb776face2e2662c6733331d (diff)
Merge tag 'sysctl-6.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux
Pull sysctl updates from Luis Chamberlain: "This only does a few sysctl moves from the kernel/sysctl.c file, the rest of the work has been put towards deprecating two API calls which incur recursion and prevent us from simplifying the registration process / saving memory per move. Most of the changes have been soaking on linux-next since v6.3-rc3. I've slowed down the kernel/sysctl.c moves due to Matthew Wilcox's feedback that we should see if we could *save* memory with these moves instead of incurring more memory. We currently incur more memory since when we move a syctl from kernel/sysclt.c out to its own file we end up having to add a new empty sysctl used to register it. To achieve saving memory we want to allow syctls to be passed without requiring the end element being empty, and just have our registration process rely on ARRAY_SIZE(). Without this, supporting both styles of sysctls would make the sysctl registration pretty brittle, hard to read and maintain as can be seen from Meng Tang's efforts to do just this [0]. Fortunately, in order to use ARRAY_SIZE() for all sysctl registrations also implies doing the work to deprecate two API calls which use recursion in order to support sysctl declarations with subdirectories. And so during this development cycle quite a bit of effort went into this deprecation effort. I've annotated the following two APIs are deprecated and in few kernel releases we should be good to remove them: - register_sysctl_table() - register_sysctl_paths() During this merge window we should be able to deprecate and unexport register_sysctl_paths(), we can probably do that towards the end of this merge window. Deprecating register_sysctl_table() will take a bit more time but this pull request goes with a few example of how to do this. As it turns out each of the conversions to move away from either of these two API calls *also* saves memory. And so long term, all these changes *will* prove to have saved a bit of memory on boot. The way I see it then is if remove a user of one deprecated call, it gives us enough savings to move one kernel/sysctl.c out from the generic arrays as we end up with about the same amount of bytes. Since deprecating register_sysctl_table() and register_sysctl_paths() does not require maintainer coordination except the final unexport you'll see quite a bit of these changes from other pull requests, I've just kept the stragglers after rc3" Link: https://lkml.kernel.org/r/ZAD+cpbrqlc5vmry@bombadil.infradead.org [0] * tag 'sysctl-6.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux: (29 commits) fs: fix sysctls.c built mm: compaction: remove incorrect #ifdef checks mm: compaction: move compaction sysctl to its own file mm: memory-failure: Move memory failure sysctls to its own file arm: simplify two-level sysctl registration for ctl_isa_vars ia64: simplify one-level sysctl registration for kdump_ctl_table utsname: simplify one-level sysctl registration for uts_kern_table ntfs: simplfy one-level sysctl registration for ntfs_sysctls coda: simplify one-level sysctl registration for coda_table fs/cachefiles: simplify one-level sysctl registration for cachefiles_sysctls xfs: simplify two-level sysctl registration for xfs_table nfs: simplify two-level sysctl registration for nfs_cb_sysctls nfs: simplify two-level sysctl registration for nfs4_cb_sysctls lockd: simplify two-level sysctl registration for nlm_sysctls proc_sysctl: enhance documentation xen: simplify sysctl registration for balloon md: simplify sysctl registration hv: simplify sysctl registration scsi: simplify sysctl registration with register_sysctl() csky: simplify alignment sysctl registration ...
Diffstat (limited to 'fs')
-rw-r--r--fs/Makefile3
-rw-r--r--fs/cachefiles/error_inject.c11
-rw-r--r--fs/coda/sysctl.c11
-rw-r--r--fs/lockd/svc.c20
-rw-r--r--fs/nfs/nfs4sysctl.c21
-rw-r--r--fs/nfs/sysctl.c20
-rw-r--r--fs/ntfs/sysctl.c12
-rw-r--r--fs/proc/proc_sysctl.c88
-rw-r--r--fs/userfaultfd.c20
-rw-r--r--fs/xfs/xfs_sysctl.c20
10 files changed, 86 insertions, 140 deletions
diff --git a/fs/Makefile b/fs/Makefile
index 05f89b5c962f..8d4736fcc766 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -6,7 +6,6 @@
# Rewritten to use lists instead of if-statements.
#
-obj-$(CONFIG_SYSCTL) += sysctls.o
obj-y := open.o read_write.o file_table.o super.o \
char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
@@ -50,7 +49,7 @@ obj-$(CONFIG_FS_MBCACHE) += mbcache.o
obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
obj-$(CONFIG_NFS_COMMON) += nfs_common/
obj-$(CONFIG_COREDUMP) += coredump.o
-obj-$(CONFIG_SYSCTL) += drop_caches.o
+obj-$(CONFIG_SYSCTL) += drop_caches.o sysctls.o
obj-$(CONFIG_FHANDLE) += fhandle.o
obj-y += iomap/
diff --git a/fs/cachefiles/error_inject.c b/fs/cachefiles/error_inject.c
index 58f8aec964e4..18de8a876b02 100644
--- a/fs/cachefiles/error_inject.c
+++ b/fs/cachefiles/error_inject.c
@@ -22,18 +22,9 @@ static struct ctl_table cachefiles_sysctls[] = {
{}
};
-static struct ctl_table cachefiles_sysctls_root[] = {
- {
- .procname = "cachefiles",
- .mode = 0555,
- .child = cachefiles_sysctls,
- },
- {}
-};
-
int __init cachefiles_register_error_injection(void)
{
- cachefiles_sysctl = register_sysctl_table(cachefiles_sysctls_root);
+ cachefiles_sysctl = register_sysctl("cachefiles", cachefiles_sysctls);
if (!cachefiles_sysctl)
return -ENOMEM;
return 0;
diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c
index fda3b702b1c5..a247c14aaab7 100644
--- a/fs/coda/sysctl.c
+++ b/fs/coda/sysctl.c
@@ -39,19 +39,10 @@ static struct ctl_table coda_table[] = {
{}
};
-static struct ctl_table fs_table[] = {
- {
- .procname = "coda",
- .mode = 0555,
- .child = coda_table
- },
- {}
-};
-
void coda_sysctl_init(void)
{
if ( !fs_table_header )
- fs_table_header = register_sysctl_table(fs_table);
+ fs_table_header = register_sysctl("coda", coda_table);
}
void coda_sysctl_clean(void)
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 9a47303b2cba..bb94949bc223 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -510,24 +510,6 @@ static struct ctl_table nlm_sysctls[] = {
{ }
};
-static struct ctl_table nlm_sysctl_dir[] = {
- {
- .procname = "nfs",
- .mode = 0555,
- .child = nlm_sysctls,
- },
- { }
-};
-
-static struct ctl_table nlm_sysctl_root[] = {
- {
- .procname = "fs",
- .mode = 0555,
- .child = nlm_sysctl_dir,
- },
- { }
-};
-
#endif /* CONFIG_SYSCTL */
/*
@@ -644,7 +626,7 @@ static int __init init_nlm(void)
#ifdef CONFIG_SYSCTL
err = -ENOMEM;
- nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root);
+ nlm_sysctl_table = register_sysctl("fs/nfs", nlm_sysctls);
if (nlm_sysctl_table == NULL)
goto err_sysctl;
#endif
diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c
index c394e4447100..e776200e9a11 100644
--- a/fs/nfs/nfs4sysctl.c
+++ b/fs/nfs/nfs4sysctl.c
@@ -37,27 +37,10 @@ static struct ctl_table nfs4_cb_sysctls[] = {
{ }
};
-static struct ctl_table nfs4_cb_sysctl_dir[] = {
- {
- .procname = "nfs",
- .mode = 0555,
- .child = nfs4_cb_sysctls,
- },
- { }
-};
-
-static struct ctl_table nfs4_cb_sysctl_root[] = {
- {
- .procname = "fs",
- .mode = 0555,
- .child = nfs4_cb_sysctl_dir,
- },
- { }
-};
-
int nfs4_register_sysctl(void)
{
- nfs4_callback_sysctl_table = register_sysctl_table(nfs4_cb_sysctl_root);
+ nfs4_callback_sysctl_table = register_sysctl("fs/nfs",
+ nfs4_cb_sysctls);
if (nfs4_callback_sysctl_table == NULL)
return -ENOMEM;
return 0;
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index 7aea195ddb35..f39e2089bc4c 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -32,27 +32,9 @@ static struct ctl_table nfs_cb_sysctls[] = {
{ }
};
-static struct ctl_table nfs_cb_sysctl_dir[] = {
- {
- .procname = "nfs",
- .mode = 0555,
- .child = nfs_cb_sysctls,
- },
- { }
-};
-
-static struct ctl_table nfs_cb_sysctl_root[] = {
- {
- .procname = "fs",
- .mode = 0555,
- .child = nfs_cb_sysctl_dir,
- },
- { }
-};
-
int nfs_register_sysctl(void)
{
- nfs_callback_sysctl_table = register_sysctl_table(nfs_cb_sysctl_root);
+ nfs_callback_sysctl_table = register_sysctl("fs/nfs", nfs_cb_sysctls);
if (nfs_callback_sysctl_table == NULL)
return -ENOMEM;
return 0;
diff --git a/fs/ntfs/sysctl.c b/fs/ntfs/sysctl.c
index a030d00af90c..174fe536a1c0 100644
--- a/fs/ntfs/sysctl.c
+++ b/fs/ntfs/sysctl.c
@@ -31,16 +31,6 @@ static struct ctl_table ntfs_sysctls[] = {
{}
};
-/* Define the parent directory /proc/sys/fs. */
-static struct ctl_table sysctls_root[] = {
- {
- .procname = "fs",
- .mode = 0555,
- .child = ntfs_sysctls
- },
- {}
-};
-
/* Storage for the sysctls header. */
static struct ctl_table_header *sysctls_root_table;
@@ -54,7 +44,7 @@ int ntfs_sysctl(int add)
{
if (add) {
BUG_ON(sysctls_root_table);
- sysctls_root_table = register_sysctl_table(sysctls_root);
+ sysctls_root_table = register_sysctl("fs", ntfs_sysctls);
if (!sysctls_root_table)
return -ENOMEM;
} else {
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 5851eb5bc726..7ad07435828f 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1283,11 +1283,43 @@ out:
return err;
}
+/* Find the directory for the ctl_table. If one is not found create it. */
+static struct ctl_dir *sysctl_mkdir_p(struct ctl_dir *dir, const char *path)
+{
+ const char *name, *nextname;
+
+ for (name = path; name; name = nextname) {
+ int namelen;
+ nextname = strchr(name, '/');
+ if (nextname) {
+ namelen = nextname - name;
+ nextname++;
+ } else {
+ namelen = strlen(name);
+ }
+ if (namelen == 0)
+ continue;
+
+ /*
+ * namelen ensures if name is "foo/bar/yay" only foo is
+ * registered first. We traverse as if using mkdir -p and
+ * return a ctl_dir for the last directory entry.
+ */
+ dir = get_subdir(dir, name, namelen);
+ if (IS_ERR(dir))
+ break;
+ }
+ return dir;
+}
+
/**
* __register_sysctl_table - register a leaf sysctl table
* @set: Sysctl tree to register on
* @path: The path to the directory the sysctl table is in.
- * @table: the top-level table structure
+ * @table: the top-level table structure without any child. This table
+ * should not be free'd after registration. So it should not be
+ * used on stack. It can either be a global or dynamically allocated
+ * by the caller and free'd later after sysctl unregistration.
*
* Register a sysctl table hierarchy. @table should be a filled in ctl_table
* array. A completely 0 filled entry terminates the table.
@@ -1308,9 +1340,12 @@ out:
* proc_handler - the text handler routine (described below)
*
* extra1, extra2 - extra pointers usable by the proc handler routines
+ * XXX: we should eventually modify these to use long min / max [0]
+ * [0] https://lkml.kernel.org/87zgpte9o4.fsf@email.froward.int.ebiederm.org
*
* Leaf nodes in the sysctl tree will be represented by a single file
- * under /proc; non-leaf nodes will be represented by directories.
+ * under /proc; non-leaf nodes (where child is not NULL) are not allowed,
+ * sysctl_check_table() verifies this.
*
* There must be a proc_handler routine for any terminal nodes.
* Several default handlers are available to cover common cases -
@@ -1331,7 +1366,6 @@ struct ctl_table_header *__register_sysctl_table(
{
struct ctl_table_root *root = set->dir.header.root;
struct ctl_table_header *header;
- const char *name, *nextname;
struct ctl_dir *dir;
struct ctl_table *entry;
struct ctl_node *node;
@@ -1352,28 +1386,13 @@ struct ctl_table_header *__register_sysctl_table(
spin_lock(&sysctl_lock);
dir = &set->dir;
- /* Reference moved down the diretory tree get_subdir */
+ /* Reference moved down the directory tree get_subdir */
dir->header.nreg++;
spin_unlock(&sysctl_lock);
- /* Find the directory for the ctl_table */
- for (name = path; name; name = nextname) {
- int namelen;
- nextname = strchr(name, '/');
- if (nextname) {
- namelen = nextname - name;
- nextname++;
- } else {
- namelen = strlen(name);
- }
- if (namelen == 0)
- continue;
-
- dir = get_subdir(dir, name, namelen);
- if (IS_ERR(dir))
- goto fail;
- }
-
+ dir = sysctl_mkdir_p(dir, path);
+ if (IS_ERR(dir))
+ goto fail;
spin_lock(&sysctl_lock);
if (insert_header(dir, header))
goto fail_put_dir_locked;
@@ -1394,8 +1413,15 @@ fail:
/**
* register_sysctl - register a sysctl table
- * @path: The path to the directory the sysctl table is in.
- * @table: the table structure
+ * @path: The path to the directory the sysctl table is in. If the path
+ * doesn't exist we will create it for you.
+ * @table: the table structure. The calller must ensure the life of the @table
+ * will be kept during the lifetime use of the syctl. It must not be freed
+ * until unregister_sysctl_table() is called with the given returned table
+ * with this registration. If your code is non modular then you don't need
+ * to call unregister_sysctl_table() and can instead use something like
+ * register_sysctl_init() which does not care for the result of the syctl
+ * registration.
*
* Register a sysctl table. @table should be a filled in ctl_table
* array. A completely 0 filled entry terminates the table.
@@ -1411,8 +1437,11 @@ EXPORT_SYMBOL(register_sysctl);
/**
* __register_sysctl_init() - register sysctl table to path
- * @path: path name for sysctl base
- * @table: This is the sysctl table that needs to be registered to the path
+ * @path: path name for sysctl base. If that path doesn't exist we will create
+ * it for you.
+ * @table: This is the sysctl table that needs to be registered to the path.
+ * The caller must ensure the life of the @table will be kept during the
+ * lifetime use of the sysctl.
* @table_name: The name of sysctl table, only used for log printing when
* registration fails
*
@@ -1424,10 +1453,7 @@ EXPORT_SYMBOL(register_sysctl);
* register_sysctl() failing on init are extremely low, and so for both reasons
* this function does not return any error as it is used by initialization code.
*
- * Context: Can only be called after your respective sysctl base path has been
- * registered. So for instance, most base directories are registered early on
- * init before init levels are processed through proc_sys_init() and
- * sysctl_init_bases().
+ * Context: if your base directory does not exist it will be created for you.
*/
void __init __register_sysctl_init(const char *path, struct ctl_table *table,
const char *table_name)
@@ -1557,6 +1583,7 @@ out:
*
* Register a sysctl table hierarchy. @table should be a filled in ctl_table
* array. A completely 0 filled entry terminates the table.
+ * We are slowly deprecating this call so avoid its use.
*
* See __register_sysctl_table for more details.
*/
@@ -1628,6 +1655,7 @@ err_register_leaves:
*
* Register a sysctl table hierarchy. @table should be a filled in ctl_table
* array. A completely 0 filled entry terminates the table.
+ * We are slowly deprecating this caller so avoid future uses of it.
*
* See __register_sysctl_paths for more details.
*/
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 40f9e1a2ebdd..28e942feacc8 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -32,7 +32,22 @@
#include <linux/swapops.h>
#include <linux/miscdevice.h>
-int sysctl_unprivileged_userfaultfd __read_mostly;
+static int sysctl_unprivileged_userfaultfd __read_mostly;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table vm_userfaultfd_table[] = {
+ {
+ .procname = "unprivileged_userfaultfd",
+ .data = &sysctl_unprivileged_userfaultfd,
+ .maxlen = sizeof(sysctl_unprivileged_userfaultfd),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ { }
+};
+#endif
static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
@@ -2180,6 +2195,9 @@ static int __init userfaultfd_init(void)
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
init_once_userfaultfd_ctx);
+#ifdef CONFIG_SYSCTL
+ register_sysctl_init("vm", vm_userfaultfd_table);
+#endif
return 0;
}
__initcall(userfaultfd_init);
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c
index 546a6cd96729..fade33735393 100644
--- a/fs/xfs/xfs_sysctl.c
+++ b/fs/xfs/xfs_sysctl.c
@@ -210,28 +210,10 @@ static struct ctl_table xfs_table[] = {
{}
};
-static struct ctl_table xfs_dir_table[] = {
- {
- .procname = "xfs",
- .mode = 0555,
- .child = xfs_table
- },
- {}
-};
-
-static struct ctl_table xfs_root_table[] = {
- {
- .procname = "fs",
- .mode = 0555,
- .child = xfs_dir_table
- },
- {}
-};
-
int
xfs_sysctl_register(void)
{
- xfs_table_header = register_sysctl_table(xfs_root_table);
+ xfs_table_header = register_sysctl("fs/xfs", xfs_table);
if (!xfs_table_header)
return -ENOMEM;
return 0;