From 36885d7b1121c779e4060d45472fe53a5b21e09f Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 10 Jun 2011 02:36:05 -0300 Subject: sysctl: remove impossible condition check Remove checks for conditions that will never happen. If procname is NULL the loop would already had bailed out, so there's no need to check it again. At the same time this also compacts the function find_in_table() by refactoring it to be easier to read. Signed-off-by: Lucas De Marchi Reviewed-by: Jesper Juhl Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index a6b6217..d82f4a8 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -59,17 +59,11 @@ out: static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name) { - int len; for ( ; p->procname; p++) { - - if (!p->procname) - continue; - - len = strlen(p->procname); - if (len != name->len) + if (strlen(p->procname) != name->len) continue; - if (memcmp(p->procname, name->name, len) != 0) + if (memcmp(p->procname, name->name, name->len) != 0) continue; /* I have a match */ @@ -266,10 +260,6 @@ static int scan(struct ctl_table_header *head, ctl_table *table, for (; table->procname; table++, (*pos)++) { int res; - /* Can't do anything without a proc name */ - if (!table->procname) - continue; - if (*pos < file->f_pos) continue; -- cgit v0.10.2 From 0ce8974d504913a0f0ae2d97b20a5ac665431a41 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 6 Jan 2012 03:13:27 -0800 Subject: sysctl: Consolidate !CONFIG_SYSCTL handling - In sysctl.h move functions only available if CONFIG_SYSCL is defined inside of #ifdef CONFIG_SYSCTL - Move the stub function definitions for !CONFIG_SYSCTL into sysctl.h and make them static inlines. Signed-off-by: Eric W. Biederman diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index bb9127d..cf3ee7f 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -937,30 +937,8 @@ enum struct ctl_table; struct nsproxy; struct ctl_table_root; - -struct ctl_table_set { - struct list_head list; - struct ctl_table_set *parent; - int (*is_seen)(struct ctl_table_set *); -}; - -extern void setup_sysctl_set(struct ctl_table_set *p, - struct ctl_table_set *parent, - int (*is_seen)(struct ctl_table_set *)); - struct ctl_table_header; -extern void sysctl_head_get(struct ctl_table_header *); -extern void sysctl_head_put(struct ctl_table_header *); -extern int sysctl_is_seen(struct ctl_table_header *); -extern struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *); -extern struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev); -extern struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, - struct ctl_table_header *prev); -extern void sysctl_head_finish(struct ctl_table_header *prev); -extern int sysctl_perm(struct ctl_table_root *root, - struct ctl_table *table, int op); - typedef struct ctl_table ctl_table; typedef int proc_handler (struct ctl_table *ctl, int write, @@ -1023,8 +1001,6 @@ static inline void *proc_sys_poll_event(struct ctl_table_poll *poll) return (void *)(unsigned long)atomic_read(&poll->event); } -void proc_sys_poll_notify(struct ctl_table_poll *poll); - #define __CTL_TABLE_POLL_INITIALIZER(name) { \ .event = ATOMIC_INIT(0), \ .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait) } @@ -1047,15 +1023,6 @@ struct ctl_table void *extra2; }; -struct ctl_table_root { - struct list_head root_list; - struct ctl_table_set default_set; - struct ctl_table_set *(*lookup)(struct ctl_table_root *root, - struct nsproxy *namespaces); - int (*permissions)(struct ctl_table_root *root, - struct nsproxy *namespaces, struct ctl_table *table); -}; - /* struct ctl_table_header is used to maintain dynamic lists of struct ctl_table trees. */ struct ctl_table_header @@ -1078,11 +1045,45 @@ struct ctl_table_header struct ctl_table_header *parent; }; +struct ctl_table_set { + struct list_head list; + struct ctl_table_set *parent; + int (*is_seen)(struct ctl_table_set *); +}; + +struct ctl_table_root { + struct list_head root_list; + struct ctl_table_set default_set; + struct ctl_table_set *(*lookup)(struct ctl_table_root *root, + struct nsproxy *namespaces); + int (*permissions)(struct ctl_table_root *root, + struct nsproxy *namespaces, struct ctl_table *table); +}; + /* struct ctl_path describes where in the hierarchy a table is added */ struct ctl_path { const char *procname; }; +#ifdef CONFIG_SYSCTL + +void proc_sys_poll_notify(struct ctl_table_poll *poll); + +extern void setup_sysctl_set(struct ctl_table_set *p, + struct ctl_table_set *parent, + int (*is_seen)(struct ctl_table_set *)); + +extern void sysctl_head_get(struct ctl_table_header *); +extern void sysctl_head_put(struct ctl_table_header *); +extern int sysctl_is_seen(struct ctl_table_header *); +extern struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *); +extern struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev); +extern struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, + struct ctl_table_header *prev); +extern void sysctl_head_finish(struct ctl_table_header *prev); +extern int sysctl_perm(struct ctl_table_root *root, + struct ctl_table *table, int op); + void register_sysctl_root(struct ctl_table_root *root); struct ctl_table_header *__register_sysctl_paths( struct ctl_table_root *root, struct nsproxy *namespaces, @@ -1094,6 +1095,34 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, void unregister_sysctl_table(struct ctl_table_header * table); int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table); +#else /* CONFIG_SYSCTL */ +static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table) +{ + return NULL; +} + +static inline struct ctl_table_header *register_sysctl_paths( + const struct ctl_path *path, struct ctl_table *table) +{ + return NULL; +} + +static inline void unregister_sysctl_table(struct ctl_table_header * table) +{ +} + +static inline void setup_sysctl_set(struct ctl_table_set *p, + struct ctl_table_set *parent, + int (*is_seen)(struct ctl_table_set *)) +{ +} + +static inline void sysctl_head_put(struct ctl_table_header *head) +{ +} + +#endif /* CONFIG_SYSCTL */ + #endif /* __KERNEL__ */ #endif /* _LINUX_SYSCTL_H */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f487f25..d5bbddd 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2017,32 +2017,6 @@ void setup_sysctl_set(struct ctl_table_set *p, p->is_seen = is_seen; } -#else /* !CONFIG_SYSCTL */ -struct ctl_table_header *register_sysctl_table(struct ctl_table * table) -{ - return NULL; -} - -struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, - struct ctl_table *table) -{ - return NULL; -} - -void unregister_sysctl_table(struct ctl_table_header * table) -{ -} - -void setup_sysctl_set(struct ctl_table_set *p, - struct ctl_table_set *parent, - int (*is_seen)(struct ctl_table_set *)) -{ -} - -void sysctl_head_put(struct ctl_table_header *head) -{ -} - #endif /* CONFIG_SYSCTL */ /* -- cgit v0.10.2 From de4e83bd6b5e16d491ec068cd22801d5d063b07a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 6 Jan 2012 03:34:20 -0800 Subject: sysctl: Register the base sysctl table like any other sysctl table. Simplify the code by treating the base sysctl table like any other sysctl table and register it with register_sysctl_table. To ensure this table is registered early enough to avoid problems call sysctl_init from proc_sys_init. Rename sysctl_net.c:sysctl_init() to net_sysctl_init() to avoid name conflicts now that kernel/sysctl.c:sysctl_init() is no longer static. Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index d82f4a8..9d29d28 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -468,5 +468,6 @@ int __init proc_sys_init(void) proc_sys_root->proc_iops = &proc_sys_dir_operations; proc_sys_root->proc_fops = &proc_sys_dir_file_operations; proc_sys_root->nlink = 0; - return 0; + + return sysctl_init(); } diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index cf3ee7f..5e3532e 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1095,6 +1095,7 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, void unregister_sysctl_table(struct ctl_table_header * table); int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table); +extern int sysctl_init(void); #else /* CONFIG_SYSCTL */ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table) { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d5bbddd..ad46024 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -192,7 +192,7 @@ static int sysrq_sysctl_handler(ctl_table *table, int write, #endif -static struct ctl_table root_table[]; +static struct ctl_table root_table[1]; static struct ctl_table_root sysctl_table_root; static struct ctl_table_header root_table_header = { {{.count = 1, @@ -222,7 +222,7 @@ int sysctl_legacy_va_layout; /* The default sysctl tables: */ -static struct ctl_table root_table[] = { +static struct ctl_table sysctl_base_table[] = { { .procname = "kernel", .mode = 0555, @@ -1747,17 +1747,12 @@ static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) } } -static __init int sysctl_init(void) +int __init sysctl_init(void) { - sysctl_set_parent(NULL, root_table); -#ifdef CONFIG_SYSCTL_SYSCALL_CHECK - sysctl_check_table(current->nsproxy, root_table); -#endif + register_sysctl_table(sysctl_base_table); return 0; } -core_initcall(sysctl_init); - static struct ctl_table *is_branch_in(struct ctl_table *branch, struct ctl_table *table) { diff --git a/net/sysctl_net.c b/net/sysctl_net.c index e758139..a6bbee2 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -90,7 +90,7 @@ static struct pernet_operations sysctl_pernet_ops = { .exit = sysctl_net_exit, }; -static __init int sysctl_init(void) +static __init int net_sysctl_init(void) { int ret; ret = register_pernet_subsys(&sysctl_pernet_ops); @@ -102,7 +102,7 @@ static __init int sysctl_init(void) out: return ret; } -subsys_initcall(sysctl_init); +subsys_initcall(net_sysctl_init); struct ctl_table_header *register_net_sysctl_table(struct net *net, const struct ctl_path *path, struct ctl_table *table) -- cgit v0.10.2 From 1f87f0b52b1d6581168cb80f86746bc4df918d01 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 6 Jan 2012 04:07:15 -0800 Subject: sysctl: Move the implementation into fs/proc/proc_sysctl.c Move the core sysctl code from kernel/sysctl.c and kernel/sysctl_check.c into fs/proc/proc_sysctl.c. Currently sysctl maintenance is hampered by the sysctl implementation being split across 3 files with artificial layering between them. Consolidate the entire sysctl implementation into 1 file so that it is easier to see what is going on and hopefully allowing for simpler maintenance. For functions that are now only used in fs/proc/proc_sysctl.c remove their declarations from sysctl.h and make them static in fs/proc/proc_sysctl.c Signed-off-by: Eric W. Biederman diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 2925775..3b5ecd9 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -10,12 +10,15 @@ */ #include +struct ctl_table_header; extern struct proc_dir_entry proc_root; #ifdef CONFIG_PROC_SYSCTL extern int proc_sys_init(void); +extern void sysctl_head_put(struct ctl_table_header *head); #else static inline void proc_sys_init(void) { } +static inline void sysctl_head_put(struct ctl_table_header *head) { } #endif #ifdef CONFIG_NET extern int proc_net_init(void); diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 9d29d28..06e6f10 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "internal.h" static const struct dentry_operations proc_sys_dentry_operations; @@ -24,6 +25,209 @@ void proc_sys_poll_notify(struct ctl_table_poll *poll) wake_up_interruptible(&poll->wait); } +static struct ctl_table root_table[1]; +static struct ctl_table_root sysctl_table_root; +static struct ctl_table_header root_table_header = { + {{.count = 1, + .ctl_table = root_table, + .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, + .root = &sysctl_table_root, + .set = &sysctl_table_root.default_set, +}; +static struct ctl_table_root sysctl_table_root = { + .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), + .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry), +}; + +static DEFINE_SPINLOCK(sysctl_lock); + +/* called under sysctl_lock */ +static int use_table(struct ctl_table_header *p) +{ + if (unlikely(p->unregistering)) + return 0; + p->used++; + return 1; +} + +/* called under sysctl_lock */ +static void unuse_table(struct ctl_table_header *p) +{ + if (!--p->used) + if (unlikely(p->unregistering)) + complete(p->unregistering); +} + +/* called under sysctl_lock, will reacquire if has to wait */ +static void start_unregistering(struct ctl_table_header *p) +{ + /* + * if p->used is 0, nobody will ever touch that entry again; + * we'll eliminate all paths to it before dropping sysctl_lock + */ + if (unlikely(p->used)) { + struct completion wait; + init_completion(&wait); + p->unregistering = &wait; + spin_unlock(&sysctl_lock); + wait_for_completion(&wait); + spin_lock(&sysctl_lock); + } else { + /* anything non-NULL; we'll never dereference it */ + p->unregistering = ERR_PTR(-EINVAL); + } + /* + * do not remove from the list until nobody holds it; walking the + * list in do_sysctl() relies on that. + */ + list_del_init(&p->ctl_entry); +} + +static void sysctl_head_get(struct ctl_table_header *head) +{ + spin_lock(&sysctl_lock); + head->count++; + spin_unlock(&sysctl_lock); +} + +void sysctl_head_put(struct ctl_table_header *head) +{ + spin_lock(&sysctl_lock); + if (!--head->count) + kfree_rcu(head, rcu); + spin_unlock(&sysctl_lock); +} + +static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) +{ + if (!head) + BUG(); + spin_lock(&sysctl_lock); + if (!use_table(head)) + head = ERR_PTR(-ENOENT); + spin_unlock(&sysctl_lock); + return head; +} + +static void sysctl_head_finish(struct ctl_table_header *head) +{ + if (!head) + return; + spin_lock(&sysctl_lock); + unuse_table(head); + spin_unlock(&sysctl_lock); +} + +static struct ctl_table_set * +lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) +{ + struct ctl_table_set *set = &root->default_set; + if (root->lookup) + set = root->lookup(root, namespaces); + return set; +} + +static struct list_head * +lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) +{ + struct ctl_table_set *set = lookup_header_set(root, namespaces); + return &set->list; +} + +static struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, + struct ctl_table_header *prev) +{ + struct ctl_table_root *root; + struct list_head *header_list; + struct ctl_table_header *head; + struct list_head *tmp; + + spin_lock(&sysctl_lock); + if (prev) { + head = prev; + tmp = &prev->ctl_entry; + unuse_table(prev); + goto next; + } + tmp = &root_table_header.ctl_entry; + for (;;) { + head = list_entry(tmp, struct ctl_table_header, ctl_entry); + + if (!use_table(head)) + goto next; + spin_unlock(&sysctl_lock); + return head; + next: + root = head->root; + tmp = tmp->next; + header_list = lookup_header_list(root, namespaces); + if (tmp != header_list) + continue; + + do { + root = list_entry(root->root_list.next, + struct ctl_table_root, root_list); + if (root == &sysctl_table_root) + goto out; + header_list = lookup_header_list(root, namespaces); + } while (list_empty(header_list)); + tmp = header_list->next; + } +out: + spin_unlock(&sysctl_lock); + return NULL; +} + +static struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev) +{ + return __sysctl_head_next(current->nsproxy, prev); +} + +void register_sysctl_root(struct ctl_table_root *root) +{ + spin_lock(&sysctl_lock); + list_add_tail(&root->root_list, &sysctl_table_root.root_list); + spin_unlock(&sysctl_lock); +} + +/* + * sysctl_perm does NOT grant the superuser all rights automatically, because + * some sysctl variables are readonly even to root. + */ + +static int test_perm(int mode, int op) +{ + if (!current_euid()) + mode >>= 6; + else if (in_egroup_p(0)) + mode >>= 3; + if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) + return 0; + return -EACCES; +} + +static int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) +{ + int mode; + + if (root->permissions) + mode = root->permissions(root, current->nsproxy, table); + else + mode = table->mode; + + return test_perm(mode, op); +} + +static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) +{ + for (; table->procname; table++) { + table->parent = parent; + if (table->child) + sysctl_set_parent(table, table->child); + } +} + + static struct inode *proc_sys_make_inode(struct super_block *sb, struct ctl_table_header *head, struct ctl_table *table) { @@ -435,6 +639,21 @@ static int proc_sys_delete(const struct dentry *dentry) return !!PROC_I(dentry->d_inode)->sysctl->unregistering; } +static int sysctl_is_seen(struct ctl_table_header *p) +{ + struct ctl_table_set *set = p->set; + int res; + spin_lock(&sysctl_lock); + if (p->unregistering) + res = 0; + else if (!set->is_seen) + res = 1; + else + res = set->is_seen(set); + spin_unlock(&sysctl_lock); + return res; +} + static int proc_sys_compare(const struct dentry *parent, const struct inode *pinode, const struct dentry *dentry, const struct inode *inode, @@ -460,6 +679,409 @@ static const struct dentry_operations proc_sys_dentry_operations = { .d_compare = proc_sys_compare, }; +static struct ctl_table *is_branch_in(struct ctl_table *branch, + struct ctl_table *table) +{ + struct ctl_table *p; + const char *s = branch->procname; + + /* branch should have named subdirectory as its first element */ + if (!s || !branch->child) + return NULL; + + /* ... and nothing else */ + if (branch[1].procname) + return NULL; + + /* table should contain subdirectory with the same name */ + for (p = table; p->procname; p++) { + if (!p->child) + continue; + if (p->procname && strcmp(p->procname, s) == 0) + return p; + } + return NULL; +} + +/* see if attaching q to p would be an improvement */ +static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) +{ + struct ctl_table *to = p->ctl_table, *by = q->ctl_table; + struct ctl_table *next; + int is_better = 0; + int not_in_parent = !p->attached_by; + + while ((next = is_branch_in(by, to)) != NULL) { + if (by == q->attached_by) + is_better = 1; + if (to == p->attached_by) + not_in_parent = 1; + by = by->child; + to = next->child; + } + + if (is_better && not_in_parent) { + q->attached_by = by; + q->attached_to = to; + q->parent = p; + } +} + +#ifdef CONFIG_SYSCTL_SYSCALL_CHECK +static int sysctl_depth(struct ctl_table *table) +{ + struct ctl_table *tmp; + int depth; + + depth = 0; + for (tmp = table; tmp->parent; tmp = tmp->parent) + depth++; + + return depth; +} + +static struct ctl_table *sysctl_parent(struct ctl_table *table, int n) +{ + int i; + + for (i = 0; table && i < n; i++) + table = table->parent; + + return table; +} + + +static void sysctl_print_path(struct ctl_table *table) +{ + struct ctl_table *tmp; + int depth, i; + depth = sysctl_depth(table); + if (table->procname) { + for (i = depth; i >= 0; i--) { + tmp = sysctl_parent(table, i); + printk("/%s", tmp->procname?tmp->procname:""); + } + } + printk(" "); +} + +static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces, + struct ctl_table *table) +{ + struct ctl_table_header *head; + struct ctl_table *ref, *test; + int depth, cur_depth; + + depth = sysctl_depth(table); + + for (head = __sysctl_head_next(namespaces, NULL); head; + head = __sysctl_head_next(namespaces, head)) { + cur_depth = depth; + ref = head->ctl_table; +repeat: + test = sysctl_parent(table, cur_depth); + for (; ref->procname; ref++) { + int match = 0; + if (cur_depth && !ref->child) + continue; + + if (test->procname && ref->procname && + (strcmp(test->procname, ref->procname) == 0)) + match++; + + if (match) { + if (cur_depth != 0) { + cur_depth--; + ref = ref->child; + goto repeat; + } + goto out; + } + } + } + ref = NULL; +out: + sysctl_head_finish(head); + return ref; +} + +static void set_fail(const char **fail, struct ctl_table *table, const char *str) +{ + if (*fail) { + printk(KERN_ERR "sysctl table check failed: "); + sysctl_print_path(table); + printk(" %s\n", *fail); + dump_stack(); + } + *fail = str; +} + +static void sysctl_check_leaf(struct nsproxy *namespaces, + struct ctl_table *table, const char **fail) +{ + struct ctl_table *ref; + + ref = sysctl_check_lookup(namespaces, table); + if (ref && (ref != table)) + set_fail(fail, table, "Sysctl already exists"); +} + +static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table) +{ + int error = 0; + for (; table->procname; table++) { + const char *fail = NULL; + + if (table->parent) { + if (!table->parent->procname) + set_fail(&fail, table, "Parent without procname"); + } + if (table->child) { + if (table->data) + set_fail(&fail, table, "Directory with data?"); + if (table->maxlen) + set_fail(&fail, table, "Directory with maxlen?"); + if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode) + set_fail(&fail, table, "Writable sysctl directory"); + if (table->proc_handler) + set_fail(&fail, table, "Directory with proc_handler"); + if (table->extra1) + set_fail(&fail, table, "Directory with extra1"); + if (table->extra2) + set_fail(&fail, table, "Directory with extra2"); + } else { + if ((table->proc_handler == proc_dostring) || + (table->proc_handler == proc_dointvec) || + (table->proc_handler == proc_dointvec_minmax) || + (table->proc_handler == proc_dointvec_jiffies) || + (table->proc_handler == proc_dointvec_userhz_jiffies) || + (table->proc_handler == proc_dointvec_ms_jiffies) || + (table->proc_handler == proc_doulongvec_minmax) || + (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { + if (!table->data) + set_fail(&fail, table, "No data"); + if (!table->maxlen) + set_fail(&fail, table, "No maxlen"); + } +#ifdef CONFIG_PROC_SYSCTL + if (!table->proc_handler) + set_fail(&fail, table, "No proc_handler"); +#endif + sysctl_check_leaf(namespaces, table, &fail); + } + if (table->mode > 0777) + set_fail(&fail, table, "bogus .mode"); + if (fail) { + set_fail(&fail, table, NULL); + error = -EINVAL; + } + if (table->child) + error |= sysctl_check_table(namespaces, table->child); + } + return error; +} +#endif /* CONFIG_SYSCTL_SYSCALL_CHECK */ + +/** + * __register_sysctl_paths - register a sysctl hierarchy + * @root: List of sysctl headers to register on + * @namespaces: Data to compute which lists of sysctl entries are visible + * @path: The path to the directory the sysctl table is in. + * @table: the top-level table structure + * + * Register a sysctl table hierarchy. @table should be a filled in ctl_table + * array. A completely 0 filled entry terminates the table. + * + * The members of the &struct ctl_table structure are used as follows: + * + * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not + * enter a sysctl file + * + * data - a pointer to data for use by proc_handler + * + * maxlen - the maximum size in bytes of the data + * + * mode - the file permissions for the /proc/sys file, and for sysctl(2) + * + * child - a pointer to the child sysctl table if this entry is a directory, or + * %NULL. + * + * proc_handler - the text handler routine (described below) + * + * de - for internal use by the sysctl routines + * + * extra1, extra2 - extra pointers usable by the proc handler routines + * + * Leaf nodes in the sysctl tree will be represented by a single file + * under /proc; non-leaf nodes will be represented by directories. + * + * sysctl(2) can automatically manage read and write requests through + * the sysctl table. The data and maxlen fields of the ctl_table + * struct enable minimal validation of the values being written to be + * performed, and the mode field allows minimal authentication. + * + * There must be a proc_handler routine for any terminal nodes + * mirrored under /proc/sys (non-terminals are handled by a built-in + * directory handler). Several default handlers are available to + * cover common cases - + * + * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), + * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), + * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax() + * + * It is the handler's job to read the input buffer from user memory + * and process it. The handler should return 0 on success. + * + * This routine returns %NULL on a failure to register, and a pointer + * to the table header on success. + */ +struct ctl_table_header *__register_sysctl_paths( + struct ctl_table_root *root, + struct nsproxy *namespaces, + const struct ctl_path *path, struct ctl_table *table) +{ + struct ctl_table_header *header; + struct ctl_table *new, **prevp; + unsigned int n, npath; + struct ctl_table_set *set; + + /* Count the path components */ + for (npath = 0; path[npath].procname; ++npath) + ; + + /* + * For each path component, allocate a 2-element ctl_table array. + * The first array element will be filled with the sysctl entry + * for this, the second will be the sentinel (procname == 0). + * + * We allocate everything in one go so that we don't have to + * worry about freeing additional memory in unregister_sysctl_table. + */ + header = kzalloc(sizeof(struct ctl_table_header) + + (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL); + if (!header) + return NULL; + + new = (struct ctl_table *) (header + 1); + + /* Now connect the dots */ + prevp = &header->ctl_table; + for (n = 0; n < npath; ++n, ++path) { + /* Copy the procname */ + new->procname = path->procname; + new->mode = 0555; + + *prevp = new; + prevp = &new->child; + + new += 2; + } + *prevp = table; + header->ctl_table_arg = table; + + INIT_LIST_HEAD(&header->ctl_entry); + header->used = 0; + header->unregistering = NULL; + header->root = root; + sysctl_set_parent(NULL, header->ctl_table); + header->count = 1; +#ifdef CONFIG_SYSCTL_SYSCALL_CHECK + if (sysctl_check_table(namespaces, header->ctl_table)) { + kfree(header); + return NULL; + } +#endif + spin_lock(&sysctl_lock); + header->set = lookup_header_set(root, namespaces); + header->attached_by = header->ctl_table; + header->attached_to = root_table; + header->parent = &root_table_header; + for (set = header->set; set; set = set->parent) { + struct ctl_table_header *p; + list_for_each_entry(p, &set->list, ctl_entry) { + if (p->unregistering) + continue; + try_attach(p, header); + } + } + header->parent->count++; + list_add_tail(&header->ctl_entry, &header->set->list); + spin_unlock(&sysctl_lock); + + return header; +} + +/** + * register_sysctl_table_path - register a sysctl table hierarchy + * @path: The path to the directory the sysctl table is in. + * @table: the top-level table structure + * + * Register a sysctl table hierarchy. @table should be a filled in ctl_table + * array. A completely 0 filled entry terminates the table. + * + * See __register_sysctl_paths for more details. + */ +struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, + struct ctl_table *table) +{ + return __register_sysctl_paths(&sysctl_table_root, current->nsproxy, + path, table); +} +EXPORT_SYMBOL(register_sysctl_paths); + +/** + * register_sysctl_table - register a sysctl table hierarchy + * @table: the top-level table structure + * + * Register a sysctl table hierarchy. @table should be a filled in ctl_table + * array. A completely 0 filled entry terminates the table. + * + * See register_sysctl_paths for more details. + */ +struct ctl_table_header *register_sysctl_table(struct ctl_table *table) +{ + static const struct ctl_path null_path[] = { {} }; + + return register_sysctl_paths(null_path, table); +} +EXPORT_SYMBOL(register_sysctl_table); + +/** + * unregister_sysctl_table - unregister a sysctl table hierarchy + * @header: the header returned from register_sysctl_table + * + * Unregisters the sysctl table and all children. proc entries may not + * actually be removed until they are no longer used by anyone. + */ +void unregister_sysctl_table(struct ctl_table_header * header) +{ + might_sleep(); + + if (header == NULL) + return; + + spin_lock(&sysctl_lock); + start_unregistering(header); + if (!--header->parent->count) { + WARN_ON(1); + kfree_rcu(header->parent, rcu); + } + if (!--header->count) + kfree_rcu(header, rcu); + spin_unlock(&sysctl_lock); +} +EXPORT_SYMBOL(unregister_sysctl_table); + +void setup_sysctl_set(struct ctl_table_set *p, + struct ctl_table_set *parent, + int (*is_seen)(struct ctl_table_set *)) +{ + INIT_LIST_HEAD(&p->list); + p->parent = parent ? parent : &sysctl_table_root.default_set; + p->is_seen = is_seen; +} + + int __init proc_sys_init(void) { struct proc_dir_entry *proc_sys_root; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 5e3532e..08cabbf 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1073,17 +1073,6 @@ extern void setup_sysctl_set(struct ctl_table_set *p, struct ctl_table_set *parent, int (*is_seen)(struct ctl_table_set *)); -extern void sysctl_head_get(struct ctl_table_header *); -extern void sysctl_head_put(struct ctl_table_header *); -extern int sysctl_is_seen(struct ctl_table_header *); -extern struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *); -extern struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev); -extern struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, - struct ctl_table_header *prev); -extern void sysctl_head_finish(struct ctl_table_header *prev); -extern int sysctl_perm(struct ctl_table_root *root, - struct ctl_table *table, int op); - void register_sysctl_root(struct ctl_table_root *root); struct ctl_table_header *__register_sysctl_paths( struct ctl_table_root *root, struct nsproxy *namespaces, @@ -1093,7 +1082,6 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, struct ctl_table *table); void unregister_sysctl_table(struct ctl_table_header * table); -int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table); extern int sysctl_init(void); #else /* CONFIG_SYSCTL */ @@ -1118,10 +1106,6 @@ static inline void setup_sysctl_set(struct ctl_table_set *p, { } -static inline void sysctl_head_put(struct ctl_table_header *head) -{ -} - #endif /* CONFIG_SYSCTL */ #endif /* __KERNEL__ */ diff --git a/kernel/Makefile b/kernel/Makefile index 2d9de86..cb41b95 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -27,7 +27,6 @@ obj-y += power/ obj-$(CONFIG_FREEZER) += freezer.o obj-$(CONFIG_PROFILING) += profile.o -obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ad46024..b774909 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -192,20 +192,6 @@ static int sysrq_sysctl_handler(ctl_table *table, int write, #endif -static struct ctl_table root_table[1]; -static struct ctl_table_root sysctl_table_root; -static struct ctl_table_header root_table_header = { - {{.count = 1, - .ctl_table = root_table, - .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, - .root = &sysctl_table_root, - .set = &sysctl_table_root.default_set, -}; -static struct ctl_table_root sysctl_table_root = { - .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), - .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry), -}; - static struct ctl_table kern_table[]; static struct ctl_table vm_table[]; static struct ctl_table fs_table[]; @@ -1559,459 +1545,12 @@ static struct ctl_table dev_table[] = { { } }; -static DEFINE_SPINLOCK(sysctl_lock); - -/* called under sysctl_lock */ -static int use_table(struct ctl_table_header *p) -{ - if (unlikely(p->unregistering)) - return 0; - p->used++; - return 1; -} - -/* called under sysctl_lock */ -static void unuse_table(struct ctl_table_header *p) -{ - if (!--p->used) - if (unlikely(p->unregistering)) - complete(p->unregistering); -} - -/* called under sysctl_lock, will reacquire if has to wait */ -static void start_unregistering(struct ctl_table_header *p) -{ - /* - * if p->used is 0, nobody will ever touch that entry again; - * we'll eliminate all paths to it before dropping sysctl_lock - */ - if (unlikely(p->used)) { - struct completion wait; - init_completion(&wait); - p->unregistering = &wait; - spin_unlock(&sysctl_lock); - wait_for_completion(&wait); - spin_lock(&sysctl_lock); - } else { - /* anything non-NULL; we'll never dereference it */ - p->unregistering = ERR_PTR(-EINVAL); - } - /* - * do not remove from the list until nobody holds it; walking the - * list in do_sysctl() relies on that. - */ - list_del_init(&p->ctl_entry); -} - -void sysctl_head_get(struct ctl_table_header *head) -{ - spin_lock(&sysctl_lock); - head->count++; - spin_unlock(&sysctl_lock); -} - -void sysctl_head_put(struct ctl_table_header *head) -{ - spin_lock(&sysctl_lock); - if (!--head->count) - kfree_rcu(head, rcu); - spin_unlock(&sysctl_lock); -} - -struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) -{ - if (!head) - BUG(); - spin_lock(&sysctl_lock); - if (!use_table(head)) - head = ERR_PTR(-ENOENT); - spin_unlock(&sysctl_lock); - return head; -} - -void sysctl_head_finish(struct ctl_table_header *head) -{ - if (!head) - return; - spin_lock(&sysctl_lock); - unuse_table(head); - spin_unlock(&sysctl_lock); -} - -static struct ctl_table_set * -lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) -{ - struct ctl_table_set *set = &root->default_set; - if (root->lookup) - set = root->lookup(root, namespaces); - return set; -} - -static struct list_head * -lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) -{ - struct ctl_table_set *set = lookup_header_set(root, namespaces); - return &set->list; -} - -struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, - struct ctl_table_header *prev) -{ - struct ctl_table_root *root; - struct list_head *header_list; - struct ctl_table_header *head; - struct list_head *tmp; - - spin_lock(&sysctl_lock); - if (prev) { - head = prev; - tmp = &prev->ctl_entry; - unuse_table(prev); - goto next; - } - tmp = &root_table_header.ctl_entry; - for (;;) { - head = list_entry(tmp, struct ctl_table_header, ctl_entry); - - if (!use_table(head)) - goto next; - spin_unlock(&sysctl_lock); - return head; - next: - root = head->root; - tmp = tmp->next; - header_list = lookup_header_list(root, namespaces); - if (tmp != header_list) - continue; - - do { - root = list_entry(root->root_list.next, - struct ctl_table_root, root_list); - if (root == &sysctl_table_root) - goto out; - header_list = lookup_header_list(root, namespaces); - } while (list_empty(header_list)); - tmp = header_list->next; - } -out: - spin_unlock(&sysctl_lock); - return NULL; -} - -struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev) -{ - return __sysctl_head_next(current->nsproxy, prev); -} - -void register_sysctl_root(struct ctl_table_root *root) -{ - spin_lock(&sysctl_lock); - list_add_tail(&root->root_list, &sysctl_table_root.root_list); - spin_unlock(&sysctl_lock); -} - -/* - * sysctl_perm does NOT grant the superuser all rights automatically, because - * some sysctl variables are readonly even to root. - */ - -static int test_perm(int mode, int op) -{ - if (!current_euid()) - mode >>= 6; - else if (in_egroup_p(0)) - mode >>= 3; - if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) - return 0; - return -EACCES; -} - -int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) -{ - int mode; - - if (root->permissions) - mode = root->permissions(root, current->nsproxy, table); - else - mode = table->mode; - - return test_perm(mode, op); -} - -static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) -{ - for (; table->procname; table++) { - table->parent = parent; - if (table->child) - sysctl_set_parent(table, table->child); - } -} - int __init sysctl_init(void) { register_sysctl_table(sysctl_base_table); return 0; } -static struct ctl_table *is_branch_in(struct ctl_table *branch, - struct ctl_table *table) -{ - struct ctl_table *p; - const char *s = branch->procname; - - /* branch should have named subdirectory as its first element */ - if (!s || !branch->child) - return NULL; - - /* ... and nothing else */ - if (branch[1].procname) - return NULL; - - /* table should contain subdirectory with the same name */ - for (p = table; p->procname; p++) { - if (!p->child) - continue; - if (p->procname && strcmp(p->procname, s) == 0) - return p; - } - return NULL; -} - -/* see if attaching q to p would be an improvement */ -static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) -{ - struct ctl_table *to = p->ctl_table, *by = q->ctl_table; - struct ctl_table *next; - int is_better = 0; - int not_in_parent = !p->attached_by; - - while ((next = is_branch_in(by, to)) != NULL) { - if (by == q->attached_by) - is_better = 1; - if (to == p->attached_by) - not_in_parent = 1; - by = by->child; - to = next->child; - } - - if (is_better && not_in_parent) { - q->attached_by = by; - q->attached_to = to; - q->parent = p; - } -} - -/** - * __register_sysctl_paths - register a sysctl hierarchy - * @root: List of sysctl headers to register on - * @namespaces: Data to compute which lists of sysctl entries are visible - * @path: The path to the directory the sysctl table is in. - * @table: the top-level table structure - * - * Register a sysctl table hierarchy. @table should be a filled in ctl_table - * array. A completely 0 filled entry terminates the table. - * - * The members of the &struct ctl_table structure are used as follows: - * - * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not - * enter a sysctl file - * - * data - a pointer to data for use by proc_handler - * - * maxlen - the maximum size in bytes of the data - * - * mode - the file permissions for the /proc/sys file, and for sysctl(2) - * - * child - a pointer to the child sysctl table if this entry is a directory, or - * %NULL. - * - * proc_handler - the text handler routine (described below) - * - * de - for internal use by the sysctl routines - * - * extra1, extra2 - extra pointers usable by the proc handler routines - * - * Leaf nodes in the sysctl tree will be represented by a single file - * under /proc; non-leaf nodes will be represented by directories. - * - * sysctl(2) can automatically manage read and write requests through - * the sysctl table. The data and maxlen fields of the ctl_table - * struct enable minimal validation of the values being written to be - * performed, and the mode field allows minimal authentication. - * - * There must be a proc_handler routine for any terminal nodes - * mirrored under /proc/sys (non-terminals are handled by a built-in - * directory handler). Several default handlers are available to - * cover common cases - - * - * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), - * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), - * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax() - * - * It is the handler's job to read the input buffer from user memory - * and process it. The handler should return 0 on success. - * - * This routine returns %NULL on a failure to register, and a pointer - * to the table header on success. - */ -struct ctl_table_header *__register_sysctl_paths( - struct ctl_table_root *root, - struct nsproxy *namespaces, - const struct ctl_path *path, struct ctl_table *table) -{ - struct ctl_table_header *header; - struct ctl_table *new, **prevp; - unsigned int n, npath; - struct ctl_table_set *set; - - /* Count the path components */ - for (npath = 0; path[npath].procname; ++npath) - ; - - /* - * For each path component, allocate a 2-element ctl_table array. - * The first array element will be filled with the sysctl entry - * for this, the second will be the sentinel (procname == 0). - * - * We allocate everything in one go so that we don't have to - * worry about freeing additional memory in unregister_sysctl_table. - */ - header = kzalloc(sizeof(struct ctl_table_header) + - (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL); - if (!header) - return NULL; - - new = (struct ctl_table *) (header + 1); - - /* Now connect the dots */ - prevp = &header->ctl_table; - for (n = 0; n < npath; ++n, ++path) { - /* Copy the procname */ - new->procname = path->procname; - new->mode = 0555; - - *prevp = new; - prevp = &new->child; - - new += 2; - } - *prevp = table; - header->ctl_table_arg = table; - - INIT_LIST_HEAD(&header->ctl_entry); - header->used = 0; - header->unregistering = NULL; - header->root = root; - sysctl_set_parent(NULL, header->ctl_table); - header->count = 1; -#ifdef CONFIG_SYSCTL_SYSCALL_CHECK - if (sysctl_check_table(namespaces, header->ctl_table)) { - kfree(header); - return NULL; - } -#endif - spin_lock(&sysctl_lock); - header->set = lookup_header_set(root, namespaces); - header->attached_by = header->ctl_table; - header->attached_to = root_table; - header->parent = &root_table_header; - for (set = header->set; set; set = set->parent) { - struct ctl_table_header *p; - list_for_each_entry(p, &set->list, ctl_entry) { - if (p->unregistering) - continue; - try_attach(p, header); - } - } - header->parent->count++; - list_add_tail(&header->ctl_entry, &header->set->list); - spin_unlock(&sysctl_lock); - - return header; -} - -/** - * register_sysctl_table_path - register a sysctl table hierarchy - * @path: The path to the directory the sysctl table is in. - * @table: the top-level table structure - * - * Register a sysctl table hierarchy. @table should be a filled in ctl_table - * array. A completely 0 filled entry terminates the table. - * - * See __register_sysctl_paths for more details. - */ -struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, - struct ctl_table *table) -{ - return __register_sysctl_paths(&sysctl_table_root, current->nsproxy, - path, table); -} - -/** - * register_sysctl_table - register a sysctl table hierarchy - * @table: the top-level table structure - * - * Register a sysctl table hierarchy. @table should be a filled in ctl_table - * array. A completely 0 filled entry terminates the table. - * - * See register_sysctl_paths for more details. - */ -struct ctl_table_header *register_sysctl_table(struct ctl_table *table) -{ - static const struct ctl_path null_path[] = { {} }; - - return register_sysctl_paths(null_path, table); -} - -/** - * unregister_sysctl_table - unregister a sysctl table hierarchy - * @header: the header returned from register_sysctl_table - * - * Unregisters the sysctl table and all children. proc entries may not - * actually be removed until they are no longer used by anyone. - */ -void unregister_sysctl_table(struct ctl_table_header * header) -{ - might_sleep(); - - if (header == NULL) - return; - - spin_lock(&sysctl_lock); - start_unregistering(header); - if (!--header->parent->count) { - WARN_ON(1); - kfree_rcu(header->parent, rcu); - } - if (!--header->count) - kfree_rcu(header, rcu); - spin_unlock(&sysctl_lock); -} - -int sysctl_is_seen(struct ctl_table_header *p) -{ - struct ctl_table_set *set = p->set; - int res; - spin_lock(&sysctl_lock); - if (p->unregistering) - res = 0; - else if (!set->is_seen) - res = 1; - else - res = set->is_seen(set); - spin_unlock(&sysctl_lock); - return res; -} - -void setup_sysctl_set(struct ctl_table_set *p, - struct ctl_table_set *parent, - int (*is_seen)(struct ctl_table_set *)) -{ - INIT_LIST_HEAD(&p->list); - p->parent = parent ? parent : &sysctl_table_root.default_set; - p->is_seen = is_seen; -} - #endif /* CONFIG_SYSCTL */ /* @@ -2977,6 +2516,3 @@ EXPORT_SYMBOL(proc_dointvec_ms_jiffies); EXPORT_SYMBOL(proc_dostring); EXPORT_SYMBOL(proc_doulongvec_minmax); EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax); -EXPORT_SYMBOL(register_sysctl_table); -EXPORT_SYMBOL(register_sysctl_paths); -EXPORT_SYMBOL(unregister_sysctl_table); diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c deleted file mode 100644 index 362da65..0000000 --- a/kernel/sysctl_check.c +++ /dev/null @@ -1,160 +0,0 @@ -#include -#include -#include "../fs/xfs/xfs_sysctl.h" -#include -#include -#include - - -static int sysctl_depth(struct ctl_table *table) -{ - struct ctl_table *tmp; - int depth; - - depth = 0; - for (tmp = table; tmp->parent; tmp = tmp->parent) - depth++; - - return depth; -} - -static struct ctl_table *sysctl_parent(struct ctl_table *table, int n) -{ - int i; - - for (i = 0; table && i < n; i++) - table = table->parent; - - return table; -} - - -static void sysctl_print_path(struct ctl_table *table) -{ - struct ctl_table *tmp; - int depth, i; - depth = sysctl_depth(table); - if (table->procname) { - for (i = depth; i >= 0; i--) { - tmp = sysctl_parent(table, i); - printk("/%s", tmp->procname?tmp->procname:""); - } - } - printk(" "); -} - -static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces, - struct ctl_table *table) -{ - struct ctl_table_header *head; - struct ctl_table *ref, *test; - int depth, cur_depth; - - depth = sysctl_depth(table); - - for (head = __sysctl_head_next(namespaces, NULL); head; - head = __sysctl_head_next(namespaces, head)) { - cur_depth = depth; - ref = head->ctl_table; -repeat: - test = sysctl_parent(table, cur_depth); - for (; ref->procname; ref++) { - int match = 0; - if (cur_depth && !ref->child) - continue; - - if (test->procname && ref->procname && - (strcmp(test->procname, ref->procname) == 0)) - match++; - - if (match) { - if (cur_depth != 0) { - cur_depth--; - ref = ref->child; - goto repeat; - } - goto out; - } - } - } - ref = NULL; -out: - sysctl_head_finish(head); - return ref; -} - -static void set_fail(const char **fail, struct ctl_table *table, const char *str) -{ - if (*fail) { - printk(KERN_ERR "sysctl table check failed: "); - sysctl_print_path(table); - printk(" %s\n", *fail); - dump_stack(); - } - *fail = str; -} - -static void sysctl_check_leaf(struct nsproxy *namespaces, - struct ctl_table *table, const char **fail) -{ - struct ctl_table *ref; - - ref = sysctl_check_lookup(namespaces, table); - if (ref && (ref != table)) - set_fail(fail, table, "Sysctl already exists"); -} - -int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table) -{ - int error = 0; - for (; table->procname; table++) { - const char *fail = NULL; - - if (table->parent) { - if (!table->parent->procname) - set_fail(&fail, table, "Parent without procname"); - } - if (table->child) { - if (table->data) - set_fail(&fail, table, "Directory with data?"); - if (table->maxlen) - set_fail(&fail, table, "Directory with maxlen?"); - if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode) - set_fail(&fail, table, "Writable sysctl directory"); - if (table->proc_handler) - set_fail(&fail, table, "Directory with proc_handler"); - if (table->extra1) - set_fail(&fail, table, "Directory with extra1"); - if (table->extra2) - set_fail(&fail, table, "Directory with extra2"); - } else { - if ((table->proc_handler == proc_dostring) || - (table->proc_handler == proc_dointvec) || - (table->proc_handler == proc_dointvec_minmax) || - (table->proc_handler == proc_dointvec_jiffies) || - (table->proc_handler == proc_dointvec_userhz_jiffies) || - (table->proc_handler == proc_dointvec_ms_jiffies) || - (table->proc_handler == proc_doulongvec_minmax) || - (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { - if (!table->data) - set_fail(&fail, table, "No data"); - if (!table->maxlen) - set_fail(&fail, table, "No maxlen"); - } -#ifdef CONFIG_PROC_SYSCTL - if (!table->proc_handler) - set_fail(&fail, table, "No proc_handler"); -#endif - sysctl_check_leaf(namespaces, table, &fail); - } - if (table->mode > 0777) - set_fail(&fail, table, "bogus .mode"); - if (fail) { - set_fail(&fail, table, NULL); - error = -EINVAL; - } - if (table->child) - error |= sysctl_check_table(namespaces, table->child); - } - return error; -} -- cgit v0.10.2 From a15e20982e2fbb06e85da584a0f150784042c17d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 8 Jan 2012 00:16:29 -0800 Subject: sysctl: Make the directories have nlink == 1 I goofed when I made sysctl directories have nlink == 0. nlink == 0 means the directory has been deleted. nlink == 1 meands a directory does not count subdirectories. Use the default nlink == 1 for sysctl directories. Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 06e6f10..f6aa751 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -253,7 +253,6 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, inode->i_fop = &proc_sys_file_operations; } else { inode->i_mode |= S_IFDIR; - clear_nlink(inode); inode->i_op = &proc_sys_dir_operations; inode->i_fop = &proc_sys_dir_file_operations; } -- cgit v0.10.2 From 97324cd804b7b9fb6044e114329335db79810425 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 9 Jan 2012 22:19:13 -0800 Subject: sysctl: Implement retire_sysctl_set This adds a small helper retire_sysctl_set to remove the intimate knowledge about the how a sysctl_set is implemented from net/sysct_net.c Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index f6aa751..9d8223c 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1080,6 +1080,10 @@ void setup_sysctl_set(struct ctl_table_set *p, p->is_seen = is_seen; } +void retire_sysctl_set(struct ctl_table_set *set) +{ + WARN_ON(!list_empty(&set->list)); +} int __init proc_sys_init(void) { diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 08cabbf..475ff0e 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1072,6 +1072,7 @@ void proc_sys_poll_notify(struct ctl_table_poll *poll); extern void setup_sysctl_set(struct ctl_table_set *p, struct ctl_table_set *parent, int (*is_seen)(struct ctl_table_set *)); +extern void retire_sysctl_set(struct ctl_table_set *set); void register_sysctl_root(struct ctl_table_root *root); struct ctl_table_header *__register_sysctl_paths( diff --git a/net/sysctl_net.c b/net/sysctl_net.c index a6bbee2..ffd67a6 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -82,7 +82,7 @@ static int __net_init sysctl_net_init(struct net *net) static void __net_exit sysctl_net_exit(struct net *net) { - WARN_ON(!list_empty(&net->sysctls.list)); + retire_sysctl_set(&net->sysctls); } static struct pernet_operations sysctl_pernet_ops = { -- cgit v0.10.2 From bd295b56cfae85f2dd6c2b03951480c91e6d08f3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 22 Jan 2012 21:10:21 -0800 Subject: sysctl: Remove the unnecessary sysctl_set parent concept. In sysctl_net register the two networking roots in the proper order. In register_sysctl walk the sysctl sets in the reverse order of the sysctl roots. Remove parent from ctl_table_set and setup_sysctl_set as it is no longer needed. Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 9d8223c..86d32a3 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -995,13 +995,20 @@ struct ctl_table_header *__register_sysctl_paths( header->attached_by = header->ctl_table; header->attached_to = root_table; header->parent = &root_table_header; - for (set = header->set; set; set = set->parent) { + set = header->set; + root = header->root; + for (;;) { struct ctl_table_header *p; list_for_each_entry(p, &set->list, ctl_entry) { if (p->unregistering) continue; try_attach(p, header); } + if (root == &sysctl_table_root) + break; + root = list_entry(root->root_list.prev, + struct ctl_table_root, root_list); + set = lookup_header_set(root, namespaces); } header->parent->count++; list_add_tail(&header->ctl_entry, &header->set->list); @@ -1072,11 +1079,9 @@ void unregister_sysctl_table(struct ctl_table_header * header) EXPORT_SYMBOL(unregister_sysctl_table); void setup_sysctl_set(struct ctl_table_set *p, - struct ctl_table_set *parent, int (*is_seen)(struct ctl_table_set *)) { INIT_LIST_HEAD(&p->list); - p->parent = parent ? parent : &sysctl_table_root.default_set; p->is_seen = is_seen; } diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 475ff0e..43c36ac 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1047,7 +1047,6 @@ struct ctl_table_header struct ctl_table_set { struct list_head list; - struct ctl_table_set *parent; int (*is_seen)(struct ctl_table_set *); }; @@ -1070,7 +1069,6 @@ struct ctl_path { void proc_sys_poll_notify(struct ctl_table_poll *poll); extern void setup_sysctl_set(struct ctl_table_set *p, - struct ctl_table_set *parent, int (*is_seen)(struct ctl_table_set *)); extern void retire_sysctl_set(struct ctl_table_set *set); @@ -1102,7 +1100,6 @@ static inline void unregister_sysctl_table(struct ctl_table_header * table) } static inline void setup_sysctl_set(struct ctl_table_set *p, - struct ctl_table_set *parent, int (*is_seen)(struct ctl_table_set *)) { } diff --git a/net/sysctl_net.c b/net/sysctl_net.c index ffd67a6..07c6b87 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -75,7 +75,6 @@ static struct ctl_table_root net_sysctl_ro_root = { static int __net_init sysctl_net_init(struct net *net) { setup_sysctl_set(&net->sysctls, - &net_sysctl_ro_root.default_set, is_seen); return 0; } @@ -96,9 +95,9 @@ static __init int net_sysctl_init(void) ret = register_pernet_subsys(&sysctl_pernet_ops); if (ret) goto out; - register_sysctl_root(&net_sysctl_root); - setup_sysctl_set(&net_sysctl_ro_root.default_set, NULL, NULL); + setup_sysctl_set(&net_sysctl_ro_root.default_set, NULL); register_sysctl_root(&net_sysctl_ro_root); + register_sysctl_root(&net_sysctl_root); out: return ret; } -- cgit v0.10.2 From f05e53a7fbb28c951c0c8cf3963fa8019ae1d4d3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 21 Jan 2012 10:03:13 -0800 Subject: sysctl: Create local copies of directory names used in paths Creating local copies of directory names is a good idea for two reasons. - The dynamic names used by callers must be copied into new strings by the callers today to ensure the strings do not change between register and unregister of the sysctl table. - Sysctl directories have a potentially different lifetime than the time between register and unregister of any particular sysctl table. Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 86d32a3..bcf60fb 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -943,10 +943,12 @@ struct ctl_table_header *__register_sysctl_paths( struct ctl_table *new, **prevp; unsigned int n, npath; struct ctl_table_set *set; + size_t path_bytes = 0; + char *new_name; /* Count the path components */ for (npath = 0; path[npath].procname; ++npath) - ; + path_bytes += strlen(path[npath].procname) + 1; /* * For each path component, allocate a 2-element ctl_table array. @@ -956,24 +958,27 @@ struct ctl_table_header *__register_sysctl_paths( * We allocate everything in one go so that we don't have to * worry about freeing additional memory in unregister_sysctl_table. */ - header = kzalloc(sizeof(struct ctl_table_header) + + header = kzalloc(sizeof(struct ctl_table_header) + path_bytes + (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL); if (!header) return NULL; new = (struct ctl_table *) (header + 1); + new_name = (char *)(new + (2 * npath)); /* Now connect the dots */ prevp = &header->ctl_table; for (n = 0; n < npath; ++n, ++path) { /* Copy the procname */ - new->procname = path->procname; + strcpy(new_name, path->procname); + new->procname = new_name; new->mode = 0555; *prevp = new; prevp = &new->child; new += 2; + new_name += strlen(new_name) + 1; } *prevp = table; header->ctl_table_arg = table; -- cgit v0.10.2 From 6e9d5164153ad6539edd31e7afb02a3e79124cad Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 21 Jan 2012 10:26:26 -0800 Subject: sysctl: Add support for register sysctl tables with a normal cstring path. Make __register_sysctl_table the core sysctl registration operation and make it take a char * string as path. Now that binary paths have been banished into the real of backwards compatibility in kernel/binary_sysctl.c where they can be safely ignored there is no longer a need to use struct ctl_path to represent path names when registering ctl_tables. Start the transition to using normal char * strings to represent pathnames when registering sysctl tables. Normal strings are easier to deal with both in the internal sysctl implementation and for programmers registering sysctl tables. __register_sysctl_paths is turned into a backwards compatibility wrapper that converts a ctl_path array into a normal char * string. Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index bcf60fb..5704ff0 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -882,7 +882,7 @@ static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *tabl #endif /* CONFIG_SYSCTL_SYSCALL_CHECK */ /** - * __register_sysctl_paths - register a sysctl hierarchy + * __register_sysctl_table - register a sysctl table * @root: List of sysctl headers to register on * @namespaces: Data to compute which lists of sysctl entries are visible * @path: The path to the directory the sysctl table is in. @@ -934,21 +934,34 @@ static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *tabl * This routine returns %NULL on a failure to register, and a pointer * to the table header on success. */ -struct ctl_table_header *__register_sysctl_paths( +struct ctl_table_header *__register_sysctl_table( struct ctl_table_root *root, struct nsproxy *namespaces, - const struct ctl_path *path, struct ctl_table *table) + const char *path, struct ctl_table *table) { struct ctl_table_header *header; struct ctl_table *new, **prevp; - unsigned int n, npath; + const char *name, *nextname; + unsigned int npath = 0; struct ctl_table_set *set; size_t path_bytes = 0; char *new_name; /* Count the path components */ - for (npath = 0; path[npath].procname; ++npath) - path_bytes += strlen(path[npath].procname) + 1; + for (name = path; name; name = nextname) { + int namelen; + nextname = strchr(name, '/'); + if (nextname) { + namelen = nextname - name; + nextname++; + } else { + namelen = strlen(name); + } + if (namelen == 0) + continue; + path_bytes += namelen + 1; + npath++; + } /* * For each path component, allocate a 2-element ctl_table array. @@ -968,9 +981,20 @@ struct ctl_table_header *__register_sysctl_paths( /* Now connect the dots */ prevp = &header->ctl_table; - for (n = 0; n < npath; ++n, ++path) { - /* Copy the procname */ - strcpy(new_name, path->procname); + for (name = path; name; name = nextname) { + int namelen; + nextname = strchr(name, '/'); + if (nextname) { + namelen = nextname - name; + nextname++; + } else { + namelen = strlen(name); + } + if (namelen == 0) + continue; + memcpy(new_name, name, namelen); + new_name[namelen] = '\0'; + new->procname = new_name; new->mode = 0555; @@ -978,7 +1002,7 @@ struct ctl_table_header *__register_sysctl_paths( prevp = &new->child; new += 2; - new_name += strlen(new_name) + 1; + new_name += namelen + 1; } *prevp = table; header->ctl_table_arg = table; @@ -1022,6 +1046,56 @@ struct ctl_table_header *__register_sysctl_paths( return header; } +static char *append_path(const char *path, char *pos, const char *name) +{ + int namelen; + namelen = strlen(name); + if (((pos - path) + namelen + 2) >= PATH_MAX) + return NULL; + memcpy(pos, name, namelen); + pos[namelen] = '/'; + pos[namelen + 1] = '\0'; + pos += namelen + 1; + return pos; +} + +/** + * __register_sysctl_paths - register a sysctl table hierarchy + * @root: List of sysctl headers to register on + * @namespaces: Data to compute which lists of sysctl entries are visible + * @path: The path to the directory the sysctl table is in. + * @table: the top-level table structure + * + * Register a sysctl table hierarchy. @table should be a filled in ctl_table + * array. A completely 0 filled entry terminates the table. + * + * See __register_sysctl_table for more details. + */ +struct ctl_table_header *__register_sysctl_paths( + struct ctl_table_root *root, + struct nsproxy *namespaces, + const struct ctl_path *path, struct ctl_table *table) +{ + struct ctl_table_header *header = NULL; + const struct ctl_path *component; + char *new_path, *pos; + + pos = new_path = kmalloc(PATH_MAX, GFP_KERNEL); + if (!new_path) + return NULL; + + pos[0] = '\0'; + for (component = path; component->procname; component++) { + pos = append_path(new_path, pos, component->procname); + if (!pos) + goto out; + } + header = __register_sysctl_table(root, namespaces, new_path, table); +out: + kfree(new_path); + return header; +} + /** * register_sysctl_table_path - register a sysctl table hierarchy * @path: The path to the directory the sysctl table is in. diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 43c36ac..a514e0f 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1073,6 +1073,9 @@ extern void setup_sysctl_set(struct ctl_table_set *p, extern void retire_sysctl_set(struct ctl_table_set *set); void register_sysctl_root(struct ctl_table_root *root); +struct ctl_table_header *__register_sysctl_table( + struct ctl_table_root *root, struct nsproxy *namespaces, + const char *path, struct ctl_table *table); struct ctl_table_header *__register_sysctl_paths( struct ctl_table_root *root, struct nsproxy *namespaces, const struct ctl_path *path, struct ctl_table *table); -- cgit v0.10.2 From ec6a52668d0bbc6d648e978c327150254bf1ce7f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 21 Jan 2012 12:35:23 -0800 Subject: sysctl: Add ctl_table chains into cstring paths For any component of table passed to __register_sysctl_paths that actually serves as a path, add that to the cstring path that is passed to __register_sysctl_table. The result is that for most calls to __register_sysctl_paths we only pass a table to __register_sysctl_table that contains no child directories. Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 5704ff0..9b91dee 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1076,6 +1076,7 @@ struct ctl_table_header *__register_sysctl_paths( struct nsproxy *namespaces, const struct ctl_path *path, struct ctl_table *table) { + struct ctl_table *ctl_table_arg = table; struct ctl_table_header *header = NULL; const struct ctl_path *component; char *new_path, *pos; @@ -1090,7 +1091,15 @@ struct ctl_table_header *__register_sysctl_paths( if (!pos) goto out; } + while (table->procname && table->child && !table[1].procname) { + pos = append_path(new_path, pos, table->procname); + if (!pos) + goto out; + table = table->child; + } header = __register_sysctl_table(root, namespaces, new_path, table); + if (header) + header->ctl_table_arg = ctl_table_arg; out: kfree(new_path); return header; -- cgit v0.10.2 From f728019bb72e655680c02ad1829323054a8e875f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 22 Jan 2012 18:22:05 -0800 Subject: sysctl: register only tables of sysctl files Split the registration of a complex ctl_table array which may have arbitrary numbers of directories (->child != NULL) and tables of files into a series of simpler registrations that only register tables of files. Graphically: register('dir', { + file-a + file-b + subdir1 + file-c + subdir2 + file-d + file-e }) is transformed into: wrapper->subheaders[0] = register('dir', {file1-a, file1-b}) wrapper->subheaders[1] = register('dir/subdir1', {file-c}) wrapper->subheaders[2] = register('dir/subdir2', {file-d, file-e}) return wrapper This guarantees that __register_sysctl_table will only see a simple ctl_table array with all entries having (->child == NULL). Care was taken to pass the original simple ctl_table arrays to __register_sysctl_table whenever possible. This change is derived from a similar patch written by Lucrian Grijincu. Inspired-by: Lucian Adrian Grijincu Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 9b91dee..6bab2ae 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -882,7 +882,7 @@ static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *tabl #endif /* CONFIG_SYSCTL_SYSCALL_CHECK */ /** - * __register_sysctl_table - register a sysctl table + * __register_sysctl_table - register a leaf sysctl table * @root: List of sysctl headers to register on * @namespaces: Data to compute which lists of sysctl entries are visible * @path: The path to the directory the sysctl table is in. @@ -900,29 +900,19 @@ static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *tabl * * maxlen - the maximum size in bytes of the data * - * mode - the file permissions for the /proc/sys file, and for sysctl(2) + * mode - the file permissions for the /proc/sys file * - * child - a pointer to the child sysctl table if this entry is a directory, or - * %NULL. + * child - must be %NULL. * * proc_handler - the text handler routine (described below) * - * de - for internal use by the sysctl routines - * * extra1, extra2 - extra pointers usable by the proc handler routines * * Leaf nodes in the sysctl tree will be represented by a single file * under /proc; non-leaf nodes will be represented by directories. * - * sysctl(2) can automatically manage read and write requests through - * the sysctl table. The data and maxlen fields of the ctl_table - * struct enable minimal validation of the values being written to be - * performed, and the mode field allows minimal authentication. - * - * There must be a proc_handler routine for any terminal nodes - * mirrored under /proc/sys (non-terminals are handled by a built-in - * directory handler). Several default handlers are available to - * cover common cases - + * There must be a proc_handler routine for any terminal nodes. + * Several default handlers are available to cover common cases - * * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), @@ -1059,6 +1049,100 @@ static char *append_path(const char *path, char *pos, const char *name) return pos; } +static int count_subheaders(struct ctl_table *table) +{ + int has_files = 0; + int nr_subheaders = 0; + struct ctl_table *entry; + + /* special case: no directory and empty directory */ + if (!table || !table->procname) + return 1; + + for (entry = table; entry->procname; entry++) { + if (entry->child) + nr_subheaders += count_subheaders(entry->child); + else + has_files = 1; + } + return nr_subheaders + has_files; +} + +static int register_leaf_sysctl_tables(const char *path, char *pos, + struct ctl_table_header ***subheader, + struct ctl_table_root *root, struct nsproxy *namespaces, + struct ctl_table *table) +{ + struct ctl_table *ctl_table_arg = NULL; + struct ctl_table *entry, *files; + int nr_files = 0; + int nr_dirs = 0; + int err = -ENOMEM; + + for (entry = table; entry->procname; entry++) { + if (entry->child) + nr_dirs++; + else + nr_files++; + } + + files = table; + /* If there are mixed files and directories we need a new table */ + if (nr_dirs && nr_files) { + struct ctl_table *new; + files = kzalloc(sizeof(struct ctl_table) * (nr_files + 1), + GFP_KERNEL); + if (!files) + goto out; + + ctl_table_arg = files; + for (new = files, entry = table; entry->procname; entry++) { + if (entry->child) + continue; + *new = *entry; + new++; + } + } + + /* Register everything except a directory full of subdirectories */ + if (nr_files || !nr_dirs) { + struct ctl_table_header *header; + header = __register_sysctl_table(root, namespaces, path, files); + if (!header) { + kfree(ctl_table_arg); + goto out; + } + + /* Remember if we need to free the file table */ + header->ctl_table_arg = ctl_table_arg; + **subheader = header; + (*subheader)++; + } + + /* Recurse into the subdirectories. */ + for (entry = table; entry->procname; entry++) { + char *child_pos; + + if (!entry->child) + continue; + + err = -ENAMETOOLONG; + child_pos = append_path(path, pos, entry->procname); + if (!child_pos) + goto out; + + err = register_leaf_sysctl_tables(path, child_pos, subheader, + root, namespaces, entry->child); + pos[0] = '\0'; + if (err) + goto out; + } + err = 0; +out: + /* On failure our caller will unregister all registered subheaders */ + return err; +} + /** * __register_sysctl_paths - register a sysctl table hierarchy * @root: List of sysctl headers to register on @@ -1077,7 +1161,8 @@ struct ctl_table_header *__register_sysctl_paths( const struct ctl_path *path, struct ctl_table *table) { struct ctl_table *ctl_table_arg = table; - struct ctl_table_header *header = NULL; + int nr_subheaders = count_subheaders(table); + struct ctl_table_header *header = NULL, **subheaders, **subheader; const struct ctl_path *component; char *new_path, *pos; @@ -1097,12 +1182,39 @@ struct ctl_table_header *__register_sysctl_paths( goto out; table = table->child; } - header = __register_sysctl_table(root, namespaces, new_path, table); - if (header) + if (nr_subheaders == 1) { + header = __register_sysctl_table(root, namespaces, new_path, table); + if (header) + header->ctl_table_arg = ctl_table_arg; + } else { + header = kzalloc(sizeof(*header) + + sizeof(*subheaders)*nr_subheaders, GFP_KERNEL); + if (!header) + goto out; + + subheaders = (struct ctl_table_header **) (header + 1); + subheader = subheaders; header->ctl_table_arg = ctl_table_arg; + + if (register_leaf_sysctl_tables(new_path, pos, &subheader, + root, namespaces, table)) + goto err_register_leaves; + } + out: kfree(new_path); return header; + +err_register_leaves: + while (subheader > subheaders) { + struct ctl_table_header *subh = *(--subheader); + struct ctl_table *table = subh->ctl_table_arg; + unregister_sysctl_table(subh); + kfree(table); + } + kfree(header); + header = NULL; + goto out; } /** @@ -1149,11 +1261,28 @@ EXPORT_SYMBOL(register_sysctl_table); */ void unregister_sysctl_table(struct ctl_table_header * header) { + int nr_subheaders; might_sleep(); if (header == NULL) return; + nr_subheaders = count_subheaders(header->ctl_table_arg); + if (unlikely(nr_subheaders > 1)) { + struct ctl_table_header **subheaders; + int i; + + subheaders = (struct ctl_table_header **)(header + 1); + for (i = nr_subheaders -1; i >= 0; i--) { + struct ctl_table_header *subh = subheaders[i]; + struct ctl_table *table = subh->ctl_table_arg; + unregister_sysctl_table(subh); + kfree(table); + } + kfree(header); + return; + } + spin_lock(&sysctl_lock); start_unregistering(header); if (!--header->parent->count) { diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index a514e0f..25c7dfe 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1015,7 +1015,7 @@ struct ctl_table void *data; int maxlen; umode_t mode; - struct ctl_table *child; + struct ctl_table *child; /* Deprecated */ struct ctl_table *parent; /* Automatically set */ proc_handler *proc_handler; /* Callback for text formatting */ struct ctl_table_poll *poll; -- cgit v0.10.2 From 7c60c48f58a78195acc1f71c9a9d01958c02ab89 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 21 Jan 2012 13:34:05 -0800 Subject: sysctl: Improve the sysctl sanity checks - Stop validating subdirectories now that we only register leaf tables - Cleanup and improve the duplicate filename check. * Run the duplicate filename check under the sysctl_lock to guarantee we never add duplicate names. * Reduce the duplicate filename check to nearly O(M*N) where M is the number of entries in tthe table we are registering and N is the number of entries in the directory before we got there. - Move the duplicate filename check into it's own function and call it directtly from __register_sysctl_table - Kill the config option as the sanity checks are now cheap enough the config option is unnecessary. The original reason for the config option was because we had a huge table used to verify the proc filename to binary sysctl mapping. That table has now evolved into the binary_sysctl translation layer and is no longer part of the sysctl_check code. - Tighten up the permission checks. Guarnateeing that files only have read or write permissions. - Removed redudant check for parents having a procname as now everything has a procname. - Generalize the backtrace logic so that we print a backtrace from any failure of __register_sysctl_table that was not caused by a memmory allocation failure. The backtrace allows us to track down who erroneously registered a sysctl table. Bechmark before (CONFIG_SYSCTL_CHECK=y): make-dummies 0 999 -> 12s rmmod dummy -> 0.08s Bechmark before (CONFIG_SYSCTL_CHECK=n): make-dummies 0 999 -> 0.7s rmmod dummy -> 0.06s make-dummies 0 99999 -> 1m13s rmmod dummy -> 0.38s Benchmark after: make-dummies 0 999 -> 0.65s rmmod dummy -> 0.055s make-dummies 0 9999 -> 1m10s rmmod dummy -> 0.39s The sysctl sanity checks now impose no measurable cost. Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 6bab2ae..a492ff6 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -726,160 +726,106 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) } } -#ifdef CONFIG_SYSCTL_SYSCALL_CHECK -static int sysctl_depth(struct ctl_table *table) +static int sysctl_check_table_dups(const char *path, struct ctl_table *old, + struct ctl_table *table) { - struct ctl_table *tmp; - int depth; - - depth = 0; - for (tmp = table; tmp->parent; tmp = tmp->parent) - depth++; + struct ctl_table *entry, *test; + int error = 0; - return depth; + for (entry = old; entry->procname; entry++) { + for (test = table; test->procname; test++) { + if (strcmp(entry->procname, test->procname) == 0) { + printk(KERN_ERR "sysctl duplicate entry: %s/%s\n", + path, test->procname); + error = -EEXIST; + } + } + } + return error; } -static struct ctl_table *sysctl_parent(struct ctl_table *table, int n) +static int sysctl_check_dups(struct nsproxy *namespaces, + struct ctl_table_header *header, + const char *path, struct ctl_table *table) { - int i; + struct ctl_table_root *root; + struct ctl_table_set *set; + struct ctl_table_header *dir_head, *head; + struct ctl_table *dir_table; + int error = 0; - for (i = 0; table && i < n; i++) - table = table->parent; + /* No dups if we are the only member of our directory */ + if (header->attached_by != table) + return 0; - return table; -} + dir_head = header->parent; + dir_table = header->attached_to; + error = sysctl_check_table_dups(path, dir_table, table); -static void sysctl_print_path(struct ctl_table *table) -{ - struct ctl_table *tmp; - int depth, i; - depth = sysctl_depth(table); - if (table->procname) { - for (i = depth; i >= 0; i--) { - tmp = sysctl_parent(table, i); - printk("/%s", tmp->procname?tmp->procname:""); - } - } - printk(" "); -} + root = &sysctl_table_root; + do { + set = lookup_header_set(root, namespaces); -static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces, - struct ctl_table *table) -{ - struct ctl_table_header *head; - struct ctl_table *ref, *test; - int depth, cur_depth; - - depth = sysctl_depth(table); - - for (head = __sysctl_head_next(namespaces, NULL); head; - head = __sysctl_head_next(namespaces, head)) { - cur_depth = depth; - ref = head->ctl_table; -repeat: - test = sysctl_parent(table, cur_depth); - for (; ref->procname; ref++) { - int match = 0; - if (cur_depth && !ref->child) + list_for_each_entry(head, &set->list, ctl_entry) { + if (head->unregistering) continue; - - if (test->procname && ref->procname && - (strcmp(test->procname, ref->procname) == 0)) - match++; - - if (match) { - if (cur_depth != 0) { - cur_depth--; - ref = ref->child; - goto repeat; - } - goto out; - } + if (head->attached_to != dir_table) + continue; + error = sysctl_check_table_dups(path, head->attached_by, + table); } - } - ref = NULL; -out: - sysctl_head_finish(head); - return ref; + root = list_entry(root->root_list.next, + struct ctl_table_root, root_list); + } while (root != &sysctl_table_root); + return error; } -static void set_fail(const char **fail, struct ctl_table *table, const char *str) +static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...) { - if (*fail) { - printk(KERN_ERR "sysctl table check failed: "); - sysctl_print_path(table); - printk(" %s\n", *fail); - dump_stack(); - } - *fail = str; -} + struct va_format vaf; + va_list args; -static void sysctl_check_leaf(struct nsproxy *namespaces, - struct ctl_table *table, const char **fail) -{ - struct ctl_table *ref; + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n", + path, table->procname, &vaf); - ref = sysctl_check_lookup(namespaces, table); - if (ref && (ref != table)) - set_fail(fail, table, "Sysctl already exists"); + va_end(args); + return -EINVAL; } -static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table) +static int sysctl_check_table(const char *path, struct ctl_table *table) { - int error = 0; + int err = 0; for (; table->procname; table++) { - const char *fail = NULL; - - if (table->parent) { - if (!table->parent->procname) - set_fail(&fail, table, "Parent without procname"); - } - if (table->child) { - if (table->data) - set_fail(&fail, table, "Directory with data?"); - if (table->maxlen) - set_fail(&fail, table, "Directory with maxlen?"); - if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode) - set_fail(&fail, table, "Writable sysctl directory"); - if (table->proc_handler) - set_fail(&fail, table, "Directory with proc_handler"); - if (table->extra1) - set_fail(&fail, table, "Directory with extra1"); - if (table->extra2) - set_fail(&fail, table, "Directory with extra2"); - } else { - if ((table->proc_handler == proc_dostring) || - (table->proc_handler == proc_dointvec) || - (table->proc_handler == proc_dointvec_minmax) || - (table->proc_handler == proc_dointvec_jiffies) || - (table->proc_handler == proc_dointvec_userhz_jiffies) || - (table->proc_handler == proc_dointvec_ms_jiffies) || - (table->proc_handler == proc_doulongvec_minmax) || - (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { - if (!table->data) - set_fail(&fail, table, "No data"); - if (!table->maxlen) - set_fail(&fail, table, "No maxlen"); - } -#ifdef CONFIG_PROC_SYSCTL - if (!table->proc_handler) - set_fail(&fail, table, "No proc_handler"); -#endif - sysctl_check_leaf(namespaces, table, &fail); - } - if (table->mode > 0777) - set_fail(&fail, table, "bogus .mode"); - if (fail) { - set_fail(&fail, table, NULL); - error = -EINVAL; - } if (table->child) - error |= sysctl_check_table(namespaces, table->child); + err = sysctl_err(path, table, "Not a file"); + + if ((table->proc_handler == proc_dostring) || + (table->proc_handler == proc_dointvec) || + (table->proc_handler == proc_dointvec_minmax) || + (table->proc_handler == proc_dointvec_jiffies) || + (table->proc_handler == proc_dointvec_userhz_jiffies) || + (table->proc_handler == proc_dointvec_ms_jiffies) || + (table->proc_handler == proc_doulongvec_minmax) || + (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { + if (!table->data) + err = sysctl_err(path, table, "No data"); + if (!table->maxlen) + err = sysctl_err(path, table, "No maxlen"); + } + if (!table->proc_handler) + err = sysctl_err(path, table, "No proc_handler"); + + if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode) + err = sysctl_err(path, table, "bogus .mode 0%o", + table->mode); } - return error; + return err; } -#endif /* CONFIG_SYSCTL_SYSCALL_CHECK */ /** * __register_sysctl_table - register a leaf sysctl table @@ -1003,12 +949,8 @@ struct ctl_table_header *__register_sysctl_table( header->root = root; sysctl_set_parent(NULL, header->ctl_table); header->count = 1; -#ifdef CONFIG_SYSCTL_SYSCALL_CHECK - if (sysctl_check_table(namespaces, header->ctl_table)) { - kfree(header); - return NULL; - } -#endif + if (sysctl_check_table(path, table)) + goto fail; spin_lock(&sysctl_lock); header->set = lookup_header_set(root, namespaces); header->attached_by = header->ctl_table; @@ -1029,11 +971,19 @@ struct ctl_table_header *__register_sysctl_table( struct ctl_table_root, root_list); set = lookup_header_set(root, namespaces); } + if (sysctl_check_dups(namespaces, header, path, table)) + goto fail_locked; header->parent->count++; list_add_tail(&header->ctl_entry, &header->set->list); spin_unlock(&sysctl_lock); return header; +fail_locked: + spin_unlock(&sysctl_lock); +fail: + kfree(header); + dump_stack(); + return NULL; } static char *append_path(const char *path, char *pos, const char *name) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8745ac7..943a618 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1113,14 +1113,6 @@ config LATENCYTOP Enable this option if you want to use the LatencyTOP tool to find out which userspace is blocking on what kernel operations. -config SYSCTL_SYSCALL_CHECK - bool "Sysctl checks" - depends on SYSCTL - ---help--- - sys_sysctl uses binary paths that have been found challenging - to properly maintain and use. This enables checks that help - you to keep things correct. - source mm/Kconfig.debug source kernel/trace/Kconfig -- cgit v0.10.2 From 8d6ecfcc014332fd2fe933f64194160f0e3a6696 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 6 Jan 2012 11:55:30 -0800 Subject: sysctl: Remove the now unused ctl_table parent field. While useful at one time for selinux and the sysctl sanity checks those users no longer use the parent field and we can safely remove it. Inspired-by: Lucian Adrian Grijincu Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index a492ff6..e573f9b 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -218,16 +218,6 @@ static int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int return test_perm(mode, op); } -static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) -{ - for (; table->procname; table++) { - table->parent = parent; - if (table->child) - sysctl_set_parent(table, table->child); - } -} - - static struct inode *proc_sys_make_inode(struct super_block *sb, struct ctl_table_header *head, struct ctl_table *table) { @@ -947,10 +937,10 @@ struct ctl_table_header *__register_sysctl_table( header->used = 0; header->unregistering = NULL; header->root = root; - sysctl_set_parent(NULL, header->ctl_table); header->count = 1; if (sysctl_check_table(path, table)) goto fail; + spin_lock(&sysctl_lock); header->set = lookup_header_set(root, namespaces); header->attached_by = header->ctl_table; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 25c7dfe..094bc5c 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1016,7 +1016,6 @@ struct ctl_table int maxlen; umode_t mode; struct ctl_table *child; /* Deprecated */ - struct ctl_table *parent; /* Automatically set */ proc_handler *proc_handler; /* Callback for text formatting */ struct ctl_table_poll *poll; void *extra1; -- cgit v0.10.2 From 3cc3e04636d603778d921854b84ae7bd34a349a2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 7 Jan 2012 06:57:47 -0800 Subject: sysctl: A more obvious version of grab_header. Instead of relying on sysct_head_next(NULL) to magically return the right header for the root directory instead explicitly transform NULL into the root directories header. Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index e573f9b..1544485 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -267,10 +267,10 @@ static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name) static struct ctl_table_header *grab_header(struct inode *inode) { - if (PROC_I(inode)->sysctl) - return sysctl_head_grab(PROC_I(inode)->sysctl); - else - return sysctl_head_next(NULL); + struct ctl_table_header *head = PROC_I(inode)->sysctl; + if (!head) + head = &root_table_header; + return sysctl_head_grab(head); } static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, -- cgit v0.10.2 From 938aaa4f9249aa1519fd0db07fc72125de2df338 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 9 Jan 2012 17:24:30 -0800 Subject: sysctl: Initial support for auto-unregistering sysctl tables. Add nreg to ctl_table_header. When nreg drops to 0 the ctl_table_header will be unregistered. Factor out drop_sysctl_table from unregister_sysctl_table, and add the logic for decrementing nreg. Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 1544485..13faa48 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -29,8 +29,9 @@ static struct ctl_table root_table[1]; static struct ctl_table_root sysctl_table_root; static struct ctl_table_header root_table_header = { {{.count = 1, - .ctl_table = root_table, - .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, + .nreg = 1, + .ctl_table = root_table, + .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, .root = &sysctl_table_root, .set = &sysctl_table_root.default_set, }; @@ -938,6 +939,7 @@ struct ctl_table_header *__register_sysctl_table( header->unregistering = NULL; header->root = root; header->count = 1; + header->nreg = 1; if (sysctl_check_table(path, table)) goto fail; @@ -1192,6 +1194,20 @@ struct ctl_table_header *register_sysctl_table(struct ctl_table *table) } EXPORT_SYMBOL(register_sysctl_table); +static void drop_sysctl_table(struct ctl_table_header *header) +{ + if (--header->nreg) + return; + + start_unregistering(header); + if (!--header->parent->count) { + WARN_ON(1); + kfree_rcu(header->parent, rcu); + } + if (!--header->count) + kfree_rcu(header, rcu); +} + /** * unregister_sysctl_table - unregister a sysctl table hierarchy * @header: the header returned from register_sysctl_table @@ -1224,13 +1240,7 @@ void unregister_sysctl_table(struct ctl_table_header * header) } spin_lock(&sysctl_lock); - start_unregistering(header); - if (!--header->parent->count) { - WARN_ON(1); - kfree_rcu(header->parent, rcu); - } - if (!--header->count) - kfree_rcu(header, rcu); + drop_sysctl_table(header); spin_unlock(&sysctl_lock); } EXPORT_SYMBOL(unregister_sysctl_table); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 094bc5c..e40b8f6 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1032,6 +1032,7 @@ struct ctl_table_header struct list_head ctl_entry; int used; int count; + int nreg; }; struct rcu_head rcu; }; -- cgit v0.10.2 From e0d045290a8454ecd7f63c78c10d412f35d6ef94 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 9 Jan 2012 22:36:41 -0800 Subject: sysctl: Factor out init_header from __register_sysctl_paths Factor out a routing to initialize the sysctl_table_header. Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 13faa48..4979925 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -42,6 +42,21 @@ static struct ctl_table_root sysctl_table_root = { static DEFINE_SPINLOCK(sysctl_lock); +static void init_header(struct ctl_table_header *head, + struct ctl_table_root *root, struct ctl_table_set *set, + struct ctl_table *table) +{ + head->ctl_table_arg = table; + INIT_LIST_HEAD(&head->ctl_entry); + head->used = 0; + head->count = 1; + head->nreg = 1; + head->unregistering = NULL; + head->root = root; + head->set = set; + head->parent = NULL; +} + /* called under sysctl_lock */ static int use_table(struct ctl_table_header *p) { @@ -932,14 +947,8 @@ struct ctl_table_header *__register_sysctl_table( new_name += namelen + 1; } *prevp = table; - header->ctl_table_arg = table; - - INIT_LIST_HEAD(&header->ctl_entry); - header->used = 0; - header->unregistering = NULL; - header->root = root; - header->count = 1; - header->nreg = 1; + + init_header(header, root, NULL, table); if (sysctl_check_table(path, table)) goto fail; -- cgit v0.10.2 From 8425d6aaf0704b98480131ed339c208ffce12e44 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 9 Jan 2012 17:35:01 -0800 Subject: sysctl: Factor out insert_header and erase_header Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 4979925..7e96a26 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -57,6 +57,17 @@ static void init_header(struct ctl_table_header *head, head->parent = NULL; } +static void erase_header(struct ctl_table_header *head) +{ + list_del_init(&head->ctl_entry); +} + +static void insert_header(struct ctl_table_header *header) +{ + header->parent->count++; + list_add_tail(&header->ctl_entry, &header->set->list); +} + /* called under sysctl_lock */ static int use_table(struct ctl_table_header *p) { @@ -96,7 +107,7 @@ static void start_unregistering(struct ctl_table_header *p) * do not remove from the list until nobody holds it; walking the * list in do_sysctl() relies on that. */ - list_del_init(&p->ctl_entry); + erase_header(p); } static void sysctl_head_get(struct ctl_table_header *head) @@ -974,8 +985,7 @@ struct ctl_table_header *__register_sysctl_table( } if (sysctl_check_dups(namespaces, header, path, table)) goto fail_locked; - header->parent->count++; - list_add_tail(&header->ctl_entry, &header->set->list); + insert_header(header); spin_unlock(&sysctl_lock); return header; -- cgit v0.10.2 From a194558e8698621a9ce7f2c6a720123e644af131 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 21 Jan 2012 17:51:48 -0800 Subject: sysctl: Normalize the root_table data structure. Every other directory has a .child member and we look at the .child for our entries. Do the same for the root_table. Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 7e96a26..88d1b06 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -25,7 +25,14 @@ void proc_sys_poll_notify(struct ctl_table_poll *poll) wake_up_interruptible(&poll->wait); } -static struct ctl_table root_table[1]; +static struct ctl_table root_table[] = { + { + .procname = "", + .mode = S_IRUGO|S_IXUGO, + .child = &root_table[1], + }, + { } +}; static struct ctl_table_root sysctl_table_root; static struct ctl_table_header root_table_header = { {{.count = 1, @@ -319,7 +326,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, goto out; } - table = table ? table->child : head->ctl_table; + table = table ? table->child : &head->ctl_table[1]; p = find_in_table(table, name); if (!p) { @@ -510,7 +517,7 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) goto out; } - table = table ? table->child : head->ctl_table; + table = table ? table->child : &head->ctl_table[1]; ret = 0; /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ @@ -966,7 +973,7 @@ struct ctl_table_header *__register_sysctl_table( spin_lock(&sysctl_lock); header->set = lookup_header_set(root, namespaces); header->attached_by = header->ctl_table; - header->attached_to = root_table; + header->attached_to = &root_table[1]; header->parent = &root_table_header; set = header->set; root = header->root; -- cgit v0.10.2 From 076c3eed2c31773200b082568957fd8852ae93d7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 9 Jan 2012 21:42:02 -0800 Subject: sysctl: Rewrite proc_sys_lookup introducing find_entry and lookup_entry. Replace the helpers that proc_sys_lookup uses with helpers that work in terms of an entire sysctl directory. This is worse for sysctl_lock hold times but it is much better for code clarity and the code cleanups to come. find_in_table is no longer needed so it is removed. find_entry a general helper to find entries in a directory is added. lookup_entry is a simple wrapper around find_entry that takes the sysctl_lock increases the use count if an entry is found and drops the sysctl_lock. Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 88d1b06..3b63f29 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -49,6 +49,55 @@ static struct ctl_table_root sysctl_table_root = { static DEFINE_SPINLOCK(sysctl_lock); +static int namecmp(const char *name1, int len1, const char *name2, int len2) +{ + int minlen; + int cmp; + + minlen = len1; + if (minlen > len2) + minlen = len2; + + cmp = memcmp(name1, name2, minlen); + if (cmp == 0) + cmp = len1 - len2; + return cmp; +} + +static struct ctl_table *find_entry(struct ctl_table_header **phead, + struct ctl_table_set *set, + struct ctl_table_header *dir_head, struct ctl_table *dir, + const char *name, int namelen) +{ + struct ctl_table_header *head; + struct ctl_table *entry; + + if (dir_head->set == set) { + for (entry = dir; entry->procname; entry++) { + const char *procname = entry->procname; + if (namecmp(procname, strlen(procname), name, namelen) == 0) { + *phead = dir_head; + return entry; + } + } + } + + list_for_each_entry(head, &set->list, ctl_entry) { + if (head->unregistering) + continue; + if (head->attached_to != dir) + continue; + for (entry = head->attached_by; entry->procname; entry++) { + const char *procname = entry->procname; + if (namecmp(procname, strlen(procname), name, namelen) == 0) { + *phead = head; + return entry; + } + } + } + return NULL; +} + static void init_header(struct ctl_table_header *head, struct ctl_table_root *root, struct ctl_table_set *set, struct ctl_table *table) @@ -168,6 +217,32 @@ lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) return &set->list; } +static struct ctl_table *lookup_entry(struct ctl_table_header **phead, + struct ctl_table_header *dir_head, + struct ctl_table *dir, + const char *name, int namelen) +{ + struct ctl_table_header *head; + struct ctl_table *entry; + struct ctl_table_root *root; + struct ctl_table_set *set; + + spin_lock(&sysctl_lock); + root = &sysctl_table_root; + do { + set = lookup_header_set(root, current->nsproxy); + entry = find_entry(&head, set, dir_head, dir, name, namelen); + if (entry && use_table(head)) + *phead = head; + else + entry = NULL; + root = list_entry(root->root_list.next, + struct ctl_table_root, root_list); + } while (!entry && root != &sysctl_table_root); + spin_unlock(&sysctl_lock); + return entry; +} + static struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, struct ctl_table_header *prev) { @@ -284,21 +359,6 @@ out: return inode; } -static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name) -{ - for ( ; p->procname; p++) { - if (strlen(p->procname) != name->len) - continue; - - if (memcmp(p->procname, name->name, name->len) != 0) - continue; - - /* I have a match */ - return p; - } - return NULL; -} - static struct ctl_table_header *grab_header(struct inode *inode) { struct ctl_table_header *head = PROC_I(inode)->sysctl; @@ -328,17 +388,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, table = table ? table->child : &head->ctl_table[1]; - p = find_in_table(table, name); - if (!p) { - for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { - if (h->attached_to != table) - continue; - p = find_in_table(h->attached_by, name); - if (p) - break; - } - } - + p = lookup_entry(&h, head, table, name->name, name->len); if (!p) goto out; -- cgit v0.10.2 From 6a75ce167c53b41f15088d3c2c7e51c89dc8798a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 18 Jan 2012 03:15:51 -0800 Subject: sysctl: Rewrite proc_sys_readdir in terms of first_entry and next_entry Replace sysctl_head_next with first_entry and next_entry. These new iterators operate at the level of sysctl table entries and filter out any sysctl tables that should not be shown. Utilizing two specialized functions instead of a single function removes conditionals for handling awkward special cases that only come up at the beginning of iteration, making the iterators easier to read and understand. Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 3b63f29..d9c3ae6 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -243,31 +243,25 @@ static struct ctl_table *lookup_entry(struct ctl_table_header **phead, return entry; } -static struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, - struct ctl_table_header *prev) +static struct ctl_table_header *next_usable_entry(struct ctl_table *dir, + struct ctl_table_root *root, struct list_head *tmp) { - struct ctl_table_root *root; + struct nsproxy *namespaces = current->nsproxy; struct list_head *header_list; struct ctl_table_header *head; - struct list_head *tmp; - spin_lock(&sysctl_lock); - if (prev) { - head = prev; - tmp = &prev->ctl_entry; - unuse_table(prev); - goto next; - } - tmp = &root_table_header.ctl_entry; + goto next; for (;;) { head = list_entry(tmp, struct ctl_table_header, ctl_entry); + root = head->root; - if (!use_table(head)) + if (head->attached_to != dir || + !head->attached_by->procname || + !use_table(head)) goto next; - spin_unlock(&sysctl_lock); + return head; next: - root = head->root; tmp = tmp->next; header_list = lookup_header_list(root, namespaces); if (tmp != header_list) @@ -283,13 +277,53 @@ static struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, tmp = header_list->next; } out: - spin_unlock(&sysctl_lock); return NULL; } -static struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev) +static void first_entry( + struct ctl_table_header *dir_head, struct ctl_table *dir, + struct ctl_table_header **phead, struct ctl_table **pentry) { - return __sysctl_head_next(current->nsproxy, prev); + struct ctl_table_header *head = dir_head; + struct ctl_table *entry = dir; + + spin_lock(&sysctl_lock); + if (entry->procname) { + use_table(head); + } else { + head = next_usable_entry(dir, &sysctl_table_root, + &sysctl_table_root.default_set.list); + if (head) + entry = head->attached_by; + } + spin_unlock(&sysctl_lock); + *phead = head; + *pentry = entry; +} + +static void next_entry(struct ctl_table *dir, + struct ctl_table_header **phead, struct ctl_table **pentry) +{ + struct ctl_table_header *head = *phead; + struct ctl_table *entry = *pentry; + + entry++; + if (!entry->procname) { + struct ctl_table_root *root = head->root; + struct list_head *tmp = &head->ctl_entry; + if (head->attached_to != dir) { + root = &sysctl_table_root; + tmp = &sysctl_table_root.default_set.list; + } + spin_lock(&sysctl_lock); + unuse_table(head); + head = next_usable_entry(dir, root, tmp); + spin_unlock(&sysctl_lock); + if (head) + entry = head->attached_by; + } + *phead = head; + *pentry = entry; } void register_sysctl_root(struct ctl_table_root *root) @@ -533,20 +567,17 @@ static int scan(struct ctl_table_header *head, ctl_table *table, unsigned long *pos, struct file *file, void *dirent, filldir_t filldir) { + int res; - for (; table->procname; table++, (*pos)++) { - int res; + if ((*pos)++ < file->f_pos) + return 0; - if (*pos < file->f_pos) - continue; + res = proc_sys_fill_cache(file, dirent, filldir, head, table); - res = proc_sys_fill_cache(file, dirent, filldir, head, table); - if (res) - return res; + if (res == 0) + file->f_pos = *pos; - file->f_pos = *pos + 1; - } - return 0; + return res; } static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) @@ -556,6 +587,7 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) struct ctl_table_header *head = grab_header(inode); struct ctl_table *table = PROC_I(inode)->sysctl_entry; struct ctl_table_header *h = NULL; + struct ctl_table *entry; unsigned long pos; int ret = -EINVAL; @@ -585,14 +617,8 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) } pos = 2; - ret = scan(head, table, &pos, filp, dirent, filldir); - if (ret) - goto out; - - for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { - if (h->attached_to != table) - continue; - ret = scan(h, h->attached_by, &pos, filp, dirent, filldir); + for (first_entry(head, table, &h, &entry); h; next_entry(table, &h, &entry)) { + ret = scan(h, entry, &pos, filp, dirent, filldir); if (ret) { sysctl_head_finish(h); break; -- cgit v0.10.2 From 9eb47c26f09e27506d343ef52e634b2a50ee21ef Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 22 Jan 2012 21:26:00 -0800 Subject: sysctl: Add a root pointer to ctl_table_set Add a ctl_table_root pointer to ctl_table set so it is easy to go from a ctl_table_set to a ctl_table_root. Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index d9c3ae6..65c13dd 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -45,6 +45,7 @@ static struct ctl_table_header root_table_header = { static struct ctl_table_root sysctl_table_root = { .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry), + .default_set.root = &sysctl_table_root, }; static DEFINE_SPINLOCK(sysctl_lock); @@ -1348,9 +1349,11 @@ void unregister_sysctl_table(struct ctl_table_header * header) EXPORT_SYMBOL(unregister_sysctl_table); void setup_sysctl_set(struct ctl_table_set *p, + struct ctl_table_root *root, int (*is_seen)(struct ctl_table_set *)) { INIT_LIST_HEAD(&p->list); + p->root = root; p->is_seen = is_seen; } diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index e40b8f6..e73ba33 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1047,6 +1047,7 @@ struct ctl_table_header struct ctl_table_set { struct list_head list; + struct ctl_table_root *root; int (*is_seen)(struct ctl_table_set *); }; @@ -1069,6 +1070,7 @@ struct ctl_path { void proc_sys_poll_notify(struct ctl_table_poll *poll); extern void setup_sysctl_set(struct ctl_table_set *p, + struct ctl_table_root *root, int (*is_seen)(struct ctl_table_set *)); extern void retire_sysctl_set(struct ctl_table_set *set); @@ -1103,6 +1105,7 @@ static inline void unregister_sysctl_table(struct ctl_table_header * table) } static inline void setup_sysctl_set(struct ctl_table_set *p, + struct ctl_table_root *root, int (*is_seen)(struct ctl_table_set *)) { } diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 07c6b87..e998c64 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -74,8 +74,7 @@ static struct ctl_table_root net_sysctl_ro_root = { static int __net_init sysctl_net_init(struct net *net) { - setup_sysctl_set(&net->sysctls, - is_seen); + setup_sysctl_set(&net->sysctls, &net_sysctl_root, is_seen); return 0; } @@ -95,7 +94,7 @@ static __init int net_sysctl_init(void) ret = register_pernet_subsys(&sysctl_pernet_ops); if (ret) goto out; - setup_sysctl_set(&net_sysctl_ro_root.default_set, NULL); + setup_sysctl_set(&net_sysctl_ro_root.default_set, &net_sysctl_ro_root, NULL); register_sysctl_root(&net_sysctl_ro_root); register_sysctl_root(&net_sysctl_root); out: -- cgit v0.10.2 From 7ec66d06362da7684a4948c4c2bf1f8546425df4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 29 Dec 2011 08:24:29 -0800 Subject: sysctl: Stop requiring explicit management of sysctl directories Simplify the code and the sysctl semantics by autogenerating sysctl directories when a sysctl table is registered that needs the directories and autodeleting the directories when there are no more sysctl tables registered that need them. Autogenerating directories keeps sysctl tables from depending on each other, removing all of the arcane register/unregister ordering constraints and makes it impossible to get the order wrong when reigsering and unregistering sysctl tables. Autogenerating directories yields one unique entity that dentries can point to, retaining the current effective use of the dcache. Add struct ctl_dir as the type of these new autogenerated directories. The attached_by and attached_to fields in ctl_table_header are removed as they are no longer needed. The child field in ctl_table is no longer needed by the core of the sysctl code. ctl_table.child can be removed once all of the existing users have been updated. Benchmark before: make-dummies 0 999 -> 0.7s rmmod dummy -> 0.07s make-dummies 0 9999 -> 1m10s rmmod dummy -> 0.4s Benchmark after: make-dummies 0 999 -> 0.44s rmmod dummy -> 0.065s make-dummies 0 9999 -> 1m36s rmmod dummy -> 0.4s Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 65c13dd..3c0767d 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -28,28 +28,31 @@ void proc_sys_poll_notify(struct ctl_table_poll *poll) static struct ctl_table root_table[] = { { .procname = "", - .mode = S_IRUGO|S_IXUGO, - .child = &root_table[1], + .mode = S_IFDIR|S_IRUGO|S_IXUGO, }, { } }; static struct ctl_table_root sysctl_table_root; -static struct ctl_table_header root_table_header = { - {{.count = 1, - .nreg = 1, - .ctl_table = root_table, - .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, - .root = &sysctl_table_root, - .set = &sysctl_table_root.default_set, +static struct ctl_dir sysctl_root_dir = { + .header = { + {{.count = 1, + .nreg = 1, + .ctl_table = root_table, + .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, + .root = &sysctl_table_root, + .set = &sysctl_table_root.default_set, + }, }; static struct ctl_table_root sysctl_table_root = { .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), - .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry), + .default_set.list = LIST_HEAD_INIT(sysctl_root_dir.header.ctl_entry), .default_set.root = &sysctl_table_root, }; static DEFINE_SPINLOCK(sysctl_lock); +static void drop_sysctl_table(struct ctl_table_header *header); + static int namecmp(const char *name1, int len1, const char *name2, int len2) { int minlen; @@ -66,29 +69,18 @@ static int namecmp(const char *name1, int len1, const char *name2, int len2) } static struct ctl_table *find_entry(struct ctl_table_header **phead, - struct ctl_table_set *set, - struct ctl_table_header *dir_head, struct ctl_table *dir, + struct ctl_table_set *set, struct ctl_dir *dir, const char *name, int namelen) { struct ctl_table_header *head; struct ctl_table *entry; - if (dir_head->set == set) { - for (entry = dir; entry->procname; entry++) { - const char *procname = entry->procname; - if (namecmp(procname, strlen(procname), name, namelen) == 0) { - *phead = dir_head; - return entry; - } - } - } - list_for_each_entry(head, &set->list, ctl_entry) { if (head->unregistering) continue; - if (head->attached_to != dir) + if (head->parent != dir) continue; - for (entry = head->attached_by; entry->procname; entry++) { + for (entry = head->ctl_table; entry->procname; entry++) { const char *procname = entry->procname; if (namecmp(procname, strlen(procname), name, namelen) == 0) { *phead = head; @@ -103,6 +95,7 @@ static void init_header(struct ctl_table_header *head, struct ctl_table_root *root, struct ctl_table_set *set, struct ctl_table *table) { + head->ctl_table = table; head->ctl_table_arg = table; INIT_LIST_HEAD(&head->ctl_entry); head->used = 0; @@ -119,9 +112,10 @@ static void erase_header(struct ctl_table_header *head) list_del_init(&head->ctl_entry); } -static void insert_header(struct ctl_table_header *header) +static void insert_header(struct ctl_dir *dir, struct ctl_table_header *header) { - header->parent->count++; + header->parent = dir; + header->parent->header.nreg++; list_add_tail(&header->ctl_entry, &header->set->list); } @@ -219,8 +213,7 @@ lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) } static struct ctl_table *lookup_entry(struct ctl_table_header **phead, - struct ctl_table_header *dir_head, - struct ctl_table *dir, + struct ctl_dir *dir, const char *name, int namelen) { struct ctl_table_header *head; @@ -232,7 +225,7 @@ static struct ctl_table *lookup_entry(struct ctl_table_header **phead, root = &sysctl_table_root; do { set = lookup_header_set(root, current->nsproxy); - entry = find_entry(&head, set, dir_head, dir, name, namelen); + entry = find_entry(&head, set, dir, name, namelen); if (entry && use_table(head)) *phead = head; else @@ -244,7 +237,7 @@ static struct ctl_table *lookup_entry(struct ctl_table_header **phead, return entry; } -static struct ctl_table_header *next_usable_entry(struct ctl_table *dir, +static struct ctl_table_header *next_usable_entry(struct ctl_dir *dir, struct ctl_table_root *root, struct list_head *tmp) { struct nsproxy *namespaces = current->nsproxy; @@ -256,8 +249,8 @@ static struct ctl_table_header *next_usable_entry(struct ctl_table *dir, head = list_entry(tmp, struct ctl_table_header, ctl_entry); root = head->root; - if (head->attached_to != dir || - !head->attached_by->procname || + if (head->parent != dir || + !head->ctl_table->procname || !use_table(head)) goto next; @@ -281,47 +274,35 @@ out: return NULL; } -static void first_entry( - struct ctl_table_header *dir_head, struct ctl_table *dir, +static void first_entry(struct ctl_dir *dir, struct ctl_table_header **phead, struct ctl_table **pentry) { - struct ctl_table_header *head = dir_head; - struct ctl_table *entry = dir; + struct ctl_table_header *head; + struct ctl_table *entry = NULL; spin_lock(&sysctl_lock); - if (entry->procname) { - use_table(head); - } else { - head = next_usable_entry(dir, &sysctl_table_root, - &sysctl_table_root.default_set.list); - if (head) - entry = head->attached_by; - } + head = next_usable_entry(dir, &sysctl_table_root, + &sysctl_table_root.default_set.list); spin_unlock(&sysctl_lock); + if (head) + entry = head->ctl_table; *phead = head; *pentry = entry; } -static void next_entry(struct ctl_table *dir, - struct ctl_table_header **phead, struct ctl_table **pentry) +static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentry) { struct ctl_table_header *head = *phead; struct ctl_table *entry = *pentry; entry++; if (!entry->procname) { - struct ctl_table_root *root = head->root; - struct list_head *tmp = &head->ctl_entry; - if (head->attached_to != dir) { - root = &sysctl_table_root; - tmp = &sysctl_table_root.default_set.list; - } spin_lock(&sysctl_lock); unuse_table(head); - head = next_usable_entry(dir, root, tmp); + head = next_usable_entry(head->parent, head->root, &head->ctl_entry); spin_unlock(&sysctl_lock); if (head) - entry = head->attached_by; + entry = head->ctl_table; } *phead = head; *pentry = entry; @@ -381,7 +362,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_mode = table->mode; - if (!table->child) { + if (!S_ISDIR(table->mode)) { inode->i_mode |= S_IFREG; inode->i_op = &proc_sys_inode_operations; inode->i_fop = &proc_sys_file_operations; @@ -398,7 +379,7 @@ static struct ctl_table_header *grab_header(struct inode *inode) { struct ctl_table_header *head = PROC_I(inode)->sysctl; if (!head) - head = &root_table_header; + head = &sysctl_root_dir.header; return sysctl_head_grab(head); } @@ -406,24 +387,19 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct ctl_table_header *head = grab_header(dir); - struct ctl_table *table = PROC_I(dir)->sysctl_entry; struct ctl_table_header *h = NULL; struct qstr *name = &dentry->d_name; struct ctl_table *p; struct inode *inode; struct dentry *err = ERR_PTR(-ENOENT); + struct ctl_dir *ctl_dir; if (IS_ERR(head)) return ERR_CAST(head); - if (table && !table->child) { - WARN_ON(1); - goto out; - } + ctl_dir = container_of(head, struct ctl_dir, header); - table = table ? table->child : &head->ctl_table[1]; - - p = lookup_entry(&h, head, table, name->name, name->len); + p = lookup_entry(&h, ctl_dir, name->name, name->len); if (!p) goto out; @@ -586,21 +562,16 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; struct ctl_table_header *head = grab_header(inode); - struct ctl_table *table = PROC_I(inode)->sysctl_entry; struct ctl_table_header *h = NULL; struct ctl_table *entry; + struct ctl_dir *ctl_dir; unsigned long pos; int ret = -EINVAL; if (IS_ERR(head)) return PTR_ERR(head); - if (table && !table->child) { - WARN_ON(1); - goto out; - } - - table = table ? table->child : &head->ctl_table[1]; + ctl_dir = container_of(head, struct ctl_dir, header); ret = 0; /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ @@ -618,7 +589,7 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) } pos = 2; - for (first_entry(head, table, &h, &entry); h; next_entry(table, &h, &entry)) { + for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) { ret = scan(h, entry, &pos, filp, dirent, filldir); if (ret) { sysctl_head_finish(h); @@ -779,52 +750,86 @@ static const struct dentry_operations proc_sys_dentry_operations = { .d_compare = proc_sys_compare, }; -static struct ctl_table *is_branch_in(struct ctl_table *branch, - struct ctl_table *table) +static struct ctl_dir *find_subdir(struct ctl_table_set *set, struct ctl_dir *dir, + const char *name, int namelen) { - struct ctl_table *p; - const char *s = branch->procname; + struct ctl_table_header *head; + struct ctl_table *entry; - /* branch should have named subdirectory as its first element */ - if (!s || !branch->child) - return NULL; + entry = find_entry(&head, set, dir, name, namelen); + if (!entry) + return ERR_PTR(-ENOENT); + if (S_ISDIR(entry->mode)) + return container_of(head, struct ctl_dir, header); + return ERR_PTR(-ENOTDIR); +} + +static struct ctl_dir *new_dir(struct ctl_table_set *set, + const char *name, int namelen) +{ + struct ctl_table *table; + struct ctl_dir *new; + char *new_name; - /* ... and nothing else */ - if (branch[1].procname) + new = kzalloc(sizeof(*new) + sizeof(struct ctl_table)*2 + + namelen + 1, GFP_KERNEL); + if (!new) return NULL; - /* table should contain subdirectory with the same name */ - for (p = table; p->procname; p++) { - if (!p->child) - continue; - if (p->procname && strcmp(p->procname, s) == 0) - return p; - } - return NULL; + table = (struct ctl_table *)(new + 1); + new_name = (char *)(table + 2); + memcpy(new_name, name, namelen); + new_name[namelen] = '\0'; + table[0].procname = new_name; + table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; + init_header(&new->header, set->root, set, table); + + return new; } -/* see if attaching q to p would be an improvement */ -static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) +static struct ctl_dir *get_subdir(struct ctl_table_set *set, + struct ctl_dir *dir, const char *name, int namelen) { - struct ctl_table *to = p->ctl_table, *by = q->ctl_table; - struct ctl_table *next; - int is_better = 0; - int not_in_parent = !p->attached_by; - - while ((next = is_branch_in(by, to)) != NULL) { - if (by == q->attached_by) - is_better = 1; - if (to == p->attached_by) - not_in_parent = 1; - by = by->child; - to = next->child; - } + struct ctl_dir *subdir, *new = NULL; - if (is_better && not_in_parent) { - q->attached_by = by; - q->attached_to = to; - q->parent = p; + spin_lock(&sysctl_lock); + subdir = find_subdir(dir->header.set, dir, name, namelen); + if (!IS_ERR(subdir)) + goto found; + if ((PTR_ERR(subdir) == -ENOENT) && set != dir->header.set) + subdir = find_subdir(set, dir, name, namelen); + if (!IS_ERR(subdir)) + goto found; + if (PTR_ERR(subdir) != -ENOENT) + goto failed; + + spin_unlock(&sysctl_lock); + new = new_dir(set, name, namelen); + spin_lock(&sysctl_lock); + subdir = ERR_PTR(-ENOMEM); + if (!new) + goto failed; + + subdir = find_subdir(set, dir, name, namelen); + if (!IS_ERR(subdir)) + goto found; + if (PTR_ERR(subdir) != -ENOENT) + goto failed; + + insert_header(dir, &new->header); + subdir = new; +found: + subdir->header.nreg++; +failed: + if (unlikely(IS_ERR(subdir))) { + printk(KERN_ERR "sysctl could not get directory: %*.*s %ld\n", + namelen, namelen, name, PTR_ERR(subdir)); } + drop_sysctl_table(&dir->header); + if (new) + drop_sysctl_table(&new->header); + spin_unlock(&sysctl_lock); + return subdir; } static int sysctl_check_table_dups(const char *path, struct ctl_table *old, @@ -846,24 +851,14 @@ static int sysctl_check_table_dups(const char *path, struct ctl_table *old, } static int sysctl_check_dups(struct nsproxy *namespaces, - struct ctl_table_header *header, + struct ctl_dir *dir, const char *path, struct ctl_table *table) { struct ctl_table_root *root; struct ctl_table_set *set; - struct ctl_table_header *dir_head, *head; - struct ctl_table *dir_table; + struct ctl_table_header *head; int error = 0; - /* No dups if we are the only member of our directory */ - if (header->attached_by != table) - return 0; - - dir_head = header->parent; - dir_table = header->attached_to; - - error = sysctl_check_table_dups(path, dir_table, table); - root = &sysctl_table_root; do { set = lookup_header_set(root, namespaces); @@ -871,9 +866,9 @@ static int sysctl_check_dups(struct nsproxy *namespaces, list_for_each_entry(head, &set->list, ctl_entry) { if (head->unregistering) continue; - if (head->attached_to != dir_table) + if (head->parent != dir) continue; - error = sysctl_check_table_dups(path, head->attached_by, + error = sysctl_check_table_dups(path, head->ctl_table, table); } root = list_entry(root->root_list.next, @@ -977,47 +972,25 @@ struct ctl_table_header *__register_sysctl_table( const char *path, struct ctl_table *table) { struct ctl_table_header *header; - struct ctl_table *new, **prevp; const char *name, *nextname; - unsigned int npath = 0; struct ctl_table_set *set; - size_t path_bytes = 0; - char *new_name; - - /* Count the path components */ - for (name = path; name; name = nextname) { - int namelen; - nextname = strchr(name, '/'); - if (nextname) { - namelen = nextname - name; - nextname++; - } else { - namelen = strlen(name); - } - if (namelen == 0) - continue; - path_bytes += namelen + 1; - npath++; - } + struct ctl_dir *dir; - /* - * For each path component, allocate a 2-element ctl_table array. - * The first array element will be filled with the sysctl entry - * for this, the second will be the sentinel (procname == 0). - * - * We allocate everything in one go so that we don't have to - * worry about freeing additional memory in unregister_sysctl_table. - */ - header = kzalloc(sizeof(struct ctl_table_header) + path_bytes + - (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL); + header = kzalloc(sizeof(struct ctl_table_header), GFP_KERNEL); if (!header) return NULL; - new = (struct ctl_table *) (header + 1); - new_name = (char *)(new + (2 * npath)); + init_header(header, root, NULL, table); + if (sysctl_check_table(path, table)) + goto fail; + + spin_lock(&sysctl_lock); + header->set = set = lookup_header_set(root, namespaces); + dir = &sysctl_root_dir; + dir->header.nreg++; + spin_unlock(&sysctl_lock); - /* Now connect the dots */ - prevp = &header->ctl_table; + /* Find the directory for the ctl_table */ for (name = path; name; name = nextname) { int namelen; nextname = strchr(name, '/'); @@ -1029,51 +1002,21 @@ struct ctl_table_header *__register_sysctl_table( } if (namelen == 0) continue; - memcpy(new_name, name, namelen); - new_name[namelen] = '\0'; - - new->procname = new_name; - new->mode = 0555; - - *prevp = new; - prevp = &new->child; - new += 2; - new_name += namelen + 1; + dir = get_subdir(set, dir, name, namelen); + if (IS_ERR(dir)) + goto fail; } - *prevp = table; - - init_header(header, root, NULL, table); - if (sysctl_check_table(path, table)) - goto fail; - spin_lock(&sysctl_lock); - header->set = lookup_header_set(root, namespaces); - header->attached_by = header->ctl_table; - header->attached_to = &root_table[1]; - header->parent = &root_table_header; - set = header->set; - root = header->root; - for (;;) { - struct ctl_table_header *p; - list_for_each_entry(p, &set->list, ctl_entry) { - if (p->unregistering) - continue; - try_attach(p, header); - } - if (root == &sysctl_table_root) - break; - root = list_entry(root->root_list.prev, - struct ctl_table_root, root_list); - set = lookup_header_set(root, namespaces); - } - if (sysctl_check_dups(namespaces, header, path, table)) - goto fail_locked; - insert_header(header); + if (sysctl_check_dups(namespaces, dir, path, table)) + goto fail_put_dir_locked; + insert_header(dir, header); + drop_sysctl_table(&dir->header); spin_unlock(&sysctl_lock); return header; -fail_locked: +fail_put_dir_locked: + drop_sysctl_table(&dir->header); spin_unlock(&sysctl_lock); fail: kfree(header); @@ -1299,16 +1242,17 @@ EXPORT_SYMBOL(register_sysctl_table); static void drop_sysctl_table(struct ctl_table_header *header) { + struct ctl_dir *parent = header->parent; + if (--header->nreg) return; start_unregistering(header); - if (!--header->parent->count) { - WARN_ON(1); - kfree_rcu(header->parent, rcu); - } if (!--header->count) kfree_rcu(header, rcu); + + if (parent) + drop_sysctl_table(&parent->header); } /** diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index e73ba33..3084b62 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -938,6 +938,7 @@ struct ctl_table; struct nsproxy; struct ctl_table_root; struct ctl_table_header; +struct ctl_dir; typedef struct ctl_table ctl_table; @@ -1040,9 +1041,12 @@ struct ctl_table_header struct ctl_table *ctl_table_arg; struct ctl_table_root *root; struct ctl_table_set *set; - struct ctl_table *attached_by; - struct ctl_table *attached_to; - struct ctl_table_header *parent; + struct ctl_dir *parent; +}; + +struct ctl_dir { + /* Header must be at the start of ctl_dir */ + struct ctl_table_header header; }; struct ctl_table_set { -- cgit v0.10.2 From 6980128fe1b834c92a85e556ca8198030f0d8d01 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 21 Jan 2012 20:09:45 -0800 Subject: sysctl: Add sysctl_print_dir and use it in get_subdir When there are errors it is very nice to know the full sysctl path. Add a simple function that computes the sysctl path and prints it out. Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 3c0767d..a785565 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -53,6 +53,13 @@ static DEFINE_SPINLOCK(sysctl_lock); static void drop_sysctl_table(struct ctl_table_header *header); +static void sysctl_print_dir(struct ctl_dir *dir) +{ + if (dir->header.parent) + sysctl_print_dir(dir->header.parent); + printk(KERN_CONT "%s/", dir->header.ctl_table[0].procname); +} + static int namecmp(const char *name1, int len1, const char *name2, int len2) { int minlen; @@ -822,7 +829,9 @@ found: subdir->header.nreg++; failed: if (unlikely(IS_ERR(subdir))) { - printk(KERN_ERR "sysctl could not get directory: %*.*s %ld\n", + printk(KERN_ERR "sysctl could not get directory: "); + sysctl_print_dir(dir); + printk(KERN_CONT "/%*.*s %ld\n", namelen, namelen, name, PTR_ERR(subdir)); } drop_sysctl_table(&dir->header); -- cgit v0.10.2 From 0e47c99d7fe25e0f3907d9f3401079169d904891 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 7 Jan 2012 23:24:30 -0800 Subject: sysctl: Replace root_list with links between sysctl_table_sets. Piecing together directories by looking first in one directory tree, than in another directory tree and finally in a third directory tree makes it hard to verify that some directory entries are not multiply defined and makes it hard to create efficient implementations the sysctl filesystem. Replace the sysctl wide list of roots with autogenerated links from the core sysctl directory tree to the other sysctl directory trees. This simplifies sysctl directory reading and lookups as now only entries in a single sysctl directory tree need to be considered. Benchmark before: make-dummies 0 999 -> 0.44s rmmod dummy -> 0.065s make-dummies 0 9999 -> 1m36s rmmod dummy -> 0.4s Benchmark after: make-dummies 0 999 -> 0.63s rmmod dummy -> 0.12s make-dummies 0 9999 -> 2m35s rmmod dummy -> 18s The slowdown is caused by the lookups used in insert_headers and put_links to see if we need to add links or remove links. Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index a785565..ec54a57 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -32,26 +32,26 @@ static struct ctl_table root_table[] = { }, { } }; -static struct ctl_table_root sysctl_table_root; -static struct ctl_dir sysctl_root_dir = { - .header = { +static struct ctl_table_root sysctl_table_root = { + .default_set.list = LIST_HEAD_INIT(sysctl_table_root.default_set.dir.header.ctl_entry), + .default_set.dir.header = { {{.count = 1, .nreg = 1, .ctl_table = root_table, .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, + .ctl_table_arg = root_table, .root = &sysctl_table_root, .set = &sysctl_table_root.default_set, }, }; -static struct ctl_table_root sysctl_table_root = { - .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), - .default_set.list = LIST_HEAD_INIT(sysctl_root_dir.header.ctl_entry), - .default_set.root = &sysctl_table_root, -}; static DEFINE_SPINLOCK(sysctl_lock); static void drop_sysctl_table(struct ctl_table_header *header); +static int sysctl_follow_link(struct ctl_table_header **phead, + struct ctl_table **pentry, struct nsproxy *namespaces); +static int insert_links(struct ctl_table_header *head); +static void put_links(struct ctl_table_header *header); static void sysctl_print_dir(struct ctl_dir *dir) { @@ -76,9 +76,9 @@ static int namecmp(const char *name1, int len1, const char *name2, int len2) } static struct ctl_table *find_entry(struct ctl_table_header **phead, - struct ctl_table_set *set, struct ctl_dir *dir, - const char *name, int namelen) + struct ctl_dir *dir, const char *name, int namelen) { + struct ctl_table_set *set = dir->header.set; struct ctl_table_header *head; struct ctl_table *entry; @@ -119,11 +119,21 @@ static void erase_header(struct ctl_table_header *head) list_del_init(&head->ctl_entry); } -static void insert_header(struct ctl_dir *dir, struct ctl_table_header *header) +static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) { + int err; + + dir->header.nreg++; header->parent = dir; - header->parent->header.nreg++; + err = insert_links(header); + if (err) + goto fail_links; list_add_tail(&header->ctl_entry, &header->set->list); + return 0; +fail_links: + header->parent = NULL; + drop_sysctl_table(&dir->header); + return err; } /* called under sysctl_lock */ @@ -212,72 +222,39 @@ lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) return set; } -static struct list_head * -lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) -{ - struct ctl_table_set *set = lookup_header_set(root, namespaces); - return &set->list; -} - static struct ctl_table *lookup_entry(struct ctl_table_header **phead, struct ctl_dir *dir, const char *name, int namelen) { struct ctl_table_header *head; struct ctl_table *entry; - struct ctl_table_root *root; - struct ctl_table_set *set; spin_lock(&sysctl_lock); - root = &sysctl_table_root; - do { - set = lookup_header_set(root, current->nsproxy); - entry = find_entry(&head, set, dir, name, namelen); - if (entry && use_table(head)) - *phead = head; - else - entry = NULL; - root = list_entry(root->root_list.next, - struct ctl_table_root, root_list); - } while (!entry && root != &sysctl_table_root); + entry = find_entry(&head, dir, name, namelen); + if (entry && use_table(head)) + *phead = head; + else + entry = NULL; spin_unlock(&sysctl_lock); return entry; } static struct ctl_table_header *next_usable_entry(struct ctl_dir *dir, - struct ctl_table_root *root, struct list_head *tmp) + struct list_head *tmp) { - struct nsproxy *namespaces = current->nsproxy; - struct list_head *header_list; + struct ctl_table_set *set = dir->header.set; struct ctl_table_header *head; - goto next; - for (;;) { + for (tmp = tmp->next; tmp != &set->list; tmp = tmp->next) { head = list_entry(tmp, struct ctl_table_header, ctl_entry); - root = head->root; if (head->parent != dir || !head->ctl_table->procname || !use_table(head)) - goto next; - - return head; - next: - tmp = tmp->next; - header_list = lookup_header_list(root, namespaces); - if (tmp != header_list) continue; - do { - root = list_entry(root->root_list.next, - struct ctl_table_root, root_list); - if (root == &sysctl_table_root) - goto out; - header_list = lookup_header_list(root, namespaces); - } while (list_empty(header_list)); - tmp = header_list->next; + return head; } -out: return NULL; } @@ -288,8 +265,7 @@ static void first_entry(struct ctl_dir *dir, struct ctl_table *entry = NULL; spin_lock(&sysctl_lock); - head = next_usable_entry(dir, &sysctl_table_root, - &sysctl_table_root.default_set.list); + head = next_usable_entry(dir, &dir->header.set->list); spin_unlock(&sysctl_lock); if (head) entry = head->ctl_table; @@ -306,7 +282,7 @@ static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentr if (!entry->procname) { spin_lock(&sysctl_lock); unuse_table(head); - head = next_usable_entry(head->parent, head->root, &head->ctl_entry); + head = next_usable_entry(head->parent, &head->ctl_entry); spin_unlock(&sysctl_lock); if (head) entry = head->ctl_table; @@ -317,9 +293,6 @@ static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentr void register_sysctl_root(struct ctl_table_root *root) { - spin_lock(&sysctl_lock); - list_add_tail(&root->root_list, &sysctl_table_root.root_list); - spin_unlock(&sysctl_lock); } /* @@ -386,7 +359,7 @@ static struct ctl_table_header *grab_header(struct inode *inode) { struct ctl_table_header *head = PROC_I(inode)->sysctl; if (!head) - head = &sysctl_root_dir.header; + head = &sysctl_table_root.default_set.dir.header; return sysctl_head_grab(head); } @@ -400,6 +373,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, struct inode *inode; struct dentry *err = ERR_PTR(-ENOENT); struct ctl_dir *ctl_dir; + int ret; if (IS_ERR(head)) return ERR_CAST(head); @@ -410,6 +384,11 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, if (!p) goto out; + ret = sysctl_follow_link(&h, &p, current->nsproxy); + err = ERR_PTR(ret); + if (ret) + goto out; + err = ERR_PTR(-ENOMEM); inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); if (h) @@ -547,6 +526,25 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent, return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); } +static int proc_sys_link_fill_cache(struct file *filp, void *dirent, + filldir_t filldir, + struct ctl_table_header *head, + struct ctl_table *table) +{ + int err, ret = 0; + head = sysctl_head_grab(head); + + /* It is not an error if we can not follow the link ignore it */ + err = sysctl_follow_link(&head, &table, current->nsproxy); + if (err) + goto out; + + ret = proc_sys_fill_cache(filp, dirent, filldir, head, table); +out: + sysctl_head_finish(head); + return ret; +} + static int scan(struct ctl_table_header *head, ctl_table *table, unsigned long *pos, struct file *file, void *dirent, filldir_t filldir) @@ -556,7 +554,10 @@ static int scan(struct ctl_table_header *head, ctl_table *table, if ((*pos)++ < file->f_pos) return 0; - res = proc_sys_fill_cache(file, dirent, filldir, head, table); + if (unlikely(S_ISLNK(table->mode))) + res = proc_sys_link_fill_cache(file, dirent, filldir, head, table); + else + res = proc_sys_fill_cache(file, dirent, filldir, head, table); if (res == 0) file->f_pos = *pos; @@ -757,13 +758,13 @@ static const struct dentry_operations proc_sys_dentry_operations = { .d_compare = proc_sys_compare, }; -static struct ctl_dir *find_subdir(struct ctl_table_set *set, struct ctl_dir *dir, - const char *name, int namelen) +static struct ctl_dir *find_subdir(struct ctl_dir *dir, + const char *name, int namelen) { struct ctl_table_header *head; struct ctl_table *entry; - entry = find_entry(&head, set, dir, name, namelen); + entry = find_entry(&head, dir, name, namelen); if (!entry) return ERR_PTR(-ENOENT); if (S_ISDIR(entry->mode)) @@ -772,7 +773,7 @@ static struct ctl_dir *find_subdir(struct ctl_table_set *set, struct ctl_dir *di } static struct ctl_dir *new_dir(struct ctl_table_set *set, - const char *name, int namelen) + const char *name, int namelen) { struct ctl_table *table; struct ctl_dir *new; @@ -789,22 +790,19 @@ static struct ctl_dir *new_dir(struct ctl_table_set *set, new_name[namelen] = '\0'; table[0].procname = new_name; table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; - init_header(&new->header, set->root, set, table); + init_header(&new->header, set->dir.header.root, set, table); return new; } -static struct ctl_dir *get_subdir(struct ctl_table_set *set, - struct ctl_dir *dir, const char *name, int namelen) +static struct ctl_dir *get_subdir(struct ctl_dir *dir, + const char *name, int namelen) { + struct ctl_table_set *set = dir->header.set; struct ctl_dir *subdir, *new = NULL; spin_lock(&sysctl_lock); - subdir = find_subdir(dir->header.set, dir, name, namelen); - if (!IS_ERR(subdir)) - goto found; - if ((PTR_ERR(subdir) == -ENOENT) && set != dir->header.set) - subdir = find_subdir(set, dir, name, namelen); + subdir = find_subdir(dir, name, namelen); if (!IS_ERR(subdir)) goto found; if (PTR_ERR(subdir) != -ENOENT) @@ -817,13 +815,14 @@ static struct ctl_dir *get_subdir(struct ctl_table_set *set, if (!new) goto failed; - subdir = find_subdir(set, dir, name, namelen); + subdir = find_subdir(dir, name, namelen); if (!IS_ERR(subdir)) goto found; if (PTR_ERR(subdir) != -ENOENT) goto failed; - insert_header(dir, &new->header); + if (insert_header(dir, &new->header)) + goto failed; subdir = new; found: subdir->header.nreg++; @@ -841,6 +840,57 @@ failed: return subdir; } +static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir) +{ + struct ctl_dir *parent; + const char *procname; + if (!dir->header.parent) + return &set->dir; + parent = xlate_dir(set, dir->header.parent); + if (IS_ERR(parent)) + return parent; + procname = dir->header.ctl_table[0].procname; + return find_subdir(parent, procname, strlen(procname)); +} + +static int sysctl_follow_link(struct ctl_table_header **phead, + struct ctl_table **pentry, struct nsproxy *namespaces) +{ + struct ctl_table_header *head; + struct ctl_table_root *root; + struct ctl_table_set *set; + struct ctl_table *entry; + struct ctl_dir *dir; + int ret; + + /* Get out quickly if not a link */ + if (!S_ISLNK((*pentry)->mode)) + return 0; + + ret = 0; + spin_lock(&sysctl_lock); + root = (*pentry)->data; + set = lookup_header_set(root, namespaces); + dir = xlate_dir(set, (*phead)->parent); + if (IS_ERR(dir)) + ret = PTR_ERR(dir); + else { + const char *procname = (*pentry)->procname; + head = NULL; + entry = find_entry(&head, dir, procname, strlen(procname)); + ret = -ENOENT; + if (entry && use_table(head)) { + unuse_table(*phead); + *phead = head; + *pentry = entry; + ret = 0; + } + } + + spin_unlock(&sysctl_lock); + return ret; +} + static int sysctl_check_table_dups(const char *path, struct ctl_table *old, struct ctl_table *table) { @@ -859,30 +909,21 @@ static int sysctl_check_table_dups(const char *path, struct ctl_table *old, return error; } -static int sysctl_check_dups(struct nsproxy *namespaces, - struct ctl_dir *dir, +static int sysctl_check_dups(struct ctl_dir *dir, const char *path, struct ctl_table *table) { - struct ctl_table_root *root; struct ctl_table_set *set; struct ctl_table_header *head; int error = 0; - root = &sysctl_table_root; - do { - set = lookup_header_set(root, namespaces); - - list_for_each_entry(head, &set->list, ctl_entry) { - if (head->unregistering) - continue; - if (head->parent != dir) - continue; - error = sysctl_check_table_dups(path, head->ctl_table, - table); - } - root = list_entry(root->root_list.next, - struct ctl_table_root, root_list); - } while (root != &sysctl_table_root); + set = dir->header.set; + list_for_each_entry(head, &set->list, ctl_entry) { + if (head->unregistering) + continue; + if (head->parent != dir) + continue; + error = sysctl_check_table_dups(path, head->ctl_table, table); + } return error; } @@ -932,6 +973,115 @@ static int sysctl_check_table(const char *path, struct ctl_table *table) return err; } +static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table *table, + struct ctl_table_root *link_root) +{ + struct ctl_table *link_table, *entry, *link; + struct ctl_table_header *links; + char *link_name; + int nr_entries, name_bytes; + + name_bytes = 0; + nr_entries = 0; + for (entry = table; entry->procname; entry++) { + nr_entries++; + name_bytes += strlen(entry->procname) + 1; + } + + links = kzalloc(sizeof(struct ctl_table_header) + + sizeof(struct ctl_table)*(nr_entries + 1) + + name_bytes, + GFP_KERNEL); + + if (!links) + return NULL; + + link_table = (struct ctl_table *)(links + 1); + link_name = (char *)&link_table[nr_entries + 1]; + + for (link = link_table, entry = table; entry->procname; link++, entry++) { + int len = strlen(entry->procname) + 1; + memcpy(link_name, entry->procname, len); + link->procname = link_name; + link->mode = S_IFLNK|S_IRWXUGO; + link->data = link_root; + link_name += len; + } + init_header(links, dir->header.root, dir->header.set, link_table); + links->nreg = nr_entries; + + return links; +} + +static bool get_links(struct ctl_dir *dir, + struct ctl_table *table, struct ctl_table_root *link_root) +{ + struct ctl_table_header *head; + struct ctl_table *entry, *link; + + /* Are there links available for every entry in table? */ + for (entry = table; entry->procname; entry++) { + const char *procname = entry->procname; + link = find_entry(&head, dir, procname, strlen(procname)); + if (!link) + return false; + if (S_ISDIR(link->mode) && S_ISDIR(entry->mode)) + continue; + if (S_ISLNK(link->mode) && (link->data == link_root)) + continue; + return false; + } + + /* The checks passed. Increase the registration count on the links */ + for (entry = table; entry->procname; entry++) { + const char *procname = entry->procname; + link = find_entry(&head, dir, procname, strlen(procname)); + head->nreg++; + } + return true; +} + +static int insert_links(struct ctl_table_header *head) +{ + struct ctl_table_set *root_set = &sysctl_table_root.default_set; + struct ctl_dir *core_parent = NULL; + struct ctl_table_header *links; + int err; + + if (head->set == root_set) + return 0; + + core_parent = xlate_dir(root_set, head->parent); + if (IS_ERR(core_parent)) + return 0; + + if (get_links(core_parent, head->ctl_table, head->root)) + return 0; + + core_parent->header.nreg++; + spin_unlock(&sysctl_lock); + + links = new_links(core_parent, head->ctl_table, head->root); + + spin_lock(&sysctl_lock); + err = -ENOMEM; + if (!links) + goto out; + + err = 0; + if (get_links(core_parent, head->ctl_table, head->root)) { + kfree(links); + goto out; + } + + err = insert_header(core_parent, links); + if (err) + kfree(links); +out: + drop_sysctl_table(&core_parent->header); + return err; +} + /** * __register_sysctl_table - register a leaf sysctl table * @root: List of sysctl headers to register on @@ -980,6 +1130,7 @@ struct ctl_table_header *__register_sysctl_table( struct nsproxy *namespaces, const char *path, struct ctl_table *table) { + struct ctl_table_header *links = NULL; struct ctl_table_header *header; const char *name, *nextname; struct ctl_table_set *set; @@ -995,7 +1146,7 @@ struct ctl_table_header *__register_sysctl_table( spin_lock(&sysctl_lock); header->set = set = lookup_header_set(root, namespaces); - dir = &sysctl_root_dir; + dir = &set->dir; dir->header.nreg++; spin_unlock(&sysctl_lock); @@ -1012,22 +1163,28 @@ struct ctl_table_header *__register_sysctl_table( if (namelen == 0) continue; - dir = get_subdir(set, dir, name, namelen); + dir = get_subdir(dir, name, namelen); if (IS_ERR(dir)) goto fail; } + spin_lock(&sysctl_lock); - if (sysctl_check_dups(namespaces, dir, path, table)) + if (sysctl_check_dups(dir, path, table)) + goto fail_put_dir_locked; + + if (insert_header(dir, header)) goto fail_put_dir_locked; - insert_header(dir, header); + drop_sysctl_table(&dir->header); spin_unlock(&sysctl_lock); return header; + fail_put_dir_locked: drop_sysctl_table(&dir->header); spin_unlock(&sysctl_lock); fail: + kfree(links); kfree(header); dump_stack(); return NULL; @@ -1249,6 +1406,40 @@ struct ctl_table_header *register_sysctl_table(struct ctl_table *table) } EXPORT_SYMBOL(register_sysctl_table); +static void put_links(struct ctl_table_header *header) +{ + struct ctl_table_set *root_set = &sysctl_table_root.default_set; + struct ctl_table_root *root = header->root; + struct ctl_dir *parent = header->parent; + struct ctl_dir *core_parent; + struct ctl_table *entry; + + if (header->set == root_set) + return; + + core_parent = xlate_dir(root_set, parent); + if (IS_ERR(core_parent)) + return; + + for (entry = header->ctl_table; entry->procname; entry++) { + struct ctl_table_header *link_head; + struct ctl_table *link; + const char *name = entry->procname; + + link = find_entry(&link_head, core_parent, name, strlen(name)); + if (link && + ((S_ISDIR(link->mode) && S_ISDIR(entry->mode)) || + (S_ISLNK(link->mode) && (link->data == root)))) { + drop_sysctl_table(link_head); + } + else { + printk(KERN_ERR "sysctl link missing during unregister: "); + sysctl_print_dir(parent); + printk(KERN_CONT "/%s\n", name); + } + } +} + static void drop_sysctl_table(struct ctl_table_header *header) { struct ctl_dir *parent = header->parent; @@ -1256,6 +1447,7 @@ static void drop_sysctl_table(struct ctl_table_header *header) if (--header->nreg) return; + put_links(header); start_unregistering(header); if (!--header->count) kfree_rcu(header, rcu); @@ -1301,13 +1493,14 @@ void unregister_sysctl_table(struct ctl_table_header * header) } EXPORT_SYMBOL(unregister_sysctl_table); -void setup_sysctl_set(struct ctl_table_set *p, +void setup_sysctl_set(struct ctl_table_set *set, struct ctl_table_root *root, int (*is_seen)(struct ctl_table_set *)) { - INIT_LIST_HEAD(&p->list); - p->root = root; - p->is_seen = is_seen; + memset(set, sizeof(*set), 0); + INIT_LIST_HEAD(&set->list); + set->is_seen = is_seen; + init_header(&set->dir.header, root, set, root_table); } void retire_sysctl_set(struct ctl_table_set *set) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 3084b62..2a1446a 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1051,12 +1051,11 @@ struct ctl_dir { struct ctl_table_set { struct list_head list; - struct ctl_table_root *root; int (*is_seen)(struct ctl_table_set *); + struct ctl_dir dir; }; struct ctl_table_root { - struct list_head root_list; struct ctl_table_set default_set; struct ctl_table_set *(*lookup)(struct ctl_table_root *root, struct nsproxy *namespaces); -- cgit v0.10.2 From 60a47a2e823cbe6b609346bffff61a00c0c76470 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 8 Jan 2012 00:02:37 -0800 Subject: sysctl: Modify __register_sysctl_paths to take a set instead of a root and an nsproxy An nsproxy argument here has always been awkard and now the nsproxy argument is completely unnecessary so remove it, replacing it with the set we want the registered tables to show up in. Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index ec54a57..e0d3e7e 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1084,8 +1084,7 @@ out: /** * __register_sysctl_table - register a leaf sysctl table - * @root: List of sysctl headers to register on - * @namespaces: Data to compute which lists of sysctl entries are visible + * @set: Sysctl tree to register on * @path: The path to the directory the sysctl table is in. * @table: the top-level table structure * @@ -1126,26 +1125,24 @@ out: * to the table header on success. */ struct ctl_table_header *__register_sysctl_table( - struct ctl_table_root *root, - struct nsproxy *namespaces, + struct ctl_table_set *set, const char *path, struct ctl_table *table) { + struct ctl_table_root *root = set->dir.header.root; struct ctl_table_header *links = NULL; struct ctl_table_header *header; const char *name, *nextname; - struct ctl_table_set *set; struct ctl_dir *dir; header = kzalloc(sizeof(struct ctl_table_header), GFP_KERNEL); if (!header) return NULL; - init_header(header, root, NULL, table); + init_header(header, root, set, table); if (sysctl_check_table(path, table)) goto fail; spin_lock(&sysctl_lock); - header->set = set = lookup_header_set(root, namespaces); dir = &set->dir; dir->header.nreg++; spin_unlock(&sysctl_lock); @@ -1223,8 +1220,7 @@ static int count_subheaders(struct ctl_table *table) } static int register_leaf_sysctl_tables(const char *path, char *pos, - struct ctl_table_header ***subheader, - struct ctl_table_root *root, struct nsproxy *namespaces, + struct ctl_table_header ***subheader, struct ctl_table_set *set, struct ctl_table *table) { struct ctl_table *ctl_table_arg = NULL; @@ -1261,7 +1257,7 @@ static int register_leaf_sysctl_tables(const char *path, char *pos, /* Register everything except a directory full of subdirectories */ if (nr_files || !nr_dirs) { struct ctl_table_header *header; - header = __register_sysctl_table(root, namespaces, path, files); + header = __register_sysctl_table(set, path, files); if (!header) { kfree(ctl_table_arg); goto out; @@ -1286,7 +1282,7 @@ static int register_leaf_sysctl_tables(const char *path, char *pos, goto out; err = register_leaf_sysctl_tables(path, child_pos, subheader, - root, namespaces, entry->child); + set, entry->child); pos[0] = '\0'; if (err) goto out; @@ -1299,8 +1295,7 @@ out: /** * __register_sysctl_paths - register a sysctl table hierarchy - * @root: List of sysctl headers to register on - * @namespaces: Data to compute which lists of sysctl entries are visible + * @set: Sysctl tree to register on * @path: The path to the directory the sysctl table is in. * @table: the top-level table structure * @@ -1310,8 +1305,7 @@ out: * See __register_sysctl_table for more details. */ struct ctl_table_header *__register_sysctl_paths( - struct ctl_table_root *root, - struct nsproxy *namespaces, + struct ctl_table_set *set, const struct ctl_path *path, struct ctl_table *table) { struct ctl_table *ctl_table_arg = table; @@ -1337,7 +1331,7 @@ struct ctl_table_header *__register_sysctl_paths( table = table->child; } if (nr_subheaders == 1) { - header = __register_sysctl_table(root, namespaces, new_path, table); + header = __register_sysctl_table(set, new_path, table); if (header) header->ctl_table_arg = ctl_table_arg; } else { @@ -1351,7 +1345,7 @@ struct ctl_table_header *__register_sysctl_paths( header->ctl_table_arg = ctl_table_arg; if (register_leaf_sysctl_tables(new_path, pos, &subheader, - root, namespaces, table)) + set, table)) goto err_register_leaves; } @@ -1384,7 +1378,7 @@ err_register_leaves: struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, struct ctl_table *table) { - return __register_sysctl_paths(&sysctl_table_root, current->nsproxy, + return __register_sysctl_paths(&sysctl_table_root.default_set, path, table); } EXPORT_SYMBOL(register_sysctl_paths); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 2a1446a..cec5941 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1079,10 +1079,10 @@ extern void retire_sysctl_set(struct ctl_table_set *set); void register_sysctl_root(struct ctl_table_root *root); struct ctl_table_header *__register_sysctl_table( - struct ctl_table_root *root, struct nsproxy *namespaces, + struct ctl_table_set *set, const char *path, struct ctl_table *table); struct ctl_table_header *__register_sysctl_paths( - struct ctl_table_root *root, struct nsproxy *namespaces, + struct ctl_table_set *set, const struct ctl_path *path, struct ctl_table *table); struct ctl_table_header *register_sysctl_table(struct ctl_table * table); struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, diff --git a/net/sysctl_net.c b/net/sysctl_net.c index e998c64..c3e65ae 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -105,19 +105,15 @@ subsys_initcall(net_sysctl_init); struct ctl_table_header *register_net_sysctl_table(struct net *net, const struct ctl_path *path, struct ctl_table *table) { - struct nsproxy namespaces; - namespaces = *current->nsproxy; - namespaces.net_ns = net; - return __register_sysctl_paths(&net_sysctl_root, - &namespaces, path, table); + return __register_sysctl_paths(&net->sysctls, path, table); } EXPORT_SYMBOL_GPL(register_net_sysctl_table); struct ctl_table_header *register_net_sysctl_rotable(const struct ctl_path *path, struct ctl_table *table) { - return __register_sysctl_paths(&net_sysctl_ro_root, - &init_nsproxy, path, table); + return __register_sysctl_paths(&net_sysctl_ro_root.default_set, + path, table); } EXPORT_SYMBOL_GPL(register_net_sysctl_rotable); -- cgit v0.10.2 From e54012cede6749528899f66a72312522a179d427 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 18 Jan 2012 22:57:15 -0800 Subject: sysctl: Move sysctl_check_dups into insert_header Simplify the callers of insert_header by removing explicit calls to check for duplicates and instead have insert_header do the work. This makes the code slightly more maintainable by enabling changes to data structures where the insertion of new entries without duplicate suppression is not possible. There is not always a convenient path string where insert_header is called so modify sysctl_check_dups to use sysctl_print_dir when printing the full path when a duplicate is discovered. Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index e0d3e7e..160d578 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -52,6 +52,7 @@ static int sysctl_follow_link(struct ctl_table_header **phead, struct ctl_table **pentry, struct nsproxy *namespaces); static int insert_links(struct ctl_table_header *head); static void put_links(struct ctl_table_header *header); +static int sysctl_check_dups(struct ctl_dir *dir, struct ctl_table *table); static void sysctl_print_dir(struct ctl_dir *dir) { @@ -123,6 +124,10 @@ static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) { int err; + err = sysctl_check_dups(dir, header->ctl_table); + if (err) + return err; + dir->header.nreg++; header->parent = dir; err = insert_links(header); @@ -891,7 +896,7 @@ static int sysctl_follow_link(struct ctl_table_header **phead, return ret; } -static int sysctl_check_table_dups(const char *path, struct ctl_table *old, +static int sysctl_check_table_dups(struct ctl_dir *dir, struct ctl_table *old, struct ctl_table *table) { struct ctl_table *entry, *test; @@ -900,8 +905,9 @@ static int sysctl_check_table_dups(const char *path, struct ctl_table *old, for (entry = old; entry->procname; entry++) { for (test = table; test->procname; test++) { if (strcmp(entry->procname, test->procname) == 0) { - printk(KERN_ERR "sysctl duplicate entry: %s/%s\n", - path, test->procname); + printk(KERN_ERR "sysctl duplicate entry: "); + sysctl_print_dir(dir); + printk(KERN_CONT "/%s\n", test->procname); error = -EEXIST; } } @@ -909,8 +915,7 @@ static int sysctl_check_table_dups(const char *path, struct ctl_table *old, return error; } -static int sysctl_check_dups(struct ctl_dir *dir, - const char *path, struct ctl_table *table) +static int sysctl_check_dups(struct ctl_dir *dir, struct ctl_table *table) { struct ctl_table_set *set; struct ctl_table_header *head; @@ -922,7 +927,7 @@ static int sysctl_check_dups(struct ctl_dir *dir, continue; if (head->parent != dir) continue; - error = sysctl_check_table_dups(path, head->ctl_table, table); + error = sysctl_check_table_dups(dir, head->ctl_table, table); } return error; } @@ -1166,9 +1171,6 @@ struct ctl_table_header *__register_sysctl_table( } spin_lock(&sysctl_lock); - if (sysctl_check_dups(dir, path, table)) - goto fail_put_dir_locked; - if (insert_header(dir, header)) goto fail_put_dir_locked; -- cgit v0.10.2 From 9e3d47df35abd6430fed04fb40a76c7358b1e815 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 7 Jan 2012 23:45:12 -0800 Subject: sysctl: Make the header lists per directory. Slightly enhance efficiency and clarity of the code by making the header list per directory instead of per set. Benchmark before: make-dummies 0 999 -> 0.63s rmmod dummy -> 0.12s make-dummies 0 9999 -> 2m35s rmmod dummy -> 18s Benchmark after: make-dummies 0 999 -> 0.32s rmmod dummy -> 0.12s make-dummies 0 9999 -> 1m17s rmmod dummy -> 17s Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 160d578..e971ccc 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -33,12 +33,12 @@ static struct ctl_table root_table[] = { { } }; static struct ctl_table_root sysctl_table_root = { - .default_set.list = LIST_HEAD_INIT(sysctl_table_root.default_set.dir.header.ctl_entry), + .default_set.dir.list = LIST_HEAD_INIT(sysctl_table_root.default_set.dir.list), .default_set.dir.header = { {{.count = 1, .nreg = 1, .ctl_table = root_table, - .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, + .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.dir.header.ctl_entry),}}, .ctl_table_arg = root_table, .root = &sysctl_table_root, .set = &sysctl_table_root.default_set, @@ -79,15 +79,12 @@ static int namecmp(const char *name1, int len1, const char *name2, int len2) static struct ctl_table *find_entry(struct ctl_table_header **phead, struct ctl_dir *dir, const char *name, int namelen) { - struct ctl_table_set *set = dir->header.set; struct ctl_table_header *head; struct ctl_table *entry; - list_for_each_entry(head, &set->list, ctl_entry) { + list_for_each_entry(head, &dir->list, ctl_entry) { if (head->unregistering) continue; - if (head->parent != dir) - continue; for (entry = head->ctl_table; entry->procname; entry++) { const char *procname = entry->procname; if (namecmp(procname, strlen(procname), name, namelen) == 0) { @@ -133,7 +130,7 @@ static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) err = insert_links(header); if (err) goto fail_links; - list_add_tail(&header->ctl_entry, &header->set->list); + list_add_tail(&header->ctl_entry, &header->parent->list); return 0; fail_links: header->parent = NULL; @@ -247,14 +244,12 @@ static struct ctl_table *lookup_entry(struct ctl_table_header **phead, static struct ctl_table_header *next_usable_entry(struct ctl_dir *dir, struct list_head *tmp) { - struct ctl_table_set *set = dir->header.set; struct ctl_table_header *head; - for (tmp = tmp->next; tmp != &set->list; tmp = tmp->next) { + for (tmp = tmp->next; tmp != &dir->list; tmp = tmp->next) { head = list_entry(tmp, struct ctl_table_header, ctl_entry); - if (head->parent != dir || - !head->ctl_table->procname || + if (!head->ctl_table->procname || !use_table(head)) continue; @@ -270,7 +265,7 @@ static void first_entry(struct ctl_dir *dir, struct ctl_table *entry = NULL; spin_lock(&sysctl_lock); - head = next_usable_entry(dir, &dir->header.set->list); + head = next_usable_entry(dir, &dir->list); spin_unlock(&sysctl_lock); if (head) entry = head->ctl_table; @@ -793,6 +788,7 @@ static struct ctl_dir *new_dir(struct ctl_table_set *set, new_name = (char *)(table + 2); memcpy(new_name, name, namelen); new_name[namelen] = '\0'; + INIT_LIST_HEAD(&new->list); table[0].procname = new_name; table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; init_header(&new->header, set->dir.header.root, set, table); @@ -917,12 +913,10 @@ static int sysctl_check_table_dups(struct ctl_dir *dir, struct ctl_table *old, static int sysctl_check_dups(struct ctl_dir *dir, struct ctl_table *table) { - struct ctl_table_set *set; struct ctl_table_header *head; int error = 0; - set = dir->header.set; - list_for_each_entry(head, &set->list, ctl_entry) { + list_for_each_entry(head, &dir->list, ctl_entry) { if (head->unregistering) continue; if (head->parent != dir) @@ -1494,14 +1488,14 @@ void setup_sysctl_set(struct ctl_table_set *set, int (*is_seen)(struct ctl_table_set *)) { memset(set, sizeof(*set), 0); - INIT_LIST_HEAD(&set->list); set->is_seen = is_seen; + INIT_LIST_HEAD(&set->dir.list); init_header(&set->dir.header, root, set, root_table); } void retire_sysctl_set(struct ctl_table_set *set) { - WARN_ON(!list_empty(&set->list)); + WARN_ON(!list_empty(&set->dir.list)); } int __init proc_sys_init(void) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index cec5941..36dec75 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1047,10 +1047,10 @@ struct ctl_table_header struct ctl_dir { /* Header must be at the start of ctl_dir */ struct ctl_table_header header; + struct list_head list; }; struct ctl_table_set { - struct list_head list; int (*is_seen)(struct ctl_table_set *); struct ctl_dir dir; }; -- cgit v0.10.2 From ac13ac6f4c6c0504d2c927862216f4e422a2c0b5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 9 Jan 2012 17:24:30 -0800 Subject: sysctl: Index sysctl directories with rbtrees. One of the most important jobs of sysctl is to export network stack tunables. Several of those tunables are per network device. In several instances people are running with 1000+ network devices in there network stacks, which makes the simple per directory linked list in sysctl a scaling bottleneck. Replace O(N^2) sysctl insertion and lookup times with O(NlogN) by using an rbtree to index the sysctl directories. Benchmark before: make-dummies 0 999 -> 0.32s rmmod dummy -> 0.12s make-dummies 0 9999 -> 1m17s rmmod dummy -> 17s Benchmark after: make-dummies 0 999 -> 0.074s rmmod dummy -> 0.070s make-dummies 0 9999 -> 3.4s rmmod dummy -> 0.44s Benchmark after (without dev_snmp6): make-dummies 0 9999 -> 0.75s rmmod dummy -> 0.44s make-dummies 0 99999 -> 11s rmmod dummy -> 4.3s At 10,000 dummy devices the bottleneck becomes the time to add and remove the files under /proc/sys/net/dev_snmp6. I have commented out the code that adds and removes files under /proc/sys/net/dev_snmp6 and taken measurments of creating and destroying 100,000 dummies to verify the sysctl continues to scale. Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index e971ccc..05c393a 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -33,12 +33,10 @@ static struct ctl_table root_table[] = { { } }; static struct ctl_table_root sysctl_table_root = { - .default_set.dir.list = LIST_HEAD_INIT(sysctl_table_root.default_set.dir.list), .default_set.dir.header = { {{.count = 1, .nreg = 1, - .ctl_table = root_table, - .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.dir.header.ctl_entry),}}, + .ctl_table = root_table }}, .ctl_table_arg = root_table, .root = &sysctl_table_root, .set = &sysctl_table_root.default_set, @@ -52,7 +50,6 @@ static int sysctl_follow_link(struct ctl_table_header **phead, struct ctl_table **pentry, struct nsproxy *namespaces); static int insert_links(struct ctl_table_header *head); static void put_links(struct ctl_table_header *header); -static int sysctl_check_dups(struct ctl_dir *dir, struct ctl_table *table); static void sysctl_print_dir(struct ctl_dir *dir) { @@ -81,28 +78,83 @@ static struct ctl_table *find_entry(struct ctl_table_header **phead, { struct ctl_table_header *head; struct ctl_table *entry; + struct rb_node *node = dir->root.rb_node; - list_for_each_entry(head, &dir->list, ctl_entry) { - if (head->unregistering) - continue; - for (entry = head->ctl_table; entry->procname; entry++) { - const char *procname = entry->procname; - if (namecmp(procname, strlen(procname), name, namelen) == 0) { - *phead = head; - return entry; - } + while (node) + { + struct ctl_node *ctl_node; + const char *procname; + int cmp; + + ctl_node = rb_entry(node, struct ctl_node, node); + head = ctl_node->header; + entry = &head->ctl_table[ctl_node - head->node]; + procname = entry->procname; + + cmp = namecmp(name, namelen, procname, strlen(procname)); + if (cmp < 0) + node = node->rb_left; + else if (cmp > 0) + node = node->rb_right; + else { + *phead = head; + return entry; } } return NULL; } +static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry) +{ + struct rb_node *node = &head->node[entry - head->ctl_table].node; + struct rb_node **p = &head->parent->root.rb_node; + struct rb_node *parent = NULL; + const char *name = entry->procname; + int namelen = strlen(name); + + while (*p) { + struct ctl_table_header *parent_head; + struct ctl_table *parent_entry; + struct ctl_node *parent_node; + const char *parent_name; + int cmp; + + parent = *p; + parent_node = rb_entry(parent, struct ctl_node, node); + parent_head = parent_node->header; + parent_entry = &parent_head->ctl_table[parent_node - parent_head->node]; + parent_name = parent_entry->procname; + + cmp = namecmp(name, namelen, parent_name, strlen(parent_name)); + if (cmp < 0) + p = &(*p)->rb_left; + else if (cmp > 0) + p = &(*p)->rb_right; + else { + printk(KERN_ERR "sysctl duplicate entry: "); + sysctl_print_dir(head->parent); + printk(KERN_CONT "/%s\n", entry->procname); + return -EEXIST; + } + } + + rb_link_node(node, parent, p); + return 0; +} + +static void erase_entry(struct ctl_table_header *head, struct ctl_table *entry) +{ + struct rb_node *node = &head->node[entry - head->ctl_table].node; + + rb_erase(node, &head->parent->root); +} + static void init_header(struct ctl_table_header *head, struct ctl_table_root *root, struct ctl_table_set *set, - struct ctl_table *table) + struct ctl_node *node, struct ctl_table *table) { head->ctl_table = table; head->ctl_table_arg = table; - INIT_LIST_HEAD(&head->ctl_entry); head->used = 0; head->count = 1; head->nreg = 1; @@ -110,28 +162,42 @@ static void init_header(struct ctl_table_header *head, head->root = root; head->set = set; head->parent = NULL; + head->node = node; + if (node) { + struct ctl_table *entry; + for (entry = table; entry->procname; entry++, node++) { + rb_init_node(&node->node); + node->header = head; + } + } } static void erase_header(struct ctl_table_header *head) { - list_del_init(&head->ctl_entry); + struct ctl_table *entry; + for (entry = head->ctl_table; entry->procname; entry++) + erase_entry(head, entry); } static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) { + struct ctl_table *entry; int err; - err = sysctl_check_dups(dir, header->ctl_table); - if (err) - return err; - dir->header.nreg++; header->parent = dir; err = insert_links(header); if (err) goto fail_links; - list_add_tail(&header->ctl_entry, &header->parent->list); + for (entry = header->ctl_table; entry->procname; entry++) { + err = insert_entry(header, entry); + if (err) + goto fail; + } return 0; +fail: + erase_header(header); + put_links(header); fail_links: header->parent = NULL; drop_sysctl_table(&dir->header); @@ -241,19 +307,14 @@ static struct ctl_table *lookup_entry(struct ctl_table_header **phead, return entry; } -static struct ctl_table_header *next_usable_entry(struct ctl_dir *dir, - struct list_head *tmp) +static struct ctl_node *first_usable_entry(struct rb_node *node) { - struct ctl_table_header *head; - - for (tmp = tmp->next; tmp != &dir->list; tmp = tmp->next) { - head = list_entry(tmp, struct ctl_table_header, ctl_entry); + struct ctl_node *ctl_node; - if (!head->ctl_table->procname || - !use_table(head)) - continue; - - return head; + for (;node; node = rb_next(node)) { + ctl_node = rb_entry(node, struct ctl_node, node); + if (use_table(ctl_node->header)) + return ctl_node; } return NULL; } @@ -261,14 +322,17 @@ static struct ctl_table_header *next_usable_entry(struct ctl_dir *dir, static void first_entry(struct ctl_dir *dir, struct ctl_table_header **phead, struct ctl_table **pentry) { - struct ctl_table_header *head; + struct ctl_table_header *head = NULL; struct ctl_table *entry = NULL; + struct ctl_node *ctl_node; spin_lock(&sysctl_lock); - head = next_usable_entry(dir, &dir->list); + ctl_node = first_usable_entry(rb_first(&dir->root)); spin_unlock(&sysctl_lock); - if (head) - entry = head->ctl_table; + if (ctl_node) { + head = ctl_node->header; + entry = &head->ctl_table[ctl_node - head->node]; + } *phead = head; *pentry = entry; } @@ -277,15 +341,17 @@ static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentr { struct ctl_table_header *head = *phead; struct ctl_table *entry = *pentry; + struct ctl_node *ctl_node = &head->node[entry - head->ctl_table]; - entry++; - if (!entry->procname) { - spin_lock(&sysctl_lock); - unuse_table(head); - head = next_usable_entry(head->parent, &head->ctl_entry); - spin_unlock(&sysctl_lock); - if (head) - entry = head->ctl_table; + spin_lock(&sysctl_lock); + unuse_table(head); + + ctl_node = first_usable_entry(rb_next(&ctl_node->node)); + spin_unlock(&sysctl_lock); + head = NULL; + if (ctl_node) { + head = ctl_node->header; + entry = &head->ctl_table[ctl_node - head->node]; } *phead = head; *pentry = entry; @@ -777,21 +843,23 @@ static struct ctl_dir *new_dir(struct ctl_table_set *set, { struct ctl_table *table; struct ctl_dir *new; + struct ctl_node *node; char *new_name; - new = kzalloc(sizeof(*new) + sizeof(struct ctl_table)*2 + - namelen + 1, GFP_KERNEL); + new = kzalloc(sizeof(*new) + sizeof(struct ctl_node) + + sizeof(struct ctl_table)*2 + namelen + 1, + GFP_KERNEL); if (!new) return NULL; - table = (struct ctl_table *)(new + 1); + node = (struct ctl_node *)(new + 1); + table = (struct ctl_table *)(node + 1); new_name = (char *)(table + 2); memcpy(new_name, name, namelen); new_name[namelen] = '\0'; - INIT_LIST_HEAD(&new->list); table[0].procname = new_name; table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; - init_header(&new->header, set->dir.header.root, set, table); + init_header(&new->header, set->dir.header.root, set, node, table); return new; } @@ -892,40 +960,6 @@ static int sysctl_follow_link(struct ctl_table_header **phead, return ret; } -static int sysctl_check_table_dups(struct ctl_dir *dir, struct ctl_table *old, - struct ctl_table *table) -{ - struct ctl_table *entry, *test; - int error = 0; - - for (entry = old; entry->procname; entry++) { - for (test = table; test->procname; test++) { - if (strcmp(entry->procname, test->procname) == 0) { - printk(KERN_ERR "sysctl duplicate entry: "); - sysctl_print_dir(dir); - printk(KERN_CONT "/%s\n", test->procname); - error = -EEXIST; - } - } - } - return error; -} - -static int sysctl_check_dups(struct ctl_dir *dir, struct ctl_table *table) -{ - struct ctl_table_header *head; - int error = 0; - - list_for_each_entry(head, &dir->list, ctl_entry) { - if (head->unregistering) - continue; - if (head->parent != dir) - continue; - error = sysctl_check_table_dups(dir, head->ctl_table, table); - } - return error; -} - static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...) { struct va_format vaf; @@ -977,6 +1011,7 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table { struct ctl_table *link_table, *entry, *link; struct ctl_table_header *links; + struct ctl_node *node; char *link_name; int nr_entries, name_bytes; @@ -988,6 +1023,7 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table } links = kzalloc(sizeof(struct ctl_table_header) + + sizeof(struct ctl_node)*nr_entries + sizeof(struct ctl_table)*(nr_entries + 1) + name_bytes, GFP_KERNEL); @@ -995,7 +1031,8 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table if (!links) return NULL; - link_table = (struct ctl_table *)(links + 1); + node = (struct ctl_node *)(links + 1); + link_table = (struct ctl_table *)(node + nr_entries); link_name = (char *)&link_table[nr_entries + 1]; for (link = link_table, entry = table; entry->procname; link++, entry++) { @@ -1006,7 +1043,7 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table link->data = link_root; link_name += len; } - init_header(links, dir->header.root, dir->header.set, link_table); + init_header(links, dir->header.root, dir->header.set, node, link_table); links->nreg = nr_entries; return links; @@ -1132,12 +1169,20 @@ struct ctl_table_header *__register_sysctl_table( struct ctl_table_header *header; const char *name, *nextname; struct ctl_dir *dir; + struct ctl_table *entry; + struct ctl_node *node; + int nr_entries = 0; + + for (entry = table; entry->procname; entry++) + nr_entries++; - header = kzalloc(sizeof(struct ctl_table_header), GFP_KERNEL); + header = kzalloc(sizeof(struct ctl_table_header) + + sizeof(struct ctl_node)*nr_entries, GFP_KERNEL); if (!header) return NULL; - init_header(header, root, set, table); + node = (struct ctl_node *)(header + 1); + init_header(header, root, set, node, table); if (sysctl_check_table(path, table)) goto fail; @@ -1489,13 +1534,12 @@ void setup_sysctl_set(struct ctl_table_set *set, { memset(set, sizeof(*set), 0); set->is_seen = is_seen; - INIT_LIST_HEAD(&set->dir.list); - init_header(&set->dir.header, root, set, root_table); + init_header(&set->dir.header, root, set, NULL, root_table); } void retire_sysctl_set(struct ctl_table_set *set) { - WARN_ON(!list_empty(&set->dir.list)); + WARN_ON(!RB_EMPTY_ROOT(&set->dir.root)); } int __init proc_sys_init(void) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 36dec75..35c50ed 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -932,6 +932,7 @@ enum #include #include #include +#include /* For the /proc/sys support */ struct ctl_table; @@ -1023,6 +1024,11 @@ struct ctl_table void *extra2; }; +struct ctl_node { + struct rb_node node; + struct ctl_table_header *header; +}; + /* struct ctl_table_header is used to maintain dynamic lists of struct ctl_table trees. */ struct ctl_table_header @@ -1030,7 +1036,6 @@ struct ctl_table_header union { struct { struct ctl_table *ctl_table; - struct list_head ctl_entry; int used; int count; int nreg; @@ -1042,12 +1047,13 @@ struct ctl_table_header struct ctl_table_root *root; struct ctl_table_set *set; struct ctl_dir *parent; + struct ctl_node *node; }; struct ctl_dir { /* Header must be at the start of ctl_dir */ struct ctl_table_header header; - struct list_head list; + struct rb_root root; }; struct ctl_table_set { -- cgit v0.10.2 From fea478d4101a4285aa25c5bafaaf4cec35026fe0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Jan 2012 21:47:03 -0800 Subject: sysctl: Add register_sysctl for normal sysctl users The plan is to convert all callers of register_sysctl_table and register_sysctl_paths to register_sysctl. The interface to register_sysctl is enough nicer this should make the callers a bit more readable. Additionally after the conversion the 230 lines of backwards compatibility can be removed. Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 05c393a..8dc7f0e 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1228,6 +1228,23 @@ fail: return NULL; } +/** + * register_sysctl - register a sysctl table + * @path: The path to the directory the sysctl table is in. + * @table: the table structure + * + * Register a sysctl table. @table should be a filled in ctl_table + * array. A completely 0 filled entry terminates the table. + * + * See __register_sysctl_table for more details. + */ +struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table) +{ + return __register_sysctl_table(&sysctl_table_root.default_set, + path, table); +} +EXPORT_SYMBOL(register_sysctl); + static char *append_path(const char *path, char *pos, const char *name) { int namelen; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 35c50ed..c34b4c8 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1090,6 +1090,7 @@ struct ctl_table_header *__register_sysctl_table( struct ctl_table_header *__register_sysctl_paths( struct ctl_table_set *set, const struct ctl_path *path, struct ctl_table *table); +struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table); struct ctl_table_header *register_sysctl_table(struct ctl_table * table); struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, struct ctl_table *table); -- cgit v0.10.2 From 47981787092aecb87dc3cb2d478455dcfb77516a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 30 Jan 2012 16:39:59 +0300 Subject: sysctl: remove an unused variable "links" is never used, so we can remove it. Signed-off-by: Dan Carpenter Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 8dc7f0e..1b1f5b8 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1165,7 +1165,6 @@ struct ctl_table_header *__register_sysctl_table( const char *path, struct ctl_table *table) { struct ctl_table_root *root = set->dir.header.root; - struct ctl_table_header *links = NULL; struct ctl_table_header *header; const char *name, *nextname; struct ctl_dir *dir; @@ -1222,7 +1221,6 @@ fail_put_dir_locked: drop_sysctl_table(&dir->header); spin_unlock(&sysctl_lock); fail: - kfree(links); kfree(header); dump_stack(); return NULL; -- cgit v0.10.2 From 1347440db6f76ec5ae0af8d8558387f571a5e1dd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 30 Jan 2012 16:40:29 +0300 Subject: sysctl: fix memset parameters in setup_sysctl_set() The current code is a nop. Signed-off-by: Dan Carpenter Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 1b1f5b8..27e265b 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1547,7 +1547,7 @@ void setup_sysctl_set(struct ctl_table_set *set, struct ctl_table_root *root, int (*is_seen)(struct ctl_table_set *)) { - memset(set, sizeof(*set), 0); + memset(set, 0, sizeof(*set)); set->is_seen = is_seen; init_header(&set->dir.header, root, set, NULL, root_table); } -- cgit v0.10.2 From 51f72f4a0f92e4abde33a8bca0fac9667575d035 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 30 Jan 2012 20:09:33 -0800 Subject: sysctl: An easier to read version of find_subdir Suggested-by: Lucian Adrian Grijincu Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 27e265b..ebe8b30 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -833,9 +833,9 @@ static struct ctl_dir *find_subdir(struct ctl_dir *dir, entry = find_entry(&head, dir, name, namelen); if (!entry) return ERR_PTR(-ENOENT); - if (S_ISDIR(entry->mode)) - return container_of(head, struct ctl_dir, header); - return ERR_PTR(-ENOTDIR); + if (!S_ISDIR(entry->mode)) + return ERR_PTR(-ENOTDIR); + return container_of(head, struct ctl_dir, header); } static struct ctl_dir *new_dir(struct ctl_table_set *set, -- cgit v0.10.2 From 0eb97f38d2bfaea289b44c5140a7b04e7b369bad Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 30 Jan 2012 20:37:51 -0800 Subject: sysctl: Correct error return from get_subdir When insert_header fails ensure we return the proper error value from get_subdir. In practice nothing cares, but there is no need to be sloppy. Reported-by: Lucian Adrian Grijincu Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index ebe8b30..722ec11 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -869,6 +869,7 @@ static struct ctl_dir *get_subdir(struct ctl_dir *dir, { struct ctl_table_set *set = dir->header.set; struct ctl_dir *subdir, *new = NULL; + int err; spin_lock(&sysctl_lock); subdir = find_subdir(dir, name, namelen); @@ -890,7 +891,9 @@ static struct ctl_dir *get_subdir(struct ctl_dir *dir, if (PTR_ERR(subdir) != -ENOENT) goto failed; - if (insert_header(dir, &new->header)) + err = insert_header(dir, &new->header); + subdir = ERR_PTR(err); + if (err) goto failed; subdir = new; found: -- cgit v0.10.2 From 60f126d93b210ae708e2a5bb4a3be2121831f2a0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 30 Jan 2012 21:23:52 -0800 Subject: sysctl: Comments to make the code clearer. Document get_subdir and that find_subdir alwasy takes a reference. Suggested-by: Lucian Adrian Grijincu Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 722ec11..e5601dc 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -73,6 +73,7 @@ static int namecmp(const char *name1, int len1, const char *name2, int len2) return cmp; } +/* Called under sysctl_lock */ static struct ctl_table *find_entry(struct ctl_table_header **phead, struct ctl_dir *dir, const char *name, int namelen) { @@ -864,6 +865,18 @@ static struct ctl_dir *new_dir(struct ctl_table_set *set, return new; } +/** + * get_subdir - find or create a subdir with the specified name. + * @dir: Directory to create the subdirectory in + * @name: The name of the subdirectory to find or create + * @namelen: The length of name + * + * Takes a directory with an elevated reference count so we know that + * if we drop the lock the directory will not go away. Upon success + * the reference is moved from @dir to the returned subdirectory. + * Upon error an error code is returned and the reference on @dir is + * simply dropped. + */ static struct ctl_dir *get_subdir(struct ctl_dir *dir, const char *name, int namelen) { @@ -885,12 +898,14 @@ static struct ctl_dir *get_subdir(struct ctl_dir *dir, if (!new) goto failed; + /* Was the subdir added while we dropped the lock? */ subdir = find_subdir(dir, name, namelen); if (!IS_ERR(subdir)) goto found; if (PTR_ERR(subdir) != -ENOENT) goto failed; + /* Nope. Use the our freshly made directory entry. */ err = insert_header(dir, &new->header); subdir = ERR_PTR(err); if (err) @@ -1190,6 +1205,7 @@ struct ctl_table_header *__register_sysctl_table( spin_lock(&sysctl_lock); dir = &set->dir; + /* Reference moved down the diretory tree get_subdir */ dir->header.nreg++; spin_unlock(&sysctl_lock); -- cgit v0.10.2 From 4e75732035d7e97e001bdf6e3149d3967c0221de Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 30 Jan 2012 21:24:59 -0800 Subject: sysctl: Don't call sysctl_follow_link unless we are a link. There are no functional changes. Just code motion to make it clear that we don't follow a link between sysctl roots unless the directory entry actually is a link. Suggested-by: Lucian Adrian Grijincu Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index e5601dc..a7708b7 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -451,10 +451,12 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, if (!p) goto out; - ret = sysctl_follow_link(&h, &p, current->nsproxy); - err = ERR_PTR(ret); - if (ret) - goto out; + if (S_ISLNK(p->mode)) { + ret = sysctl_follow_link(&h, &p, current->nsproxy); + err = ERR_PTR(ret); + if (ret) + goto out; + } err = ERR_PTR(-ENOMEM); inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); @@ -601,10 +603,12 @@ static int proc_sys_link_fill_cache(struct file *filp, void *dirent, int err, ret = 0; head = sysctl_head_grab(head); - /* It is not an error if we can not follow the link ignore it */ - err = sysctl_follow_link(&head, &table, current->nsproxy); - if (err) - goto out; + if (S_ISLNK(table->mode)) { + /* It is not an error if we can not follow the link ignore it */ + err = sysctl_follow_link(&head, &table, current->nsproxy); + if (err) + goto out; + } ret = proc_sys_fill_cache(filp, dirent, filldir, head, table); out: @@ -950,10 +954,6 @@ static int sysctl_follow_link(struct ctl_table_header **phead, struct ctl_dir *dir; int ret; - /* Get out quickly if not a link */ - if (!S_ISLNK((*pentry)->mode)) - return 0; - ret = 0; spin_lock(&sysctl_lock); root = (*pentry)->data; -- cgit v0.10.2 From 4e474a00d7ff746ed177ddae14fa8b2d4bad7a00 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 22 Mar 2012 14:42:22 -0700 Subject: sysctl: protect poll() in entries that may go away Protect code accessing ctl_table by grabbing the header with grab_header() and after releasing with sysctl_head_finish(). This is needed if poll() is called in entries created by modules: currently only hostname and domainname support poll(), but this bug may be triggered when/if modules use it and if user called poll() in a file that doesn't support it. Dave Jones reported the following when using a syscall fuzzer while hibernating/resuming: RIP: 0010:[] [] proc_sys_poll+0x4e/0x90 RAX: 0000000000000145 RBX: ffff88020cab6940 RCX: 0000000000000000 RDX: ffffffff81233df0 RSI: 6b6b6b6b6b6b6b6b RDI: ffff88020cab6940 [ ... ] Code: 00 48 89 fb 48 89 f1 48 8b 40 30 4c 8b 60 e8 b8 45 01 00 00 49 83 7c 24 28 00 74 2e 49 8b 74 24 30 48 85 f6 74 24 48 85 c9 75 32 <8b> 16 b8 45 01 00 00 48 63 d2 49 39 d5 74 10 8b 06 48 98 48 89 If an entry goes away while we are polling() it, ctl_table may not exist anymore. Reported-by: Dave Jones Signed-off-by: Lucas De Marchi Cc: Al Viro Cc: Linus Torvalds Cc: Alexey Dobriyan Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton Signed-off-by: Eric W. Biederman diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index a7708b7..47b474b 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -525,20 +525,32 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf, static int proc_sys_open(struct inode *inode, struct file *filp) { + struct ctl_table_header *head = grab_header(inode); struct ctl_table *table = PROC_I(inode)->sysctl_entry; + /* sysctl was unregistered */ + if (IS_ERR(head)) + return PTR_ERR(head); + if (table->poll) filp->private_data = proc_sys_poll_event(table->poll); + sysctl_head_finish(head); + return 0; } static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) { struct inode *inode = filp->f_path.dentry->d_inode; + struct ctl_table_header *head = grab_header(inode); struct ctl_table *table = PROC_I(inode)->sysctl_entry; - unsigned long event = (unsigned long)filp->private_data; unsigned int ret = DEFAULT_POLLMASK; + unsigned long event; + + /* sysctl was unregistered */ + if (IS_ERR(head)) + return POLLERR | POLLHUP; if (!table->proc_handler) goto out; @@ -546,6 +558,7 @@ static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) if (!table->poll) goto out; + event = (unsigned long)filp->private_data; poll_wait(filp, &table->poll->wait, wait); if (event != atomic_read(&table->poll->event)) { @@ -554,6 +567,8 @@ static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) } out: + sysctl_head_finish(head); + return ret; } -- cgit v0.10.2