From dacdd0e04768da1fd2b24a6ee274c582b40d0c5b Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 17 Jul 2008 16:54:19 -0700 Subject: [PATCH] configfs: Include linux/err.h in linux/configfs.h We now use PTR_ERR() in the ->make_item() and ->make_group() operations. Folks including configfs.h need err.h. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 179589b..2495f23 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1094,7 +1094,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) kfree(name); if (ret) { /* - * If item == NULL, then link_obj() was never called. + * If ret != 0, then link_obj() was never called. * There are no extra references to clean up. */ goto out_put; diff --git a/include/linux/configfs.h b/include/linux/configfs.h index d62c19f..0a5491b 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -40,6 +40,7 @@ #include #include #include +#include #include -- cgit v0.10.2 From 4768e9b18dc63719209c68920d4ae52dc49b6161 Mon Sep 17 00:00:00 2001 From: Louis Rilling Date: Mon, 23 Jun 2008 14:16:17 +0200 Subject: [PATCH] configfs: Fix symlink() to a removing item The rule for configfs symlinks is that symlinks always point to valid config_items, and prevent the target from being removed. However, configfs_symlink() only checks that it can grab a reference on the target item, without ensuring that it remains alive until the symlink is correctly attached. This patch makes configfs_symlink() fail whenever the target is being removed, using the CONFIGFS_USET_DROPPING flag set by configfs_detach_prep() and protected by configfs_dirent_lock. This patch introduces a similar (weird?) behavior as with mkdir failures making rmdir fail: if symlink() races with rmdir() of the parent directory (or its youngest user-created ancestor if parent is a default group) or rmdir() of the target directory, and then fails in configfs_create(), this can make the racing rmdir() fail despite the concerned directory having no user-created entry (resp. no symlink pointing to it or one of its default groups) in the end. This behavior is fixed in later patches. Signed-off-by: Louis Rilling Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 2495f23..cb5ea44 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -370,6 +370,9 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex struct configfs_dirent *sd; int ret; + /* Mark that we're trying to drop the group */ + parent_sd->s_type |= CONFIGFS_USET_DROPPING; + ret = -EBUSY; if (!list_empty(&parent_sd->s_links)) goto out; @@ -385,8 +388,6 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex *wait_mutex = &sd->s_dentry->d_inode->i_mutex; return -EAGAIN; } - /* Mark that we're trying to drop the group */ - sd->s_type |= CONFIGFS_USET_DROPPING; /* * Yup, recursive. If there's a problem, blame @@ -414,12 +415,11 @@ static void configfs_detach_rollback(struct dentry *dentry) struct configfs_dirent *parent_sd = dentry->d_fsdata; struct configfs_dirent *sd; - list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { - if (sd->s_type & CONFIGFS_USET_DEFAULT) { + parent_sd->s_type &= ~CONFIGFS_USET_DROPPING; + + list_for_each_entry(sd, &parent_sd->s_children, s_sibling) + if (sd->s_type & CONFIGFS_USET_DEFAULT) configfs_detach_rollback(sd->s_dentry); - sd->s_type &= ~CONFIGFS_USET_DROPPING; - } - } } static void detach_attrs(struct config_item * item) diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index 0004d18..c12801a 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -78,6 +78,12 @@ static int create_link(struct config_item *parent_item, if (sl) { sl->sl_target = config_item_get(item); spin_lock(&configfs_dirent_lock); + if (target_sd->s_type & CONFIGFS_USET_DROPPING) { + spin_unlock(&configfs_dirent_lock); + config_item_put(item); + kfree(sl); + return -ENOENT; + } list_add(&sl->sl_list, &target_sd->s_links); spin_unlock(&configfs_dirent_lock); ret = configfs_create_link(sl, parent_item->ci_dentry, -- cgit v0.10.2 From 9a73d78cda750f12e25eb811878f2d9dbab1bc6e Mon Sep 17 00:00:00 2001 From: Louis Rilling Date: Fri, 20 Jun 2008 14:09:22 +0200 Subject: [PATCH] configfs: Fix failing symlink() making rmdir() fail On a similar pattern as mkdir() vs rmdir(), a failing symlink() may make rmdir() fail for the symlink's parent and the symlink's target as well. failing symlink() making target's rmdir() fail: process 1: process 2: symlink("A/S" -> "B") allow_link() create_link() attach to "B" links list rmdir("B") detach_prep("B") error because of new link configfs_create_link("A", "S") error (eg -ENOMEM) failing symlink() making parent's rmdir() fail: process 1: process 2: symlink("A/D/S" -> "B") allow_link() create_link() attach to "B" links list configfs_create_link("A/D", "S") make_dirent("A/D", "S") rmdir("A") detach_prep("A") detach_prep("A/D") error because of "S" create("S") error (eg -ENOMEM) We cannot use the same solution as for mkdir() vs rmdir(), since rmdir() on the target cannot wait on the i_mutex of the new symlink's parent without risking a deadlock (with other symlink() or sys_rename()). Instead we define a global mutex protecting all configfs symlinks attachment, so that rmdir() can avoid the races above. Signed-off-by: Louis Rilling Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index da015c1..5f61b26 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -51,6 +51,7 @@ struct configfs_dirent { #define CONFIGFS_USET_IN_MKDIR 0x0200 #define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR) +extern struct mutex configfs_symlink_mutex; extern spinlock_t configfs_dirent_lock; extern struct vfsmount * configfs_mount; diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index cb5ea44..4e228c8 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1207,6 +1207,11 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) return -EINVAL; } + /* + * Ensure that no racing symlink() will make detach_prep() fail while + * the new link is temporarily attached + */ + mutex_lock(&configfs_symlink_mutex); spin_lock(&configfs_dirent_lock); do { struct mutex *wait_mutex; @@ -1215,6 +1220,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) if (ret) { configfs_detach_rollback(dentry); spin_unlock(&configfs_dirent_lock); + mutex_unlock(&configfs_symlink_mutex); if (ret != -EAGAIN) { config_item_put(parent_item); return ret; @@ -1224,10 +1230,12 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) mutex_lock(wait_mutex); mutex_unlock(wait_mutex); + mutex_lock(&configfs_symlink_mutex); spin_lock(&configfs_dirent_lock); } } while (ret == -EAGAIN); spin_unlock(&configfs_dirent_lock); + mutex_unlock(&configfs_symlink_mutex); /* Get a working ref for the duration of this function */ item = configfs_get_config_item(dentry); @@ -1517,11 +1525,13 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex, I_MUTEX_PARENT); mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); + mutex_lock(&configfs_symlink_mutex); spin_lock(&configfs_dirent_lock); if (configfs_detach_prep(dentry, NULL)) { printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n"); } spin_unlock(&configfs_dirent_lock); + mutex_unlock(&configfs_symlink_mutex); configfs_detach_group(&group->cg_item); dentry->d_inode->i_flags |= S_DEAD; mutex_unlock(&dentry->d_inode->i_mutex); diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index c12801a..61a886d 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -31,6 +31,9 @@ #include #include "configfs_internal.h" +/* Protects attachments of new symlinks */ +DEFINE_MUTEX(configfs_symlink_mutex); + static int item_depth(struct config_item * item) { struct config_item * p = item; @@ -147,7 +150,9 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna ret = type->ct_item_ops->allow_link(parent_item, target_item); if (!ret) { + mutex_lock(&configfs_symlink_mutex); ret = create_link(parent_item, target_item, dentry); + mutex_unlock(&configfs_symlink_mutex); if (ret && type->ct_item_ops->drop_link) type->ct_item_ops->drop_link(parent_item, target_item); -- cgit v0.10.2 From 2a109f2a4155f168047aa2f5b3a170e279bef89a Mon Sep 17 00:00:00 2001 From: Louis Rilling Date: Fri, 4 Jul 2008 16:56:05 +0200 Subject: [PATCH] configfs: Prevent userspace from creating new entries under attaching directories process 1: process 2: configfs_mkdir("A") attach_group("A") attach_item("A") d_instantiate("A") populate_groups("A") mutex_lock("A") attach_group("A/B") attach_item("A") d_instantiate("A/B") mkdir("A/B/C") do_path_lookup("A/B/C", LOOKUP_PARENT) ok lookup_create("A/B/C") mutex_lock("A/B") ok configfs_mkdir("A/B/C") ok attach_group("A/C") attach_item("A/C") d_instantiate("A/C") populate_groups("A/C") mutex_lock("A/C") attach_group("A/C/D") attach_item("A/C/D") failure mutex_unlock("A/C") detach_groups("A/C") nothing to do mkdir("A/C/E") do_path_lookup("A/C/E", LOOKUP_PARENT) ok lookup_create("A/C/E") mutex_lock("A/C") ok configfs_mkdir("A/C/E") ok detach_item("A/C") d_delete("A/C") mutex_unlock("A") detach_groups("A") mutex_lock("A/B") detach_group("A/B") detach_groups("A/B") nothing since no _default_ group detach_item("A/B") mutex_unlock("A/B") d_delete("A/B") detach_item("A") d_delete("A") Two bugs: 1/ "A/B/C" and "A/C/E" are created, but never removed while their parent are removed in the end. The same could happen with symlink() instead of mkdir(). 2/ "A" and "A/C" inodes are not locked while detach_item() is called on them, which may probably confuse VFS. This commit fixes 1/, tagging new directories with CONFIGFS_USET_CREATING before building the inode and instantiating the dentry, and validating the whole group+default groups hierarchy in a second pass by clearing CONFIGFS_USET_CREATING. mkdir(), symlink(), lookup(), and dir_open() simply return -ENOENT if called in (or linking to) a directory tagged with CONFIGFS_USET_CREATING. This does not prevent userspace from calling stat() successfuly on such directories, but this prevents userspace from adding (children to | symlinking from/to | read/write attributes of | listing the contents of) not validated items. In other words, userspace will not interact with the subsystem on a new item until the new item creation completes correctly. It was first proposed to re-use CONFIGFS_USET_IN_MKDIR instead of a new flag CONFIGFS_USET_CREATING, but this generated conflicts when checking the target of a new symlink: a valid target directory in the middle of attaching a new user-created child item could be wrongly detected as being attached. 2/ is fixed by next commit. Signed-off-by: Louis Rilling Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index 5f61b26..762d287 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -49,6 +49,7 @@ struct configfs_dirent { #define CONFIGFS_USET_DEFAULT 0x0080 #define CONFIGFS_USET_DROPPING 0x0100 #define CONFIGFS_USET_IN_MKDIR 0x0200 +#define CONFIGFS_USET_CREATING 0x0400 #define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR) extern struct mutex configfs_symlink_mutex; @@ -67,6 +68,7 @@ extern void configfs_inode_exit(void); extern int configfs_create_file(struct config_item *, const struct configfs_attribute *); extern int configfs_make_dirent(struct configfs_dirent *, struct dentry *, void *, umode_t, int); +extern int configfs_dirent_is_ready(struct configfs_dirent *); extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int); extern void configfs_hash_and_remove(struct dentry * dir, const char * name); diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 4e228c8..647499a 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -185,7 +185,7 @@ static int create_dir(struct config_item * k, struct dentry * p, error = configfs_dirent_exists(p->d_fsdata, d->d_name.name); if (!error) error = configfs_make_dirent(p->d_fsdata, d, k, mode, - CONFIGFS_DIR); + CONFIGFS_DIR | CONFIGFS_USET_CREATING); if (!error) { error = configfs_create(d, mode, init_dir); if (!error) { @@ -209,6 +209,9 @@ static int create_dir(struct config_item * k, struct dentry * p, * configfs_create_dir - create a directory for an config_item. * @item: config_itemwe're creating directory for. * @dentry: config_item's dentry. + * + * Note: user-created entries won't be allowed under this new directory + * until it is validated by configfs_dir_set_ready() */ static int configfs_create_dir(struct config_item * item, struct dentry *dentry) @@ -231,6 +234,44 @@ static int configfs_create_dir(struct config_item * item, struct dentry *dentry) return error; } +/* + * Allow userspace to create new entries under a new directory created with + * configfs_create_dir(), and under all of its chidlren directories recursively. + * @sd configfs_dirent of the new directory to validate + * + * Caller must hold configfs_dirent_lock. + */ +static void configfs_dir_set_ready(struct configfs_dirent *sd) +{ + struct configfs_dirent *child_sd; + + sd->s_type &= ~CONFIGFS_USET_CREATING; + list_for_each_entry(child_sd, &sd->s_children, s_sibling) + if (child_sd->s_type & CONFIGFS_USET_CREATING) + configfs_dir_set_ready(child_sd); +} + +/* + * Check that a directory does not belong to a directory hierarchy being + * attached and not validated yet. + * @sd configfs_dirent of the directory to check + * + * @return non-zero iff the directory was validated + * + * Note: takes configfs_dirent_lock, so the result may change from false to true + * in two consecutive calls, but never from true to false. + */ +int configfs_dirent_is_ready(struct configfs_dirent *sd) +{ + int ret; + + spin_lock(&configfs_dirent_lock); + ret = !(sd->s_type & CONFIGFS_USET_CREATING); + spin_unlock(&configfs_dirent_lock); + + return ret; +} + int configfs_create_link(struct configfs_symlink *sl, struct dentry *parent, struct dentry *dentry) @@ -330,7 +371,19 @@ static struct dentry * configfs_lookup(struct inode *dir, struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata; struct configfs_dirent * sd; int found = 0; - int err = 0; + int err; + + /* + * Fake invisibility if dir belongs to a group/default groups hierarchy + * being attached + * + * This forbids userspace to read/write attributes of items which may + * not complete their initialization, since the dentries of the + * attributes won't be instantiated. + */ + err = -ENOENT; + if (!configfs_dirent_is_ready(parent_sd)) + goto out; list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { if (sd->s_type & CONFIGFS_NOT_PINNED) { @@ -353,6 +406,7 @@ static struct dentry * configfs_lookup(struct inode *dir, return simple_lookup(dir, dentry, nd); } +out: return ERR_PTR(err); } @@ -1044,6 +1098,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) } sd = dentry->d_parent->d_fsdata; + + /* + * Fake invisibility if dir belongs to a group/default groups hierarchy + * being attached + */ + if (!configfs_dirent_is_ready(sd)) { + ret = -ENOENT; + goto out; + } + if (!(sd->s_type & CONFIGFS_USET_DIR)) { ret = -EPERM; goto out; @@ -1142,6 +1206,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) spin_lock(&configfs_dirent_lock); sd->s_type &= ~CONFIGFS_USET_IN_MKDIR; + if (!ret) + configfs_dir_set_ready(dentry->d_fsdata); spin_unlock(&configfs_dirent_lock); out_unlink: @@ -1322,13 +1388,24 @@ static int configfs_dir_open(struct inode *inode, struct file *file) { struct dentry * dentry = file->f_path.dentry; struct configfs_dirent * parent_sd = dentry->d_fsdata; + int err; mutex_lock(&dentry->d_inode->i_mutex); - file->private_data = configfs_new_dirent(parent_sd, NULL); + /* + * Fake invisibility if dir belongs to a group/default groups hierarchy + * being attached + */ + err = -ENOENT; + if (configfs_dirent_is_ready(parent_sd)) { + file->private_data = configfs_new_dirent(parent_sd, NULL); + if (IS_ERR(file->private_data)) + err = PTR_ERR(file->private_data); + else + err = 0; + } mutex_unlock(&dentry->d_inode->i_mutex); - return IS_ERR(file->private_data) ? PTR_ERR(file->private_data) : 0; - + return err; } static int configfs_dir_close(struct inode *inode, struct file *file) @@ -1499,6 +1576,10 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) if (err) { d_delete(dentry); dput(dentry); + } else { + spin_lock(&configfs_dirent_lock); + configfs_dir_set_ready(dentry->d_fsdata); + spin_unlock(&configfs_dirent_lock); } } diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index 61a886d..bf74973 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -76,6 +76,9 @@ static int create_link(struct config_item *parent_item, struct configfs_symlink *sl; int ret; + ret = -ENOENT; + if (!configfs_dirent_is_ready(target_sd)) + goto out; ret = -ENOMEM; sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL); if (sl) { @@ -100,6 +103,7 @@ static int create_link(struct config_item *parent_item, } } +out: return ret; } @@ -129,6 +133,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna { int ret; struct nameidata nd; + struct configfs_dirent *sd; struct config_item *parent_item; struct config_item *target_item; struct config_item_type *type; @@ -137,9 +142,19 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna if (dentry->d_parent == configfs_sb->s_root) goto out; + sd = dentry->d_parent->d_fsdata; + /* + * Fake invisibility if dir belongs to a group/default groups hierarchy + * being attached + */ + ret = -ENOENT; + if (!configfs_dirent_is_ready(sd)) + goto out; + parent_item = configfs_get_config_item(dentry->d_parent); type = parent_item->ci_type; + ret = -EPERM; if (!type || !type->ct_item_ops || !type->ct_item_ops->allow_link) goto out_put; -- cgit v0.10.2 From 2e2ce171c3ba6f2753fb1fd2706b63683394da2d Mon Sep 17 00:00:00 2001 From: Louis Rilling Date: Fri, 4 Jul 2008 16:56:06 +0200 Subject: [PATCH] configfs: Lock new directory inodes before removing on cleanup after failure Once a new configfs directory is created by configfs_attach_item() or configfs_attach_group(), a failure in the remaining initialization steps leads to removing a directory which inode the VFS may have already accessed. This commit adds the necessary inode locking to safely remove configfs directories while cleaning up after a failure. As an advantage, the locking rules of populate_groups() and detach_groups() become the same: the caller must have the group's inode mutex locked. Signed-off-by: Louis Rilling Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 647499a..4d11479 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -324,6 +324,8 @@ static void remove_dir(struct dentry * d) * The only thing special about this is that we remove any files in * the directory before we remove the directory, and we've inlined * what used to be configfs_rmdir() below, instead of calling separately. + * + * Caller holds the mutex of the item's inode */ static void configfs_remove_dir(struct config_item * item) @@ -612,36 +614,21 @@ static int create_default_group(struct config_group *parent_group, static int populate_groups(struct config_group *group) { struct config_group *new_group; - struct dentry *dentry = group->cg_item.ci_dentry; int ret = 0; int i; if (group->default_groups) { - /* - * FYI, we're faking mkdir here - * I'm not sure we need this semaphore, as we're called - * from our parent's mkdir. That holds our parent's - * i_mutex, so afaik lookup cannot continue through our - * parent to find us, let alone mess with our tree. - * That said, taking our i_mutex is closer to mkdir - * emulation, and shouldn't hurt. - */ - mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); - for (i = 0; group->default_groups[i]; i++) { new_group = group->default_groups[i]; ret = create_default_group(group, new_group); - if (ret) + if (ret) { + detach_groups(group); break; + } } - - mutex_unlock(&dentry->d_inode->i_mutex); } - if (ret) - detach_groups(group); - return ret; } @@ -756,7 +743,15 @@ static int configfs_attach_item(struct config_item *parent_item, if (!ret) { ret = populate_attrs(item); if (ret) { + /* + * We are going to remove an inode and its dentry but + * the VFS may already have hit and used them. Thus, + * we must lock them as rmdir() would. + */ + mutex_lock(&dentry->d_inode->i_mutex); configfs_remove_dir(item); + dentry->d_inode->i_flags |= S_DEAD; + mutex_unlock(&dentry->d_inode->i_mutex); d_delete(dentry); } } @@ -764,6 +759,7 @@ static int configfs_attach_item(struct config_item *parent_item, return ret; } +/* Caller holds the mutex of the item's inode */ static void configfs_detach_item(struct config_item *item) { detach_attrs(item); @@ -782,16 +778,30 @@ static int configfs_attach_group(struct config_item *parent_item, sd = dentry->d_fsdata; sd->s_type |= CONFIGFS_USET_DIR; + /* + * FYI, we're faking mkdir in populate_groups() + * We must lock the group's inode to avoid races with the VFS + * which can already hit the inode and try to add/remove entries + * under it. + * + * We must also lock the inode to remove it safely in case of + * error, as rmdir() would. + */ + mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); ret = populate_groups(to_config_group(item)); if (ret) { configfs_detach_item(item); - d_delete(dentry); + dentry->d_inode->i_flags |= S_DEAD; } + mutex_unlock(&dentry->d_inode->i_mutex); + if (ret) + d_delete(dentry); } return ret; } +/* Caller holds the mutex of the group's inode */ static void configfs_detach_group(struct config_item *item) { detach_groups(to_config_group(item)); -- cgit v0.10.2 From 99cefda42ac550863b5ae1df9e60322e377decf9 Mon Sep 17 00:00:00 2001 From: Louis Rilling Date: Fri, 27 Jun 2008 13:10:25 +0200 Subject: [PATCH] configfs: Fix open directory making rmdir() fail When checking for user-created elements under an item to be removed by rmdir(), configfs_detach_prep() counts fake configfs_dirents created by dir_open() as user-created and fails when finding one. It is however perfectly valid to remove a directory that is open. Simply make configfs_detach_prep() skip fake configfs_dirent, like it already does for attributes, and like detach_groups() does. Signed-off-by: Louis Rilling Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 4d11479..a89058b 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -435,7 +435,8 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex ret = 0; list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { - if (sd->s_type & CONFIGFS_NOT_PINNED) + if (!sd->s_element || + (sd->s_type & CONFIGFS_NOT_PINNED)) continue; if (sd->s_type & CONFIGFS_USET_DEFAULT) { /* Abort if racing with mkdir() */ -- cgit v0.10.2 From 70526b67443a980d5029d9cf06903bef731a4e96 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 17 Jun 2008 15:34:32 -0700 Subject: [PATCH] configfs: Pin configfs subsystems separately from new config_items. configfs_mkdir() creates a new item by calling its parent's ->make_item/group() functions. Once that object is created, configfs_mkdir() calls try_module_get() on the new item's module. If it succeeds, the module owning the new item cannot be unloaded, and configfs is safe to reference the item. If the item and the subsystem it belongs to are part of the same module, the subsystem is also pinned. This is the common case. However, if the subsystem is made up of multiple modules, this may not pin the subsystem. Thus, it would be possible to unload the toplevel subsystem module while there is still a child item. Thus, we now try_module_get() the subsystem's module. This only really affects children of the toplevel subsystem group. Deeper children already have their parents pinned. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index a89058b..7a8db78 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1100,7 +1100,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) struct configfs_subsystem *subsys; struct configfs_dirent *sd; struct config_item_type *type; - struct module *owner = NULL; + struct module *subsys_owner = NULL, *new_item_owner = NULL; char *name; if (dentry->d_parent == configfs_sb->s_root) { @@ -1137,10 +1137,25 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) goto out_put; } + /* + * The subsystem may belong to a different module than the item + * being created. We don't want to safely pin the new item but + * fail to pin the subsystem it sits under. + */ + if (!subsys->su_group.cg_item.ci_type) { + ret = -EINVAL; + goto out_put; + } + subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner; + if (!try_module_get(subsys_owner)) { + ret = -EINVAL; + goto out_put; + } + name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL); if (!name) { ret = -ENOMEM; - goto out_put; + goto out_subsys_put; } snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); @@ -1172,7 +1187,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) * If ret != 0, then link_obj() was never called. * There are no extra references to clean up. */ - goto out_put; + goto out_subsys_put; } /* @@ -1186,8 +1201,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) goto out_unlink; } - owner = type->ct_owner; - if (!try_module_get(owner)) { + new_item_owner = type->ct_owner; + if (!try_module_get(new_item_owner)) { ret = -EINVAL; goto out_unlink; } @@ -1236,9 +1251,13 @@ out_unlink: mutex_unlock(&subsys->su_mutex); if (module_got) - module_put(owner); + module_put(new_item_owner); } +out_subsys_put: + if (ret) + module_put(subsys_owner); + out_put: /* * link_obj()/link_group() took a reference from child->parent, @@ -1257,7 +1276,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) struct config_item *item; struct configfs_subsystem *subsys; struct configfs_dirent *sd; - struct module *owner = NULL; + struct module *subsys_owner = NULL, *dead_item_owner = NULL; int ret; if (dentry->d_parent == configfs_sb->s_root) @@ -1284,6 +1303,10 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) return -EINVAL; } + /* configfs_mkdir() shouldn't have allowed this */ + BUG_ON(!subsys->su_group.cg_item.ci_type); + subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner; + /* * Ensure that no racing symlink() will make detach_prep() fail while * the new link is temporarily attached @@ -1321,7 +1344,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) config_item_put(parent_item); if (item->ci_type) - owner = item->ci_type->ct_owner; + dead_item_owner = item->ci_type->ct_owner; if (sd->s_type & CONFIGFS_USET_DIR) { configfs_detach_group(item); @@ -1343,7 +1366,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) /* Drop our reference from above */ config_item_put(item); - module_put(owner); + module_put(dead_item_owner); + module_put(subsys_owner); return 0; } -- cgit v0.10.2 From ecb3d28c7edd58b54f16838c434b342ba9195bec Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 18 Jun 2008 19:29:05 -0700 Subject: [PATCH] configfs: Convenience macros for attribute definition. Sysfs has the _ATTR() and _ATTR_RO() macros to make defining extended form attributes easier. configfs should have something similiar. - _CONFIGFS_ATTR() and _CONFIGFS_ATTR_RO() are the counterparts to the sysfs macros. - CONFIGFS_ATTR_STRUCT() creates the extended form attribute structure. - CONFIGFS_ATTR_OPS() defines the show_attribute()/store_attribute() operations that call the show()/store() operations of the extended form configfs_attributes. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh diff --git a/Documentation/filesystems/configfs/configfs.txt b/Documentation/filesystems/configfs/configfs.txt index 44c97e6..fabcb0e 100644 --- a/Documentation/filesystems/configfs/configfs.txt +++ b/Documentation/filesystems/configfs/configfs.txt @@ -311,9 +311,20 @@ the subsystem must be ready for it. [An Example] The best example of these basic concepts is the simple_children -subsystem/group and the simple_child item in configfs_example.c It -shows a trivial object displaying and storing an attribute, and a simple -group creating and destroying these children. +subsystem/group and the simple_child item in configfs_example_explicit.c +and configfs_example_macros.c. It shows a trivial object displaying and +storing an attribute, and a simple group creating and destroying these +children. + +The only difference between configfs_example_explicit.c and +configfs_example_macros.c is how the attributes of the childless item +are defined. The childless item has extended attributes, each with +their own show()/store() operation. This follows a convention commonly +used in sysfs. configfs_example_explicit.c creates these attributes +by explicitly defining the structures involved. Conversely +configfs_example_macros.c uses some convenience macros from configfs.h +to define the attributes. These macros are similar to their sysfs +counterparts. [Hierarchy Navigation and the Subsystem Mutex] diff --git a/Documentation/filesystems/configfs/configfs_example.c b/Documentation/filesystems/configfs/configfs_example.c deleted file mode 100644 index 03964879..0000000 --- a/Documentation/filesystems/configfs/configfs_example.c +++ /dev/null @@ -1,485 +0,0 @@ -/* - * vim: noexpandtab ts=8 sts=0 sw=8: - * - * configfs_example.c - This file is a demonstration module containing - * a number of configfs subsystems. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * - * Based on sysfs: - * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel - * - * configfs Copyright (C) 2005 Oracle. All rights reserved. - */ - -#include -#include -#include - -#include - - - -/* - * 01-childless - * - * This first example is a childless subsystem. It cannot create - * any config_items. It just has attributes. - * - * Note that we are enclosing the configfs_subsystem inside a container. - * This is not necessary if a subsystem has no attributes directly - * on the subsystem. See the next example, 02-simple-children, for - * such a subsystem. - */ - -struct childless { - struct configfs_subsystem subsys; - int showme; - int storeme; -}; - -struct childless_attribute { - struct configfs_attribute attr; - ssize_t (*show)(struct childless *, char *); - ssize_t (*store)(struct childless *, const char *, size_t); -}; - -static inline struct childless *to_childless(struct config_item *item) -{ - return item ? container_of(to_configfs_subsystem(to_config_group(item)), struct childless, subsys) : NULL; -} - -static ssize_t childless_showme_read(struct childless *childless, - char *page) -{ - ssize_t pos; - - pos = sprintf(page, "%d\n", childless->showme); - childless->showme++; - - return pos; -} - -static ssize_t childless_storeme_read(struct childless *childless, - char *page) -{ - return sprintf(page, "%d\n", childless->storeme); -} - -static ssize_t childless_storeme_write(struct childless *childless, - const char *page, - size_t count) -{ - unsigned long tmp; - char *p = (char *) page; - - tmp = simple_strtoul(p, &p, 10); - if (!p || (*p && (*p != '\n'))) - return -EINVAL; - - if (tmp > INT_MAX) - return -ERANGE; - - childless->storeme = tmp; - - return count; -} - -static ssize_t childless_description_read(struct childless *childless, - char *page) -{ - return sprintf(page, -"[01-childless]\n" -"\n" -"The childless subsystem is the simplest possible subsystem in\n" -"configfs. It does not support the creation of child config_items.\n" -"It only has a few attributes. In fact, it isn't much different\n" -"than a directory in /proc.\n"); -} - -static struct childless_attribute childless_attr_showme = { - .attr = { .ca_owner = THIS_MODULE, .ca_name = "showme", .ca_mode = S_IRUGO }, - .show = childless_showme_read, -}; -static struct childless_attribute childless_attr_storeme = { - .attr = { .ca_owner = THIS_MODULE, .ca_name = "storeme", .ca_mode = S_IRUGO | S_IWUSR }, - .show = childless_storeme_read, - .store = childless_storeme_write, -}; -static struct childless_attribute childless_attr_description = { - .attr = { .ca_owner = THIS_MODULE, .ca_name = "description", .ca_mode = S_IRUGO }, - .show = childless_description_read, -}; - -static struct configfs_attribute *childless_attrs[] = { - &childless_attr_showme.attr, - &childless_attr_storeme.attr, - &childless_attr_description.attr, - NULL, -}; - -static ssize_t childless_attr_show(struct config_item *item, - struct configfs_attribute *attr, - char *page) -{ - struct childless *childless = to_childless(item); - struct childless_attribute *childless_attr = - container_of(attr, struct childless_attribute, attr); - ssize_t ret = 0; - - if (childless_attr->show) - ret = childless_attr->show(childless, page); - return ret; -} - -static ssize_t childless_attr_store(struct config_item *item, - struct configfs_attribute *attr, - const char *page, size_t count) -{ - struct childless *childless = to_childless(item); - struct childless_attribute *childless_attr = - container_of(attr, struct childless_attribute, attr); - ssize_t ret = -EINVAL; - - if (childless_attr->store) - ret = childless_attr->store(childless, page, count); - return ret; -} - -static struct configfs_item_operations childless_item_ops = { - .show_attribute = childless_attr_show, - .store_attribute = childless_attr_store, -}; - -static struct config_item_type childless_type = { - .ct_item_ops = &childless_item_ops, - .ct_attrs = childless_attrs, - .ct_owner = THIS_MODULE, -}; - -static struct childless childless_subsys = { - .subsys = { - .su_group = { - .cg_item = { - .ci_namebuf = "01-childless", - .ci_type = &childless_type, - }, - }, - }, -}; - - -/* ----------------------------------------------------------------- */ - -/* - * 02-simple-children - * - * This example merely has a simple one-attribute child. Note that - * there is no extra attribute structure, as the child's attribute is - * known from the get-go. Also, there is no container for the - * subsystem, as it has no attributes of its own. - */ - -struct simple_child { - struct config_item item; - int storeme; -}; - -static inline struct simple_child *to_simple_child(struct config_item *item) -{ - return item ? container_of(item, struct simple_child, item) : NULL; -} - -static struct configfs_attribute simple_child_attr_storeme = { - .ca_owner = THIS_MODULE, - .ca_name = "storeme", - .ca_mode = S_IRUGO | S_IWUSR, -}; - -static struct configfs_attribute *simple_child_attrs[] = { - &simple_child_attr_storeme, - NULL, -}; - -static ssize_t simple_child_attr_show(struct config_item *item, - struct configfs_attribute *attr, - char *page) -{ - ssize_t count; - struct simple_child *simple_child = to_simple_child(item); - - count = sprintf(page, "%d\n", simple_child->storeme); - - return count; -} - -static ssize_t simple_child_attr_store(struct config_item *item, - struct configfs_attribute *attr, - const char *page, size_t count) -{ - struct simple_child *simple_child = to_simple_child(item); - unsigned long tmp; - char *p = (char *) page; - - tmp = simple_strtoul(p, &p, 10); - if (!p || (*p && (*p != '\n'))) - return -EINVAL; - - if (tmp > INT_MAX) - return -ERANGE; - - simple_child->storeme = tmp; - - return count; -} - -static void simple_child_release(struct config_item *item) -{ - kfree(to_simple_child(item)); -} - -static struct configfs_item_operations simple_child_item_ops = { - .release = simple_child_release, - .show_attribute = simple_child_attr_show, - .store_attribute = simple_child_attr_store, -}; - -static struct config_item_type simple_child_type = { - .ct_item_ops = &simple_child_item_ops, - .ct_attrs = simple_child_attrs, - .ct_owner = THIS_MODULE, -}; - - -struct simple_children { - struct config_group group; -}; - -static inline struct simple_children *to_simple_children(struct config_item *item) -{ - return item ? container_of(to_config_group(item), struct simple_children, group) : NULL; -} - -static struct config_item *simple_children_make_item(struct config_group *group, const char *name) -{ - struct simple_child *simple_child; - - simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL); - if (!simple_child) - return ERR_PTR(-ENOMEM); - - - config_item_init_type_name(&simple_child->item, name, - &simple_child_type); - - simple_child->storeme = 0; - - return &simple_child->item; -} - -static struct configfs_attribute simple_children_attr_description = { - .ca_owner = THIS_MODULE, - .ca_name = "description", - .ca_mode = S_IRUGO, -}; - -static struct configfs_attribute *simple_children_attrs[] = { - &simple_children_attr_description, - NULL, -}; - -static ssize_t simple_children_attr_show(struct config_item *item, - struct configfs_attribute *attr, - char *page) -{ - return sprintf(page, -"[02-simple-children]\n" -"\n" -"This subsystem allows the creation of child config_items. These\n" -"items have only one attribute that is readable and writeable.\n"); -} - -static void simple_children_release(struct config_item *item) -{ - kfree(to_simple_children(item)); -} - -static struct configfs_item_operations simple_children_item_ops = { - .release = simple_children_release, - .show_attribute = simple_children_attr_show, -}; - -/* - * Note that, since no extra work is required on ->drop_item(), - * no ->drop_item() is provided. - */ -static struct configfs_group_operations simple_children_group_ops = { - .make_item = simple_children_make_item, -}; - -static struct config_item_type simple_children_type = { - .ct_item_ops = &simple_children_item_ops, - .ct_group_ops = &simple_children_group_ops, - .ct_attrs = simple_children_attrs, - .ct_owner = THIS_MODULE, -}; - -static struct configfs_subsystem simple_children_subsys = { - .su_group = { - .cg_item = { - .ci_namebuf = "02-simple-children", - .ci_type = &simple_children_type, - }, - }, -}; - - -/* ----------------------------------------------------------------- */ - -/* - * 03-group-children - * - * This example reuses the simple_children group from above. However, - * the simple_children group is not the subsystem itself, it is a - * child of the subsystem. Creation of a group in the subsystem creates - * a new simple_children group. That group can then have simple_child - * children of its own. - */ - -static struct config_group *group_children_make_group(struct config_group *group, const char *name) -{ - struct simple_children *simple_children; - - simple_children = kzalloc(sizeof(struct simple_children), - GFP_KERNEL); - if (!simple_children) - return ERR_PTR(-ENOMEM); - - - config_group_init_type_name(&simple_children->group, name, - &simple_children_type); - - return &simple_children->group; -} - -static struct configfs_attribute group_children_attr_description = { - .ca_owner = THIS_MODULE, - .ca_name = "description", - .ca_mode = S_IRUGO, -}; - -static struct configfs_attribute *group_children_attrs[] = { - &group_children_attr_description, - NULL, -}; - -static ssize_t group_children_attr_show(struct config_item *item, - struct configfs_attribute *attr, - char *page) -{ - return sprintf(page, -"[03-group-children]\n" -"\n" -"This subsystem allows the creation of child config_groups. These\n" -"groups are like the subsystem simple-children.\n"); -} - -static struct configfs_item_operations group_children_item_ops = { - .show_attribute = group_children_attr_show, -}; - -/* - * Note that, since no extra work is required on ->drop_item(), - * no ->drop_item() is provided. - */ -static struct configfs_group_operations group_children_group_ops = { - .make_group = group_children_make_group, -}; - -static struct config_item_type group_children_type = { - .ct_item_ops = &group_children_item_ops, - .ct_group_ops = &group_children_group_ops, - .ct_attrs = group_children_attrs, - .ct_owner = THIS_MODULE, -}; - -static struct configfs_subsystem group_children_subsys = { - .su_group = { - .cg_item = { - .ci_namebuf = "03-group-children", - .ci_type = &group_children_type, - }, - }, -}; - -/* ----------------------------------------------------------------- */ - -/* - * We're now done with our subsystem definitions. - * For convenience in this module, here's a list of them all. It - * allows the init function to easily register them. Most modules - * will only have one subsystem, and will only call register_subsystem - * on it directly. - */ -static struct configfs_subsystem *example_subsys[] = { - &childless_subsys.subsys, - &simple_children_subsys, - &group_children_subsys, - NULL, -}; - -static int __init configfs_example_init(void) -{ - int ret; - int i; - struct configfs_subsystem *subsys; - - for (i = 0; example_subsys[i]; i++) { - subsys = example_subsys[i]; - - config_group_init(&subsys->su_group); - mutex_init(&subsys->su_mutex); - ret = configfs_register_subsystem(subsys); - if (ret) { - printk(KERN_ERR "Error %d while registering subsystem %s\n", - ret, - subsys->su_group.cg_item.ci_namebuf); - goto out_unregister; - } - } - - return 0; - -out_unregister: - for (; i >= 0; i--) { - configfs_unregister_subsystem(example_subsys[i]); - } - - return ret; -} - -static void __exit configfs_example_exit(void) -{ - int i; - - for (i = 0; example_subsys[i]; i++) { - configfs_unregister_subsystem(example_subsys[i]); - } -} - -module_init(configfs_example_init); -module_exit(configfs_example_exit); -MODULE_LICENSE("GPL"); diff --git a/Documentation/filesystems/configfs/configfs_example_explicit.c b/Documentation/filesystems/configfs/configfs_example_explicit.c new file mode 100644 index 0000000..d428cc9 --- /dev/null +++ b/Documentation/filesystems/configfs/configfs_example_explicit.c @@ -0,0 +1,485 @@ +/* + * vim: noexpandtab ts=8 sts=0 sw=8: + * + * configfs_example_explicit.c - This file is a demonstration module + * containing a number of configfs subsystems. It explicitly defines + * each structure without using the helper macros defined in + * configfs.h. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Based on sysfs: + * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel + * + * configfs Copyright (C) 2005 Oracle. All rights reserved. + */ + +#include +#include +#include + +#include + + + +/* + * 01-childless + * + * This first example is a childless subsystem. It cannot create + * any config_items. It just has attributes. + * + * Note that we are enclosing the configfs_subsystem inside a container. + * This is not necessary if a subsystem has no attributes directly + * on the subsystem. See the next example, 02-simple-children, for + * such a subsystem. + */ + +struct childless { + struct configfs_subsystem subsys; + int showme; + int storeme; +}; + +struct childless_attribute { + struct configfs_attribute attr; + ssize_t (*show)(struct childless *, char *); + ssize_t (*store)(struct childless *, const char *, size_t); +}; + +static inline struct childless *to_childless(struct config_item *item) +{ + return item ? container_of(to_configfs_subsystem(to_config_group(item)), struct childless, subsys) : NULL; +} + +static ssize_t childless_showme_read(struct childless *childless, + char *page) +{ + ssize_t pos; + + pos = sprintf(page, "%d\n", childless->showme); + childless->showme++; + + return pos; +} + +static ssize_t childless_storeme_read(struct childless *childless, + char *page) +{ + return sprintf(page, "%d\n", childless->storeme); +} + +static ssize_t childless_storeme_write(struct childless *childless, + const char *page, + size_t count) +{ + unsigned long tmp; + char *p = (char *) page; + + tmp = simple_strtoul(p, &p, 10); + if (!p || (*p && (*p != '\n'))) + return -EINVAL; + + if (tmp > INT_MAX) + return -ERANGE; + + childless->storeme = tmp; + + return count; +} + +static ssize_t childless_description_read(struct childless *childless, + char *page) +{ + return sprintf(page, +"[01-childless]\n" +"\n" +"The childless subsystem is the simplest possible subsystem in\n" +"configfs. It does not support the creation of child config_items.\n" +"It only has a few attributes. In fact, it isn't much different\n" +"than a directory in /proc.\n"); +} + +static struct childless_attribute childless_attr_showme = { + .attr = { .ca_owner = THIS_MODULE, .ca_name = "showme", .ca_mode = S_IRUGO }, + .show = childless_showme_read, +}; +static struct childless_attribute childless_attr_storeme = { + .attr = { .ca_owner = THIS_MODULE, .ca_name = "storeme", .ca_mode = S_IRUGO | S_IWUSR }, + .show = childless_storeme_read, + .store = childless_storeme_write, +}; +static struct childless_attribute childless_attr_description = { + .attr = { .ca_owner = THIS_MODULE, .ca_name = "description", .ca_mode = S_IRUGO }, + .show = childless_description_read, +}; + +static struct configfs_attribute *childless_attrs[] = { + &childless_attr_showme.attr, + &childless_attr_storeme.attr, + &childless_attr_description.attr, + NULL, +}; + +static ssize_t childless_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *page) +{ + struct childless *childless = to_childless(item); + struct childless_attribute *childless_attr = + container_of(attr, struct childless_attribute, attr); + ssize_t ret = 0; + + if (childless_attr->show) + ret = childless_attr->show(childless, page); + return ret; +} + +static ssize_t childless_attr_store(struct config_item *item, + struct configfs_attribute *attr, + const char *page, size_t count) +{ + struct childless *childless = to_childless(item); + struct childless_attribute *childless_attr = + container_of(attr, struct childless_attribute, attr); + ssize_t ret = -EINVAL; + + if (childless_attr->store) + ret = childless_attr->store(childless, page, count); + return ret; +} + +static struct configfs_item_operations childless_item_ops = { + .show_attribute = childless_attr_show, + .store_attribute = childless_attr_store, +}; + +static struct config_item_type childless_type = { + .ct_item_ops = &childless_item_ops, + .ct_attrs = childless_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct childless childless_subsys = { + .subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "01-childless", + .ci_type = &childless_type, + }, + }, + }, +}; + + +/* ----------------------------------------------------------------- */ + +/* + * 02-simple-children + * + * This example merely has a simple one-attribute child. Note that + * there is no extra attribute structure, as the child's attribute is + * known from the get-go. Also, there is no container for the + * subsystem, as it has no attributes of its own. + */ + +struct simple_child { + struct config_item item; + int storeme; +}; + +static inline struct simple_child *to_simple_child(struct config_item *item) +{ + return item ? container_of(item, struct simple_child, item) : NULL; +} + +static struct configfs_attribute simple_child_attr_storeme = { + .ca_owner = THIS_MODULE, + .ca_name = "storeme", + .ca_mode = S_IRUGO | S_IWUSR, +}; + +static struct configfs_attribute *simple_child_attrs[] = { + &simple_child_attr_storeme, + NULL, +}; + +static ssize_t simple_child_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *page) +{ + ssize_t count; + struct simple_child *simple_child = to_simple_child(item); + + count = sprintf(page, "%d\n", simple_child->storeme); + + return count; +} + +static ssize_t simple_child_attr_store(struct config_item *item, + struct configfs_attribute *attr, + const char *page, size_t count) +{ + struct simple_child *simple_child = to_simple_child(item); + unsigned long tmp; + char *p = (char *) page; + + tmp = simple_strtoul(p, &p, 10); + if (!p || (*p && (*p != '\n'))) + return -EINVAL; + + if (tmp > INT_MAX) + return -ERANGE; + + simple_child->storeme = tmp; + + return count; +} + +static void simple_child_release(struct config_item *item) +{ + kfree(to_simple_child(item)); +} + +static struct configfs_item_operations simple_child_item_ops = { + .release = simple_child_release, + .show_attribute = simple_child_attr_show, + .store_attribute = simple_child_attr_store, +}; + +static struct config_item_type simple_child_type = { + .ct_item_ops = &simple_child_item_ops, + .ct_attrs = simple_child_attrs, + .ct_owner = THIS_MODULE, +}; + + +struct simple_children { + struct config_group group; +}; + +static inline struct simple_children *to_simple_children(struct config_item *item) +{ + return item ? container_of(to_config_group(item), struct simple_children, group) : NULL; +} + +static struct config_item *simple_children_make_item(struct config_group *group, const char *name) +{ + struct simple_child *simple_child; + + simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL); + if (!simple_child) + return ERR_PTR(-ENOMEM); + + config_item_init_type_name(&simple_child->item, name, + &simple_child_type); + + simple_child->storeme = 0; + + return &simple_child->item; +} + +static struct configfs_attribute simple_children_attr_description = { + .ca_owner = THIS_MODULE, + .ca_name = "description", + .ca_mode = S_IRUGO, +}; + +static struct configfs_attribute *simple_children_attrs[] = { + &simple_children_attr_description, + NULL, +}; + +static ssize_t simple_children_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *page) +{ + return sprintf(page, +"[02-simple-children]\n" +"\n" +"This subsystem allows the creation of child config_items. These\n" +"items have only one attribute that is readable and writeable.\n"); +} + +static void simple_children_release(struct config_item *item) +{ + kfree(to_simple_children(item)); +} + +static struct configfs_item_operations simple_children_item_ops = { + .release = simple_children_release, + .show_attribute = simple_children_attr_show, +}; + +/* + * Note that, since no extra work is required on ->drop_item(), + * no ->drop_item() is provided. + */ +static struct configfs_group_operations simple_children_group_ops = { + .make_item = simple_children_make_item, +}; + +static struct config_item_type simple_children_type = { + .ct_item_ops = &simple_children_item_ops, + .ct_group_ops = &simple_children_group_ops, + .ct_attrs = simple_children_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct configfs_subsystem simple_children_subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "02-simple-children", + .ci_type = &simple_children_type, + }, + }, +}; + + +/* ----------------------------------------------------------------- */ + +/* + * 03-group-children + * + * This example reuses the simple_children group from above. However, + * the simple_children group is not the subsystem itself, it is a + * child of the subsystem. Creation of a group in the subsystem creates + * a new simple_children group. That group can then have simple_child + * children of its own. + */ + +static struct config_group *group_children_make_group(struct config_group *group, const char *name) +{ + struct simple_children *simple_children; + + simple_children = kzalloc(sizeof(struct simple_children), + GFP_KERNEL); + if (!simple_children) + return ERR_PTR(-ENOMEM); + + config_group_init_type_name(&simple_children->group, name, + &simple_children_type); + + return &simple_children->group; +} + +static struct configfs_attribute group_children_attr_description = { + .ca_owner = THIS_MODULE, + .ca_name = "description", + .ca_mode = S_IRUGO, +}; + +static struct configfs_attribute *group_children_attrs[] = { + &group_children_attr_description, + NULL, +}; + +static ssize_t group_children_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *page) +{ + return sprintf(page, +"[03-group-children]\n" +"\n" +"This subsystem allows the creation of child config_groups. These\n" +"groups are like the subsystem simple-children.\n"); +} + +static struct configfs_item_operations group_children_item_ops = { + .show_attribute = group_children_attr_show, +}; + +/* + * Note that, since no extra work is required on ->drop_item(), + * no ->drop_item() is provided. + */ +static struct configfs_group_operations group_children_group_ops = { + .make_group = group_children_make_group, +}; + +static struct config_item_type group_children_type = { + .ct_item_ops = &group_children_item_ops, + .ct_group_ops = &group_children_group_ops, + .ct_attrs = group_children_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct configfs_subsystem group_children_subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "03-group-children", + .ci_type = &group_children_type, + }, + }, +}; + +/* ----------------------------------------------------------------- */ + +/* + * We're now done with our subsystem definitions. + * For convenience in this module, here's a list of them all. It + * allows the init function to easily register them. Most modules + * will only have one subsystem, and will only call register_subsystem + * on it directly. + */ +static struct configfs_subsystem *example_subsys[] = { + &childless_subsys.subsys, + &simple_children_subsys, + &group_children_subsys, + NULL, +}; + +static int __init configfs_example_init(void) +{ + int ret; + int i; + struct configfs_subsystem *subsys; + + for (i = 0; example_subsys[i]; i++) { + subsys = example_subsys[i]; + + config_group_init(&subsys->su_group); + mutex_init(&subsys->su_mutex); + ret = configfs_register_subsystem(subsys); + if (ret) { + printk(KERN_ERR "Error %d while registering subsystem %s\n", + ret, + subsys->su_group.cg_item.ci_namebuf); + goto out_unregister; + } + } + + return 0; + +out_unregister: + for (; i >= 0; i--) { + configfs_unregister_subsystem(example_subsys[i]); + } + + return ret; +} + +static void __exit configfs_example_exit(void) +{ + int i; + + for (i = 0; example_subsys[i]; i++) { + configfs_unregister_subsystem(example_subsys[i]); + } +} + +module_init(configfs_example_init); +module_exit(configfs_example_exit); +MODULE_LICENSE("GPL"); diff --git a/Documentation/filesystems/configfs/configfs_example_macros.c b/Documentation/filesystems/configfs/configfs_example_macros.c new file mode 100644 index 0000000..d8e30a0 --- /dev/null +++ b/Documentation/filesystems/configfs/configfs_example_macros.c @@ -0,0 +1,448 @@ +/* + * vim: noexpandtab ts=8 sts=0 sw=8: + * + * configfs_example_macros.c - This file is a demonstration module + * containing a number of configfs subsystems. It uses the helper + * macros defined by configfs.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Based on sysfs: + * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel + * + * configfs Copyright (C) 2005 Oracle. All rights reserved. + */ + +#include +#include +#include + +#include + + + +/* + * 01-childless + * + * This first example is a childless subsystem. It cannot create + * any config_items. It just has attributes. + * + * Note that we are enclosing the configfs_subsystem inside a container. + * This is not necessary if a subsystem has no attributes directly + * on the subsystem. See the next example, 02-simple-children, for + * such a subsystem. + */ + +struct childless { + struct configfs_subsystem subsys; + int showme; + int storeme; +}; + +static inline struct childless *to_childless(struct config_item *item) +{ + return item ? container_of(to_configfs_subsystem(to_config_group(item)), struct childless, subsys) : NULL; +} + +CONFIGFS_ATTR_STRUCT(childless); +#define CHILDLESS_ATTR(_name, _mode, _show, _store) \ +struct childless_attribute childless_attr_##_name = __CONFIGFS_ATTR(_name, _mode, _show, _store) +#define CHILDLESS_ATTR_RO(_name, _show) \ +struct childless_attribute childless_attr_##_name = __CONFIGFS_ATTR_RO(_name, _show); + +static ssize_t childless_showme_read(struct childless *childless, + char *page) +{ + ssize_t pos; + + pos = sprintf(page, "%d\n", childless->showme); + childless->showme++; + + return pos; +} + +static ssize_t childless_storeme_read(struct childless *childless, + char *page) +{ + return sprintf(page, "%d\n", childless->storeme); +} + +static ssize_t childless_storeme_write(struct childless *childless, + const char *page, + size_t count) +{ + unsigned long tmp; + char *p = (char *) page; + + tmp = simple_strtoul(p, &p, 10); + if (!p || (*p && (*p != '\n'))) + return -EINVAL; + + if (tmp > INT_MAX) + return -ERANGE; + + childless->storeme = tmp; + + return count; +} + +static ssize_t childless_description_read(struct childless *childless, + char *page) +{ + return sprintf(page, +"[01-childless]\n" +"\n" +"The childless subsystem is the simplest possible subsystem in\n" +"configfs. It does not support the creation of child config_items.\n" +"It only has a few attributes. In fact, it isn't much different\n" +"than a directory in /proc.\n"); +} + +CHILDLESS_ATTR_RO(showme, childless_showme_read); +CHILDLESS_ATTR(storeme, S_IRUGO | S_IWUSR, childless_storeme_read, + childless_storeme_write); +CHILDLESS_ATTR_RO(description, childless_description_read); + +static struct configfs_attribute *childless_attrs[] = { + &childless_attr_showme.attr, + &childless_attr_storeme.attr, + &childless_attr_description.attr, + NULL, +}; + +CONFIGFS_ATTR_OPS(childless); +static struct configfs_item_operations childless_item_ops = { + .show_attribute = childless_attr_show, + .store_attribute = childless_attr_store, +}; + +static struct config_item_type childless_type = { + .ct_item_ops = &childless_item_ops, + .ct_attrs = childless_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct childless childless_subsys = { + .subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "01-childless", + .ci_type = &childless_type, + }, + }, + }, +}; + + +/* ----------------------------------------------------------------- */ + +/* + * 02-simple-children + * + * This example merely has a simple one-attribute child. Note that + * there is no extra attribute structure, as the child's attribute is + * known from the get-go. Also, there is no container for the + * subsystem, as it has no attributes of its own. + */ + +struct simple_child { + struct config_item item; + int storeme; +}; + +static inline struct simple_child *to_simple_child(struct config_item *item) +{ + return item ? container_of(item, struct simple_child, item) : NULL; +} + +static struct configfs_attribute simple_child_attr_storeme = { + .ca_owner = THIS_MODULE, + .ca_name = "storeme", + .ca_mode = S_IRUGO | S_IWUSR, +}; + +static struct configfs_attribute *simple_child_attrs[] = { + &simple_child_attr_storeme, + NULL, +}; + +static ssize_t simple_child_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *page) +{ + ssize_t count; + struct simple_child *simple_child = to_simple_child(item); + + count = sprintf(page, "%d\n", simple_child->storeme); + + return count; +} + +static ssize_t simple_child_attr_store(struct config_item *item, + struct configfs_attribute *attr, + const char *page, size_t count) +{ + struct simple_child *simple_child = to_simple_child(item); + unsigned long tmp; + char *p = (char *) page; + + tmp = simple_strtoul(p, &p, 10); + if (!p || (*p && (*p != '\n'))) + return -EINVAL; + + if (tmp > INT_MAX) + return -ERANGE; + + simple_child->storeme = tmp; + + return count; +} + +static void simple_child_release(struct config_item *item) +{ + kfree(to_simple_child(item)); +} + +static struct configfs_item_operations simple_child_item_ops = { + .release = simple_child_release, + .show_attribute = simple_child_attr_show, + .store_attribute = simple_child_attr_store, +}; + +static struct config_item_type simple_child_type = { + .ct_item_ops = &simple_child_item_ops, + .ct_attrs = simple_child_attrs, + .ct_owner = THIS_MODULE, +}; + + +struct simple_children { + struct config_group group; +}; + +static inline struct simple_children *to_simple_children(struct config_item *item) +{ + return item ? container_of(to_config_group(item), struct simple_children, group) : NULL; +} + +static struct config_item *simple_children_make_item(struct config_group *group, const char *name) +{ + struct simple_child *simple_child; + + simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL); + if (!simple_child) + return ERR_PTR(-ENOMEM); + + config_item_init_type_name(&simple_child->item, name, + &simple_child_type); + + simple_child->storeme = 0; + + return &simple_child->item; +} + +static struct configfs_attribute simple_children_attr_description = { + .ca_owner = THIS_MODULE, + .ca_name = "description", + .ca_mode = S_IRUGO, +}; + +static struct configfs_attribute *simple_children_attrs[] = { + &simple_children_attr_description, + NULL, +}; + +static ssize_t simple_children_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *page) +{ + return sprintf(page, +"[02-simple-children]\n" +"\n" +"This subsystem allows the creation of child config_items. These\n" +"items have only one attribute that is readable and writeable.\n"); +} + +static void simple_children_release(struct config_item *item) +{ + kfree(to_simple_children(item)); +} + +static struct configfs_item_operations simple_children_item_ops = { + .release = simple_children_release, + .show_attribute = simple_children_attr_show, +}; + +/* + * Note that, since no extra work is required on ->drop_item(), + * no ->drop_item() is provided. + */ +static struct configfs_group_operations simple_children_group_ops = { + .make_item = simple_children_make_item, +}; + +static struct config_item_type simple_children_type = { + .ct_item_ops = &simple_children_item_ops, + .ct_group_ops = &simple_children_group_ops, + .ct_attrs = simple_children_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct configfs_subsystem simple_children_subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "02-simple-children", + .ci_type = &simple_children_type, + }, + }, +}; + + +/* ----------------------------------------------------------------- */ + +/* + * 03-group-children + * + * This example reuses the simple_children group from above. However, + * the simple_children group is not the subsystem itself, it is a + * child of the subsystem. Creation of a group in the subsystem creates + * a new simple_children group. That group can then have simple_child + * children of its own. + */ + +static struct config_group *group_children_make_group(struct config_group *group, const char *name) +{ + struct simple_children *simple_children; + + simple_children = kzalloc(sizeof(struct simple_children), + GFP_KERNEL); + if (!simple_children) + return ERR_PTR(-ENOMEM); + + config_group_init_type_name(&simple_children->group, name, + &simple_children_type); + + return &simple_children->group; +} + +static struct configfs_attribute group_children_attr_description = { + .ca_owner = THIS_MODULE, + .ca_name = "description", + .ca_mode = S_IRUGO, +}; + +static struct configfs_attribute *group_children_attrs[] = { + &group_children_attr_description, + NULL, +}; + +static ssize_t group_children_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *page) +{ + return sprintf(page, +"[03-group-children]\n" +"\n" +"This subsystem allows the creation of child config_groups. These\n" +"groups are like the subsystem simple-children.\n"); +} + +static struct configfs_item_operations group_children_item_ops = { + .show_attribute = group_children_attr_show, +}; + +/* + * Note that, since no extra work is required on ->drop_item(), + * no ->drop_item() is provided. + */ +static struct configfs_group_operations group_children_group_ops = { + .make_group = group_children_make_group, +}; + +static struct config_item_type group_children_type = { + .ct_item_ops = &group_children_item_ops, + .ct_group_ops = &group_children_group_ops, + .ct_attrs = group_children_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct configfs_subsystem group_children_subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "03-group-children", + .ci_type = &group_children_type, + }, + }, +}; + +/* ----------------------------------------------------------------- */ + +/* + * We're now done with our subsystem definitions. + * For convenience in this module, here's a list of them all. It + * allows the init function to easily register them. Most modules + * will only have one subsystem, and will only call register_subsystem + * on it directly. + */ +static struct configfs_subsystem *example_subsys[] = { + &childless_subsys.subsys, + &simple_children_subsys, + &group_children_subsys, + NULL, +}; + +static int __init configfs_example_init(void) +{ + int ret; + int i; + struct configfs_subsystem *subsys; + + for (i = 0; example_subsys[i]; i++) { + subsys = example_subsys[i]; + + config_group_init(&subsys->su_group); + mutex_init(&subsys->su_mutex); + ret = configfs_register_subsystem(subsys); + if (ret) { + printk(KERN_ERR "Error %d while registering subsystem %s\n", + ret, + subsys->su_group.cg_item.ci_namebuf); + goto out_unregister; + } + } + + return 0; + +out_unregister: + for (; i >= 0; i--) { + configfs_unregister_subsystem(example_subsys[i]); + } + + return ret; +} + +static void __exit configfs_example_exit(void) +{ + int i; + + for (i = 0; example_subsys[i]; i++) { + configfs_unregister_subsystem(example_subsys[i]); + } +} + +module_init(configfs_example_init); +module_exit(configfs_example_exit); +MODULE_LICENSE("GPL"); diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 0a5491b..7f62777 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -130,8 +130,25 @@ struct configfs_attribute { /* * Users often need to create attribute structures for their configurable * attributes, containing a configfs_attribute member and function pointers - * for the show() and store() operations on that attribute. They can use - * this macro (similar to sysfs' __ATTR) to make defining attributes easier. + * for the show() and store() operations on that attribute. If they don't + * need anything else on the extended attribute structure, they can use + * this macro to define it The argument _item is the name of the + * config_item structure. + */ +#define CONFIGFS_ATTR_STRUCT(_item) \ +struct _item##_attribute { \ + struct configfs_attribute attr; \ + ssize_t (*show)(struct _item *, char *); \ + ssize_t (*store)(struct _item *, const char *, size_t); \ +} + +/* + * With the extended attribute structure, users can use this macro + * (similar to sysfs' __ATTR) to make defining attributes easier. + * An example: + * #define MYITEM_ATTR(_name, _mode, _show, _store) \ + * struct myitem_attribute childless_attr_##_name = \ + * __CONFIGFS_ATTR(_name, _mode, _show, _store) */ #define __CONFIGFS_ATTR(_name, _mode, _show, _store) \ { \ @@ -143,6 +160,52 @@ struct configfs_attribute { .show = _show, \ .store = _store, \ } +/* Here is a readonly version, only requiring a show() operation */ +#define __CONFIGFS_ATTR_RO(_name, _show) \ +{ \ + .attr = { \ + .ca_name = __stringify(_name), \ + .ca_mode = 0444, \ + .ca_owner = THIS_MODULE, \ + }, \ + .show = _show, \ +} + +/* + * With these extended attributes, the simple show_attribute() and + * store_attribute() operations need to call the show() and store() of the + * attributes. This is a common pattern, so we provide a macro to define + * them. The argument _item is the name of the config_item structure. + * This macro expects the attributes to be named "struct _attribute" + * and the function to_() to exist; + */ +#define CONFIGFS_ATTR_OPS(_item) \ +static ssize_t _item##_attr_show(struct config_item *item, \ + struct configfs_attribute *attr, \ + char *page) \ +{ \ + struct _item *_item = to_##_item(item); \ + struct _item##_attribute *_item##_attr = \ + container_of(attr, struct _item##_attribute, attr); \ + ssize_t ret = 0; \ + \ + if (_item##_attr->show) \ + ret = _item##_attr->show(_item, page); \ + return ret; \ +} \ +static ssize_t _item##_attr_store(struct config_item *item, \ + struct configfs_attribute *attr, \ + const char *page, size_t count) \ +{ \ + struct _item *_item = to_##_item(item); \ + struct _item##_attribute *_item##_attr = \ + container_of(attr, struct _item##_attribute, attr); \ + ssize_t ret = -EINVAL; \ + \ + if (_item##_attr->store) \ + ret = _item##_attr->store(_item, page, count); \ + return ret; \ +} /* * If allow_link() exists, the item can symlink(2) out to other -- cgit v0.10.2 From c69991aac71a8beb57c11d651c7fd4b24c32aa8b Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 14 Jul 2008 17:31:09 -0700 Subject: [PATCH 1/2] ocfs2: Add counter in struct ocfs2_dinode to track journal replays This patch renames the ij_pad to ij_recovery_generation in struct ocfs2_dinode. This will be used to keep count of journal replays after an unclean shutdown. Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 3f19451..4f61985 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -660,7 +660,10 @@ struct ocfs2_dinode { struct { /* Info for journal system inodes */ __le32 ij_flags; /* Mounted, version, etc. */ - __le32 ij_pad; + __le32 ij_recovery_generation; /* Incremented when the + journal is recovered + after an unclean + shutdown */ } journal1; } id1; /* Inode type dependant 1 */ /*C0*/ union { -- cgit v0.10.2 From 539d8264093560b917ee3afe4c7f74e5da09d6a5 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 14 Jul 2008 17:31:10 -0700 Subject: [PATCH 2/2] ocfs2: Fix race between mount and recovery As the fs recovery is asynchronous, there is a small chance that another node can mount (and thus recover) the slot before the recovery thread gets to it. If this happens, the recovery thread will block indefinitely on the journal/slot lock as that lock will be held for the duration of the mount (by design) by the node assigned to that slot. The solution implemented is to keep track of the journal replays using a recovery generation in the journal inode, which will be incremented by the thread replaying that journal. The recovery thread, before attempting the blocking lock on the journal/slot lock, will compare the generation on disk with what it has cached and skip recovery if it does not match. This bug appears to have been inadvertently introduced during the mount/umount vote removal by mainline commit 34d024f84345807bf44163fac84e921513dde323. In the mount voting scheme, the messaging would indirectly indicate that the slot was being recovered. Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index a8c19cb..7a37240 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -57,7 +57,7 @@ static int __ocfs2_recovery_thread(void *arg); static int ocfs2_commit_cache(struct ocfs2_super *osb); static int ocfs2_wait_on_mount(struct ocfs2_super *osb); static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, - int dirty); + int dirty, int replayed); static int ocfs2_trylock_journal(struct ocfs2_super *osb, int slot_num); static int ocfs2_recover_orphans(struct ocfs2_super *osb, @@ -562,8 +562,18 @@ done: return status; } +static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di) +{ + le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1); +} + +static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di) +{ + return le32_to_cpu(di->id1.journal1.ij_recovery_generation); +} + static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, - int dirty) + int dirty, int replayed) { int status; unsigned int flags; @@ -593,6 +603,9 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, flags &= ~OCFS2_JOURNAL_DIRTY_FL; fe->id1.journal1.ij_flags = cpu_to_le32(flags); + if (replayed) + ocfs2_bump_recovery_generation(fe); + status = ocfs2_write_block(osb, bh, journal->j_inode); if (status < 0) mlog_errno(status); @@ -667,7 +680,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) * Do not toggle if flush was unsuccessful otherwise * will leave dirty metadata in a "clean" journal */ - status = ocfs2_journal_toggle_dirty(osb, 0); + status = ocfs2_journal_toggle_dirty(osb, 0, 0); if (status < 0) mlog_errno(status); } @@ -710,7 +723,7 @@ static void ocfs2_clear_journal_error(struct super_block *sb, } } -int ocfs2_journal_load(struct ocfs2_journal *journal, int local) +int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed) { int status = 0; struct ocfs2_super *osb; @@ -729,7 +742,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local) ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num); - status = ocfs2_journal_toggle_dirty(osb, 1); + status = ocfs2_journal_toggle_dirty(osb, 1, replayed); if (status < 0) { mlog_errno(status); goto done; @@ -771,7 +784,7 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full) goto bail; } - status = ocfs2_journal_toggle_dirty(journal->j_osb, 0); + status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0); if (status < 0) mlog_errno(status); @@ -1034,6 +1047,12 @@ restart: spin_unlock(&osb->osb_lock); mlog(0, "All nodes recovered\n"); + /* Refresh all journal recovery generations from disk */ + status = ocfs2_check_journals_nolocks(osb); + status = (status == -EROFS) ? 0 : status; + if (status < 0) + mlog_errno(status); + ocfs2_super_unlock(osb, 1); /* We always run recovery on our own orphan dir - the dead @@ -1096,6 +1115,42 @@ out: mlog_exit_void(); } +static int ocfs2_read_journal_inode(struct ocfs2_super *osb, + int slot_num, + struct buffer_head **bh, + struct inode **ret_inode) +{ + int status = -EACCES; + struct inode *inode = NULL; + + BUG_ON(slot_num >= osb->max_slots); + + inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, + slot_num); + if (!inode || is_bad_inode(inode)) { + mlog_errno(status); + goto bail; + } + SET_INODE_JOURNAL(inode); + + status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, bh, 0, inode); + if (status < 0) { + mlog_errno(status); + goto bail; + } + + status = 0; + +bail: + if (inode) { + if (status || !ret_inode) + iput(inode); + else + *ret_inode = inode; + } + return status; +} + /* Does the actual journal replay and marks the journal inode as * clean. Will only replay if the journal inode is marked dirty. */ static int ocfs2_replay_journal(struct ocfs2_super *osb, @@ -1109,22 +1164,36 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, struct ocfs2_dinode *fe; journal_t *journal = NULL; struct buffer_head *bh = NULL; + u32 slot_reco_gen; - inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, - slot_num); - if (inode == NULL) { - status = -EACCES; + status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode); + if (status) { mlog_errno(status); goto done; } - if (is_bad_inode(inode)) { - status = -EACCES; - iput(inode); - inode = NULL; - mlog_errno(status); + + fe = (struct ocfs2_dinode *)bh->b_data; + slot_reco_gen = ocfs2_get_recovery_generation(fe); + brelse(bh); + bh = NULL; + + /* + * As the fs recovery is asynchronous, there is a small chance that + * another node mounted (and recovered) the slot before the recovery + * thread could get the lock. To handle that, we dirty read the journal + * inode for that slot to get the recovery generation. If it is + * different than what we expected, the slot has been recovered. + * If not, it needs recovery. + */ + if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) { + mlog(0, "Slot %u already recovered (old/new=%u/%u)\n", slot_num, + osb->slot_recovery_generations[slot_num], slot_reco_gen); + osb->slot_recovery_generations[slot_num] = slot_reco_gen; + status = -EBUSY; goto done; } - SET_INODE_JOURNAL(inode); + + /* Continue with recovery as the journal has not yet been recovered */ status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); if (status < 0) { @@ -1138,9 +1207,12 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, fe = (struct ocfs2_dinode *) bh->b_data; flags = le32_to_cpu(fe->id1.journal1.ij_flags); + slot_reco_gen = ocfs2_get_recovery_generation(fe); if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) { mlog(0, "No recovery required for node %d\n", node_num); + /* Refresh recovery generation for the slot */ + osb->slot_recovery_generations[slot_num] = slot_reco_gen; goto done; } @@ -1188,6 +1260,11 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, flags &= ~OCFS2_JOURNAL_DIRTY_FL; fe->id1.journal1.ij_flags = cpu_to_le32(flags); + /* Increment recovery generation to indicate successful recovery */ + ocfs2_bump_recovery_generation(fe); + osb->slot_recovery_generations[slot_num] = + ocfs2_get_recovery_generation(fe); + status = ocfs2_write_block(osb, bh, inode); if (status < 0) mlog_errno(status); @@ -1252,6 +1329,13 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, status = ocfs2_replay_journal(osb, node_num, slot_num); if (status < 0) { + if (status == -EBUSY) { + mlog(0, "Skipping recovery for slot %u (node %u) " + "as another node has recovered it\n", slot_num, + node_num); + status = 0; + goto done; + } mlog_errno(status); goto done; } @@ -1334,12 +1418,29 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) { unsigned int node_num; int status, i; + struct buffer_head *bh = NULL; + struct ocfs2_dinode *di; /* This is called with the super block cluster lock, so we * know that the slot map can't change underneath us. */ spin_lock(&osb->osb_lock); for (i = 0; i < osb->max_slots; i++) { + /* Read journal inode to get the recovery generation */ + status = ocfs2_read_journal_inode(osb, i, &bh, NULL); + if (status) { + mlog_errno(status); + goto bail; + } + di = (struct ocfs2_dinode *)bh->b_data; + osb->slot_recovery_generations[i] = + ocfs2_get_recovery_generation(di); + brelse(bh); + bh = NULL; + + mlog(0, "Slot %u recovery generation is %u\n", i, + osb->slot_recovery_generations[i]); + if (i == osb->slot_num) continue; @@ -1603,49 +1704,41 @@ static int ocfs2_commit_thread(void *arg) return 0; } -/* Look for a dirty journal without taking any cluster locks. Used for - * hard readonly access to determine whether the file system journals - * require recovery. */ +/* Reads all the journal inodes without taking any cluster locks. Used + * for hard readonly access to determine whether any journal requires + * recovery. Also used to refresh the recovery generation numbers after + * a journal has been recovered by another node. + */ int ocfs2_check_journals_nolocks(struct ocfs2_super *osb) { int ret = 0; unsigned int slot; - struct buffer_head *di_bh; + struct buffer_head *di_bh = NULL; struct ocfs2_dinode *di; - struct inode *journal = NULL; + int journal_dirty = 0; for(slot = 0; slot < osb->max_slots; slot++) { - journal = ocfs2_get_system_file_inode(osb, - JOURNAL_SYSTEM_INODE, - slot); - if (!journal || is_bad_inode(journal)) { - ret = -EACCES; - mlog_errno(ret); - goto out; - } - - di_bh = NULL; - ret = ocfs2_read_block(osb, OCFS2_I(journal)->ip_blkno, &di_bh, - 0, journal); - if (ret < 0) { + ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL); + if (ret) { mlog_errno(ret); goto out; } di = (struct ocfs2_dinode *) di_bh->b_data; + osb->slot_recovery_generations[slot] = + ocfs2_get_recovery_generation(di); + if (le32_to_cpu(di->id1.journal1.ij_flags) & OCFS2_JOURNAL_DIRTY_FL) - ret = -EROFS; + journal_dirty = 1; brelse(di_bh); - if (ret) - break; + di_bh = NULL; } out: - if (journal) - iput(journal); - + if (journal_dirty) + ret = -EROFS; return ret; } diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index db82be2..2178ebf 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -161,7 +161,8 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, void ocfs2_journal_shutdown(struct ocfs2_super *osb); int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full); -int ocfs2_journal_load(struct ocfs2_journal *journal, int local); +int ocfs2_journal_load(struct ocfs2_journal *journal, int local, + int replayed); int ocfs2_check_journals_nolocks(struct ocfs2_super *osb); void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 1cb814b..7f625f2 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -204,6 +204,8 @@ struct ocfs2_super struct ocfs2_slot_info *slot_info; + u32 *slot_recovery_generations; + spinlock_t node_map_lock; u64 root_blkno; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 2560b33..88255d3f 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1442,6 +1442,15 @@ static int ocfs2_initialize_super(struct super_block *sb, } mlog(0, "max_slots for this device: %u\n", osb->max_slots); + osb->slot_recovery_generations = + kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations), + GFP_KERNEL); + if (!osb->slot_recovery_generations) { + status = -ENOMEM; + mlog_errno(status); + goto bail; + } + init_waitqueue_head(&osb->osb_wipe_event); osb->osb_orphan_wipes = kcalloc(osb->max_slots, sizeof(*osb->osb_orphan_wipes), @@ -1703,7 +1712,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) local = ocfs2_mount_local(osb); /* will play back anything left in the journal. */ - status = ocfs2_journal_load(osb->journal, local); + status = ocfs2_journal_load(osb->journal, local, dirty); if (status < 0) { mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status); goto finally; @@ -1768,6 +1777,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb) ocfs2_free_slot_info(osb); kfree(osb->osb_orphan_wipes); + kfree(osb->slot_recovery_generations); /* FIXME * This belongs in journal shutdown, but because we have to * allocate osb->journal at the start of ocfs2_initalize_osb(), -- cgit v0.10.2 From 961cecbee6786f4b1f1b8f695e87045b583f9f49 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Wed, 16 Jul 2008 17:22:22 -0700 Subject: [PATCH] ocfs2: Fix oops when racing files truncates with writes into an mmap region This patch fixes an oops that is reproduced when one races writes to a mmap-ed region with another process truncating the file. Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 1db0801..506c24f 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1073,12 +1073,15 @@ static void ocfs2_write_failure(struct inode *inode, for(i = 0; i < wc->w_num_pages; i++) { tmppage = wc->w_pages[i]; - if (ocfs2_should_order_data(inode)) - walk_page_buffers(wc->w_handle, page_buffers(tmppage), - from, to, NULL, - ocfs2_journal_dirty_data); - - block_commit_write(tmppage, from, to); + if (page_has_buffers(tmppage)) { + if (ocfs2_should_order_data(inode)) + walk_page_buffers(wc->w_handle, + page_buffers(tmppage), + from, to, NULL, + ocfs2_journal_dirty_data); + + block_commit_write(tmppage, from, to); + } } } @@ -1901,12 +1904,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping, to = PAGE_CACHE_SIZE; } - if (ocfs2_should_order_data(inode)) - walk_page_buffers(wc->w_handle, page_buffers(tmppage), - from, to, NULL, - ocfs2_journal_dirty_data); - - block_commit_write(tmppage, from, to); + if (page_has_buffers(tmppage)) { + if (ocfs2_should_order_data(inode)) + walk_page_buffers(wc->w_handle, + page_buffers(tmppage), + from, to, NULL, + ocfs2_journal_dirty_data); + block_commit_write(tmppage, from, to); + } } out_write_size: -- cgit v0.10.2 From c259ae52e204d42f8b2d484c85517a4c367030e1 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 21 Jul 2008 09:59:15 +0200 Subject: [PATCH] ocfs2: Release mutex in error handling code The mutex is released on a successful return, so it would seem that it should be released on an error return as well. The semantic patch finds this problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @@ expression l; @@ mutex_lock(l); ... when != mutex_unlock(l) when any when strict ( if (...) { ... when != mutex_unlock(l) + mutex_unlock(l); return ...; } | mutex_unlock(l); ) // Signed-off-by: Julia Lawall Signed-off-by: Mark Fasheh diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index be2dd95..ec2ed15 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1766,8 +1766,8 @@ out_inode_unlock: out_rw_unlock: ocfs2_rw_unlock(inode, 1); - mutex_unlock(&inode->i_mutex); out: + mutex_unlock(&inode->i_mutex); return ret; } -- cgit v0.10.2