From ecf6f5e7d68471b08603f7c20143ac236602364f Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 8 Nov 2010 18:08:14 -0500 Subject: fanotify: deny permissions when no event was sent If no event was sent to userspace we cannot expect userspace to respond to permissions requests. Today such requests just hang forever. This patch will deny any permissions event which was unable to be sent to userspace. Reported-by: Tvrtko Ursulin Signed-off-by: Eric Paris diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 0632248..045c079 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -106,7 +106,7 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) return client_fd; } -static ssize_t fill_event_metadata(struct fsnotify_group *group, +static int fill_event_metadata(struct fsnotify_group *group, struct fanotify_event_metadata *metadata, struct fsnotify_event *event) { @@ -257,10 +257,11 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, pr_debug("%s: group=%p event=%p\n", __func__, group, event); - fd = fill_event_metadata(group, &fanotify_event_metadata, event); - if (fd < 0) - return fd; + ret = fill_event_metadata(group, &fanotify_event_metadata, event); + if (ret < 0) + goto out; + fd = ret; ret = prepare_for_access_response(group, event, fd); if (ret) goto out_close_fd; @@ -275,6 +276,13 @@ out_kill_access_response: remove_access_response(group, event, fd); out_close_fd: sys_close(fd); +out: +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + if (event->mask & FAN_ALL_PERM_EVENTS) { + event->response = FAN_DENY; + wake_up(&group->fanotify_data.access_waitq); + } +#endif return ret; } -- cgit v0.10.2 From 88d60c32765716289abeb362c44adf6c35c6824c Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 8 Nov 2010 18:19:22 -0500 Subject: fanotify: remove packed from access response message Since fanotify has decided to be careful about alignment and packing rather than rely on __attribute__((packed)) for multiarch support. Since this attribute isn't doing anything on fanotify_response we just drop it. This does not break API/ABI. Suggested-by: Tvrtko Ursulin Signed-off-by: Eric Paris diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 0f01214..bdbf9bb 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -96,7 +96,7 @@ struct fanotify_event_metadata { struct fanotify_response { __s32 fd; __u32 response; -} __attribute__ ((packed)); +}; /* Legit userspace responses to a _PERM event */ #define FAN_ALLOW 0x01 -- cgit v0.10.2 From b1085ba80cd2784400a7beec3fda5099198ed01c Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Fri, 5 Nov 2010 17:05:27 +0100 Subject: fanotify: if set by user unset FMODE_NONOTIFY before fsnotify_perm() is called Unsetting FMODE_NONOTIFY in fsnotify_open() is too late, since fsnotify_perm() is called before. If FMODE_NONOTIFY is set fsnotify_perm() will skip permission checks, so a user can still disable permission checks by setting this flag in an open() call. This patch corrects this by unsetting the flag before fsnotify_perm is called. Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris diff --git a/fs/namei.c b/fs/namei.c index 5362af9..4ff7ca5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1748,6 +1748,9 @@ struct file *do_filp_open(int dfd, const char *pathname, if (!(open_flag & O_CREAT)) mode = 0; + /* Must never be set by userspace */ + open_flag &= ~FMODE_NONOTIFY; + /* * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only * check for O_DSYNC if the need any syncing at all we enforce it's diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 5c185fa..b10bcde 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -235,9 +235,6 @@ static inline void fsnotify_open(struct file *file) if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; - /* FMODE_NONOTIFY must never be set from user */ - file->f_mode &= ~FMODE_NONOTIFY; - fsnotify_parent(path, NULL, mask); fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0); } -- cgit v0.10.2 From fa218ab98c31eeacd12b89501e6b99d146ea56cc Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Tue, 9 Nov 2010 18:18:16 +0100 Subject: fanotify: correct broken ref counting in case adding a mark failed If adding a mount or inode mark failed fanotify_free_mark() is called explicitly. But at this time the mark has already been put into the destroy list of the fsnotify_mark kernel thread. If the thread is too slow it will try to decrease the reference of a mark, that has already been freed by fanotify_free_mark(). (If its fast enough it will only decrease the marks ref counter from 2 to 1 - note that the counter has been increased to 2 in add_mark() - which has practically no effect.) This patch fixes the ref counting by not calling free_mark() explicitly, but decreasing the ref counter and rely on the fsnotify_mark thread to cleanup in case adding the mark has failed. Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 045c079..c0ca1fa 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -594,11 +594,10 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, { struct fsnotify_mark *fsn_mark; __u32 added; + int ret = 0; fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); if (!fsn_mark) { - int ret; - if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) return -ENOSPC; @@ -608,17 +607,16 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, fsnotify_init_mark(fsn_mark, fanotify_free_mark); ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0); - if (ret) { - fanotify_free_mark(fsn_mark); - return ret; - } + if (ret) + goto err; } added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); - fsnotify_put_mark(fsn_mark); + if (added & ~mnt->mnt_fsnotify_mask) fsnotify_recalc_vfsmount_mask(mnt); - - return 0; +err: + fsnotify_put_mark(fsn_mark); + return ret; } static int fanotify_add_inode_mark(struct fsnotify_group *group, @@ -627,6 +625,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, { struct fsnotify_mark *fsn_mark; __u32 added; + int ret = 0; pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); @@ -642,8 +641,6 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, fsn_mark = fsnotify_find_inode_mark(group, inode); if (!fsn_mark) { - int ret; - if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) return -ENOSPC; @@ -653,16 +650,16 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, fsnotify_init_mark(fsn_mark, fanotify_free_mark); ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0); - if (ret) { - fanotify_free_mark(fsn_mark); - return ret; - } + if (ret) + goto err; } added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); - fsnotify_put_mark(fsn_mark); + if (added & ~inode->i_fsnotify_mask) fsnotify_recalc_inode_mask(inode); - return 0; +err: + fsnotify_put_mark(fsn_mark); + return ret; } /* fanotify syscalls */ -- cgit v0.10.2 From 1734dee4e3a296cb72b4819fc2e7ef2440737dff Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Mon, 22 Nov 2010 18:46:33 +0100 Subject: fanotify: Dont allow a mask of 0 if setting or removing a mark In mark_remove_from_mask() we destroy marks that have their event mask cleared. Thus we should not allow the creation of those marks in the first place. With this patch we check if the mask given from user is 0 in case of FAN_MARK_ADD. If so we return an error. Same for FAN_MARK_REMOVE since this does not have any effect. Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index c0ca1fa..480434c 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -769,8 +769,10 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, if (flags & ~FAN_ALL_MARK_FLAGS) return -EINVAL; switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { - case FAN_MARK_ADD: + case FAN_MARK_ADD: /* fallthrough */ case FAN_MARK_REMOVE: + if (!mask) + return -EINVAL; case FAN_MARK_FLUSH: break; default: -- cgit v0.10.2 From 09e5f14e57c70f9d357862bb56e57026c51092a1 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Fri, 19 Nov 2010 10:58:07 +0100 Subject: fanotify: on group destroy allow all waiters to bypass permission check When fanotify_release() is called, there may still be processes waiting for access permission. Currently only processes for which an event has already been queued into the groups access list will be woken up. Processes for which no event has been queued will continue to sleep and thus cause a deadlock when fsnotify_put_group() is called. Furthermore there is a race allowing further processes to be waiting on the access wait queue after wake_up (if they arrive before clear_marks_by_group() is called). This patch corrects this by setting a flag to inform processes that the group is about to be destroyed and thus not to wait for access permission. [additional changelog from eparis] Lets think about the 4 relevant code paths from the PoV of the 'operator' 'listener' 'responder' and 'closer'. Where operator is the process doing an action (like open/read) which could require permission. Listener is the task (or in this case thread) slated with reading from the fanotify file descriptor. The 'responder' is the thread responsible for responding to access requests. 'Closer' is the thread attempting to close the fanotify file descriptor. The 'operator' is going to end up in: fanotify_handle_event() get_response_from_access() (THIS BLOCKS WAITING ON USERSPACE) The 'listener' interesting code path fanotify_read() copy_event_to_user() prepare_for_access_response() (THIS CREATES AN fanotify_response_event) The 'responder' code path: fanotify_write() process_access_response() (REMOVE A fanotify_response_event, SET RESPONSE, WAKE UP 'operator') The 'closer': fanotify_release() (SUPPOSED TO CLEAN UP THE REST OF THIS MESS) What we have today is that in the closer we remove all of the fanotify_response_events and set a bit so no more response events are ever created in prepare_for_access_response(). The bug is that we never wake all of the operators up and tell them to move along. You fix that in fanotify_get_response_from_access(). You also fix other operators which haven't gotten there yet. So I agree that's a good fix. [/additional changelog from eparis] [remove additional changes to minimize patch size] [move initialization so it was inside CONFIG_FANOTIFY_PERMISSION] Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index b04f88e..f35794b 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -92,7 +92,11 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group, pr_debug("%s: group=%p event=%p\n", __func__, group, event); - wait_event(group->fanotify_data.access_waitq, event->response); + wait_event(group->fanotify_data.access_waitq, event->response || + atomic_read(&group->fanotify_data.bypass_perm)); + + if (!event->response) /* bypass_perm set */ + return 0; /* userspace responded, convert to something usable */ spin_lock(&event->lock); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 480434c..01fffe6 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -200,7 +200,7 @@ static int prepare_for_access_response(struct fsnotify_group *group, mutex_lock(&group->fanotify_data.access_mutex); - if (group->fanotify_data.bypass_perm) { + if (atomic_read(&group->fanotify_data.bypass_perm)) { mutex_unlock(&group->fanotify_data.access_mutex); kmem_cache_free(fanotify_response_event_cache, re); event->response = FAN_ALLOW; @@ -390,7 +390,7 @@ static int fanotify_release(struct inode *ignored, struct file *file) mutex_lock(&group->fanotify_data.access_mutex); - group->fanotify_data.bypass_perm = true; + atomic_inc(&group->fanotify_data.bypass_perm); list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) { pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group, @@ -703,6 +703,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) mutex_init(&group->fanotify_data.access_mutex); init_waitqueue_head(&group->fanotify_data.access_waitq); INIT_LIST_HEAD(&group->fanotify_data.access_list); + atomic_set(&group->fanotify_data.bypass_perm, 0); #endif switch (flags & FAN_ALL_CLASS_BITS) { case FAN_CLASS_NOTIF: diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 0a68f92..7380763 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -166,7 +166,7 @@ struct fsnotify_group { struct mutex access_mutex; struct list_head access_list; wait_queue_head_t access_waitq; - bool bypass_perm; /* protected by access_mutex */ + atomic_t bypass_perm; #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */ int f_flags; unsigned int max_marks; -- cgit v0.10.2 From a2ae4cc9a16e211c8a128ba10d22a85431f093ab Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 23 Nov 2010 18:18:37 -0500 Subject: inotify: stop kernel memory leak on file creation failure If inotify_init is unable to allocate a new file for the new inotify group we leak the new group. This patch drops the reference on the group on file allocation failure. Reported-by: Vegard Nossum cc: stable@kernel.org Signed-off-by: Eric Paris diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 444c305..4cd5d5d 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -752,6 +752,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags) if (ret >= 0) return ret; + fsnotify_put_group(group); atomic_dec(&user->inotify_devs); out_free_uid: free_uid(user); -- cgit v0.10.2 From 26379198937fcc9bbe7be76be695d06df8334eaa Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 23 Nov 2010 23:48:26 -0500 Subject: fanotify: do not leak user reference on allocation failure If fanotify_init is unable to allocate a new fsnotify group it will return but will not drop its reference on the associated user struct. Drop that reference on error. Reported-by: Vegard Nossum Signed-off-by: Eric Paris diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 01fffe6..ca54957 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -692,8 +692,10 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ group = fsnotify_alloc_group(&fanotify_fsnotify_ops); - if (IS_ERR(group)) + if (IS_ERR(group)) { + free_uid(user); return PTR_ERR(group); + } group->fanotify_data.user = user; atomic_inc(&user->fanotify_listeners); -- cgit v0.10.2 From e9a3854fd4ff3907e6c200a3980e19365ee695e9 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Wed, 24 Nov 2010 18:22:09 +0100 Subject: fanotify: Introduce FAN_NOFD FAN_NOFD is used in fanotify events that do not provide an open file descriptor (like the overflow_event). Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index bdbf9bb..c732243 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -101,6 +101,8 @@ struct fanotify_response { /* Legit userspace responses to a _PERM event */ #define FAN_ALLOW 0x01 #define FAN_DENY 0x02 +/* No fd set in event */ +#define FAN_NOFD -1 /* Helper functions to deal with fanotify_event_metadata buffers */ #define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata)) -- cgit v0.10.2 From fdbf3ceeb659f0b3c0e8dd79b331b7ac05910f1e Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Wed, 24 Nov 2010 18:26:04 +0100 Subject: fanotify: Dont try to open a file descriptor for the overflow event We should not try to open a file descriptor for the overflow event since this will always fail. Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index ca54957..dccd798 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -110,6 +110,8 @@ static int fill_event_metadata(struct fsnotify_group *group, struct fanotify_event_metadata *metadata, struct fsnotify_event *event) { + int ret = 0; + pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, group, metadata, event); @@ -117,9 +119,15 @@ static int fill_event_metadata(struct fsnotify_group *group, metadata->vers = FANOTIFY_METADATA_VERSION; metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS; metadata->pid = pid_vnr(event->tgid); - metadata->fd = create_fd(group, event); + if (unlikely(event->mask & FAN_Q_OVERFLOW)) + metadata->fd = FAN_NOFD; + else { + metadata->fd = create_fd(group, event); + if (metadata->fd < 0) + ret = metadata->fd; + } - return metadata->fd; + return ret; } #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS @@ -261,7 +269,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, if (ret < 0) goto out; - fd = ret; + fd = fanotify_event_metadata.fd; ret = prepare_for_access_response(group, event, fd); if (ret) goto out_close_fd; @@ -275,7 +283,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, out_kill_access_response: remove_access_response(group, event, fd); out_close_fd: - sys_close(fd); + if (fd != FAN_NOFD) + sys_close(fd); out: #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS if (event->mask & FAN_ALL_PERM_EVENTS) { -- cgit v0.10.2 From 62731fa0c893515dc6cbc3e0a2879a92793c735f Mon Sep 17 00:00:00 2001 From: Alexey Zaytsev Date: Mon, 22 Nov 2010 00:33:03 +0000 Subject: fanotify: split version into version and metadata_len To implement per event type optional headers we are interested in knowing how long the metadata structure is. This patch slits the __u32 version field into a __u8 version and a __u16 metadata_len field (with __u8 left over). This should allow for backwards compat ABI. Signed-off-by: Alexey Zaytsev [rewrote descrtion and changed object sizes and ordering - eparis] Signed-off-by: Eric Paris diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index c732243..6c6133f 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -83,11 +83,13 @@ FAN_ALL_PERM_EVENTS |\ FAN_Q_OVERFLOW) -#define FANOTIFY_METADATA_VERSION 2 +#define FANOTIFY_METADATA_VERSION 3 struct fanotify_event_metadata { __u32 event_len; - __u32 vers; + __u8 vers; + __u8 reserved; + __u16 metadata_len; __aligned_u64 mask; __s32 fd; __s32 pid; -- cgit v0.10.2 From 7d13162332f2b67a941d18cee20f1c0413e020de Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 7 Dec 2010 15:27:57 -0500 Subject: fanotify: fill in the metadata_len field on struct fanotify_event_metadata The fanotify_event_metadata now has a field which is supposed to indicate the length of the metadata portion of the event. Fill in that field as well. Based-in-part-on-patch-by: Alexey Zaytsev Signed-off-by: Eric Paris diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index dccd798..8b61220 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -116,6 +116,7 @@ static int fill_event_metadata(struct fsnotify_group *group, group, metadata, event); metadata->event_len = FAN_EVENT_METADATA_LEN; + metadata->metadata_len = FAN_EVENT_METADATA_LEN; metadata->vers = FANOTIFY_METADATA_VERSION; metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS; metadata->pid = pid_vnr(event->tgid); @@ -275,10 +276,11 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, goto out_close_fd; ret = -EFAULT; - if (copy_to_user(buf, &fanotify_event_metadata, FAN_EVENT_METADATA_LEN)) + if (copy_to_user(buf, &fanotify_event_metadata, + fanotify_event_metadata.event_len)) goto out_kill_access_response; - return FAN_EVENT_METADATA_LEN; + return fanotify_event_metadata.event_len; out_kill_access_response: remove_access_response(group, event, fd); -- cgit v0.10.2