diff options
Diffstat (limited to 'ipc/sem.c')
-rw-r--r-- | ipc/sem.c | 280 |
1 files changed, 182 insertions, 98 deletions
@@ -243,71 +243,122 @@ static void merge_queues(struct sem_array *sma) } } +static void sem_rcu_free(struct rcu_head *head) +{ + struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); + struct sem_array *sma = ipc_rcu_to_struct(p); + + security_sem_free(sma); + ipc_rcu_free(head); +} + +/* + * Wait until all currently ongoing simple ops have completed. + * Caller must own sem_perm.lock. + * New simple ops cannot start, because simple ops first check + * that sem_perm.lock is free. + * that a) sem_perm.lock is free and b) complex_count is 0. + */ +static void sem_wait_array(struct sem_array *sma) +{ + int i; + struct sem *sem; + + if (sma->complex_count) { + /* The thread that increased sma->complex_count waited on + * all sem->lock locks. Thus we don't need to wait again. + */ + return; + } + + for (i = 0; i < sma->sem_nsems; i++) { + sem = sma->sem_base + i; + spin_unlock_wait(&sem->lock); + } +} + /* * If the request contains only one semaphore operation, and there are * no complex transactions pending, lock only the semaphore involved. * Otherwise, lock the entire semaphore array, since we either have * multiple semaphores in our own semops, or we need to look at * semaphores from other pending complex operations. - * - * Carefully guard against sma->complex_count changing between zero - * and non-zero while we are spinning for the lock. The value of - * sma->complex_count cannot change while we are holding the lock, - * so sem_unlock should be fine. - * - * The global lock path checks that all the local locks have been released, - * checking each local lock once. This means that the local lock paths - * cannot start their critical sections while the global lock is held. */ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, int nsops) { - int locknum; - again: - if (nsops == 1 && !sma->complex_count) { - struct sem *sem = sma->sem_base + sops->sem_num; + struct sem *sem; - /* Lock just the semaphore we are interested in. */ - spin_lock(&sem->lock); + if (nsops != 1) { + /* Complex operation - acquire a full lock */ + ipc_lock_object(&sma->sem_perm); - /* - * If sma->complex_count was set while we were spinning, - * we may need to look at things we did not lock here. + /* And wait until all simple ops that are processed + * right now have dropped their locks. */ - if (unlikely(sma->complex_count)) { - spin_unlock(&sem->lock); - goto lock_array; - } + sem_wait_array(sma); + return -1; + } + + /* + * Only one semaphore affected - try to optimize locking. + * The rules are: + * - optimized locking is possible if no complex operation + * is either enqueued or processed right now. + * - The test for enqueued complex ops is simple: + * sma->complex_count != 0 + * - Testing for complex ops that are processed right now is + * a bit more difficult. Complex ops acquire the full lock + * and first wait that the running simple ops have completed. + * (see above) + * Thus: If we own a simple lock and the global lock is free + * and complex_count is now 0, then it will stay 0 and + * thus just locking sem->lock is sufficient. + */ + sem = sma->sem_base + sops->sem_num; + if (sma->complex_count == 0) { /* - * Another process is holding the global lock on the - * sem_array; we cannot enter our critical section, - * but have to wait for the global lock to be released. + * It appears that no complex operation is around. + * Acquire the per-semaphore lock. */ - if (unlikely(spin_is_locked(&sma->sem_perm.lock))) { - spin_unlock(&sem->lock); - spin_unlock_wait(&sma->sem_perm.lock); - goto again; + spin_lock(&sem->lock); + + /* Then check that the global lock is free */ + if (!spin_is_locked(&sma->sem_perm.lock)) { + /* spin_is_locked() is not a memory barrier */ + smp_mb(); + + /* Now repeat the test of complex_count: + * It can't change anymore until we drop sem->lock. + * Thus: if is now 0, then it will stay 0. + */ + if (sma->complex_count == 0) { + /* fast path successful! */ + return sops->sem_num; + } } + spin_unlock(&sem->lock); + } - locknum = sops->sem_num; + /* slow path: acquire the full lock */ + ipc_lock_object(&sma->sem_perm); + + if (sma->complex_count == 0) { + /* False alarm: + * There is no complex operation, thus we can switch + * back to the fast path. + */ + spin_lock(&sem->lock); + ipc_unlock_object(&sma->sem_perm); + return sops->sem_num; } else { - int i; - /* - * Lock the semaphore array, and wait for all of the - * individual semaphore locks to go away. The code - * above ensures no new single-lock holders will enter - * their critical section while the array lock is held. + /* Not a false alarm, thus complete the sequence for a + * full lock. */ - lock_array: - ipc_lock_object(&sma->sem_perm); - for (i = 0; i < sma->sem_nsems; i++) { - struct sem *sem = sma->sem_base + i; - spin_unlock_wait(&sem->lock); - } - locknum = -1; + sem_wait_array(sma); + return -1; } - return locknum; } static inline void sem_unlock(struct sem_array *sma, int locknum) @@ -322,7 +373,7 @@ static inline void sem_unlock(struct sem_array *sma, int locknum) } /* - * sem_lock_(check_) routines are called in the paths where the rw_mutex + * sem_lock_(check_) routines are called in the paths where the rwsem * is not held. * * The caller holds the RCU read lock. @@ -374,12 +425,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns static inline void sem_lock_and_putref(struct sem_array *sma) { sem_lock(sma, NULL, -1); - ipc_rcu_putref(sma); -} - -static inline void sem_putref(struct sem_array *sma) -{ - ipc_rcu_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); } static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) @@ -426,7 +472,7 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) * @ns: namespace * @params: ptr to the structure that contains key, semflg and nsems * - * Called with sem_ids.rw_mutex held (as a writer) + * Called with sem_ids.rwsem held (as a writer) */ static int newary(struct ipc_namespace *ns, struct ipc_params *params) @@ -458,14 +504,13 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) sma->sem_perm.security = NULL; retval = security_sem_alloc(sma); if (retval) { - ipc_rcu_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); return retval; } id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); if (id < 0) { - security_sem_free(sma); - ipc_rcu_putref(sma); + ipc_rcu_putref(sma, sem_rcu_free); return id; } ns->used_sems += nsems; @@ -492,7 +537,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) /* - * Called with sem_ids.rw_mutex and ipcp locked. + * Called with sem_ids.rwsem and ipcp locked. */ static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) { @@ -503,7 +548,7 @@ static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) } /* - * Called with sem_ids.rw_mutex and ipcp locked. + * Called with sem_ids.rwsem and ipcp locked. */ static inline int sem_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params) @@ -873,6 +918,24 @@ again: } /** + * set_semotime(sma, sops) - set sem_otime + * @sma: semaphore array + * @sops: operations that modified the array, may be NULL + * + * sem_otime is replicated to avoid cache line trashing. + * This function sets one instance to the current time. + */ +static void set_semotime(struct sem_array *sma, struct sembuf *sops) +{ + if (sops == NULL) { + sma->sem_base[0].sem_otime = get_seconds(); + } else { + sma->sem_base[sops[0].sem_num].sem_otime = + get_seconds(); + } +} + +/** * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue * @sma: semaphore array * @sops: operations that were performed @@ -922,17 +985,10 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop } } } - if (otime) { - if (sops == NULL) { - sma->sem_base[0].sem_otime = get_seconds(); - } else { - sma->sem_base[sops[0].sem_num].sem_otime = - get_seconds(); - } - } + if (otime) + set_semotime(sma, sops); } - /* The following counts are associated to each semaphore: * semncnt number of tasks waiting on semval being nonzero * semzcnt number of tasks waiting on semval being zero @@ -994,8 +1050,8 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum) return semzcnt; } -/* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked - * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex +/* Free a semaphore set. freeary() is called with sem_ids.rwsem locked + * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem * remains locked on exit. */ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) @@ -1047,8 +1103,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) wake_up_sem_queue_do(&tasks); ns->used_sems -= sma->sem_nsems; - security_sem_free(sma); - ipc_rcu_putref(sma); + ipc_rcu_putref(sma, sem_rcu_free); } static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) @@ -1116,7 +1171,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, seminfo.semmnu = SEMMNU; seminfo.semmap = SEMMAP; seminfo.semume = SEMUME; - down_read(&sem_ids(ns).rw_mutex); + down_read(&sem_ids(ns).rwsem); if (cmd == SEM_INFO) { seminfo.semusz = sem_ids(ns).in_use; seminfo.semaem = ns->used_sems; @@ -1125,7 +1180,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, seminfo.semaem = SEMAEM; } max_id = ipc_get_maxid(&sem_ids(ns)); - up_read(&sem_ids(ns).rw_mutex); + up_read(&sem_ids(ns).rwsem); if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) return -EFAULT; return (max_id < 0) ? 0: max_id; @@ -1227,6 +1282,12 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, sem_lock(sma, NULL, -1); + if (sma->sem_perm.deleted) { + sem_unlock(sma, -1); + rcu_read_unlock(); + return -EIDRM; + } + curr = &sma->sem_base[semnum]; ipc_assert_locked_object(&sma->sem_perm); @@ -1281,28 +1342,28 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, int i; sem_lock(sma, NULL, -1); + if (sma->sem_perm.deleted) { + err = -EIDRM; + goto out_unlock; + } if(nsems > SEMMSL_FAST) { if (!ipc_rcu_getref(sma)) { - sem_unlock(sma, -1); - rcu_read_unlock(); err = -EIDRM; - goto out_free; + goto out_unlock; } sem_unlock(sma, -1); rcu_read_unlock(); sem_io = ipc_alloc(sizeof(ushort)*nsems); if(sem_io == NULL) { - sem_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); return -ENOMEM; } rcu_read_lock(); sem_lock_and_putref(sma); if (sma->sem_perm.deleted) { - sem_unlock(sma, -1); - rcu_read_unlock(); err = -EIDRM; - goto out_free; + goto out_unlock; } } for (i = 0; i < sma->sem_nsems; i++) @@ -1320,28 +1381,28 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, struct sem_undo *un; if (!ipc_rcu_getref(sma)) { - rcu_read_unlock(); - return -EIDRM; + err = -EIDRM; + goto out_rcu_wakeup; } rcu_read_unlock(); if(nsems > SEMMSL_FAST) { sem_io = ipc_alloc(sizeof(ushort)*nsems); if(sem_io == NULL) { - sem_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); return -ENOMEM; } } if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) { - sem_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); err = -EFAULT; goto out_free; } for (i = 0; i < nsems; i++) { if (sem_io[i] > SEMVMX) { - sem_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); err = -ERANGE; goto out_free; } @@ -1349,10 +1410,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, rcu_read_lock(); sem_lock_and_putref(sma); if (sma->sem_perm.deleted) { - sem_unlock(sma, -1); - rcu_read_unlock(); err = -EIDRM; - goto out_free; + goto out_unlock; } for (i = 0; i < nsems; i++) @@ -1376,6 +1435,10 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, goto out_rcu_wakeup; sem_lock(sma, NULL, -1); + if (sma->sem_perm.deleted) { + err = -EIDRM; + goto out_unlock; + } curr = &sma->sem_base[semnum]; switch (cmd) { @@ -1431,9 +1494,9 @@ copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) } /* - * This function handles some semctl commands which require the rw_mutex + * This function handles some semctl commands which require the rwsem * to be held in write mode. - * NOTE: no locks must be held, the rw_mutex is taken inside this function. + * NOTE: no locks must be held, the rwsem is taken inside this function. */ static int semctl_down(struct ipc_namespace *ns, int semid, int cmd, int version, void __user *p) @@ -1448,7 +1511,7 @@ static int semctl_down(struct ipc_namespace *ns, int semid, return -EFAULT; } - down_write(&sem_ids(ns).rw_mutex); + down_write(&sem_ids(ns).rwsem); rcu_read_lock(); ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, @@ -1487,7 +1550,7 @@ out_unlock0: out_unlock1: rcu_read_unlock(); out_up: - up_write(&sem_ids(ns).rw_mutex); + up_write(&sem_ids(ns).rwsem); return err; } @@ -1629,7 +1692,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) /* step 2: allocate new undo structure */ new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); if (!new) { - sem_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); return ERR_PTR(-ENOMEM); } @@ -1781,6 +1844,10 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, if (error) goto out_rcu_wakeup; + error = -EIDRM; + locknum = sem_lock(sma, sops, nsops); + if (sma->sem_perm.deleted) + goto out_unlock_free; /* * semid identifiers are not unique - find_alloc_undo may have * allocated an undo structure, it was invalidated by an RMID @@ -1788,19 +1855,22 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, * This case can be detected checking un->semid. The existence of * "un" itself is guaranteed by rcu. */ - error = -EIDRM; - locknum = sem_lock(sma, sops, nsops); if (un && un->semid == -1) goto out_unlock_free; error = perform_atomic_semop(sma, sops, nsops, un, task_tgid_vnr(current)); - if (error <= 0) { - if (alter && error == 0) + if (error == 0) { + /* If the operation was successful, then do + * the required updates. + */ + if (alter) do_smart_update(sma, sops, nsops, 1, &tasks); - - goto out_unlock_free; + else + set_semotime(sma, sops); } + if (error <= 0) + goto out_unlock_free; /* We need to sleep on this operation, so we put the current * task into the pending queue and go to sleep. @@ -1997,6 +2067,12 @@ void exit_sem(struct task_struct *tsk) } sem_lock(sma, NULL, -1); + /* exit_sem raced with IPC_RMID, nothing to do */ + if (sma->sem_perm.deleted) { + sem_unlock(sma, -1); + rcu_read_unlock(); + continue; + } un = __lookup_undo(ulp, semid); if (un == NULL) { /* exit_sem raced with IPC_RMID+semget() that created @@ -2059,6 +2135,14 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it) struct sem_array *sma = it; time_t sem_otime; + /* + * The proc interface isn't aware of sem_lock(), it calls + * ipc_lock_object() directly (in sysvipc_find_ipc). + * In order to stay compatible with sem_lock(), we must wait until + * all simple semop() calls have left their critical regions. + */ + sem_wait_array(sma); + sem_otime = get_semotime(sma); return seq_printf(s, |