summaryrefslogtreecommitdiff
path: root/ipc/sem.c
diff options
context:
space:
mode:
Diffstat (limited to 'ipc/sem.c')
-rw-r--r--ipc/sem.c256
1 files changed, 170 insertions, 86 deletions
diff --git a/ipc/sem.c b/ipc/sem.c
index 69b6a21..db9d241 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -243,71 +243,122 @@ static void merge_queues(struct sem_array *sma)
}
}
+static void sem_rcu_free(struct rcu_head *head)
+{
+ struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
+ struct sem_array *sma = ipc_rcu_to_struct(p);
+
+ security_sem_free(sma);
+ ipc_rcu_free(head);
+}
+
+/*
+ * Wait until all currently ongoing simple ops have completed.
+ * Caller must own sem_perm.lock.
+ * New simple ops cannot start, because simple ops first check
+ * that sem_perm.lock is free.
+ * that a) sem_perm.lock is free and b) complex_count is 0.
+ */
+static void sem_wait_array(struct sem_array *sma)
+{
+ int i;
+ struct sem *sem;
+
+ if (sma->complex_count) {
+ /* The thread that increased sma->complex_count waited on
+ * all sem->lock locks. Thus we don't need to wait again.
+ */
+ return;
+ }
+
+ for (i = 0; i < sma->sem_nsems; i++) {
+ sem = sma->sem_base + i;
+ spin_unlock_wait(&sem->lock);
+ }
+}
+
/*
* If the request contains only one semaphore operation, and there are
* no complex transactions pending, lock only the semaphore involved.
* Otherwise, lock the entire semaphore array, since we either have
* multiple semaphores in our own semops, or we need to look at
* semaphores from other pending complex operations.
- *
- * Carefully guard against sma->complex_count changing between zero
- * and non-zero while we are spinning for the lock. The value of
- * sma->complex_count cannot change while we are holding the lock,
- * so sem_unlock should be fine.
- *
- * The global lock path checks that all the local locks have been released,
- * checking each local lock once. This means that the local lock paths
- * cannot start their critical sections while the global lock is held.
*/
static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
int nsops)
{
- int locknum;
- again:
- if (nsops == 1 && !sma->complex_count) {
- struct sem *sem = sma->sem_base + sops->sem_num;
+ struct sem *sem;
- /* Lock just the semaphore we are interested in. */
- spin_lock(&sem->lock);
+ if (nsops != 1) {
+ /* Complex operation - acquire a full lock */
+ ipc_lock_object(&sma->sem_perm);
- /*
- * If sma->complex_count was set while we were spinning,
- * we may need to look at things we did not lock here.
+ /* And wait until all simple ops that are processed
+ * right now have dropped their locks.
*/
- if (unlikely(sma->complex_count)) {
- spin_unlock(&sem->lock);
- goto lock_array;
- }
+ sem_wait_array(sma);
+ return -1;
+ }
+
+ /*
+ * Only one semaphore affected - try to optimize locking.
+ * The rules are:
+ * - optimized locking is possible if no complex operation
+ * is either enqueued or processed right now.
+ * - The test for enqueued complex ops is simple:
+ * sma->complex_count != 0
+ * - Testing for complex ops that are processed right now is
+ * a bit more difficult. Complex ops acquire the full lock
+ * and first wait that the running simple ops have completed.
+ * (see above)
+ * Thus: If we own a simple lock and the global lock is free
+ * and complex_count is now 0, then it will stay 0 and
+ * thus just locking sem->lock is sufficient.
+ */
+ sem = sma->sem_base + sops->sem_num;
+ if (sma->complex_count == 0) {
/*
- * Another process is holding the global lock on the
- * sem_array; we cannot enter our critical section,
- * but have to wait for the global lock to be released.
+ * It appears that no complex operation is around.
+ * Acquire the per-semaphore lock.
*/
- if (unlikely(spin_is_locked(&sma->sem_perm.lock))) {
- spin_unlock(&sem->lock);
- spin_unlock_wait(&sma->sem_perm.lock);
- goto again;
+ spin_lock(&sem->lock);
+
+ /* Then check that the global lock is free */
+ if (!spin_is_locked(&sma->sem_perm.lock)) {
+ /* spin_is_locked() is not a memory barrier */
+ smp_mb();
+
+ /* Now repeat the test of complex_count:
+ * It can't change anymore until we drop sem->lock.
+ * Thus: if is now 0, then it will stay 0.
+ */
+ if (sma->complex_count == 0) {
+ /* fast path successful! */
+ return sops->sem_num;
+ }
}
+ spin_unlock(&sem->lock);
+ }
- locknum = sops->sem_num;
+ /* slow path: acquire the full lock */
+ ipc_lock_object(&sma->sem_perm);
+
+ if (sma->complex_count == 0) {
+ /* False alarm:
+ * There is no complex operation, thus we can switch
+ * back to the fast path.
+ */
+ spin_lock(&sem->lock);
+ ipc_unlock_object(&sma->sem_perm);
+ return sops->sem_num;
} else {
- int i;
- /*
- * Lock the semaphore array, and wait for all of the
- * individual semaphore locks to go away. The code
- * above ensures no new single-lock holders will enter
- * their critical section while the array lock is held.
+ /* Not a false alarm, thus complete the sequence for a
+ * full lock.
*/
- lock_array:
- ipc_lock_object(&sma->sem_perm);
- for (i = 0; i < sma->sem_nsems; i++) {
- struct sem *sem = sma->sem_base + i;
- spin_unlock_wait(&sem->lock);
- }
- locknum = -1;
+ sem_wait_array(sma);
+ return -1;
}
- return locknum;
}
static inline void sem_unlock(struct sem_array *sma, int locknum)
@@ -374,12 +425,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns
static inline void sem_lock_and_putref(struct sem_array *sma)
{
sem_lock(sma, NULL, -1);
- ipc_rcu_putref(sma);
-}
-
-static inline void sem_putref(struct sem_array *sma)
-{
- ipc_rcu_putref(sma);
+ ipc_rcu_putref(sma, ipc_rcu_free);
}
static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
@@ -458,14 +504,13 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
sma->sem_perm.security = NULL;
retval = security_sem_alloc(sma);
if (retval) {
- ipc_rcu_putref(sma);
+ ipc_rcu_putref(sma, ipc_rcu_free);
return retval;
}
id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
if (id < 0) {
- security_sem_free(sma);
- ipc_rcu_putref(sma);
+ ipc_rcu_putref(sma, sem_rcu_free);
return id;
}
ns->used_sems += nsems;
@@ -873,6 +918,24 @@ again:
}
/**
+ * set_semotime(sma, sops) - set sem_otime
+ * @sma: semaphore array
+ * @sops: operations that modified the array, may be NULL
+ *
+ * sem_otime is replicated to avoid cache line trashing.
+ * This function sets one instance to the current time.
+ */
+static void set_semotime(struct sem_array *sma, struct sembuf *sops)
+{
+ if (sops == NULL) {
+ sma->sem_base[0].sem_otime = get_seconds();
+ } else {
+ sma->sem_base[sops[0].sem_num].sem_otime =
+ get_seconds();
+ }
+}
+
+/**
* do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue
* @sma: semaphore array
* @sops: operations that were performed
@@ -922,17 +985,10 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop
}
}
}
- if (otime) {
- if (sops == NULL) {
- sma->sem_base[0].sem_otime = get_seconds();
- } else {
- sma->sem_base[sops[0].sem_num].sem_otime =
- get_seconds();
- }
- }
+ if (otime)
+ set_semotime(sma, sops);
}
-
/* The following counts are associated to each semaphore:
* semncnt number of tasks waiting on semval being nonzero
* semzcnt number of tasks waiting on semval being zero
@@ -1047,8 +1103,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
wake_up_sem_queue_do(&tasks);
ns->used_sems -= sma->sem_nsems;
- security_sem_free(sma);
- ipc_rcu_putref(sma);
+ ipc_rcu_putref(sma, sem_rcu_free);
}
static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
@@ -1227,6 +1282,12 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
sem_lock(sma, NULL, -1);
+ if (sma->sem_perm.deleted) {
+ sem_unlock(sma, -1);
+ rcu_read_unlock();
+ return -EIDRM;
+ }
+
curr = &sma->sem_base[semnum];
ipc_assert_locked_object(&sma->sem_perm);
@@ -1281,28 +1342,28 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
int i;
sem_lock(sma, NULL, -1);
+ if (sma->sem_perm.deleted) {
+ err = -EIDRM;
+ goto out_unlock;
+ }
if(nsems > SEMMSL_FAST) {
if (!ipc_rcu_getref(sma)) {
- sem_unlock(sma, -1);
- rcu_read_unlock();
err = -EIDRM;
- goto out_free;
+ goto out_unlock;
}
sem_unlock(sma, -1);
rcu_read_unlock();
sem_io = ipc_alloc(sizeof(ushort)*nsems);
if(sem_io == NULL) {
- sem_putref(sma);
+ ipc_rcu_putref(sma, ipc_rcu_free);
return -ENOMEM;
}
rcu_read_lock();
sem_lock_and_putref(sma);
if (sma->sem_perm.deleted) {
- sem_unlock(sma, -1);
- rcu_read_unlock();
err = -EIDRM;
- goto out_free;
+ goto out_unlock;
}
}
for (i = 0; i < sma->sem_nsems; i++)
@@ -1320,28 +1381,28 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
struct sem_undo *un;
if (!ipc_rcu_getref(sma)) {
- rcu_read_unlock();
- return -EIDRM;
+ err = -EIDRM;
+ goto out_rcu_wakeup;
}
rcu_read_unlock();
if(nsems > SEMMSL_FAST) {
sem_io = ipc_alloc(sizeof(ushort)*nsems);
if(sem_io == NULL) {
- sem_putref(sma);
+ ipc_rcu_putref(sma, ipc_rcu_free);
return -ENOMEM;
}
}
if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) {
- sem_putref(sma);
+ ipc_rcu_putref(sma, ipc_rcu_free);
err = -EFAULT;
goto out_free;
}
for (i = 0; i < nsems; i++) {
if (sem_io[i] > SEMVMX) {
- sem_putref(sma);
+ ipc_rcu_putref(sma, ipc_rcu_free);
err = -ERANGE;
goto out_free;
}
@@ -1349,10 +1410,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
rcu_read_lock();
sem_lock_and_putref(sma);
if (sma->sem_perm.deleted) {
- sem_unlock(sma, -1);
- rcu_read_unlock();
err = -EIDRM;
- goto out_free;
+ goto out_unlock;
}
for (i = 0; i < nsems; i++)
@@ -1376,6 +1435,10 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
goto out_rcu_wakeup;
sem_lock(sma, NULL, -1);
+ if (sma->sem_perm.deleted) {
+ err = -EIDRM;
+ goto out_unlock;
+ }
curr = &sma->sem_base[semnum];
switch (cmd) {
@@ -1629,7 +1692,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
/* step 2: allocate new undo structure */
new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
if (!new) {
- sem_putref(sma);
+ ipc_rcu_putref(sma, ipc_rcu_free);
return ERR_PTR(-ENOMEM);
}
@@ -1781,6 +1844,10 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
if (error)
goto out_rcu_wakeup;
+ error = -EIDRM;
+ locknum = sem_lock(sma, sops, nsops);
+ if (sma->sem_perm.deleted)
+ goto out_unlock_free;
/*
* semid identifiers are not unique - find_alloc_undo may have
* allocated an undo structure, it was invalidated by an RMID
@@ -1788,19 +1855,22 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
* This case can be detected checking un->semid. The existence of
* "un" itself is guaranteed by rcu.
*/
- error = -EIDRM;
- locknum = sem_lock(sma, sops, nsops);
if (un && un->semid == -1)
goto out_unlock_free;
error = perform_atomic_semop(sma, sops, nsops, un,
task_tgid_vnr(current));
- if (error <= 0) {
- if (alter && error == 0)
+ if (error == 0) {
+ /* If the operation was successful, then do
+ * the required updates.
+ */
+ if (alter)
do_smart_update(sma, sops, nsops, 1, &tasks);
-
- goto out_unlock_free;
+ else
+ set_semotime(sma, sops);
}
+ if (error <= 0)
+ goto out_unlock_free;
/* We need to sleep on this operation, so we put the current
* task into the pending queue and go to sleep.
@@ -1997,6 +2067,12 @@ void exit_sem(struct task_struct *tsk)
}
sem_lock(sma, NULL, -1);
+ /* exit_sem raced with IPC_RMID, nothing to do */
+ if (sma->sem_perm.deleted) {
+ sem_unlock(sma, -1);
+ rcu_read_unlock();
+ continue;
+ }
un = __lookup_undo(ulp, semid);
if (un == NULL) {
/* exit_sem raced with IPC_RMID+semget() that created
@@ -2059,6 +2135,14 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
struct sem_array *sma = it;
time_t sem_otime;
+ /*
+ * The proc interface isn't aware of sem_lock(), it calls
+ * ipc_lock_object() directly (in sysvipc_find_ipc).
+ * In order to stay compatible with sem_lock(), we must wait until
+ * all simple semop() calls have left their critical regions.
+ */
+ sem_wait_array(sma);
+
sem_otime = get_semotime(sma);
return seq_printf(s,