summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/spinlock.h5
-rw-r--r--fs/dcache.c17
-rw-r--r--fs/namei.c90
-rw-r--r--include/linux/dcache.h22
-rw-r--r--include/linux/lockref.h61
-rw-r--r--lib/Kconfig10
-rw-r--r--lib/Makefile1
-rw-r--r--lib/lockref.c127
9 files changed, 237 insertions, 97 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b32ebf9..67e0074 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -16,6 +16,7 @@ config X86_64
def_bool y
depends on 64BIT
select X86_DEV_DMA_OPS
+ select ARCH_USE_CMPXCHG_LOCKREF
### Arch settings
config X86
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index e3ddd7d..e0e6684 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -34,6 +34,11 @@
# define UNLOCK_LOCK_PREFIX
#endif
+static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+ return lock.tickets.head == lock.tickets.tail;
+}
+
/*
* Ticket locks are conceptually two parts, one indicating the current head of
* the queue, and the other indicating the current tail. The lock is acquired
diff --git a/fs/dcache.c b/fs/dcache.c
index b949af8..96655f4 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -611,8 +611,23 @@ static inline void __dget(struct dentry *dentry)
struct dentry *dget_parent(struct dentry *dentry)
{
+ int gotref;
struct dentry *ret;
+ /*
+ * Do optimistic parent lookup without any
+ * locking.
+ */
+ rcu_read_lock();
+ ret = ACCESS_ONCE(dentry->d_parent);
+ gotref = lockref_get_not_zero(&ret->d_lockref);
+ rcu_read_unlock();
+ if (likely(gotref)) {
+ if (likely(ret == ACCESS_ONCE(dentry->d_parent)))
+ return ret;
+ dput(ret);
+ }
+
repeat:
/*
* Don't need rcu_dereference because we re-check it was correct under
@@ -1771,7 +1786,7 @@ static noinline enum slow_d_compare slow_dentry_cmp(
* without taking d_lock and checking d_seq sequence count against @seq
* returned here.
*
- * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
+ * A refcount may be taken on the found dentry with the d_rcu_to_refcount
* function.
*
* Alternatively, __d_lookup_rcu may be called again to look up the child of
diff --git a/fs/namei.c b/fs/namei.c
index 7720fbd..2c30c84 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -494,6 +494,50 @@ static inline void unlock_rcu_walk(void)
br_read_unlock(&vfsmount_lock);
}
+/*
+ * When we move over from the RCU domain to properly refcounted
+ * long-lived dentries, we need to check the sequence numbers
+ * we got before lookup very carefully.
+ *
+ * We cannot blindly increment a dentry refcount - even if it
+ * is not locked - if it is zero, because it may have gone
+ * through the final d_kill() logic already.
+ *
+ * So for a zero refcount, we need to get the spinlock (which is
+ * safe even for a dead dentry because the de-allocation is
+ * RCU-delayed), and check the sequence count under the lock.
+ *
+ * Once we have checked the sequence count, we know it is live,
+ * and since we hold the spinlock it cannot die from under us.
+ *
+ * In contrast, if the reference count wasn't zero, we can just
+ * increment the lockref without having to take the spinlock.
+ * Even if the sequence number ends up being stale, we haven't
+ * gone through the final dput() and killed the dentry yet.
+ */
+static inline int d_rcu_to_refcount(struct dentry *dentry, seqcount_t *validate, unsigned seq)
+{
+ int gotref;
+
+ gotref = lockref_get_or_lock(&dentry->d_lockref);
+
+ /* Does the sequence number still match? */
+ if (read_seqcount_retry(validate, seq)) {
+ if (gotref)
+ dput(dentry);
+ else
+ spin_unlock(&dentry->d_lock);
+ return -ECHILD;
+ }
+
+ /* Get the ref now, if we couldn't get it originally */
+ if (!gotref) {
+ dentry->d_lockref.count++;
+ spin_unlock(&dentry->d_lock);
+ }
+ return 0;
+}
+
/**
* unlazy_walk - try to switch to ref-walk mode.
* @nd: nameidata pathwalk data
@@ -518,29 +562,28 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
nd->root.dentry != fs->root.dentry)
goto err_root;
}
- spin_lock(&parent->d_lock);
+
+ /*
+ * For a negative lookup, the lookup sequence point is the parents
+ * sequence point, and it only needs to revalidate the parent dentry.
+ *
+ * For a positive lookup, we need to move both the parent and the
+ * dentry from the RCU domain to be properly refcounted. And the
+ * sequence number in the dentry validates *both* dentry counters,
+ * since we checked the sequence number of the parent after we got
+ * the child sequence number. So we know the parent must still
+ * be valid if the child sequence number is still valid.
+ */
if (!dentry) {
- if (!__d_rcu_to_refcount(parent, nd->seq))
- goto err_parent;
+ if (d_rcu_to_refcount(parent, &parent->d_seq, nd->seq) < 0)
+ goto err_root;
BUG_ON(nd->inode != parent->d_inode);
} else {
- if (dentry->d_parent != parent)
+ if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0)
+ goto err_root;
+ if (d_rcu_to_refcount(parent, &dentry->d_seq, nd->seq) < 0)
goto err_parent;
- spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
- if (!__d_rcu_to_refcount(dentry, nd->seq))
- goto err_child;
- /*
- * If the sequence check on the child dentry passed, then
- * the child has not been removed from its parent. This
- * means the parent dentry must be valid and able to take
- * a reference at this point.
- */
- BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
- BUG_ON(!parent->d_lockref.count);
- parent->d_lockref.count++;
- spin_unlock(&dentry->d_lock);
}
- spin_unlock(&parent->d_lock);
if (want_root) {
path_get(&nd->root);
spin_unlock(&fs->lock);
@@ -551,10 +594,8 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
nd->flags &= ~LOOKUP_RCU;
return 0;
-err_child:
- spin_unlock(&dentry->d_lock);
err_parent:
- spin_unlock(&parent->d_lock);
+ dput(dentry);
err_root:
if (want_root)
spin_unlock(&fs->lock);
@@ -585,14 +626,11 @@ static int complete_walk(struct nameidata *nd)
nd->flags &= ~LOOKUP_RCU;
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
- spin_lock(&dentry->d_lock);
- if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) {
- spin_unlock(&dentry->d_lock);
+
+ if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0) {
unlock_rcu_walk();
return -ECHILD;
}
- BUG_ON(nd->inode != dentry->d_inode);
- spin_unlock(&dentry->d_lock);
mntget(nd->path.mnt);
unlock_rcu_walk();
}
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index efdc944..9169b91 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -304,28 +304,6 @@ extern struct dentry *__d_lookup(const struct dentry *, const struct qstr *);
extern struct dentry *__d_lookup_rcu(const struct dentry *parent,
const struct qstr *name, unsigned *seq);
-/**
- * __d_rcu_to_refcount - take a refcount on dentry if sequence check is ok
- * @dentry: dentry to take a ref on
- * @seq: seqcount to verify against
- * Returns: 0 on failure, else 1.
- *
- * __d_rcu_to_refcount operates on a dentry,seq pair that was returned
- * by __d_lookup_rcu, to get a reference on an rcu-walk dentry.
- */
-static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq)
-{
- int ret = 0;
-
- assert_spin_locked(&dentry->d_lock);
- if (!read_seqcount_retry(&dentry->d_seq, seq)) {
- ret = 1;
- dentry->d_lockref.count++;
- }
-
- return ret;
-}
-
static inline unsigned d_count(const struct dentry *dentry)
{
return dentry->d_lockref.count;
diff --git a/include/linux/lockref.h b/include/linux/lockref.h
index 01233e0..ca07b50 100644
--- a/include/linux/lockref.h
+++ b/include/linux/lockref.h
@@ -17,55 +17,20 @@
#include <linux/spinlock.h>
struct lockref {
- spinlock_t lock;
- unsigned int count;
+ union {
+#ifdef CONFIG_CMPXCHG_LOCKREF
+ aligned_u64 lock_count;
+#endif
+ struct {
+ spinlock_t lock;
+ unsigned int count;
+ };
+ };
};
-/**
- * lockref_get - Increments reference count unconditionally
- * @lockcnt: pointer to lockref structure
- *
- * This operation is only valid if you already hold a reference
- * to the object, so you know the count cannot be zero.
- */
-static inline void lockref_get(struct lockref *lockref)
-{
- spin_lock(&lockref->lock);
- lockref->count++;
- spin_unlock(&lockref->lock);
-}
-
-/**
- * lockref_get_not_zero - Increments count unless the count is 0
- * @lockcnt: pointer to lockref structure
- * Return: 1 if count updated successfully or 0 if count is 0
- */
-static inline int lockref_get_not_zero(struct lockref *lockref)
-{
- int retval = 0;
-
- spin_lock(&lockref->lock);
- if (lockref->count) {
- lockref->count++;
- retval = 1;
- }
- spin_unlock(&lockref->lock);
- return retval;
-}
-
-/**
- * lockref_put_or_lock - decrements count unless count <= 1 before decrement
- * @lockcnt: pointer to lockref structure
- * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken
- */
-static inline int lockref_put_or_lock(struct lockref *lockref)
-{
- spin_lock(&lockref->lock);
- if (lockref->count <= 1)
- return 0;
- lockref->count--;
- spin_unlock(&lockref->lock);
- return 1;
-}
+extern void lockref_get(struct lockref *);
+extern int lockref_get_not_zero(struct lockref *);
+extern int lockref_get_or_lock(struct lockref *);
+extern int lockref_put_or_lock(struct lockref *);
#endif /* __LINUX_LOCKREF_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 71d9f81..6556171 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -48,6 +48,16 @@ config STMP_DEVICE
config PERCPU_RWSEM
boolean
+config ARCH_USE_CMPXCHG_LOCKREF
+ bool
+
+config CMPXCHG_LOCKREF
+ def_bool y if ARCH_USE_CMPXCHG_LOCKREF
+ depends on SMP
+ depends on !GENERIC_LOCKBREAK
+ depends on !DEBUG_SPINLOCK
+ depends on !DEBUG_LOCK_ALLOC
+
config CRC_CCITT
tristate "CRC-CCITT functions"
help
diff --git a/lib/Makefile b/lib/Makefile
index 7baccfd..f2cb308 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -20,6 +20,7 @@ lib-$(CONFIG_MMU) += ioremap.o
lib-$(CONFIG_SMP) += cpumask.o
lib-y += kobject.o klist.o
+obj-y += lockref.o
obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
diff --git a/lib/lockref.c b/lib/lockref.c
new file mode 100644
index 0000000..7819c2d
--- /dev/null
+++ b/lib/lockref.c
@@ -0,0 +1,127 @@
+#include <linux/export.h>
+#include <linux/lockref.h>
+
+#ifdef CONFIG_CMPXCHG_LOCKREF
+
+/*
+ * Note that the "cmpxchg()" reloads the "old" value for the
+ * failure case.
+ */
+#define CMPXCHG_LOOP(CODE, SUCCESS) do { \
+ struct lockref old; \
+ BUILD_BUG_ON(sizeof(old) != 8); \
+ old.lock_count = ACCESS_ONCE(lockref->lock_count); \
+ while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \
+ struct lockref new = old, prev = old; \
+ CODE \
+ old.lock_count = cmpxchg(&lockref->lock_count, \
+ old.lock_count, new.lock_count); \
+ if (likely(old.lock_count == prev.lock_count)) { \
+ SUCCESS; \
+ } \
+ } \
+} while (0)
+
+#else
+
+#define CMPXCHG_LOOP(CODE, SUCCESS) do { } while (0)
+
+#endif
+
+/**
+ * lockref_get - Increments reference count unconditionally
+ * @lockcnt: pointer to lockref structure
+ *
+ * This operation is only valid if you already hold a reference
+ * to the object, so you know the count cannot be zero.
+ */
+void lockref_get(struct lockref *lockref)
+{
+ CMPXCHG_LOOP(
+ new.count++;
+ ,
+ return;
+ );
+
+ spin_lock(&lockref->lock);
+ lockref->count++;
+ spin_unlock(&lockref->lock);
+}
+EXPORT_SYMBOL(lockref_get);
+
+/**
+ * lockref_get_not_zero - Increments count unless the count is 0
+ * @lockcnt: pointer to lockref structure
+ * Return: 1 if count updated successfully or 0 if count was zero
+ */
+int lockref_get_not_zero(struct lockref *lockref)
+{
+ int retval;
+
+ CMPXCHG_LOOP(
+ new.count++;
+ if (!old.count)
+ return 0;
+ ,
+ return 1;
+ );
+
+ spin_lock(&lockref->lock);
+ retval = 0;
+ if (lockref->count) {
+ lockref->count++;
+ retval = 1;
+ }
+ spin_unlock(&lockref->lock);
+ return retval;
+}
+EXPORT_SYMBOL(lockref_get_not_zero);
+
+/**
+ * lockref_get_or_lock - Increments count unless the count is 0
+ * @lockcnt: pointer to lockref structure
+ * Return: 1 if count updated successfully or 0 if count was zero
+ * and we got the lock instead.
+ */
+int lockref_get_or_lock(struct lockref *lockref)
+{
+ CMPXCHG_LOOP(
+ new.count++;
+ if (!old.count)
+ break;
+ ,
+ return 1;
+ );
+
+ spin_lock(&lockref->lock);
+ if (!lockref->count)
+ return 0;
+ lockref->count++;
+ spin_unlock(&lockref->lock);
+ return 1;
+}
+EXPORT_SYMBOL(lockref_get_or_lock);
+
+/**
+ * lockref_put_or_lock - decrements count unless count <= 1 before decrement
+ * @lockcnt: pointer to lockref structure
+ * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken
+ */
+int lockref_put_or_lock(struct lockref *lockref)
+{
+ CMPXCHG_LOOP(
+ new.count--;
+ if (old.count <= 1)
+ break;
+ ,
+ return 1;
+ );
+
+ spin_lock(&lockref->lock);
+ if (lockref->count <= 1)
+ return 0;
+ lockref->count--;
+ spin_unlock(&lockref->lock);
+ return 1;
+}
+EXPORT_SYMBOL(lockref_put_or_lock);