summaryrefslogtreecommitdiff
path: root/drivers/misc/cxl/pci.c
diff options
context:
space:
mode:
authorAndrew Donnellan <andrew.donnellan@au1.ibm.com>2017-02-06 01:07:17 (GMT)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2017-03-15 02:02:42 (GMT)
commit53d43706f2ba5cd805093d21d69fd700584a71ab (patch)
tree97688aad50611bf48e30e5be11602ab6a7f428a4 /drivers/misc/cxl/pci.c
parent411d0b0ced692dd2c0d7c10514ca8b923d8fa0f8 (diff)
downloadlinux-53d43706f2ba5cd805093d21d69fd700584a71ab.tar.xz
cxl: fix nested locking hang during EEH hotplug
commit 171ed0fcd8966d82c45376f1434678e7b9d4d9b1 upstream. Commit 14a3ae34bfd0 ("cxl: Prevent read/write to AFU config space while AFU not configured") introduced a rwsem to fix an invalid memory access that occurred when someone attempts to access the config space of an AFU on a vPHB whilst the AFU is deconfigured, such as during EEH recovery. It turns out that it's possible to run into a nested locking issue when EEH recovery fails and a full device hotplug is required. cxl_pci_error_detected() deconfigures the AFU, taking a writer lock on configured_rwsem. When EEH recovery fails, the EEH code calls pci_hp_remove_devices() to remove the device, which in turn calls cxl_remove() -> cxl_pci_remove_afu() -> pci_deconfigure_afu(), which tries to grab the writer lock that's already held. Standard rwsem semantics don't express what we really want to do here and don't allow for nested locking. Fix this by replacing the rwsem with an atomic_t which we can control more finely. Allow the AFU to be locked multiple times so long as there are no readers. Fixes: 14a3ae34bfd0 ("cxl: Prevent read/write to AFU config space while AFU not configured") Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com> Acked-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/misc/cxl/pci.c')
-rw-r--r--drivers/misc/cxl/pci.c11
1 files changed, 9 insertions, 2 deletions
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index b2ff108..dd99b06 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1129,7 +1129,7 @@ static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pc
if ((rc = cxl_native_register_psl_irq(afu)))
goto err2;
- up_write(&afu->configured_rwsem);
+ atomic_set(&afu->configured_state, 0);
return 0;
err2:
@@ -1142,7 +1142,14 @@ err1:
static void pci_deconfigure_afu(struct cxl_afu *afu)
{
- down_write(&afu->configured_rwsem);
+ /*
+ * It's okay to deconfigure when AFU is already locked, otherwise wait
+ * until there are no readers
+ */
+ if (atomic_read(&afu->configured_state) != -1) {
+ while (atomic_cmpxchg(&afu->configured_state, 0, -1) != -1)
+ schedule();
+ }
cxl_native_release_psl_irq(afu);
if (afu->adapter->native->sl_ops->release_serr_irq)
afu->adapter->native->sl_ops->release_serr_irq(afu);