summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/apei/erst.c6
-rw-r--r--drivers/amba/Kconfig2
-rw-r--r--drivers/base/devtmpfs.c12
-rw-r--r--drivers/block/aoe/aoecmd.c4
-rw-r--r--drivers/block/drbd/drbd_actlog.c323
-rw-r--r--drivers/block/drbd/drbd_bitmap.c48
-rw-r--r--drivers/block/drbd/drbd_debugfs.c14
-rw-r--r--drivers/block/drbd/drbd_int.h114
-rw-r--r--drivers/block/drbd/drbd_main.c74
-rw-r--r--drivers/block/drbd/drbd_nl.c1361
-rw-r--r--drivers/block/drbd/drbd_proc.c6
-rw-r--r--drivers/block/drbd/drbd_protocol.h2
-rw-r--r--drivers/block/drbd/drbd_receiver.c254
-rw-r--r--drivers/block/drbd/drbd_req.c147
-rw-r--r--drivers/block/drbd/drbd_req.h17
-rw-r--r--drivers/block/drbd/drbd_state.c428
-rw-r--r--drivers/block/drbd/drbd_state.h6
-rw-r--r--drivers/block/drbd/drbd_state_change.h63
-rw-r--r--drivers/block/drbd/drbd_worker.c105
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c6
-rw-r--r--drivers/block/null_blk.c11
-rw-r--r--drivers/block/rbd.c3
-rw-r--r--drivers/block/sx8.c7
-rw-r--r--drivers/block/xen-blkback/blkback.c391
-rw-r--r--drivers/block/xen-blkback/common.h86
-rw-r--r--drivers/block/xen-blkback/xenbus.c416
-rw-r--r--drivers/block/xen-blkfront.c1061
-rw-r--r--drivers/char/mem.c4
-rw-r--r--drivers/char/mspec.c15
-rw-r--r--drivers/char/ps3flash.c4
-rw-r--r--drivers/crypto/Kconfig1
-rw-r--r--drivers/crypto/atmel-aes.c1
-rw-r--r--drivers/crypto/qat/qat_common/qat_hal.c23
-rw-r--r--drivers/gpu/drm/drm_hashtab.c5
-rw-r--r--drivers/infiniband/Kconfig10
-rw-r--r--drivers/infiniband/core/Makefile4
-rw-r--r--drivers/infiniband/core/addr.c194
-rw-r--r--drivers/infiniband/core/cache.c345
-rw-r--r--drivers/infiniband/core/cm.c49
-rw-r--r--drivers/infiniband/core/cma.c265
-rw-r--r--drivers/infiniband/core/cma_configfs.c321
-rw-r--r--drivers/infiniband/core/core_priv.h45
-rw-r--r--drivers/infiniband/core/cq.c209
-rw-r--r--drivers/infiniband/core/device.c51
-rw-r--r--drivers/infiniband/core/fmr_pool.c20
-rw-r--r--drivers/infiniband/core/mad.c162
-rw-r--r--drivers/infiniband/core/mad_priv.h2
-rw-r--r--drivers/infiniband/core/multicast.c17
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c81
-rw-r--r--drivers/infiniband/core/sa_query.c91
-rw-r--r--drivers/infiniband/core/sysfs.c377
-rw-r--r--drivers/infiniband/core/ud_header.c155
-rw-r--r--drivers/infiniband/core/umem_odp.c2
-rw-r--r--drivers/infiniband/core/user_mad.c1
-rw-r--r--drivers/infiniband/core/uverbs.h2
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c38
-rw-r--r--drivers/infiniband/core/uverbs_main.c13
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c1
-rw-r--r--drivers/infiniband/core/verbs.c238
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c4
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c4
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c102
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c146
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h15
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c82
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c14
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c57
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h13
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c251
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c5
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h7
-rw-r--r--drivers/infiniband/hw/cxgb4/user.h2
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c3
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c3
-rw-r--r--drivers/infiniband/hw/mlx4/main.c102
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h10
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c22
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c318
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c3
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c32
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c31
-rw-r--r--drivers/infiniband/hw/mlx5/main.c447
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h121
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c29
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c1014
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c41
-rw-r--r--drivers/infiniband/hw/mlx5/user.h63
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c3
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c84
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c17
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.h2
-rw-r--r--drivers/infiniband/hw/nes/nes_utils.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c216
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.h4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c7
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c1
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c163
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h3
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c12
-rw-r--r--drivers/infiniband/hw/qib/qib_mr.c51
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c46
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c12
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h4
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs_mcast.c35
-rw-r--r--drivers/infiniband/hw/usnic/usnic_debugfs.c5
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c4
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c24
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.h2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_vnic.c54
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c21
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c14
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c40
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c45
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c13
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h156
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c323
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c179
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c337
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c118
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h41
-rw-r--r--drivers/infiniband/ulp/isert/isert_proto.h47
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c205
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h7
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c443
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h53
-rw-r--r--drivers/input/joystick/xpad.c591
-rw-r--r--drivers/input/keyboard/gpio_keys.c6
-rw-r--r--drivers/input/touchscreen/atmel_mxt_ts.c68
-rw-r--r--drivers/lightnvm/Makefile2
-rw-r--r--drivers/lightnvm/core.c340
-rw-r--r--drivers/lightnvm/gennvm.c198
-rw-r--r--drivers/lightnvm/rrpc.c104
-rw-r--r--drivers/lightnvm/rrpc.h13
-rw-r--r--drivers/lightnvm/sysblk.c741
-rw-r--r--drivers/md/bcache/btree.c5
-rw-r--r--drivers/md/bcache/super.c16
-rw-r--r--drivers/md/bcache/writeback.c37
-rw-r--r--drivers/md/bcache/writeback.h3
-rw-r--r--drivers/mmc/core/debugfs.c2
-rw-r--r--drivers/mmc/core/pwrseq_simple.c22
-rw-r--r--drivers/mmc/core/sd.c28
-rw-r--r--drivers/mmc/core/sdio.c4
-rw-r--r--drivers/mmc/core/sdio_cis.c1
-rw-r--r--drivers/mmc/host/mmci.c3
-rw-r--r--drivers/mmc/host/tmio_mmc_dma.c15
-rw-r--r--drivers/mtd/ubi/cdev.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c39
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/port.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/qp.c26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c61
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qp.c233
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/srq.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/transobj.c50
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/transobj.h72
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c116
-rw-r--r--drivers/ntb/hw/Kconfig1
-rw-r--r--drivers/ntb/hw/Makefile1
-rw-r--r--drivers/ntb/hw/amd/Kconfig7
-rw-r--r--drivers/ntb/hw/amd/Makefile1
-rw-r--r--drivers/ntb/hw/amd/ntb_hw_amd.c1143
-rw-r--r--drivers/ntb/hw/amd/ntb_hw_amd.h217
-rw-r--r--drivers/ntb/hw/intel/ntb_hw_intel.c4
-rw-r--r--drivers/ntb/hw/intel/ntb_hw_intel.h8
-rw-r--r--drivers/ntb/ntb_transport.c50
-rw-r--r--drivers/ntb/test/Kconfig8
-rw-r--r--drivers/ntb/test/Makefile1
-rw-r--r--drivers/ntb/test/ntb_perf.c748
-rw-r--r--drivers/nvme/host/Kconfig11
-rw-r--r--drivers/nvme/host/Makefile5
-rw-r--r--drivers/nvme/host/core.c1472
-rw-r--r--drivers/nvme/host/lightnvm.c67
-rw-r--r--drivers/nvme/host/nvme.h242
-rw-r--r--drivers/nvme/host/pci.c2700
-rw-r--r--drivers/nvme/host/scsi.c212
-rw-r--r--drivers/oprofile/oprofilefs.c16
-rw-r--r--drivers/scsi/3w-xxxx.c3
-rw-r--r--drivers/scsi/Kconfig18
-rw-r--r--drivers/scsi/NCR5380.c2868
-rw-r--r--drivers/scsi/NCR5380.h87
-rw-r--r--drivers/scsi/arm/cumana_1.c31
-rw-r--r--drivers/scsi/arm/oak.c27
-rw-r--r--drivers/scsi/atari_NCR5380.c2266
-rw-r--r--drivers/scsi/atari_scsi.c102
-rw-r--r--drivers/scsi/be2iscsi/Kconfig1
-rw-r--r--drivers/scsi/be2iscsi/be.h4
-rw-r--r--drivers/scsi/be2iscsi/be_iscsi.c4
-rw-r--r--drivers/scsi/be2iscsi/be_main.c20
-rw-r--r--drivers/scsi/cxgbi/cxgb3i/cxgb3i.c2
-rw-r--r--drivers/scsi/dmx3191d.c33
-rw-r--r--drivers/scsi/dtc.c115
-rw-r--r--drivers/scsi/dtc.h45
-rw-r--r--drivers/scsi/g_NCR5380.c408
-rw-r--r--drivers/scsi/g_NCR5380.h66
-rw-r--r--drivers/scsi/hisi_sas/hisi_sas_v1_hw.c40
-rw-r--r--drivers/scsi/imm.c50
-rw-r--r--drivers/scsi/ipr.c30
-rw-r--r--drivers/scsi/ipr.h4
-rw-r--r--drivers/scsi/mac_scsi.c117
-rw-r--r--drivers/scsi/megaraid/megaraid_mm.c4
-rw-r--r--drivers/scsi/pas16.c116
-rw-r--r--drivers/scsi/pas16.h40
-rw-r--r--drivers/scsi/scsi_devinfo.c1
-rw-r--r--drivers/scsi/storvsc_drv.c2
-rw-r--r--drivers/scsi/sun3_scsi.c141
-rw-r--r--drivers/scsi/t128.c102
-rw-r--r--drivers/scsi/t128.h39
-rw-r--r--drivers/soc/Kconfig1
-rw-r--r--drivers/soc/qcom/spm.c2
-rw-r--r--drivers/soc/tegra/Kconfig83
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_private.h8
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c21
-rw-r--r--drivers/staging/lustre/lustre/llite/dir.c4
-rw-r--r--drivers/staging/lustre/lustre/llite/file.c16
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_internal.h2
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_lib.c4
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_nfs.c4
-rw-r--r--drivers/staging/lustre/lustre/llite/lloop.c4
-rw-r--r--drivers/staging/lustre/lustre/llite/rw.c4
-rw-r--r--drivers/staging/lustre/lustre/llite/rw26.c4
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_io.c4
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_page.c10
-rw-r--r--drivers/staging/rdma/amso1100/c2_cq.c3
-rw-r--r--drivers/staging/rdma/amso1100/c2_provider.c72
-rw-r--r--drivers/staging/rdma/ehca/ehca_classes.h5
-rw-r--r--drivers/staging/rdma/ehca/ehca_iverbs.h16
-rw-r--r--drivers/staging/rdma/ehca/ehca_main.c4
-rw-r--r--drivers/staging/rdma/ehca/ehca_mrmw.c477
-rw-r--r--drivers/staging/rdma/ehca/ehca_mrmw.h5
-rw-r--r--drivers/staging/rdma/ehca/ehca_reqs.c1
-rw-r--r--drivers/staging/rdma/hfi1/mr.c51
-rw-r--r--drivers/staging/rdma/hfi1/verbs.c1
-rw-r--r--drivers/staging/rdma/hfi1/verbs.h4
-rw-r--r--drivers/staging/rdma/ipath/ipath_fs.c8
-rw-r--r--drivers/staging/rdma/ipath/ipath_mr.c55
-rw-r--r--drivers/staging/rdma/ipath/ipath_verbs.c1
-rw-r--r--drivers/staging/rdma/ipath/ipath_verbs.h4
-rw-r--r--drivers/target/target_core_iblock.c4
-rw-r--r--drivers/thermal/int340x_thermal/processor_thermal_device.c10
-rw-r--r--drivers/thermal/intel_pch_thermal.c2
-rw-r--r--drivers/thermal/rcar_thermal.c53
-rw-r--r--drivers/thermal/rockchip_thermal.c172
-rw-r--r--drivers/thermal/step_wise.c17
-rw-r--r--drivers/thermal/thermal_core.c81
-rw-r--r--drivers/thermal/thermal_core.h1
-rw-r--r--drivers/usb/gadget/function/f_printer.c4
-rw-r--r--drivers/usb/gadget/legacy/inode.c4
-rw-r--r--drivers/usb/gadget/udc/atmel_usba_udc.c12
-rw-r--r--drivers/usb/host/Kconfig2
-rw-r--r--drivers/video/fbdev/core/fb_defio.c4
257 files changed, 19317 insertions, 11913 deletions
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 6682c5d..6e6bc10 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -32,6 +32,7 @@
#include <linux/hardirq.h>
#include <linux/pstore.h>
#include <linux/vmalloc.h>
+#include <linux/mm.h> /* kvfree() */
#include <acpi/apei.h>
#include "apei-internal.h"
@@ -532,10 +533,7 @@ retry:
return -ENOMEM;
memcpy(new_entries, entries,
erst_record_id_cache.len * sizeof(entries[0]));
- if (erst_record_id_cache.size < PAGE_SIZE)
- kfree(entries);
- else
- vfree(entries);
+ kvfree(entries);
erst_record_id_cache.entries = entries = new_entries;
erst_record_id_cache.size = new_size;
}
diff --git a/drivers/amba/Kconfig b/drivers/amba/Kconfig
index 4a5c9d2..294ba6f 100644
--- a/drivers/amba/Kconfig
+++ b/drivers/amba/Kconfig
@@ -4,7 +4,7 @@ config ARM_AMBA
if ARM_AMBA
config TEGRA_AHB
- bool "Enable AHB driver for NVIDIA Tegra SoCs"
+ bool
default y if ARCH_TEGRA
help
Adds AHB configuration functionality for NVIDIA Tegra SoCs,
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 68f0314..44a74cf 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -215,9 +215,9 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
newattrs.ia_uid = uid;
newattrs.ia_gid = gid;
newattrs.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID;
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
notify_change(dentry, &newattrs, NULL);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
/* mark as kernel-created inode */
d_inode(dentry)->i_private = &thread;
@@ -244,7 +244,7 @@ static int dev_rmdir(const char *name)
err = -ENOENT;
}
dput(dentry);
- mutex_unlock(&d_inode(parent.dentry)->i_mutex);
+ inode_unlock(d_inode(parent.dentry));
path_put(&parent);
return err;
}
@@ -321,9 +321,9 @@ static int handle_remove(const char *nodename, struct device *dev)
newattrs.ia_mode = stat.mode & ~0777;
newattrs.ia_valid =
ATTR_UID|ATTR_GID|ATTR_MODE;
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
notify_change(dentry, &newattrs, NULL);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
err = vfs_unlink(d_inode(parent.dentry), dentry, NULL);
if (!err || err == -ENOENT)
deleted = 1;
@@ -332,7 +332,7 @@ static int handle_remove(const char *nodename, struct device *dev)
err = -ENOENT;
}
dput(dentry);
- mutex_unlock(&d_inode(parent.dentry)->i_mutex);
+ inode_unlock(d_inode(parent.dentry));
path_put(&parent);
if (deleted && strchr(nodename, '/'))
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index ad80c85..d048d20 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -964,9 +964,9 @@ aoecmd_sleepwork(struct work_struct *work)
ssize = get_capacity(d->gd);
bd = bdget_disk(d->gd, 0);
if (bd) {
- mutex_lock(&bd->bd_inode->i_mutex);
+ inode_lock(bd->bd_inode);
i_size_write(bd->bd_inode, (loff_t)ssize<<9);
- mutex_unlock(&bd->bd_inode->i_mutex);
+ inode_unlock(bd->bd_inode);
bdput(bd);
}
spin_lock_irq(&d->lock);
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index b3868e7..10459a1 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -288,7 +288,162 @@ bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *
return need_transaction;
}
-static int al_write_transaction(struct drbd_device *device);
+#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
+/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
+ * are still coupled, or assume too much about their relation.
+ * Code below will not work if this is violated.
+ * Will be cleaned up with some followup patch.
+ */
+# error FIXME
+#endif
+
+static unsigned int al_extent_to_bm_page(unsigned int al_enr)
+{
+ return al_enr >>
+ /* bit to page */
+ ((PAGE_SHIFT + 3) -
+ /* al extent number to bit */
+ (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
+}
+
+static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device)
+{
+ const unsigned int stripes = device->ldev->md.al_stripes;
+ const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k;
+
+ /* transaction number, modulo on-disk ring buffer wrap around */
+ unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k);
+
+ /* ... to aligned 4k on disk block */
+ t = ((t % stripes) * stripe_size_4kB) + t/stripes;
+
+ /* ... to 512 byte sector in activity log */
+ t *= 8;
+
+ /* ... plus offset to the on disk position */
+ return device->ldev->md.md_offset + device->ldev->md.al_offset + t;
+}
+
+static int __al_write_transaction(struct drbd_device *device, struct al_transaction_on_disk *buffer)
+{
+ struct lc_element *e;
+ sector_t sector;
+ int i, mx;
+ unsigned extent_nr;
+ unsigned crc = 0;
+ int err = 0;
+
+ memset(buffer, 0, sizeof(*buffer));
+ buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
+ buffer->tr_number = cpu_to_be32(device->al_tr_number);
+
+ i = 0;
+
+ /* Even though no one can start to change this list
+ * once we set the LC_LOCKED -- from drbd_al_begin_io(),
+ * lc_try_lock_for_transaction() --, someone may still
+ * be in the process of changing it. */
+ spin_lock_irq(&device->al_lock);
+ list_for_each_entry(e, &device->act_log->to_be_changed, list) {
+ if (i == AL_UPDATES_PER_TRANSACTION) {
+ i++;
+ break;
+ }
+ buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index);
+ buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number);
+ if (e->lc_number != LC_FREE)
+ drbd_bm_mark_for_writeout(device,
+ al_extent_to_bm_page(e->lc_number));
+ i++;
+ }
+ spin_unlock_irq(&device->al_lock);
+ BUG_ON(i > AL_UPDATES_PER_TRANSACTION);
+
+ buffer->n_updates = cpu_to_be16(i);
+ for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) {
+ buffer->update_slot_nr[i] = cpu_to_be16(-1);
+ buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE);
+ }
+
+ buffer->context_size = cpu_to_be16(device->act_log->nr_elements);
+ buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle);
+
+ mx = min_t(int, AL_CONTEXT_PER_TRANSACTION,
+ device->act_log->nr_elements - device->al_tr_cycle);
+ for (i = 0; i < mx; i++) {
+ unsigned idx = device->al_tr_cycle + i;
+ extent_nr = lc_element_by_index(device->act_log, idx)->lc_number;
+ buffer->context[i] = cpu_to_be32(extent_nr);
+ }
+ for (; i < AL_CONTEXT_PER_TRANSACTION; i++)
+ buffer->context[i] = cpu_to_be32(LC_FREE);
+
+ device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION;
+ if (device->al_tr_cycle >= device->act_log->nr_elements)
+ device->al_tr_cycle = 0;
+
+ sector = al_tr_number_to_on_disk_sector(device);
+
+ crc = crc32c(0, buffer, 4096);
+ buffer->crc32c = cpu_to_be32(crc);
+
+ if (drbd_bm_write_hinted(device))
+ err = -EIO;
+ else {
+ bool write_al_updates;
+ rcu_read_lock();
+ write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates;
+ rcu_read_unlock();
+ if (write_al_updates) {
+ if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) {
+ err = -EIO;
+ drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
+ } else {
+ device->al_tr_number++;
+ device->al_writ_cnt++;
+ }
+ }
+ }
+
+ return err;
+}
+
+static int al_write_transaction(struct drbd_device *device)
+{
+ struct al_transaction_on_disk *buffer;
+ int err;
+
+ if (!get_ldev(device)) {
+ drbd_err(device, "disk is %s, cannot start al transaction\n",
+ drbd_disk_str(device->state.disk));
+ return -EIO;
+ }
+
+ /* The bitmap write may have failed, causing a state change. */
+ if (device->state.disk < D_INCONSISTENT) {
+ drbd_err(device,
+ "disk is %s, cannot write al transaction\n",
+ drbd_disk_str(device->state.disk));
+ put_ldev(device);
+ return -EIO;
+ }
+
+ /* protects md_io_buffer, al_tr_cycle, ... */
+ buffer = drbd_md_get_buffer(device, __func__);
+ if (!buffer) {
+ drbd_err(device, "disk failed while waiting for md_io buffer\n");
+ put_ldev(device);
+ return -ENODEV;
+ }
+
+ err = __al_write_transaction(device, buffer);
+
+ drbd_md_put_buffer(device);
+ put_ldev(device);
+
+ return err;
+}
+
void drbd_al_begin_io_commit(struct drbd_device *device)
{
@@ -420,153 +575,6 @@ void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i)
wake_up(&device->al_wait);
}
-#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
-/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
- * are still coupled, or assume too much about their relation.
- * Code below will not work if this is violated.
- * Will be cleaned up with some followup patch.
- */
-# error FIXME
-#endif
-
-static unsigned int al_extent_to_bm_page(unsigned int al_enr)
-{
- return al_enr >>
- /* bit to page */
- ((PAGE_SHIFT + 3) -
- /* al extent number to bit */
- (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
-}
-
-static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device)
-{
- const unsigned int stripes = device->ldev->md.al_stripes;
- const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k;
-
- /* transaction number, modulo on-disk ring buffer wrap around */
- unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k);
-
- /* ... to aligned 4k on disk block */
- t = ((t % stripes) * stripe_size_4kB) + t/stripes;
-
- /* ... to 512 byte sector in activity log */
- t *= 8;
-
- /* ... plus offset to the on disk position */
- return device->ldev->md.md_offset + device->ldev->md.al_offset + t;
-}
-
-int al_write_transaction(struct drbd_device *device)
-{
- struct al_transaction_on_disk *buffer;
- struct lc_element *e;
- sector_t sector;
- int i, mx;
- unsigned extent_nr;
- unsigned crc = 0;
- int err = 0;
-
- if (!get_ldev(device)) {
- drbd_err(device, "disk is %s, cannot start al transaction\n",
- drbd_disk_str(device->state.disk));
- return -EIO;
- }
-
- /* The bitmap write may have failed, causing a state change. */
- if (device->state.disk < D_INCONSISTENT) {
- drbd_err(device,
- "disk is %s, cannot write al transaction\n",
- drbd_disk_str(device->state.disk));
- put_ldev(device);
- return -EIO;
- }
-
- /* protects md_io_buffer, al_tr_cycle, ... */
- buffer = drbd_md_get_buffer(device, __func__);
- if (!buffer) {
- drbd_err(device, "disk failed while waiting for md_io buffer\n");
- put_ldev(device);
- return -ENODEV;
- }
-
- memset(buffer, 0, sizeof(*buffer));
- buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
- buffer->tr_number = cpu_to_be32(device->al_tr_number);
-
- i = 0;
-
- /* Even though no one can start to change this list
- * once we set the LC_LOCKED -- from drbd_al_begin_io(),
- * lc_try_lock_for_transaction() --, someone may still
- * be in the process of changing it. */
- spin_lock_irq(&device->al_lock);
- list_for_each_entry(e, &device->act_log->to_be_changed, list) {
- if (i == AL_UPDATES_PER_TRANSACTION) {
- i++;
- break;
- }
- buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index);
- buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number);
- if (e->lc_number != LC_FREE)
- drbd_bm_mark_for_writeout(device,
- al_extent_to_bm_page(e->lc_number));
- i++;
- }
- spin_unlock_irq(&device->al_lock);
- BUG_ON(i > AL_UPDATES_PER_TRANSACTION);
-
- buffer->n_updates = cpu_to_be16(i);
- for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) {
- buffer->update_slot_nr[i] = cpu_to_be16(-1);
- buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE);
- }
-
- buffer->context_size = cpu_to_be16(device->act_log->nr_elements);
- buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle);
-
- mx = min_t(int, AL_CONTEXT_PER_TRANSACTION,
- device->act_log->nr_elements - device->al_tr_cycle);
- for (i = 0; i < mx; i++) {
- unsigned idx = device->al_tr_cycle + i;
- extent_nr = lc_element_by_index(device->act_log, idx)->lc_number;
- buffer->context[i] = cpu_to_be32(extent_nr);
- }
- for (; i < AL_CONTEXT_PER_TRANSACTION; i++)
- buffer->context[i] = cpu_to_be32(LC_FREE);
-
- device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION;
- if (device->al_tr_cycle >= device->act_log->nr_elements)
- device->al_tr_cycle = 0;
-
- sector = al_tr_number_to_on_disk_sector(device);
-
- crc = crc32c(0, buffer, 4096);
- buffer->crc32c = cpu_to_be32(crc);
-
- if (drbd_bm_write_hinted(device))
- err = -EIO;
- else {
- bool write_al_updates;
- rcu_read_lock();
- write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates;
- rcu_read_unlock();
- if (write_al_updates) {
- if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) {
- err = -EIO;
- drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
- } else {
- device->al_tr_number++;
- device->al_writ_cnt++;
- }
- }
- }
-
- drbd_md_put_buffer(device);
- put_ldev(device);
-
- return err;
-}
-
static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext)
{
int rv;
@@ -606,21 +614,24 @@ void drbd_al_shrink(struct drbd_device *device)
wake_up(&device->al_wait);
}
-int drbd_initialize_al(struct drbd_device *device, void *buffer)
+int drbd_al_initialize(struct drbd_device *device, void *buffer)
{
struct al_transaction_on_disk *al = buffer;
struct drbd_md *md = &device->ldev->md;
- sector_t al_base = md->md_offset + md->al_offset;
int al_size_4k = md->al_stripes * md->al_stripe_size_4k;
int i;
- memset(al, 0, 4096);
- al->magic = cpu_to_be32(DRBD_AL_MAGIC);
- al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED);
- al->crc32c = cpu_to_be32(crc32c(0, al, 4096));
+ __al_write_transaction(device, al);
+ /* There may or may not have been a pending transaction. */
+ spin_lock_irq(&device->al_lock);
+ lc_committed(device->act_log);
+ spin_unlock_irq(&device->al_lock);
- for (i = 0; i < al_size_4k; i++) {
- int err = drbd_md_sync_page_io(device, device->ldev, al_base + i * 8, WRITE);
+ /* The rest of the transactions will have an empty "updates" list, and
+ * are written out only to provide the context, and to initialize the
+ * on-disk ring buffer. */
+ for (i = 1; i < al_size_4k; i++) {
+ int err = __al_write_transaction(device, al);
if (err)
return err;
}
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 9462d27..92d6fc0 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -24,7 +24,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/bitops.h>
+#include <linux/bitmap.h>
#include <linux/vmalloc.h>
#include <linux/string.h>
#include <linux/drbd.h>
@@ -364,12 +364,9 @@ static void bm_free_pages(struct page **pages, unsigned long number)
}
}
-static void bm_vk_free(void *ptr, int v)
+static inline void bm_vk_free(void *ptr)
{
- if (v)
- vfree(ptr);
- else
- kfree(ptr);
+ kvfree(ptr);
}
/*
@@ -379,7 +376,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
{
struct page **old_pages = b->bm_pages;
struct page **new_pages, *page;
- unsigned int i, bytes, vmalloced = 0;
+ unsigned int i, bytes;
unsigned long have = b->bm_number_of_pages;
BUG_ON(have == 0 && old_pages != NULL);
@@ -401,7 +398,6 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
PAGE_KERNEL);
if (!new_pages)
return NULL;
- vmalloced = 1;
}
if (want >= have) {
@@ -411,7 +407,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
if (!page) {
bm_free_pages(new_pages + have, i - have);
- bm_vk_free(new_pages, vmalloced);
+ bm_vk_free(new_pages);
return NULL;
}
/* we want to know which page it is
@@ -427,11 +423,6 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
*/
}
- if (vmalloced)
- b->bm_flags |= BM_P_VMALLOCED;
- else
- b->bm_flags &= ~BM_P_VMALLOCED;
-
return new_pages;
}
@@ -469,7 +460,7 @@ void drbd_bm_cleanup(struct drbd_device *device)
if (!expect(device->bitmap))
return;
bm_free_pages(device->bitmap->bm_pages, device->bitmap->bm_number_of_pages);
- bm_vk_free(device->bitmap->bm_pages, (BM_P_VMALLOCED & device->bitmap->bm_flags));
+ bm_vk_free(device->bitmap->bm_pages);
kfree(device->bitmap);
device->bitmap = NULL;
}
@@ -479,8 +470,14 @@ void drbd_bm_cleanup(struct drbd_device *device)
* this masks out the remaining bits.
* Returns the number of bits cleared.
*/
+#ifndef BITS_PER_PAGE
#define BITS_PER_PAGE (1UL << (PAGE_SHIFT + 3))
#define BITS_PER_PAGE_MASK (BITS_PER_PAGE - 1)
+#else
+# if BITS_PER_PAGE != (1UL << (PAGE_SHIFT + 3))
+# error "ambiguous BITS_PER_PAGE"
+# endif
+#endif
#define BITS_PER_LONG_MASK (BITS_PER_LONG - 1)
static int bm_clear_surplus(struct drbd_bitmap *b)
{
@@ -559,21 +556,19 @@ static unsigned long bm_count_bits(struct drbd_bitmap *b)
unsigned long *p_addr;
unsigned long bits = 0;
unsigned long mask = (1UL << (b->bm_bits & BITS_PER_LONG_MASK)) -1;
- int idx, i, last_word;
+ int idx, last_word;
/* all but last page */
for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) {
p_addr = __bm_map_pidx(b, idx);
- for (i = 0; i < LWPP; i++)
- bits += hweight_long(p_addr[i]);
+ bits += bitmap_weight(p_addr, BITS_PER_PAGE);
__bm_unmap(p_addr);
cond_resched();
}
/* last (or only) page */
last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL;
p_addr = __bm_map_pidx(b, idx);
- for (i = 0; i < last_word; i++)
- bits += hweight_long(p_addr[i]);
+ bits += bitmap_weight(p_addr, last_word * BITS_PER_LONG);
p_addr[last_word] &= cpu_to_lel(mask);
bits += hweight_long(p_addr[last_word]);
/* 32bit arch, may have an unused padding long */
@@ -639,7 +634,6 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
unsigned long want, have, onpages; /* number of pages */
struct page **npages, **opages = NULL;
int err = 0, growing;
- int opages_vmalloced;
if (!expect(b))
return -ENOMEM;
@@ -652,8 +646,6 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
if (capacity == b->bm_dev_capacity)
goto out;
- opages_vmalloced = (BM_P_VMALLOCED & b->bm_flags);
-
if (capacity == 0) {
spin_lock_irq(&b->bm_lock);
opages = b->bm_pages;
@@ -667,7 +659,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
b->bm_dev_capacity = 0;
spin_unlock_irq(&b->bm_lock);
bm_free_pages(opages, onpages);
- bm_vk_free(opages, opages_vmalloced);
+ bm_vk_free(opages);
goto out;
}
bits = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT));
@@ -740,7 +732,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
spin_unlock_irq(&b->bm_lock);
if (opages != npages)
- bm_vk_free(opages, opages_vmalloced);
+ bm_vk_free(opages);
if (!growing)
b->bm_set = bm_count_bits(b);
drbd_info(device, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want);
@@ -1419,6 +1411,9 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
int bits;
int changed = 0;
unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]);
+
+ /* I think it is more cache line friendly to hweight_long then set to ~0UL,
+ * than to first bitmap_weight() all words, then bitmap_fill() all words */
for (i = first_word; i < last_word; i++) {
bits = hweight_long(paddr[i]);
paddr[i] = ~0UL;
@@ -1628,8 +1623,7 @@ int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr)
int n = e-s;
p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
bm = p_addr + MLPP(s);
- while (n--)
- count += hweight_long(*bm++);
+ count += bitmap_weight(bm, n * BITS_PER_LONG);
bm_unmap(p_addr);
} else {
drbd_err(device, "start offset (%d) too large in drbd_bm_e_weight\n", s);
diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c
index 6b88a35..4de95bb 100644
--- a/drivers/block/drbd/drbd_debugfs.c
+++ b/drivers/block/drbd/drbd_debugfs.c
@@ -434,12 +434,12 @@ static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, vo
if (!parent || d_really_is_negative(parent))
goto out;
/* serialize with d_delete() */
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
/* Make sure the object is still alive */
if (simple_positive(file->f_path.dentry)
&& kref_get_unless_zero(kref))
ret = 0;
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
if (!ret) {
ret = single_open(file, show, data);
if (ret)
@@ -771,6 +771,13 @@ static int device_data_gen_id_show(struct seq_file *m, void *ignored)
return 0;
}
+static int device_ed_gen_id_show(struct seq_file *m, void *ignored)
+{
+ struct drbd_device *device = m->private;
+ seq_printf(m, "0x%016llX\n", (unsigned long long)device->ed_uuid);
+ return 0;
+}
+
#define drbd_debugfs_device_attr(name) \
static int device_ ## name ## _open(struct inode *inode, struct file *file) \
{ \
@@ -796,6 +803,7 @@ drbd_debugfs_device_attr(oldest_requests)
drbd_debugfs_device_attr(act_log_extents)
drbd_debugfs_device_attr(resync_extents)
drbd_debugfs_device_attr(data_gen_id)
+drbd_debugfs_device_attr(ed_gen_id)
void drbd_debugfs_device_add(struct drbd_device *device)
{
@@ -839,6 +847,7 @@ void drbd_debugfs_device_add(struct drbd_device *device)
DCF(act_log_extents);
DCF(resync_extents);
DCF(data_gen_id);
+ DCF(ed_gen_id);
#undef DCF
return;
@@ -854,6 +863,7 @@ void drbd_debugfs_device_cleanup(struct drbd_device *device)
drbd_debugfs_remove(&device->debugfs_vol_act_log_extents);
drbd_debugfs_remove(&device->debugfs_vol_resync_extents);
drbd_debugfs_remove(&device->debugfs_vol_data_gen_id);
+ drbd_debugfs_remove(&device->debugfs_vol_ed_gen_id);
drbd_debugfs_remove(&device->debugfs_vol);
}
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index e66d453..34bc84e 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -77,13 +77,6 @@ extern int fault_devs;
extern char usermode_helper[];
-/* I don't remember why XCPU ...
- * This is used to wake the asender,
- * and to interrupt sending the sending task
- * on disconnect.
- */
-#define DRBD_SIG SIGXCPU
-
/* This is used to stop/restart our threads.
* Cannot use SIGTERM nor SIGKILL, since these
* are sent out by init on runlevel changes
@@ -292,6 +285,9 @@ struct drbd_device_work {
extern int drbd_wait_misc(struct drbd_device *, struct drbd_interval *);
+extern void lock_all_resources(void);
+extern void unlock_all_resources(void);
+
struct drbd_request {
struct drbd_work w;
struct drbd_device *device;
@@ -504,7 +500,6 @@ enum {
MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */
- SUSPEND_IO, /* suspend application io */
BITMAP_IO, /* suspend application io;
once no more io in flight, start bitmap io */
BITMAP_IO_QUEUED, /* Started bitmap IO */
@@ -541,9 +536,6 @@ struct drbd_bitmap; /* opaque for drbd_device */
/* definition of bits in bm_flags to be used in drbd_bm_lock
* and drbd_bitmap_io and friends. */
enum bm_flag {
- /* do we need to kfree, or vfree bm_pages? */
- BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */
-
/* currently locked for bulk operation */
BM_LOCKED_MASK = 0xf,
@@ -632,12 +624,6 @@ struct bm_io_work {
void (*done)(struct drbd_device *device, int rv);
};
-enum write_ordering_e {
- WO_none,
- WO_drain_io,
- WO_bdev_flush,
-};
-
struct fifo_buffer {
unsigned int head_index;
unsigned int size;
@@ -650,8 +636,7 @@ extern struct fifo_buffer *fifo_alloc(int fifo_size);
enum {
NET_CONGESTED, /* The data socket is congested */
RESOLVE_CONFLICTS, /* Set on one node, cleared on the peer! */
- SEND_PING, /* whether asender should send a ping asap */
- SIGNAL_ASENDER, /* whether asender wants to be interrupted */
+ SEND_PING,
GOT_PING_ACK, /* set when we receive a ping_ack packet, ping_wait gets woken */
CONN_WD_ST_CHG_REQ, /* A cluster wide state change on the connection is active */
CONN_WD_ST_CHG_OKAY,
@@ -670,6 +655,8 @@ enum {
DEVICE_WORK_PENDING, /* tell worker that some device has pending work */
};
+enum which_state { NOW, OLD = NOW, NEW };
+
struct drbd_resource {
char *name;
#ifdef CONFIG_DEBUG_FS
@@ -755,7 +742,8 @@ struct drbd_connection {
unsigned long last_reconnect_jif;
struct drbd_thread receiver;
struct drbd_thread worker;
- struct drbd_thread asender;
+ struct drbd_thread ack_receiver;
+ struct workqueue_struct *ack_sender;
/* cached pointers,
* so we can look up the oldest pending requests more quickly.
@@ -774,6 +762,8 @@ struct drbd_connection {
struct drbd_thread_timing_details r_timing_details[DRBD_THREAD_DETAILS_HIST];
struct {
+ unsigned long last_sent_barrier_jif;
+
/* whether this sender thread
* has processed a single write yet. */
bool seen_any_write_yet;
@@ -788,6 +778,17 @@ struct drbd_connection {
} send;
};
+static inline bool has_net_conf(struct drbd_connection *connection)
+{
+ bool has_net_conf;
+
+ rcu_read_lock();
+ has_net_conf = rcu_dereference(connection->net_conf);
+ rcu_read_unlock();
+
+ return has_net_conf;
+}
+
void __update_timing_details(
struct drbd_thread_timing_details *tdp,
unsigned int *cb_nr,
@@ -811,6 +812,7 @@ struct drbd_peer_device {
struct list_head peer_devices;
struct drbd_device *device;
struct drbd_connection *connection;
+ struct work_struct send_acks_work;
#ifdef CONFIG_DEBUG_FS
struct dentry *debugfs_peer_dev;
#endif
@@ -829,6 +831,7 @@ struct drbd_device {
struct dentry *debugfs_vol_act_log_extents;
struct dentry *debugfs_vol_resync_extents;
struct dentry *debugfs_vol_data_gen_id;
+ struct dentry *debugfs_vol_ed_gen_id;
#endif
unsigned int vnr; /* volume number within the connection */
@@ -873,6 +876,7 @@ struct drbd_device {
atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
atomic_t unacked_cnt; /* Need to send replies for */
atomic_t local_cnt; /* Waiting for local completion */
+ atomic_t suspend_cnt;
/* Interval tree of pending local requests */
struct rb_root read_requests;
@@ -1020,6 +1024,12 @@ static inline struct drbd_peer_device *first_peer_device(struct drbd_device *dev
return list_first_entry_or_null(&device->peer_devices, struct drbd_peer_device, peer_devices);
}
+static inline struct drbd_peer_device *
+conn_peer_device(struct drbd_connection *connection, int volume_number)
+{
+ return idr_find(&connection->peer_devices, volume_number);
+}
+
#define for_each_resource(resource, _resources) \
list_for_each_entry(resource, _resources, resources)
@@ -1113,7 +1123,7 @@ extern int drbd_send_ov_request(struct drbd_peer_device *, sector_t sector, int
extern int drbd_send_bitmap(struct drbd_device *device);
extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode);
extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode);
-extern void drbd_free_ldev(struct drbd_backing_dev *ldev);
+extern void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev);
extern void drbd_device_cleanup(struct drbd_device *device);
void drbd_print_uuids(struct drbd_device *device, const char *text);
@@ -1424,7 +1434,7 @@ extern struct bio_set *drbd_md_io_bio_set;
/* to allocate from that set */
extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
-extern rwlock_t global_state_lock;
+extern struct mutex resources_mutex;
extern int conn_lowest_minor(struct drbd_connection *connection);
extern enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor);
@@ -1454,6 +1464,9 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t);
/* drbd_nl.c */
+
+extern struct mutex notification_mutex;
+
extern void drbd_suspend_io(struct drbd_device *device);
extern void drbd_resume_io(struct drbd_device *device);
extern char *ppsize(char *buf, unsigned long long size);
@@ -1536,7 +1549,9 @@ extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
/* drbd_receiver.c */
extern int drbd_receiver(struct drbd_thread *thi);
-extern int drbd_asender(struct drbd_thread *thi);
+extern int drbd_ack_receiver(struct drbd_thread *thi);
+extern void drbd_send_ping_wf(struct work_struct *ws);
+extern void drbd_send_acks_wf(struct work_struct *ws);
extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device);
extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
bool throttle_if_app_is_waiting);
@@ -1649,7 +1664,7 @@ extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int s
#define drbd_rs_failed_io(device, sector, size) \
__drbd_change_sync(device, sector, size, RECORD_RS_FAILED)
extern void drbd_al_shrink(struct drbd_device *device);
-extern int drbd_initialize_al(struct drbd_device *, void *);
+extern int drbd_al_initialize(struct drbd_device *, void *);
/* drbd_nl.c */
/* state info broadcast */
@@ -1668,6 +1683,29 @@ struct sib_info {
};
void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib);
+extern void notify_resource_state(struct sk_buff *,
+ unsigned int,
+ struct drbd_resource *,
+ struct resource_info *,
+ enum drbd_notification_type);
+extern void notify_device_state(struct sk_buff *,
+ unsigned int,
+ struct drbd_device *,
+ struct device_info *,
+ enum drbd_notification_type);
+extern void notify_connection_state(struct sk_buff *,
+ unsigned int,
+ struct drbd_connection *,
+ struct connection_info *,
+ enum drbd_notification_type);
+extern void notify_peer_device_state(struct sk_buff *,
+ unsigned int,
+ struct drbd_peer_device *,
+ struct peer_device_info *,
+ enum drbd_notification_type);
+extern void notify_helper(enum drbd_notification_type, struct drbd_device *,
+ struct drbd_connection *, const char *, int);
+
/*
* inline helper functions
*************************/
@@ -1694,19 +1732,6 @@ static inline int drbd_peer_req_has_active_page(struct drbd_peer_request *peer_r
return 0;
}
-static inline enum drbd_state_rv
-_drbd_set_state(struct drbd_device *device, union drbd_state ns,
- enum chg_state_flags flags, struct completion *done)
-{
- enum drbd_state_rv rv;
-
- read_lock(&global_state_lock);
- rv = __drbd_set_state(device, ns, flags, done);
- read_unlock(&global_state_lock);
-
- return rv;
-}
-
static inline union drbd_state drbd_read_state(struct drbd_device *device)
{
struct drbd_resource *resource = device->resource;
@@ -1937,16 +1962,21 @@ drbd_device_post_work(struct drbd_device *device, int work_bit)
extern void drbd_flush_workqueue(struct drbd_work_queue *work_queue);
-static inline void wake_asender(struct drbd_connection *connection)
+/* To get the ack_receiver out of the blocking network stack,
+ * so it can change its sk_rcvtimeo from idle- to ping-timeout,
+ * and send a ping, we need to send a signal.
+ * Which signal we send is irrelevant. */
+static inline void wake_ack_receiver(struct drbd_connection *connection)
{
- if (test_bit(SIGNAL_ASENDER, &connection->flags))
- force_sig(DRBD_SIG, connection->asender.task);
+ struct task_struct *task = connection->ack_receiver.task;
+ if (task && get_t_state(&connection->ack_receiver) == RUNNING)
+ force_sig(SIGXCPU, task);
}
static inline void request_ping(struct drbd_connection *connection)
{
set_bit(SEND_PING, &connection->flags);
- wake_asender(connection);
+ wake_ack_receiver(connection);
}
extern void *conn_prepare_command(struct drbd_connection *, struct drbd_socket *);
@@ -2230,7 +2260,7 @@ static inline bool may_inc_ap_bio(struct drbd_device *device)
if (drbd_suspended(device))
return false;
- if (test_bit(SUSPEND_IO, &device->flags))
+ if (atomic_read(&device->suspend_cnt))
return false;
/* to avoid potential deadlock or bitmap corruption,
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 74d97f4..5b43dfb 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -117,6 +117,7 @@ module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0
*/
struct idr drbd_devices;
struct list_head drbd_resources;
+struct mutex resources_mutex;
struct kmem_cache *drbd_request_cache;
struct kmem_cache *drbd_ee_cache; /* peer requests */
@@ -1435,8 +1436,8 @@ static int we_should_drop_the_connection(struct drbd_connection *connection, str
/* long elapsed = (long)(jiffies - device->last_received); */
drop_it = connection->meta.socket == sock
- || !connection->asender.task
- || get_t_state(&connection->asender) != RUNNING
+ || !connection->ack_receiver.task
+ || get_t_state(&connection->ack_receiver) != RUNNING
|| connection->cstate < C_WF_REPORT_PARAMS;
if (drop_it)
@@ -1793,15 +1794,6 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock,
drbd_update_congested(connection);
}
do {
- /* STRANGE
- * tcp_sendmsg does _not_ use its size parameter at all ?
- *
- * -EAGAIN on timeout, -EINTR on signal.
- */
-/* THINK
- * do we need to block DRBD_SIG if sock == &meta.socket ??
- * otherwise wake_asender() might interrupt some send_*Ack !
- */
rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
if (rv == -EAGAIN) {
if (we_should_drop_the_connection(connection, sock))
@@ -2000,7 +1992,7 @@ void drbd_device_cleanup(struct drbd_device *device)
drbd_bm_cleanup(device);
}
- drbd_free_ldev(device->ldev);
+ drbd_backing_dev_free(device, device->ldev);
device->ldev = NULL;
clear_bit(AL_SUSPENDED, &device->flags);
@@ -2179,7 +2171,7 @@ void drbd_destroy_device(struct kref *kref)
if (device->this_bdev)
bdput(device->this_bdev);
- drbd_free_ldev(device->ldev);
+ drbd_backing_dev_free(device, device->ldev);
device->ldev = NULL;
drbd_release_all_peer_reqs(device);
@@ -2563,7 +2555,7 @@ int set_resource_options(struct drbd_resource *resource, struct res_opts *res_op
cpumask_copy(resource->cpu_mask, new_cpu_mask);
for_each_connection_rcu(connection, resource) {
connection->receiver.reset_cpu_mask = 1;
- connection->asender.reset_cpu_mask = 1;
+ connection->ack_receiver.reset_cpu_mask = 1;
connection->worker.reset_cpu_mask = 1;
}
}
@@ -2590,7 +2582,7 @@ struct drbd_resource *drbd_create_resource(const char *name)
kref_init(&resource->kref);
idr_init(&resource->devices);
INIT_LIST_HEAD(&resource->connections);
- resource->write_ordering = WO_bdev_flush;
+ resource->write_ordering = WO_BDEV_FLUSH;
list_add_tail_rcu(&resource->resources, &drbd_resources);
mutex_init(&resource->conf_update);
mutex_init(&resource->adm_mutex);
@@ -2652,8 +2644,8 @@ struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts)
connection->receiver.connection = connection;
drbd_thread_init(resource, &connection->worker, drbd_worker, "worker");
connection->worker.connection = connection;
- drbd_thread_init(resource, &connection->asender, drbd_asender, "asender");
- connection->asender.connection = connection;
+ drbd_thread_init(resource, &connection->ack_receiver, drbd_ack_receiver, "ack_recv");
+ connection->ack_receiver.connection = connection;
kref_init(&connection->kref);
@@ -2702,8 +2694,8 @@ static int init_submitter(struct drbd_device *device)
{
/* opencoded create_singlethread_workqueue(),
* to be able to say "drbd%d", ..., minor */
- device->submit.wq = alloc_workqueue("drbd%u_submit",
- WQ_UNBOUND | WQ_MEM_RECLAIM, 1, device->minor);
+ device->submit.wq =
+ alloc_ordered_workqueue("drbd%u_submit", WQ_MEM_RECLAIM, device->minor);
if (!device->submit.wq)
return -ENOMEM;
@@ -2820,6 +2812,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
goto out_idr_remove_from_resource;
}
kref_get(&connection->kref);
+ INIT_WORK(&peer_device->send_acks_work, drbd_send_acks_wf);
}
if (init_submitter(device)) {
@@ -2923,7 +2916,7 @@ static int __init drbd_init(void)
drbd_proc = NULL; /* play safe for drbd_cleanup */
idr_init(&drbd_devices);
- rwlock_init(&global_state_lock);
+ mutex_init(&resources_mutex);
INIT_LIST_HEAD(&drbd_resources);
err = drbd_genl_register();
@@ -2971,18 +2964,6 @@ fail:
return err;
}
-void drbd_free_ldev(struct drbd_backing_dev *ldev)
-{
- if (ldev == NULL)
- return;
-
- blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
- blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
-
- kfree(ldev->disk_conf);
- kfree(ldev);
-}
-
static void drbd_free_one_sock(struct drbd_socket *ds)
{
struct socket *s;
@@ -3277,6 +3258,10 @@ int drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev)
* and read it. */
bdev->md.meta_dev_idx = bdev->disk_conf->meta_dev_idx;
bdev->md.md_offset = drbd_md_ss(bdev);
+ /* Even for (flexible or indexed) external meta data,
+ * initially restrict us to the 4k superblock for now.
+ * Affects the paranoia out-of-range access check in drbd_md_sync_page_io(). */
+ bdev->md.md_size_sect = 8;
if (drbd_md_sync_page_io(device, bdev, bdev->md.md_offset, READ)) {
/* NOTE: can't do normal error processing here as this is
@@ -3578,7 +3563,9 @@ void drbd_queue_bitmap_io(struct drbd_device *device,
spin_lock_irq(&device->resource->req_lock);
set_bit(BITMAP_IO, &device->flags);
- if (atomic_read(&device->ap_bio_cnt) == 0) {
+ /* don't wait for pending application IO if the caller indicates that
+ * application IO does not conflict anyways. */
+ if (flags == BM_LOCKED_CHANGE_ALLOWED || atomic_read(&device->ap_bio_cnt) == 0) {
if (!test_and_set_bit(BITMAP_IO_QUEUED, &device->flags))
drbd_queue_work(&first_peer_device(device)->connection->sender_work,
&device->bm_io_work.w);
@@ -3746,6 +3733,27 @@ int drbd_wait_misc(struct drbd_device *device, struct drbd_interval *i)
return 0;
}
+void lock_all_resources(void)
+{
+ struct drbd_resource *resource;
+ int __maybe_unused i = 0;
+
+ mutex_lock(&resources_mutex);
+ local_irq_disable();
+ for_each_resource(resource, &drbd_resources)
+ spin_lock_nested(&resource->req_lock, i++);
+}
+
+void unlock_all_resources(void)
+{
+ struct drbd_resource *resource;
+
+ for_each_resource(resource, &drbd_resources)
+ spin_unlock(&resource->req_lock);
+ local_irq_enable();
+ mutex_unlock(&resources_mutex);
+}
+
#ifdef CONFIG_DRBD_FAULT_INJECTION
/* Fault insertion support including random number generator shamelessly
* stolen from kernel/rcutorture.c */
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index e80cbef..c055c5e 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -36,6 +36,7 @@
#include "drbd_int.h"
#include "drbd_protocol.h"
#include "drbd_req.h"
+#include "drbd_state_change.h"
#include <asm/unaligned.h>
#include <linux/drbd_limits.h>
#include <linux/kthread.h>
@@ -75,11 +76,24 @@ int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
/* .dumpit */
int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
+int drbd_adm_dump_resources(struct sk_buff *skb, struct netlink_callback *cb);
+int drbd_adm_dump_devices(struct sk_buff *skb, struct netlink_callback *cb);
+int drbd_adm_dump_devices_done(struct netlink_callback *cb);
+int drbd_adm_dump_connections(struct sk_buff *skb, struct netlink_callback *cb);
+int drbd_adm_dump_connections_done(struct netlink_callback *cb);
+int drbd_adm_dump_peer_devices(struct sk_buff *skb, struct netlink_callback *cb);
+int drbd_adm_dump_peer_devices_done(struct netlink_callback *cb);
+int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb);
#include <linux/drbd_genl_api.h>
#include "drbd_nla.h"
#include <linux/genl_magic_func.h>
+static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
+static atomic_t notify_genl_seq = ATOMIC_INIT(2); /* two. */
+
+DEFINE_MUTEX(notification_mutex);
+
/* used blkdev_get_by_path, to claim our meta data device(s) */
static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
@@ -349,6 +363,7 @@ int drbd_khelper(struct drbd_device *device, char *cmd)
sib.sib_reason = SIB_HELPER_PRE;
sib.helper_name = cmd;
drbd_bcast_event(device, &sib);
+ notify_helper(NOTIFY_CALL, device, connection, cmd, 0);
ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
if (ret)
drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
@@ -361,6 +376,7 @@ int drbd_khelper(struct drbd_device *device, char *cmd)
sib.sib_reason = SIB_HELPER_POST;
sib.helper_exit_code = ret;
drbd_bcast_event(device, &sib);
+ notify_helper(NOTIFY_RESPONSE, device, connection, cmd, ret);
if (current == connection->worker.task)
clear_bit(CALLBACK_PENDING, &connection->flags);
@@ -388,6 +404,7 @@ static int conn_khelper(struct drbd_connection *connection, char *cmd)
drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name);
/* TODO: conn_bcast_event() ?? */
+ notify_helper(NOTIFY_CALL, NULL, connection, cmd, 0);
ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
if (ret)
@@ -399,6 +416,7 @@ static int conn_khelper(struct drbd_connection *connection, char *cmd)
usermode_helper, cmd, resource_name,
(ret >> 8) & 0xff, ret);
/* TODO: conn_bcast_event() ?? */
+ notify_helper(NOTIFY_RESPONSE, NULL, connection, cmd, ret);
if (ret < 0) /* Ignore any ERRNOs we got. */
ret = 0;
@@ -847,9 +865,11 @@ char *ppsize(char *buf, unsigned long long size)
* and can be long lived.
* This changes an device->flag, is triggered by drbd internals,
* and should be short-lived. */
+/* It needs to be a counter, since multiple threads might
+ independently suspend and resume IO. */
void drbd_suspend_io(struct drbd_device *device)
{
- set_bit(SUSPEND_IO, &device->flags);
+ atomic_inc(&device->suspend_cnt);
if (drbd_suspended(device))
return;
wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
@@ -857,8 +877,8 @@ void drbd_suspend_io(struct drbd_device *device)
void drbd_resume_io(struct drbd_device *device)
{
- clear_bit(SUSPEND_IO, &device->flags);
- wake_up(&device->misc_wait);
+ if (atomic_dec_and_test(&device->suspend_cnt))
+ wake_up(&device->misc_wait);
}
/**
@@ -871,27 +891,32 @@ void drbd_resume_io(struct drbd_device *device)
enum determine_dev_size
drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
{
- sector_t prev_first_sect, prev_size; /* previous meta location */
- sector_t la_size_sect, u_size;
+ struct md_offsets_and_sizes {
+ u64 last_agreed_sect;
+ u64 md_offset;
+ s32 al_offset;
+ s32 bm_offset;
+ u32 md_size_sect;
+
+ u32 al_stripes;
+ u32 al_stripe_size_4k;
+ } prev;
+ sector_t u_size, size;
struct drbd_md *md = &device->ldev->md;
- u32 prev_al_stripe_size_4k;
- u32 prev_al_stripes;
- sector_t size;
char ppb[10];
void *buffer;
int md_moved, la_size_changed;
enum determine_dev_size rv = DS_UNCHANGED;
- /* race:
- * application request passes inc_ap_bio,
- * but then cannot get an AL-reference.
- * this function later may wait on ap_bio_cnt == 0. -> deadlock.
+ /* We may change the on-disk offsets of our meta data below. Lock out
+ * anything that may cause meta data IO, to avoid acting on incomplete
+ * layout changes or scribbling over meta data that is in the process
+ * of being moved.
*
- * to avoid that:
- * Suspend IO right here.
- * still lock the act_log to not trigger ASSERTs there.
- */
+ * Move is not exactly correct, btw, currently we have all our meta
+ * data in core memory, to "move" it we just write it all out, there
+ * are no reads. */
drbd_suspend_io(device);
buffer = drbd_md_get_buffer(device, __func__); /* Lock meta-data IO */
if (!buffer) {
@@ -899,19 +924,17 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
return DS_ERROR;
}
- /* no wait necessary anymore, actually we could assert that */
- wait_event(device->al_wait, lc_try_lock(device->act_log));
-
- prev_first_sect = drbd_md_first_sector(device->ldev);
- prev_size = device->ldev->md.md_size_sect;
- la_size_sect = device->ldev->md.la_size_sect;
+ /* remember current offset and sizes */
+ prev.last_agreed_sect = md->la_size_sect;
+ prev.md_offset = md->md_offset;
+ prev.al_offset = md->al_offset;
+ prev.bm_offset = md->bm_offset;
+ prev.md_size_sect = md->md_size_sect;
+ prev.al_stripes = md->al_stripes;
+ prev.al_stripe_size_4k = md->al_stripe_size_4k;
if (rs) {
/* rs is non NULL if we should change the AL layout only */
-
- prev_al_stripes = md->al_stripes;
- prev_al_stripe_size_4k = md->al_stripe_size_4k;
-
md->al_stripes = rs->al_stripes;
md->al_stripe_size_4k = rs->al_stripe_size / 4;
md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
@@ -924,7 +947,7 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
rcu_read_unlock();
size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED);
- if (size < la_size_sect) {
+ if (size < prev.last_agreed_sect) {
if (rs && u_size == 0) {
/* Remove "rs &&" later. This check should always be active, but
right now the receiver expects the permissive behavior */
@@ -945,30 +968,29 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC));
if (unlikely(err)) {
/* currently there is only one error: ENOMEM! */
- size = drbd_bm_capacity(device)>>1;
+ size = drbd_bm_capacity(device);
if (size == 0) {
drbd_err(device, "OUT OF MEMORY! "
"Could not allocate bitmap!\n");
} else {
drbd_err(device, "BM resizing failed. "
- "Leaving size unchanged at size = %lu KB\n",
- (unsigned long)size);
+ "Leaving size unchanged\n");
}
rv = DS_ERROR;
}
/* racy, see comments above. */
drbd_set_my_capacity(device, size);
- device->ldev->md.la_size_sect = size;
+ md->la_size_sect = size;
drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
(unsigned long long)size>>1);
}
if (rv <= DS_ERROR)
goto err_out;
- la_size_changed = (la_size_sect != device->ldev->md.la_size_sect);
+ la_size_changed = (prev.last_agreed_sect != md->la_size_sect);
- md_moved = prev_first_sect != drbd_md_first_sector(device->ldev)
- || prev_size != device->ldev->md.md_size_sect;
+ md_moved = prev.md_offset != md->md_offset
+ || prev.md_size_sect != md->md_size_sect;
if (la_size_changed || md_moved || rs) {
u32 prev_flags;
@@ -977,20 +999,29 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
* Clear the timer, to avoid scary "timer expired!" messages,
* "Superblock" is written out at least twice below, anyways. */
del_timer(&device->md_sync_timer);
- drbd_al_shrink(device); /* All extents inactive. */
+ /* We won't change the "al-extents" setting, we just may need
+ * to move the on-disk location of the activity log ringbuffer.
+ * Lock for transaction is good enough, it may well be "dirty"
+ * or even "starving". */
+ wait_event(device->al_wait, lc_try_lock_for_transaction(device->act_log));
+
+ /* mark current on-disk bitmap and activity log as unreliable */
prev_flags = md->flags;
- md->flags &= ~MDF_PRIMARY_IND;
+ md->flags |= MDF_FULL_SYNC | MDF_AL_DISABLED;
drbd_md_write(device, buffer);
+ drbd_al_initialize(device, buffer);
+
drbd_info(device, "Writing the whole bitmap, %s\n",
la_size_changed && md_moved ? "size changed and md moved" :
la_size_changed ? "size changed" : "md moved");
/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
"size changed", BM_LOCKED_MASK);
- drbd_initialize_al(device, buffer);
+ /* on-disk bitmap and activity log is authoritative again
+ * (unless there was an IO error meanwhile...) */
md->flags = prev_flags;
drbd_md_write(device, buffer);
@@ -999,20 +1030,22 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
md->al_stripes, md->al_stripe_size_4k * 4);
}
- if (size > la_size_sect)
- rv = la_size_sect ? DS_GREW : DS_GREW_FROM_ZERO;
- if (size < la_size_sect)
+ if (size > prev.last_agreed_sect)
+ rv = prev.last_agreed_sect ? DS_GREW : DS_GREW_FROM_ZERO;
+ if (size < prev.last_agreed_sect)
rv = DS_SHRUNK;
if (0) {
err_out:
- if (rs) {
- md->al_stripes = prev_al_stripes;
- md->al_stripe_size_4k = prev_al_stripe_size_4k;
- md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;
-
- drbd_md_set_sector_offsets(device, device->ldev);
- }
+ /* restore previous offset and sizes */
+ md->la_size_sect = prev.last_agreed_sect;
+ md->md_offset = prev.md_offset;
+ md->al_offset = prev.al_offset;
+ md->bm_offset = prev.bm_offset;
+ md->md_size_sect = prev.md_size_sect;
+ md->al_stripes = prev.al_stripes;
+ md->al_stripe_size_4k = prev.al_stripe_size_4k;
+ md->al_size_4k = (u64)prev.al_stripes * prev.al_stripe_size_4k;
}
lc_unlock(device->act_log);
wake_up(&device->al_wait);
@@ -1115,8 +1148,7 @@ static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
lc_destroy(n);
return -EBUSY;
} else {
- if (t)
- lc_destroy(t);
+ lc_destroy(t);
}
drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */
return 0;
@@ -1151,21 +1183,20 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
if (b) {
struct drbd_connection *connection = first_peer_device(device)->connection;
+ blk_queue_max_discard_sectors(q, DRBD_MAX_DISCARD_SECTORS);
+
if (blk_queue_discard(b) &&
(connection->cstate < C_CONNECTED || connection->agreed_features & FF_TRIM)) {
- /* For now, don't allow more than one activity log extent worth of data
- * to be discarded in one go. We may need to rework drbd_al_begin_io()
- * to allow for even larger discard ranges */
- blk_queue_max_discard_sectors(q, DRBD_MAX_DISCARD_SECTORS);
-
+ /* We don't care, stacking below should fix it for the local device.
+ * Whether or not it is a suitable granularity on the remote device
+ * is not our problem, really. If you care, you need to
+ * use devices with similar topology on all peers. */
+ q->limits.discard_granularity = 512;
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
- /* REALLY? Is stacking secdiscard "legal"? */
- if (blk_queue_secdiscard(b))
- queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q);
} else {
blk_queue_max_discard_sectors(q, 0);
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
- queue_flag_clear_unlocked(QUEUE_FLAG_SECDISCARD, q);
+ q->limits.discard_granularity = 0;
}
blk_queue_stack_limits(q, b);
@@ -1177,6 +1208,12 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
}
}
+ /* To avoid confusion, if this queue does not support discard, clear
+ * max_discard_sectors, which is what lsblk -D reports to the user. */
+ if (!blk_queue_discard(q)) {
+ blk_queue_max_discard_sectors(q, 0);
+ q->limits.discard_granularity = 0;
+ }
}
void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev)
@@ -1241,8 +1278,8 @@ static void conn_reconfig_done(struct drbd_connection *connection)
connection->cstate == C_STANDALONE;
spin_unlock_irq(&connection->resource->req_lock);
if (stop_threads) {
- /* asender is implicitly stopped by receiver
- * in conn_disconnect() */
+ /* ack_receiver thread and ack_sender workqueue are implicitly
+ * stopped by receiver in conn_disconnect() */
drbd_thread_stop(&connection->receiver);
drbd_thread_stop(&connection->worker);
}
@@ -1389,13 +1426,13 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
goto fail_unlock;
}
- write_lock_irq(&global_state_lock);
+ lock_all_resources();
retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
if (retcode == NO_ERROR) {
rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
drbd_resync_after_changed(device);
}
- write_unlock_irq(&global_state_lock);
+ unlock_all_resources();
if (retcode != NO_ERROR)
goto fail_unlock;
@@ -1418,7 +1455,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
set_bit(MD_NO_FUA, &device->flags);
if (write_ordering_changed(old_disk_conf, new_disk_conf))
- drbd_bump_write_ordering(device->resource, NULL, WO_bdev_flush);
+ drbd_bump_write_ordering(device->resource, NULL, WO_BDEV_FLUSH);
drbd_md_sync(device);
@@ -1449,6 +1486,88 @@ success:
return 0;
}
+static struct block_device *open_backing_dev(struct drbd_device *device,
+ const char *bdev_path, void *claim_ptr, bool do_bd_link)
+{
+ struct block_device *bdev;
+ int err = 0;
+
+ bdev = blkdev_get_by_path(bdev_path,
+ FMODE_READ | FMODE_WRITE | FMODE_EXCL, claim_ptr);
+ if (IS_ERR(bdev)) {
+ drbd_err(device, "open(\"%s\") failed with %ld\n",
+ bdev_path, PTR_ERR(bdev));
+ return bdev;
+ }
+
+ if (!do_bd_link)
+ return bdev;
+
+ err = bd_link_disk_holder(bdev, device->vdisk);
+ if (err) {
+ blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+ drbd_err(device, "bd_link_disk_holder(\"%s\", ...) failed with %d\n",
+ bdev_path, err);
+ bdev = ERR_PTR(err);
+ }
+ return bdev;
+}
+
+static int open_backing_devices(struct drbd_device *device,
+ struct disk_conf *new_disk_conf,
+ struct drbd_backing_dev *nbc)
+{
+ struct block_device *bdev;
+
+ bdev = open_backing_dev(device, new_disk_conf->backing_dev, device, true);
+ if (IS_ERR(bdev))
+ return ERR_OPEN_DISK;
+ nbc->backing_bdev = bdev;
+
+ /*
+ * meta_dev_idx >= 0: external fixed size, possibly multiple
+ * drbd sharing one meta device. TODO in that case, paranoia
+ * check that [md_bdev, meta_dev_idx] is not yet used by some
+ * other drbd minor! (if you use drbd.conf + drbdadm, that
+ * should check it for you already; but if you don't, or
+ * someone fooled it, we need to double check here)
+ */
+ bdev = open_backing_dev(device, new_disk_conf->meta_dev,
+ /* claim ptr: device, if claimed exclusively; shared drbd_m_holder,
+ * if potentially shared with other drbd minors */
+ (new_disk_conf->meta_dev_idx < 0) ? (void*)device : (void*)drbd_m_holder,
+ /* avoid double bd_claim_by_disk() for the same (source,target) tuple,
+ * as would happen with internal metadata. */
+ (new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_FLEX_INT &&
+ new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_INTERNAL));
+ if (IS_ERR(bdev))
+ return ERR_OPEN_MD_DISK;
+ nbc->md_bdev = bdev;
+ return NO_ERROR;
+}
+
+static void close_backing_dev(struct drbd_device *device, struct block_device *bdev,
+ bool do_bd_unlink)
+{
+ if (!bdev)
+ return;
+ if (do_bd_unlink)
+ bd_unlink_disk_holder(bdev, device->vdisk);
+ blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+}
+
+void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev)
+{
+ if (ldev == NULL)
+ return;
+
+ close_backing_dev(device, ldev->md_bdev, ldev->md_bdev != ldev->backing_bdev);
+ close_backing_dev(device, ldev->backing_bdev, true);
+
+ kfree(ldev->disk_conf);
+ kfree(ldev);
+}
+
int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
{
struct drbd_config_context adm_ctx;
@@ -1462,7 +1581,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
sector_t min_md_device_sectors;
struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
struct disk_conf *new_disk_conf = NULL;
- struct block_device *bdev;
struct lru_cache *resync_lru = NULL;
struct fifo_buffer *new_plan = NULL;
union drbd_state ns, os;
@@ -1478,7 +1596,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
device = adm_ctx.device;
mutex_lock(&adm_ctx.resource->adm_mutex);
peer_device = first_peer_device(device);
- connection = peer_device ? peer_device->connection : NULL;
+ connection = peer_device->connection;
conn_reconfig_start(connection);
/* if you want to reconfigure, please tear down first */
@@ -1539,12 +1657,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
goto fail;
}
- write_lock_irq(&global_state_lock);
- retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
- write_unlock_irq(&global_state_lock);
- if (retcode != NO_ERROR)
- goto fail;
-
rcu_read_lock();
nc = rcu_dereference(connection->net_conf);
if (nc) {
@@ -1556,35 +1668,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
}
rcu_read_unlock();
- bdev = blkdev_get_by_path(new_disk_conf->backing_dev,
- FMODE_READ | FMODE_WRITE | FMODE_EXCL, device);
- if (IS_ERR(bdev)) {
- drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev,
- PTR_ERR(bdev));
- retcode = ERR_OPEN_DISK;
- goto fail;
- }
- nbc->backing_bdev = bdev;
-
- /*
- * meta_dev_idx >= 0: external fixed size, possibly multiple
- * drbd sharing one meta device. TODO in that case, paranoia
- * check that [md_bdev, meta_dev_idx] is not yet used by some
- * other drbd minor! (if you use drbd.conf + drbdadm, that
- * should check it for you already; but if you don't, or
- * someone fooled it, we need to double check here)
- */
- bdev = blkdev_get_by_path(new_disk_conf->meta_dev,
- FMODE_READ | FMODE_WRITE | FMODE_EXCL,
- (new_disk_conf->meta_dev_idx < 0) ?
- (void *)device : (void *)drbd_m_holder);
- if (IS_ERR(bdev)) {
- drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev,
- PTR_ERR(bdev));
- retcode = ERR_OPEN_MD_DISK;
+ retcode = open_backing_devices(device, new_disk_conf, nbc);
+ if (retcode != NO_ERROR)
goto fail;
- }
- nbc->md_bdev = bdev;
if ((nbc->backing_bdev == nbc->md_bdev) !=
(new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
@@ -1707,6 +1793,13 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
goto force_diskless_dec;
}
+ lock_all_resources();
+ retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
+ if (retcode != NO_ERROR) {
+ unlock_all_resources();
+ goto force_diskless_dec;
+ }
+
/* Reset the "barriers don't work" bits here, then force meta data to
* be written, to ensure we determine if barriers are supported. */
if (new_disk_conf->md_flushes)
@@ -1727,7 +1820,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
new_disk_conf = NULL;
new_plan = NULL;
- drbd_bump_write_ordering(device->resource, device->ldev, WO_bdev_flush);
+ drbd_resync_after_changed(device);
+ drbd_bump_write_ordering(device->resource, device->ldev, WO_BDEV_FLUSH);
+ unlock_all_resources();
if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
set_bit(CRASHED_PRIMARY, &device->flags);
@@ -1875,12 +1970,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
fail:
conn_reconfig_done(connection);
if (nbc) {
- if (nbc->backing_bdev)
- blkdev_put(nbc->backing_bdev,
- FMODE_READ | FMODE_WRITE | FMODE_EXCL);
- if (nbc->md_bdev)
- blkdev_put(nbc->md_bdev,
- FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+ close_backing_dev(device, nbc->md_bdev, nbc->md_bdev != nbc->backing_bdev);
+ close_backing_dev(device, nbc->backing_bdev, true);
kfree(nbc);
}
kfree(new_disk_conf);
@@ -1895,6 +1986,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
static int adm_detach(struct drbd_device *device, int force)
{
enum drbd_state_rv retcode;
+ void *buffer;
int ret;
if (force) {
@@ -1905,13 +1997,16 @@ static int adm_detach(struct drbd_device *device, int force)
}
drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
- drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */
- retcode = drbd_request_state(device, NS(disk, D_FAILED));
- drbd_md_put_buffer(device);
+ buffer = drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */
+ if (buffer) {
+ retcode = drbd_request_state(device, NS(disk, D_FAILED));
+ drbd_md_put_buffer(device);
+ } else /* already <= D_FAILED */
+ retcode = SS_NOTHING_TO_DO;
/* D_FAILED will transition to DISKLESS. */
+ drbd_resume_io(device);
ret = wait_event_interruptible(device->misc_wait,
device->state.disk != D_FAILED);
- drbd_resume_io(device);
if ((int)retcode == (int)SS_IS_DISKLESS)
retcode = SS_NOTHING_TO_DO;
if (ret)
@@ -2245,8 +2340,31 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
return 0;
}
+static void connection_to_info(struct connection_info *info,
+ struct drbd_connection *connection)
+{
+ info->conn_connection_state = connection->cstate;
+ info->conn_role = conn_highest_peer(connection);
+}
+
+static void peer_device_to_info(struct peer_device_info *info,
+ struct drbd_peer_device *peer_device)
+{
+ struct drbd_device *device = peer_device->device;
+
+ info->peer_repl_state =
+ max_t(enum drbd_conns, C_WF_REPORT_PARAMS, device->state.conn);
+ info->peer_disk_state = device->state.pdsk;
+ info->peer_resync_susp_user = device->state.user_isp;
+ info->peer_resync_susp_peer = device->state.peer_isp;
+ info->peer_resync_susp_dependency = device->state.aftr_isp;
+}
+
int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
{
+ struct connection_info connection_info;
+ enum drbd_notification_type flags;
+ unsigned int peer_devices = 0;
struct drbd_config_context adm_ctx;
struct drbd_peer_device *peer_device;
struct net_conf *old_net_conf, *new_net_conf = NULL;
@@ -2347,6 +2465,22 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
connection->peer_addr_len = nla_len(adm_ctx.peer_addr);
memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len);
+ idr_for_each_entry(&connection->peer_devices, peer_device, i) {
+ peer_devices++;
+ }
+
+ connection_to_info(&connection_info, connection);
+ flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
+ mutex_lock(&notification_mutex);
+ notify_connection_state(NULL, 0, connection, &connection_info, NOTIFY_CREATE | flags);
+ idr_for_each_entry(&connection->peer_devices, peer_device, i) {
+ struct peer_device_info peer_device_info;
+
+ peer_device_to_info(&peer_device_info, peer_device);
+ flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
+ notify_peer_device_state(NULL, 0, peer_device, &peer_device_info, NOTIFY_CREATE | flags);
+ }
+ mutex_unlock(&notification_mutex);
mutex_unlock(&adm_ctx.resource->conf_update);
rcu_read_lock();
@@ -2428,6 +2562,8 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection
drbd_err(connection,
"unexpected rv2=%d in conn_try_disconnect()\n",
rv2);
+ /* Unlike in DRBD 9, the state engine has generated
+ * NOTIFY_DESTROY events before clearing connection->net_conf. */
}
return rv;
}
@@ -2585,6 +2721,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
mutex_unlock(&device->resource->conf_update);
synchronize_rcu();
kfree(old_disk_conf);
+ new_disk_conf = NULL;
}
ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
@@ -2618,6 +2755,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
fail_ldev:
put_ldev(device);
+ kfree(new_disk_conf);
goto fail;
}
@@ -2855,7 +2993,30 @@ int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
mutex_lock(&adm_ctx.resource->adm_mutex);
device = adm_ctx.device;
if (test_bit(NEW_CUR_UUID, &device->flags)) {
- drbd_uuid_new_current(device);
+ if (get_ldev_if_state(device, D_ATTACHING)) {
+ drbd_uuid_new_current(device);
+ put_ldev(device);
+ } else {
+ /* This is effectively a multi-stage "forced down".
+ * The NEW_CUR_UUID bit is supposedly only set, if we
+ * lost the replication connection, and are configured
+ * to freeze IO and wait for some fence-peer handler.
+ * So we still don't have a replication connection.
+ * And now we don't have a local disk either. After
+ * resume, we will fail all pending and new IO, because
+ * we don't have any data anymore. Which means we will
+ * eventually be able to terminate all users of this
+ * device, and then take it down. By bumping the
+ * "effective" data uuid, we make sure that you really
+ * need to tear down before you reconfigure, we will
+ * the refuse to re-connect or re-attach (because no
+ * matching real data uuid exists).
+ */
+ u64 val;
+ get_random_bytes(&val, sizeof(u64));
+ drbd_set_ed_uuid(device, val);
+ drbd_warn(device, "Resumed without access to data; please tear down before attempting to re-configure.\n");
+ }
clear_bit(NEW_CUR_UUID, &device->flags);
}
drbd_suspend_io(device);
@@ -2910,6 +3071,486 @@ nla_put_failure:
}
/*
+ * The generic netlink dump callbacks are called outside the genl_lock(), so
+ * they cannot use the simple attribute parsing code which uses global
+ * attribute tables.
+ */
+static struct nlattr *find_cfg_context_attr(const struct nlmsghdr *nlh, int attr)
+{
+ const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
+ const int maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
+ struct nlattr *nla;
+
+ nla = nla_find(nlmsg_attrdata(nlh, hdrlen), nlmsg_attrlen(nlh, hdrlen),
+ DRBD_NLA_CFG_CONTEXT);
+ if (!nla)
+ return NULL;
+ return drbd_nla_find_nested(maxtype, nla, __nla_type(attr));
+}
+
+static void resource_to_info(struct resource_info *, struct drbd_resource *);
+
+int drbd_adm_dump_resources(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct drbd_genlmsghdr *dh;
+ struct drbd_resource *resource;
+ struct resource_info resource_info;
+ struct resource_statistics resource_statistics;
+ int err;
+
+ rcu_read_lock();
+ if (cb->args[0]) {
+ for_each_resource_rcu(resource, &drbd_resources)
+ if (resource == (struct drbd_resource *)cb->args[0])
+ goto found_resource;
+ err = 0; /* resource was probably deleted */
+ goto out;
+ }
+ resource = list_entry(&drbd_resources,
+ struct drbd_resource, resources);
+
+found_resource:
+ list_for_each_entry_continue_rcu(resource, &drbd_resources, resources) {
+ goto put_result;
+ }
+ err = 0;
+ goto out;
+
+put_result:
+ dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, &drbd_genl_family,
+ NLM_F_MULTI, DRBD_ADM_GET_RESOURCES);
+ err = -ENOMEM;
+ if (!dh)
+ goto out;
+ dh->minor = -1U;
+ dh->ret_code = NO_ERROR;
+ err = nla_put_drbd_cfg_context(skb, resource, NULL, NULL);
+ if (err)
+ goto out;
+ err = res_opts_to_skb(skb, &resource->res_opts, !capable(CAP_SYS_ADMIN));
+ if (err)
+ goto out;
+ resource_to_info(&resource_info, resource);
+ err = resource_info_to_skb(skb, &resource_info, !capable(CAP_SYS_ADMIN));
+ if (err)
+ goto out;
+ resource_statistics.res_stat_write_ordering = resource->write_ordering;
+ err = resource_statistics_to_skb(skb, &resource_statistics, !capable(CAP_SYS_ADMIN));
+ if (err)
+ goto out;
+ cb->args[0] = (long)resource;
+ genlmsg_end(skb, dh);
+ err = 0;
+
+out:
+ rcu_read_unlock();
+ if (err)
+ return err;
+ return skb->len;
+}
+
+static void device_to_statistics(struct device_statistics *s,
+ struct drbd_device *device)
+{
+ memset(s, 0, sizeof(*s));
+ s->dev_upper_blocked = !may_inc_ap_bio(device);
+ if (get_ldev(device)) {
+ struct drbd_md *md = &device->ldev->md;
+ u64 *history_uuids = (u64 *)s->history_uuids;
+ struct request_queue *q;
+ int n;
+
+ spin_lock_irq(&md->uuid_lock);
+ s->dev_current_uuid = md->uuid[UI_CURRENT];
+ BUILD_BUG_ON(sizeof(s->history_uuids) < UI_HISTORY_END - UI_HISTORY_START + 1);
+ for (n = 0; n < UI_HISTORY_END - UI_HISTORY_START + 1; n++)
+ history_uuids[n] = md->uuid[UI_HISTORY_START + n];
+ for (; n < HISTORY_UUIDS; n++)
+ history_uuids[n] = 0;
+ s->history_uuids_len = HISTORY_UUIDS;
+ spin_unlock_irq(&md->uuid_lock);
+
+ s->dev_disk_flags = md->flags;
+ q = bdev_get_queue(device->ldev->backing_bdev);
+ s->dev_lower_blocked =
+ bdi_congested(&q->backing_dev_info,
+ (1 << WB_async_congested) |
+ (1 << WB_sync_congested));
+ put_ldev(device);
+ }
+ s->dev_size = drbd_get_capacity(device->this_bdev);
+ s->dev_read = device->read_cnt;
+ s->dev_write = device->writ_cnt;
+ s->dev_al_writes = device->al_writ_cnt;
+ s->dev_bm_writes = device->bm_writ_cnt;
+ s->dev_upper_pending = atomic_read(&device->ap_bio_cnt);
+ s->dev_lower_pending = atomic_read(&device->local_cnt);
+ s->dev_al_suspended = test_bit(AL_SUSPENDED, &device->flags);
+ s->dev_exposed_data_uuid = device->ed_uuid;
+}
+
+static int put_resource_in_arg0(struct netlink_callback *cb, int holder_nr)
+{
+ if (cb->args[0]) {
+ struct drbd_resource *resource =
+ (struct drbd_resource *)cb->args[0];
+ kref_put(&resource->kref, drbd_destroy_resource);
+ }
+
+ return 0;
+}
+
+int drbd_adm_dump_devices_done(struct netlink_callback *cb) {
+ return put_resource_in_arg0(cb, 7);
+}
+
+static void device_to_info(struct device_info *, struct drbd_device *);
+
+int drbd_adm_dump_devices(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct nlattr *resource_filter;
+ struct drbd_resource *resource;
+ struct drbd_device *uninitialized_var(device);
+ int minor, err, retcode;
+ struct drbd_genlmsghdr *dh;
+ struct device_info device_info;
+ struct device_statistics device_statistics;
+ struct idr *idr_to_search;
+
+ resource = (struct drbd_resource *)cb->args[0];
+ if (!cb->args[0] && !cb->args[1]) {
+ resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
+ if (resource_filter) {
+ retcode = ERR_RES_NOT_KNOWN;
+ resource = drbd_find_resource(nla_data(resource_filter));
+ if (!resource)
+ goto put_result;
+ cb->args[0] = (long)resource;
+ }
+ }
+
+ rcu_read_lock();
+ minor = cb->args[1];
+ idr_to_search = resource ? &resource->devices : &drbd_devices;
+ device = idr_get_next(idr_to_search, &minor);
+ if (!device) {
+ err = 0;
+ goto out;
+ }
+ idr_for_each_entry_continue(idr_to_search, device, minor) {
+ retcode = NO_ERROR;
+ goto put_result; /* only one iteration */
+ }
+ err = 0;
+ goto out; /* no more devices */
+
+put_result:
+ dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, &drbd_genl_family,
+ NLM_F_MULTI, DRBD_ADM_GET_DEVICES);
+ err = -ENOMEM;
+ if (!dh)
+ goto out;
+ dh->ret_code = retcode;
+ dh->minor = -1U;
+ if (retcode == NO_ERROR) {
+ dh->minor = device->minor;
+ err = nla_put_drbd_cfg_context(skb, device->resource, NULL, device);
+ if (err)
+ goto out;
+ if (get_ldev(device)) {
+ struct disk_conf *disk_conf =
+ rcu_dereference(device->ldev->disk_conf);
+
+ err = disk_conf_to_skb(skb, disk_conf, !capable(CAP_SYS_ADMIN));
+ put_ldev(device);
+ if (err)
+ goto out;
+ }
+ device_to_info(&device_info, device);
+ err = device_info_to_skb(skb, &device_info, !capable(CAP_SYS_ADMIN));
+ if (err)
+ goto out;
+
+ device_to_statistics(&device_statistics, device);
+ err = device_statistics_to_skb(skb, &device_statistics, !capable(CAP_SYS_ADMIN));
+ if (err)
+ goto out;
+ cb->args[1] = minor + 1;
+ }
+ genlmsg_end(skb, dh);
+ err = 0;
+
+out:
+ rcu_read_unlock();
+ if (err)
+ return err;
+ return skb->len;
+}
+
+int drbd_adm_dump_connections_done(struct netlink_callback *cb)
+{
+ return put_resource_in_arg0(cb, 6);
+}
+
+enum { SINGLE_RESOURCE, ITERATE_RESOURCES };
+
+int drbd_adm_dump_connections(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct nlattr *resource_filter;
+ struct drbd_resource *resource = NULL, *next_resource;
+ struct drbd_connection *uninitialized_var(connection);
+ int err = 0, retcode;
+ struct drbd_genlmsghdr *dh;
+ struct connection_info connection_info;
+ struct connection_statistics connection_statistics;
+
+ rcu_read_lock();
+ resource = (struct drbd_resource *)cb->args[0];
+ if (!cb->args[0]) {
+ resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
+ if (resource_filter) {
+ retcode = ERR_RES_NOT_KNOWN;
+ resource = drbd_find_resource(nla_data(resource_filter));
+ if (!resource)
+ goto put_result;
+ cb->args[0] = (long)resource;
+ cb->args[1] = SINGLE_RESOURCE;
+ }
+ }
+ if (!resource) {
+ if (list_empty(&drbd_resources))
+ goto out;
+ resource = list_first_entry(&drbd_resources, struct drbd_resource, resources);
+ kref_get(&resource->kref);
+ cb->args[0] = (long)resource;
+ cb->args[1] = ITERATE_RESOURCES;
+ }
+
+ next_resource:
+ rcu_read_unlock();
+ mutex_lock(&resource->conf_update);
+ rcu_read_lock();
+ if (cb->args[2]) {
+ for_each_connection_rcu(connection, resource)
+ if (connection == (struct drbd_connection *)cb->args[2])
+ goto found_connection;
+ /* connection was probably deleted */
+ goto no_more_connections;
+ }
+ connection = list_entry(&resource->connections, struct drbd_connection, connections);
+
+found_connection:
+ list_for_each_entry_continue_rcu(connection, &resource->connections, connections) {
+ if (!has_net_conf(connection))
+ continue;
+ retcode = NO_ERROR;
+ goto put_result; /* only one iteration */
+ }
+
+no_more_connections:
+ if (cb->args[1] == ITERATE_RESOURCES) {
+ for_each_resource_rcu(next_resource, &drbd_resources) {
+ if (next_resource == resource)
+ goto found_resource;
+ }
+ /* resource was probably deleted */
+ }
+ goto out;
+
+found_resource:
+ list_for_each_entry_continue_rcu(next_resource, &drbd_resources, resources) {
+ mutex_unlock(&resource->conf_update);
+ kref_put(&resource->kref, drbd_destroy_resource);
+ resource = next_resource;
+ kref_get(&resource->kref);
+ cb->args[0] = (long)resource;
+ cb->args[2] = 0;
+ goto next_resource;
+ }
+ goto out; /* no more resources */
+
+put_result:
+ dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, &drbd_genl_family,
+ NLM_F_MULTI, DRBD_ADM_GET_CONNECTIONS);
+ err = -ENOMEM;
+ if (!dh)
+ goto out;
+ dh->ret_code = retcode;
+ dh->minor = -1U;
+ if (retcode == NO_ERROR) {
+ struct net_conf *net_conf;
+
+ err = nla_put_drbd_cfg_context(skb, resource, connection, NULL);
+ if (err)
+ goto out;
+ net_conf = rcu_dereference(connection->net_conf);
+ if (net_conf) {
+ err = net_conf_to_skb(skb, net_conf, !capable(CAP_SYS_ADMIN));
+ if (err)
+ goto out;
+ }
+ connection_to_info(&connection_info, connection);
+ err = connection_info_to_skb(skb, &connection_info, !capable(CAP_SYS_ADMIN));
+ if (err)
+ goto out;
+ connection_statistics.conn_congested = test_bit(NET_CONGESTED, &connection->flags);
+ err = connection_statistics_to_skb(skb, &connection_statistics, !capable(CAP_SYS_ADMIN));
+ if (err)
+ goto out;
+ cb->args[2] = (long)connection;
+ }
+ genlmsg_end(skb, dh);
+ err = 0;
+
+out:
+ rcu_read_unlock();
+ if (resource)
+ mutex_unlock(&resource->conf_update);
+ if (err)
+ return err;
+ return skb->len;
+}
+
+enum mdf_peer_flag {
+ MDF_PEER_CONNECTED = 1 << 0,
+ MDF_PEER_OUTDATED = 1 << 1,
+ MDF_PEER_FENCING = 1 << 2,
+ MDF_PEER_FULL_SYNC = 1 << 3,
+};
+
+static void peer_device_to_statistics(struct peer_device_statistics *s,
+ struct drbd_peer_device *peer_device)
+{
+ struct drbd_device *device = peer_device->device;
+
+ memset(s, 0, sizeof(*s));
+ s->peer_dev_received = device->recv_cnt;
+ s->peer_dev_sent = device->send_cnt;
+ s->peer_dev_pending = atomic_read(&device->ap_pending_cnt) +
+ atomic_read(&device->rs_pending_cnt);
+ s->peer_dev_unacked = atomic_read(&device->unacked_cnt);
+ s->peer_dev_out_of_sync = drbd_bm_total_weight(device) << (BM_BLOCK_SHIFT - 9);
+ s->peer_dev_resync_failed = device->rs_failed << (BM_BLOCK_SHIFT - 9);
+ if (get_ldev(device)) {
+ struct drbd_md *md = &device->ldev->md;
+
+ spin_lock_irq(&md->uuid_lock);
+ s->peer_dev_bitmap_uuid = md->uuid[UI_BITMAP];
+ spin_unlock_irq(&md->uuid_lock);
+ s->peer_dev_flags =
+ (drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND) ?
+ MDF_PEER_CONNECTED : 0) +
+ (drbd_md_test_flag(device->ldev, MDF_CONSISTENT) &&
+ !drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE) ?
+ MDF_PEER_OUTDATED : 0) +
+ /* FIXME: MDF_PEER_FENCING? */
+ (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ?
+ MDF_PEER_FULL_SYNC : 0);
+ put_ldev(device);
+ }
+}
+
+int drbd_adm_dump_peer_devices_done(struct netlink_callback *cb)
+{
+ return put_resource_in_arg0(cb, 9);
+}
+
+int drbd_adm_dump_peer_devices(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct nlattr *resource_filter;
+ struct drbd_resource *resource;
+ struct drbd_device *uninitialized_var(device);
+ struct drbd_peer_device *peer_device = NULL;
+ int minor, err, retcode;
+ struct drbd_genlmsghdr *dh;
+ struct idr *idr_to_search;
+
+ resource = (struct drbd_resource *)cb->args[0];
+ if (!cb->args[0] && !cb->args[1]) {
+ resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
+ if (resource_filter) {
+ retcode = ERR_RES_NOT_KNOWN;
+ resource = drbd_find_resource(nla_data(resource_filter));
+ if (!resource)
+ goto put_result;
+ }
+ cb->args[0] = (long)resource;
+ }
+
+ rcu_read_lock();
+ minor = cb->args[1];
+ idr_to_search = resource ? &resource->devices : &drbd_devices;
+ device = idr_find(idr_to_search, minor);
+ if (!device) {
+next_device:
+ minor++;
+ cb->args[2] = 0;
+ device = idr_get_next(idr_to_search, &minor);
+ if (!device) {
+ err = 0;
+ goto out;
+ }
+ }
+ if (cb->args[2]) {
+ for_each_peer_device(peer_device, device)
+ if (peer_device == (struct drbd_peer_device *)cb->args[2])
+ goto found_peer_device;
+ /* peer device was probably deleted */
+ goto next_device;
+ }
+ /* Make peer_device point to the list head (not the first entry). */
+ peer_device = list_entry(&device->peer_devices, struct drbd_peer_device, peer_devices);
+
+found_peer_device:
+ list_for_each_entry_continue_rcu(peer_device, &device->peer_devices, peer_devices) {
+ if (!has_net_conf(peer_device->connection))
+ continue;
+ retcode = NO_ERROR;
+ goto put_result; /* only one iteration */
+ }
+ goto next_device;
+
+put_result:
+ dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, &drbd_genl_family,
+ NLM_F_MULTI, DRBD_ADM_GET_PEER_DEVICES);
+ err = -ENOMEM;
+ if (!dh)
+ goto out;
+ dh->ret_code = retcode;
+ dh->minor = -1U;
+ if (retcode == NO_ERROR) {
+ struct peer_device_info peer_device_info;
+ struct peer_device_statistics peer_device_statistics;
+
+ dh->minor = minor;
+ err = nla_put_drbd_cfg_context(skb, device->resource, peer_device->connection, device);
+ if (err)
+ goto out;
+ peer_device_to_info(&peer_device_info, peer_device);
+ err = peer_device_info_to_skb(skb, &peer_device_info, !capable(CAP_SYS_ADMIN));
+ if (err)
+ goto out;
+ peer_device_to_statistics(&peer_device_statistics, peer_device);
+ err = peer_device_statistics_to_skb(skb, &peer_device_statistics, !capable(CAP_SYS_ADMIN));
+ if (err)
+ goto out;
+ cb->args[1] = minor;
+ cb->args[2] = (long)peer_device;
+ }
+ genlmsg_end(skb, dh);
+ err = 0;
+
+out:
+ rcu_read_unlock();
+ if (err)
+ return err;
+ return skb->len;
+}
+/*
* Return the connection of @resource if @resource has exactly one connection.
*/
static struct drbd_connection *the_only_connection(struct drbd_resource *resource)
@@ -3414,8 +4055,18 @@ drbd_check_resource_name(struct drbd_config_context *adm_ctx)
return NO_ERROR;
}
+static void resource_to_info(struct resource_info *info,
+ struct drbd_resource *resource)
+{
+ info->res_role = conn_highest_role(first_connection(resource));
+ info->res_susp = resource->susp;
+ info->res_susp_nod = resource->susp_nod;
+ info->res_susp_fen = resource->susp_fen;
+}
+
int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_connection *connection;
struct drbd_config_context adm_ctx;
enum drbd_ret_code retcode;
struct res_opts res_opts;
@@ -3449,13 +4100,33 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
}
/* not yet safe for genl_family.parallel_ops */
- if (!conn_create(adm_ctx.resource_name, &res_opts))
+ mutex_lock(&resources_mutex);
+ connection = conn_create(adm_ctx.resource_name, &res_opts);
+ mutex_unlock(&resources_mutex);
+
+ if (connection) {
+ struct resource_info resource_info;
+
+ mutex_lock(&notification_mutex);
+ resource_to_info(&resource_info, connection->resource);
+ notify_resource_state(NULL, 0, connection->resource,
+ &resource_info, NOTIFY_CREATE);
+ mutex_unlock(&notification_mutex);
+ } else
retcode = ERR_NOMEM;
+
out:
drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
+static void device_to_info(struct device_info *info,
+ struct drbd_device *device)
+{
+ info->dev_disk_state = device->state.disk;
+}
+
+
int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
{
struct drbd_config_context adm_ctx;
@@ -3490,6 +4161,36 @@ int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
mutex_lock(&adm_ctx.resource->adm_mutex);
retcode = drbd_create_device(&adm_ctx, dh->minor);
+ if (retcode == NO_ERROR) {
+ struct drbd_device *device;
+ struct drbd_peer_device *peer_device;
+ struct device_info info;
+ unsigned int peer_devices = 0;
+ enum drbd_notification_type flags;
+
+ device = minor_to_device(dh->minor);
+ for_each_peer_device(peer_device, device) {
+ if (!has_net_conf(peer_device->connection))
+ continue;
+ peer_devices++;
+ }
+
+ device_to_info(&info, device);
+ mutex_lock(&notification_mutex);
+ flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
+ notify_device_state(NULL, 0, device, &info, NOTIFY_CREATE | flags);
+ for_each_peer_device(peer_device, device) {
+ struct peer_device_info peer_device_info;
+
+ if (!has_net_conf(peer_device->connection))
+ continue;
+ peer_device_to_info(&peer_device_info, peer_device);
+ flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
+ notify_peer_device_state(NULL, 0, peer_device, &peer_device_info,
+ NOTIFY_CREATE | flags);
+ }
+ mutex_unlock(&notification_mutex);
+ }
mutex_unlock(&adm_ctx.resource->adm_mutex);
out:
drbd_adm_finish(&adm_ctx, info, retcode);
@@ -3498,13 +4199,35 @@ out:
static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
{
+ struct drbd_peer_device *peer_device;
+
if (device->state.disk == D_DISKLESS &&
/* no need to be device->state.conn == C_STANDALONE &&
* we may want to delete a minor from a live replication group.
*/
device->state.role == R_SECONDARY) {
+ struct drbd_connection *connection =
+ first_connection(device->resource);
+
_drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS),
CS_VERBOSE + CS_WAIT_COMPLETE);
+
+ /* If the state engine hasn't stopped the sender thread yet, we
+ * need to flush the sender work queue before generating the
+ * DESTROY events here. */
+ if (get_t_state(&connection->worker) == RUNNING)
+ drbd_flush_workqueue(&connection->sender_work);
+
+ mutex_lock(&notification_mutex);
+ for_each_peer_device(peer_device, device) {
+ if (!has_net_conf(peer_device->connection))
+ continue;
+ notify_peer_device_state(NULL, 0, peer_device, NULL,
+ NOTIFY_DESTROY | NOTIFY_CONTINUES);
+ }
+ notify_device_state(NULL, 0, device, NULL, NOTIFY_DESTROY);
+ mutex_unlock(&notification_mutex);
+
drbd_delete_device(device);
return NO_ERROR;
} else
@@ -3541,7 +4264,16 @@ static int adm_del_resource(struct drbd_resource *resource)
if (!idr_is_empty(&resource->devices))
return ERR_RES_IN_USE;
+ /* The state engine has stopped the sender thread, so we don't
+ * need to flush the sender work queue before generating the
+ * DESTROY event here. */
+ mutex_lock(&notification_mutex);
+ notify_resource_state(NULL, 0, resource, NULL, NOTIFY_DESTROY);
+ mutex_unlock(&notification_mutex);
+
+ mutex_lock(&resources_mutex);
list_del_rcu(&resource->resources);
+ mutex_unlock(&resources_mutex);
/* Make sure all threads have actually stopped: state handling only
* does drbd_thread_stop_nowait(). */
list_for_each_entry(connection, &resource->connections, connections)
@@ -3637,7 +4369,6 @@ finish:
void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
{
- static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
struct sk_buff *msg;
struct drbd_genlmsghdr *d_out;
unsigned seq;
@@ -3658,7 +4389,7 @@ void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
if (nla_put_status_info(msg, device, sib))
goto nla_put_failure;
genlmsg_end(msg, d_out);
- err = drbd_genl_multicast_events(msg, 0);
+ err = drbd_genl_multicast_events(msg, GFP_NOWAIT);
/* msg has been consumed or freed in netlink_broadcast() */
if (err && err != -ESRCH)
goto failed;
@@ -3672,3 +4403,405 @@ failed:
"Event seq:%u sib_reason:%u\n",
err, seq, sib->sib_reason);
}
+
+static int nla_put_notification_header(struct sk_buff *msg,
+ enum drbd_notification_type type)
+{
+ struct drbd_notification_header nh = {
+ .nh_type = type,
+ };
+
+ return drbd_notification_header_to_skb(msg, &nh, true);
+}
+
+void notify_resource_state(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_resource *resource,
+ struct resource_info *resource_info,
+ enum drbd_notification_type type)
+{
+ struct resource_statistics resource_statistics;
+ struct drbd_genlmsghdr *dh;
+ bool multicast = false;
+ int err;
+
+ if (!skb) {
+ seq = atomic_inc_return(&notify_genl_seq);
+ skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
+ err = -ENOMEM;
+ if (!skb)
+ goto failed;
+ multicast = true;
+ }
+
+ err = -EMSGSIZE;
+ dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_RESOURCE_STATE);
+ if (!dh)
+ goto nla_put_failure;
+ dh->minor = -1U;
+ dh->ret_code = NO_ERROR;
+ if (nla_put_drbd_cfg_context(skb, resource, NULL, NULL) ||
+ nla_put_notification_header(skb, type) ||
+ ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
+ resource_info_to_skb(skb, resource_info, true)))
+ goto nla_put_failure;
+ resource_statistics.res_stat_write_ordering = resource->write_ordering;
+ err = resource_statistics_to_skb(skb, &resource_statistics, !capable(CAP_SYS_ADMIN));
+ if (err)
+ goto nla_put_failure;
+ genlmsg_end(skb, dh);
+ if (multicast) {
+ err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
+ /* skb has been consumed or freed in netlink_broadcast() */
+ if (err && err != -ESRCH)
+ goto failed;
+ }
+ return;
+
+nla_put_failure:
+ nlmsg_free(skb);
+failed:
+ drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n",
+ err, seq);
+}
+
+void notify_device_state(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_device *device,
+ struct device_info *device_info,
+ enum drbd_notification_type type)
+{
+ struct device_statistics device_statistics;
+ struct drbd_genlmsghdr *dh;
+ bool multicast = false;
+ int err;
+
+ if (!skb) {
+ seq = atomic_inc_return(&notify_genl_seq);
+ skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
+ err = -ENOMEM;
+ if (!skb)
+ goto failed;
+ multicast = true;
+ }
+
+ err = -EMSGSIZE;
+ dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_DEVICE_STATE);
+ if (!dh)
+ goto nla_put_failure;
+ dh->minor = device->minor;
+ dh->ret_code = NO_ERROR;
+ if (nla_put_drbd_cfg_context(skb, device->resource, NULL, device) ||
+ nla_put_notification_header(skb, type) ||
+ ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
+ device_info_to_skb(skb, device_info, true)))
+ goto nla_put_failure;
+ device_to_statistics(&device_statistics, device);
+ device_statistics_to_skb(skb, &device_statistics, !capable(CAP_SYS_ADMIN));
+ genlmsg_end(skb, dh);
+ if (multicast) {
+ err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
+ /* skb has been consumed or freed in netlink_broadcast() */
+ if (err && err != -ESRCH)
+ goto failed;
+ }
+ return;
+
+nla_put_failure:
+ nlmsg_free(skb);
+failed:
+ drbd_err(device, "Error %d while broadcasting event. Event seq:%u\n",
+ err, seq);
+}
+
+void notify_connection_state(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_connection *connection,
+ struct connection_info *connection_info,
+ enum drbd_notification_type type)
+{
+ struct connection_statistics connection_statistics;
+ struct drbd_genlmsghdr *dh;
+ bool multicast = false;
+ int err;
+
+ if (!skb) {
+ seq = atomic_inc_return(&notify_genl_seq);
+ skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
+ err = -ENOMEM;
+ if (!skb)
+ goto failed;
+ multicast = true;
+ }
+
+ err = -EMSGSIZE;
+ dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_CONNECTION_STATE);
+ if (!dh)
+ goto nla_put_failure;
+ dh->minor = -1U;
+ dh->ret_code = NO_ERROR;
+ if (nla_put_drbd_cfg_context(skb, connection->resource, connection, NULL) ||
+ nla_put_notification_header(skb, type) ||
+ ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
+ connection_info_to_skb(skb, connection_info, true)))
+ goto nla_put_failure;
+ connection_statistics.conn_congested = test_bit(NET_CONGESTED, &connection->flags);
+ connection_statistics_to_skb(skb, &connection_statistics, !capable(CAP_SYS_ADMIN));
+ genlmsg_end(skb, dh);
+ if (multicast) {
+ err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
+ /* skb has been consumed or freed in netlink_broadcast() */
+ if (err && err != -ESRCH)
+ goto failed;
+ }
+ return;
+
+nla_put_failure:
+ nlmsg_free(skb);
+failed:
+ drbd_err(connection, "Error %d while broadcasting event. Event seq:%u\n",
+ err, seq);
+}
+
+void notify_peer_device_state(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_peer_device *peer_device,
+ struct peer_device_info *peer_device_info,
+ enum drbd_notification_type type)
+{
+ struct peer_device_statistics peer_device_statistics;
+ struct drbd_resource *resource = peer_device->device->resource;
+ struct drbd_genlmsghdr *dh;
+ bool multicast = false;
+ int err;
+
+ if (!skb) {
+ seq = atomic_inc_return(&notify_genl_seq);
+ skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
+ err = -ENOMEM;
+ if (!skb)
+ goto failed;
+ multicast = true;
+ }
+
+ err = -EMSGSIZE;
+ dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_PEER_DEVICE_STATE);
+ if (!dh)
+ goto nla_put_failure;
+ dh->minor = -1U;
+ dh->ret_code = NO_ERROR;
+ if (nla_put_drbd_cfg_context(skb, resource, peer_device->connection, peer_device->device) ||
+ nla_put_notification_header(skb, type) ||
+ ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
+ peer_device_info_to_skb(skb, peer_device_info, true)))
+ goto nla_put_failure;
+ peer_device_to_statistics(&peer_device_statistics, peer_device);
+ peer_device_statistics_to_skb(skb, &peer_device_statistics, !capable(CAP_SYS_ADMIN));
+ genlmsg_end(skb, dh);
+ if (multicast) {
+ err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
+ /* skb has been consumed or freed in netlink_broadcast() */
+ if (err && err != -ESRCH)
+ goto failed;
+ }
+ return;
+
+nla_put_failure:
+ nlmsg_free(skb);
+failed:
+ drbd_err(peer_device, "Error %d while broadcasting event. Event seq:%u\n",
+ err, seq);
+}
+
+void notify_helper(enum drbd_notification_type type,
+ struct drbd_device *device, struct drbd_connection *connection,
+ const char *name, int status)
+{
+ struct drbd_resource *resource = device ? device->resource : connection->resource;
+ struct drbd_helper_info helper_info;
+ unsigned int seq = atomic_inc_return(&notify_genl_seq);
+ struct sk_buff *skb = NULL;
+ struct drbd_genlmsghdr *dh;
+ int err;
+
+ strlcpy(helper_info.helper_name, name, sizeof(helper_info.helper_name));
+ helper_info.helper_name_len = min(strlen(name), sizeof(helper_info.helper_name));
+ helper_info.helper_status = status;
+
+ skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
+ err = -ENOMEM;
+ if (!skb)
+ goto fail;
+
+ err = -EMSGSIZE;
+ dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_HELPER);
+ if (!dh)
+ goto fail;
+ dh->minor = device ? device->minor : -1;
+ dh->ret_code = NO_ERROR;
+ mutex_lock(&notification_mutex);
+ if (nla_put_drbd_cfg_context(skb, resource, connection, device) ||
+ nla_put_notification_header(skb, type) ||
+ drbd_helper_info_to_skb(skb, &helper_info, true))
+ goto unlock_fail;
+ genlmsg_end(skb, dh);
+ err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
+ skb = NULL;
+ /* skb has been consumed or freed in netlink_broadcast() */
+ if (err && err != -ESRCH)
+ goto unlock_fail;
+ mutex_unlock(&notification_mutex);
+ return;
+
+unlock_fail:
+ mutex_unlock(&notification_mutex);
+fail:
+ nlmsg_free(skb);
+ drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n",
+ err, seq);
+}
+
+static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq)
+{
+ struct drbd_genlmsghdr *dh;
+ int err;
+
+ err = -EMSGSIZE;
+ dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_INITIAL_STATE_DONE);
+ if (!dh)
+ goto nla_put_failure;
+ dh->minor = -1U;
+ dh->ret_code = NO_ERROR;
+ if (nla_put_notification_header(skb, NOTIFY_EXISTS))
+ goto nla_put_failure;
+ genlmsg_end(skb, dh);
+ return;
+
+nla_put_failure:
+ nlmsg_free(skb);
+ pr_err("Error %d sending event. Event seq:%u\n", err, seq);
+}
+
+static void free_state_changes(struct list_head *list)
+{
+ while (!list_empty(list)) {
+ struct drbd_state_change *state_change =
+ list_first_entry(list, struct drbd_state_change, list);
+ list_del(&state_change->list);
+ forget_state_change(state_change);
+ }
+}
+
+static unsigned int notifications_for_state_change(struct drbd_state_change *state_change)
+{
+ return 1 +
+ state_change->n_connections +
+ state_change->n_devices +
+ state_change->n_devices * state_change->n_connections;
+}
+
+static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct drbd_state_change *state_change = (struct drbd_state_change *)cb->args[0];
+ unsigned int seq = cb->args[2];
+ unsigned int n;
+ enum drbd_notification_type flags = 0;
+
+ /* There is no need for taking notification_mutex here: it doesn't
+ matter if the initial state events mix with later state chage
+ events; we can always tell the events apart by the NOTIFY_EXISTS
+ flag. */
+
+ cb->args[5]--;
+ if (cb->args[5] == 1) {
+ notify_initial_state_done(skb, seq);
+ goto out;
+ }
+ n = cb->args[4]++;
+ if (cb->args[4] < cb->args[3])
+ flags |= NOTIFY_CONTINUES;
+ if (n < 1) {
+ notify_resource_state_change(skb, seq, state_change->resource,
+ NOTIFY_EXISTS | flags);
+ goto next;
+ }
+ n--;
+ if (n < state_change->n_connections) {
+ notify_connection_state_change(skb, seq, &state_change->connections[n],
+ NOTIFY_EXISTS | flags);
+ goto next;
+ }
+ n -= state_change->n_connections;
+ if (n < state_change->n_devices) {
+ notify_device_state_change(skb, seq, &state_change->devices[n],
+ NOTIFY_EXISTS | flags);
+ goto next;
+ }
+ n -= state_change->n_devices;
+ if (n < state_change->n_devices * state_change->n_connections) {
+ notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n],
+ NOTIFY_EXISTS | flags);
+ goto next;
+ }
+
+next:
+ if (cb->args[4] == cb->args[3]) {
+ struct drbd_state_change *next_state_change =
+ list_entry(state_change->list.next,
+ struct drbd_state_change, list);
+ cb->args[0] = (long)next_state_change;
+ cb->args[3] = notifications_for_state_change(next_state_change);
+ cb->args[4] = 0;
+ }
+out:
+ return skb->len;
+}
+
+int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct drbd_resource *resource;
+ LIST_HEAD(head);
+
+ if (cb->args[5] >= 1) {
+ if (cb->args[5] > 1)
+ return get_initial_state(skb, cb);
+ if (cb->args[0]) {
+ struct drbd_state_change *state_change =
+ (struct drbd_state_change *)cb->args[0];
+
+ /* connect list to head */
+ list_add(&head, &state_change->list);
+ free_state_changes(&head);
+ }
+ return 0;
+ }
+
+ cb->args[5] = 2; /* number of iterations */
+ mutex_lock(&resources_mutex);
+ for_each_resource(resource, &drbd_resources) {
+ struct drbd_state_change *state_change;
+
+ state_change = remember_old_state(resource, GFP_KERNEL);
+ if (!state_change) {
+ if (!list_empty(&head))
+ free_state_changes(&head);
+ mutex_unlock(&resources_mutex);
+ return -ENOMEM;
+ }
+ copy_old_to_new_state_change(state_change);
+ list_add_tail(&state_change->list, &head);
+ cb->args[5] += notifications_for_state_change(state_change);
+ }
+ mutex_unlock(&resources_mutex);
+
+ if (!list_empty(&head)) {
+ struct drbd_state_change *state_change =
+ list_entry(head.next, struct drbd_state_change, list);
+ cb->args[0] = (long)state_change;
+ cb->args[3] = notifications_for_state_change(state_change);
+ list_del(&head); /* detach list from head */
+ }
+
+ cb->args[2] = cb->nlh->nlmsg_seq;
+ return get_initial_state(skb, cb);
+}
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 3b10fa6..6537b25 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -245,9 +245,9 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
char wp;
static char write_ordering_chars[] = {
- [WO_none] = 'n',
- [WO_drain_io] = 'd',
- [WO_bdev_flush] = 'f',
+ [WO_NONE] = 'n',
+ [WO_DRAIN_IO] = 'd',
+ [WO_BDEV_FLUSH] = 'f',
};
seq_printf(seq, "version: " REL_VERSION " (api:%d/proto:%d-%d)\n%s\n",
diff --git a/drivers/block/drbd/drbd_protocol.h b/drivers/block/drbd/drbd_protocol.h
index 2da9104a..ef92453 100644
--- a/drivers/block/drbd/drbd_protocol.h
+++ b/drivers/block/drbd/drbd_protocol.h
@@ -23,7 +23,7 @@ enum drbd_packet {
P_AUTH_RESPONSE = 0x11,
P_STATE_CHG_REQ = 0x12,
- /* asender (meta socket */
+ /* (meta socket) */
P_PING = 0x13,
P_PING_ACK = 0x14,
P_RECV_ACK = 0x15, /* Used in protocol B */
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index b4b5680..1957fe8 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -215,7 +215,7 @@ static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
}
}
-static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
+static void drbd_reclaim_net_peer_reqs(struct drbd_device *device)
{
LIST_HEAD(reclaimed);
struct drbd_peer_request *peer_req, *t;
@@ -223,11 +223,30 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
spin_lock_irq(&device->resource->req_lock);
reclaim_finished_net_peer_reqs(device, &reclaimed);
spin_unlock_irq(&device->resource->req_lock);
-
list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
drbd_free_net_peer_req(device, peer_req);
}
+static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection)
+{
+ struct drbd_peer_device *peer_device;
+ int vnr;
+
+ rcu_read_lock();
+ idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
+ struct drbd_device *device = peer_device->device;
+ if (!atomic_read(&device->pp_in_use_by_net))
+ continue;
+
+ kref_get(&device->kref);
+ rcu_read_unlock();
+ drbd_reclaim_net_peer_reqs(device);
+ kref_put(&device->kref, drbd_destroy_device);
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+}
+
/**
* drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
* @device: DRBD device.
@@ -265,10 +284,15 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int
if (atomic_read(&device->pp_in_use) < mxb)
page = __drbd_alloc_pages(device, number);
+ /* Try to keep the fast path fast, but occasionally we need
+ * to reclaim the pages we lended to the network stack. */
+ if (page && atomic_read(&device->pp_in_use_by_net) > 512)
+ drbd_reclaim_net_peer_reqs(device);
+
while (page == NULL) {
prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
- drbd_kick_lo_and_reclaim_net(device);
+ drbd_reclaim_net_peer_reqs(device);
if (atomic_read(&device->pp_in_use) < mxb) {
page = __drbd_alloc_pages(device, number);
@@ -1099,7 +1123,15 @@ randomize:
return 0;
}
- drbd_thread_start(&connection->asender);
+ drbd_thread_start(&connection->ack_receiver);
+ /* opencoded create_singlethread_workqueue(),
+ * to be able to use format string arguments */
+ connection->ack_sender =
+ alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name);
+ if (!connection->ack_sender) {
+ drbd_err(connection, "Failed to create workqueue ack_sender\n");
+ return 0;
+ }
mutex_lock(&connection->resource->conf_update);
/* The discard_my_data flag is a single-shot modifier to the next
@@ -1178,7 +1210,7 @@ static void drbd_flush(struct drbd_connection *connection)
struct drbd_peer_device *peer_device;
int vnr;
- if (connection->resource->write_ordering >= WO_bdev_flush) {
+ if (connection->resource->write_ordering >= WO_BDEV_FLUSH) {
rcu_read_lock();
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
struct drbd_device *device = peer_device->device;
@@ -1203,7 +1235,7 @@ static void drbd_flush(struct drbd_connection *connection)
/* would rather check on EOPNOTSUPP, but that is not reliable.
* don't try again for ANY return value != 0
* if (rv == -EOPNOTSUPP) */
- drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io);
+ drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
}
put_ldev(device);
kref_put(&device->kref, drbd_destroy_device);
@@ -1299,10 +1331,10 @@ max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
dc = rcu_dereference(bdev->disk_conf);
- if (wo == WO_bdev_flush && !dc->disk_flushes)
- wo = WO_drain_io;
- if (wo == WO_drain_io && !dc->disk_drain)
- wo = WO_none;
+ if (wo == WO_BDEV_FLUSH && !dc->disk_flushes)
+ wo = WO_DRAIN_IO;
+ if (wo == WO_DRAIN_IO && !dc->disk_drain)
+ wo = WO_NONE;
return wo;
}
@@ -1319,13 +1351,13 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
enum write_ordering_e pwo;
int vnr;
static char *write_ordering_str[] = {
- [WO_none] = "none",
- [WO_drain_io] = "drain",
- [WO_bdev_flush] = "flush",
+ [WO_NONE] = "none",
+ [WO_DRAIN_IO] = "drain",
+ [WO_BDEV_FLUSH] = "flush",
};
pwo = resource->write_ordering;
- if (wo != WO_bdev_flush)
+ if (wo != WO_BDEV_FLUSH)
wo = min(pwo, wo);
rcu_read_lock();
idr_for_each_entry(&resource->devices, device, vnr) {
@@ -1343,7 +1375,7 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
rcu_read_unlock();
resource->write_ordering = wo;
- if (pwo != resource->write_ordering || wo == WO_bdev_flush)
+ if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH)
drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
}
@@ -1380,7 +1412,7 @@ int drbd_submit_peer_request(struct drbd_device *device,
if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
/* wait for all pending IO completions, before we start
* zeroing things out. */
- conn_wait_active_ee_empty(first_peer_device(device)->connection);
+ conn_wait_active_ee_empty(peer_req->peer_device->connection);
/* add it to the active list now,
* so we can find it to present it in debugfs */
peer_req->submit_jif = jiffies;
@@ -1508,12 +1540,6 @@ static void conn_wait_active_ee_empty(struct drbd_connection *connection)
rcu_read_unlock();
}
-static struct drbd_peer_device *
-conn_peer_device(struct drbd_connection *connection, int volume_number)
-{
- return idr_find(&connection->peer_devices, volume_number);
-}
-
static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
{
int rv;
@@ -1533,7 +1559,7 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
* Therefore we must send the barrier_ack after the barrier request was
* completed. */
switch (connection->resource->write_ordering) {
- case WO_none:
+ case WO_NONE:
if (rv == FE_RECYCLED)
return 0;
@@ -1546,8 +1572,8 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
/* Fall through */
- case WO_bdev_flush:
- case WO_drain_io:
+ case WO_BDEV_FLUSH:
+ case WO_DRAIN_IO:
conn_wait_active_ee_empty(connection);
drbd_flush(connection);
@@ -1752,7 +1778,7 @@ static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_req
}
/*
- * e_end_resync_block() is called in asender context via
+ * e_end_resync_block() is called in ack_sender context via
* drbd_finish_peer_reqs().
*/
static int e_end_resync_block(struct drbd_work *w, int unused)
@@ -1926,7 +1952,7 @@ static void restart_conflicting_writes(struct drbd_device *device,
}
/*
- * e_end_block() is called in asender context via drbd_finish_peer_reqs().
+ * e_end_block() is called in ack_sender context via drbd_finish_peer_reqs().
*/
static int e_end_block(struct drbd_work *w, int cancel)
{
@@ -1966,7 +1992,7 @@ static int e_end_block(struct drbd_work *w, int cancel)
} else
D_ASSERT(device, drbd_interval_empty(&peer_req->i));
- drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
+ drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
return err;
}
@@ -2098,7 +2124,7 @@ static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, co
}
rcu_read_lock();
- tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
+ tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
rcu_read_unlock();
if (!tp)
@@ -2217,7 +2243,7 @@ static int handle_write_conflicts(struct drbd_device *device,
peer_req->w.cb = superseded ? e_send_superseded :
e_send_retry_write;
list_add_tail(&peer_req->w.list, &device->done_ee);
- wake_asender(connection);
+ queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work);
err = -ENOENT;
goto out;
@@ -2364,7 +2390,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
if (dp_flags & DP_SEND_RECEIVE_ACK) {
/* I really don't like it that the receiver thread
* sends on the msock, but anyways */
- drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
+ drbd_send_ack(peer_device, P_RECV_ACK, peer_req);
}
if (tp) {
@@ -4056,7 +4082,7 @@ static int receive_state(struct drbd_connection *connection, struct packet_info
os = ns = drbd_read_state(device);
spin_unlock_irq(&device->resource->req_lock);
- /* If some other part of the code (asender thread, timeout)
+ /* If some other part of the code (ack_receiver thread, timeout)
* already decided to close the connection again,
* we must not "re-establish" it here. */
if (os.conn <= C_TEAR_DOWN)
@@ -4661,8 +4687,12 @@ static void conn_disconnect(struct drbd_connection *connection)
*/
conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
- /* asender does not clean up anything. it must not interfere, either */
- drbd_thread_stop(&connection->asender);
+ /* ack_receiver does not clean up anything. it must not interfere, either */
+ drbd_thread_stop(&connection->ack_receiver);
+ if (connection->ack_sender) {
+ destroy_workqueue(connection->ack_sender);
+ connection->ack_sender = NULL;
+ }
drbd_free_sock(connection);
rcu_read_lock();
@@ -5431,49 +5461,39 @@ static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
return 0;
}
-static int connection_finish_peer_reqs(struct drbd_connection *connection)
+struct meta_sock_cmd {
+ size_t pkt_size;
+ int (*fn)(struct drbd_connection *connection, struct packet_info *);
+};
+
+static void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout)
{
- struct drbd_peer_device *peer_device;
- int vnr, not_empty = 0;
+ long t;
+ struct net_conf *nc;
- do {
- clear_bit(SIGNAL_ASENDER, &connection->flags);
- flush_signals(current);
+ rcu_read_lock();
+ nc = rcu_dereference(connection->net_conf);
+ t = ping_timeout ? nc->ping_timeo : nc->ping_int;
+ rcu_read_unlock();
- rcu_read_lock();
- idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
- struct drbd_device *device = peer_device->device;
- kref_get(&device->kref);
- rcu_read_unlock();
- if (drbd_finish_peer_reqs(device)) {
- kref_put(&device->kref, drbd_destroy_device);
- return 1;
- }
- kref_put(&device->kref, drbd_destroy_device);
- rcu_read_lock();
- }
- set_bit(SIGNAL_ASENDER, &connection->flags);
+ t *= HZ;
+ if (ping_timeout)
+ t /= 10;
- spin_lock_irq(&connection->resource->req_lock);
- idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
- struct drbd_device *device = peer_device->device;
- not_empty = !list_empty(&device->done_ee);
- if (not_empty)
- break;
- }
- spin_unlock_irq(&connection->resource->req_lock);
- rcu_read_unlock();
- } while (not_empty);
+ connection->meta.socket->sk->sk_rcvtimeo = t;
+}
- return 0;
+static void set_ping_timeout(struct drbd_connection *connection)
+{
+ set_rcvtimeo(connection, 1);
}
-struct asender_cmd {
- size_t pkt_size;
- int (*fn)(struct drbd_connection *connection, struct packet_info *);
-};
+static void set_idle_timeout(struct drbd_connection *connection)
+{
+ set_rcvtimeo(connection, 0);
+}
-static struct asender_cmd asender_tbl[] = {
+static struct meta_sock_cmd ack_receiver_tbl[] = {
[P_PING] = { 0, got_Ping },
[P_PING_ACK] = { 0, got_PingAck },
[P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
@@ -5493,64 +5513,40 @@ static struct asender_cmd asender_tbl[] = {
[P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
};
-int drbd_asender(struct drbd_thread *thi)
+int drbd_ack_receiver(struct drbd_thread *thi)
{
struct drbd_connection *connection = thi->connection;
- struct asender_cmd *cmd = NULL;
+ struct meta_sock_cmd *cmd = NULL;
struct packet_info pi;
+ unsigned long pre_recv_jif;
int rv;
void *buf = connection->meta.rbuf;
int received = 0;
unsigned int header_size = drbd_header_size(connection);
int expect = header_size;
bool ping_timeout_active = false;
- struct net_conf *nc;
- int ping_timeo, tcp_cork, ping_int;
struct sched_param param = { .sched_priority = 2 };
rv = sched_setscheduler(current, SCHED_RR, &param);
if (rv < 0)
- drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
+ drbd_err(connection, "drbd_ack_receiver: ERROR set priority, ret=%d\n", rv);
while (get_t_state(thi) == RUNNING) {
drbd_thread_current_set_cpu(thi);
- rcu_read_lock();
- nc = rcu_dereference(connection->net_conf);
- ping_timeo = nc->ping_timeo;
- tcp_cork = nc->tcp_cork;
- ping_int = nc->ping_int;
- rcu_read_unlock();
+ conn_reclaim_net_peer_reqs(connection);
if (test_and_clear_bit(SEND_PING, &connection->flags)) {
if (drbd_send_ping(connection)) {
drbd_err(connection, "drbd_send_ping has failed\n");
goto reconnect;
}
- connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
+ set_ping_timeout(connection);
ping_timeout_active = true;
}
- /* TODO: conditionally cork; it may hurt latency if we cork without
- much to send */
- if (tcp_cork)
- drbd_tcp_cork(connection->meta.socket);
- if (connection_finish_peer_reqs(connection)) {
- drbd_err(connection, "connection_finish_peer_reqs() failed\n");
- goto reconnect;
- }
- /* but unconditionally uncork unless disabled */
- if (tcp_cork)
- drbd_tcp_uncork(connection->meta.socket);
-
- /* short circuit, recv_msg would return EINTR anyways. */
- if (signal_pending(current))
- continue;
-
+ pre_recv_jif = jiffies;
rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
- clear_bit(SIGNAL_ASENDER, &connection->flags);
-
- flush_signals(current);
/* Note:
* -EINTR (on meta) we got a signal
@@ -5562,7 +5558,6 @@ int drbd_asender(struct drbd_thread *thi)
* rv < expected: "woken" by signal during receive
* rv == 0 : "connection shut down by peer"
*/
-received_more:
if (likely(rv > 0)) {
received += rv;
buf += rv;
@@ -5584,8 +5579,7 @@ received_more:
} else if (rv == -EAGAIN) {
/* If the data socket received something meanwhile,
* that is good enough: peer is still alive. */
- if (time_after(connection->last_received,
- jiffies - connection->meta.socket->sk->sk_rcvtimeo))
+ if (time_after(connection->last_received, pre_recv_jif))
continue;
if (ping_timeout_active) {
drbd_err(connection, "PingAck did not arrive in time.\n");
@@ -5594,6 +5588,10 @@ received_more:
set_bit(SEND_PING, &connection->flags);
continue;
} else if (rv == -EINTR) {
+ /* maybe drbd_thread_stop(): the while condition will notice.
+ * maybe woken for send_ping: we'll send a ping above,
+ * and change the rcvtimeo */
+ flush_signals(current);
continue;
} else {
drbd_err(connection, "sock_recvmsg returned %d\n", rv);
@@ -5603,8 +5601,8 @@ received_more:
if (received == expect && cmd == NULL) {
if (decode_header(connection, connection->meta.rbuf, &pi))
goto reconnect;
- cmd = &asender_tbl[pi.cmd];
- if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
+ cmd = &ack_receiver_tbl[pi.cmd];
+ if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) {
drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
cmdname(pi.cmd), pi.cmd);
goto disconnect;
@@ -5627,9 +5625,8 @@ received_more:
connection->last_received = jiffies;
- if (cmd == &asender_tbl[P_PING_ACK]) {
- /* restore idle timeout */
- connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
+ if (cmd == &ack_receiver_tbl[P_PING_ACK]) {
+ set_idle_timeout(connection);
ping_timeout_active = false;
}
@@ -5638,11 +5635,6 @@ received_more:
expect = header_size;
cmd = NULL;
}
- if (test_bit(SEND_PING, &connection->flags))
- continue;
- rv = drbd_recv_short(connection->meta.socket, buf, expect-received, MSG_DONTWAIT);
- if (rv > 0)
- goto received_more;
}
if (0) {
@@ -5654,9 +5646,41 @@ reconnect:
disconnect:
conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
}
- clear_bit(SIGNAL_ASENDER, &connection->flags);
- drbd_info(connection, "asender terminated\n");
+ drbd_info(connection, "ack_receiver terminated\n");
return 0;
}
+
+void drbd_send_acks_wf(struct work_struct *ws)
+{
+ struct drbd_peer_device *peer_device =
+ container_of(ws, struct drbd_peer_device, send_acks_work);
+ struct drbd_connection *connection = peer_device->connection;
+ struct drbd_device *device = peer_device->device;
+ struct net_conf *nc;
+ int tcp_cork, err;
+
+ rcu_read_lock();
+ nc = rcu_dereference(connection->net_conf);
+ tcp_cork = nc->tcp_cork;
+ rcu_read_unlock();
+
+ if (tcp_cork)
+ drbd_tcp_cork(connection->meta.socket);
+
+ err = drbd_finish_peer_reqs(device);
+ kref_put(&device->kref, drbd_destroy_device);
+ /* get is in drbd_endio_write_sec_final(). That is necessary to keep the
+ struct work_struct send_acks_work alive, which is in the peer_device object */
+
+ if (err) {
+ conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
+ return;
+ }
+
+ if (tcp_cork)
+ drbd_tcp_uncork(connection->meta.socket);
+
+ return;
+}
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 3ae2c00..2255dcf 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -453,12 +453,12 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
kref_get(&req->kref); /* wait for the DONE */
if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) {
- /* potentially already completed in the asender thread */
+ /* potentially already completed in the ack_receiver thread */
if (!(s & RQ_NET_DONE)) {
atomic_add(req->i.size >> 9, &device->ap_in_flight);
set_if_null_req_not_net_done(peer_device, req);
}
- if (s & RQ_NET_PENDING)
+ if (req->rq_state & RQ_NET_PENDING)
set_if_null_req_ack_pending(peer_device, req);
}
@@ -1095,6 +1095,24 @@ static bool do_remote_read(struct drbd_request *req)
return false;
}
+bool drbd_should_do_remote(union drbd_dev_state s)
+{
+ return s.pdsk == D_UP_TO_DATE ||
+ (s.pdsk >= D_INCONSISTENT &&
+ s.conn >= C_WF_BITMAP_T &&
+ s.conn < C_AHEAD);
+ /* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T.
+ That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
+ states. */
+}
+
+static bool drbd_should_send_out_of_sync(union drbd_dev_state s)
+{
+ return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
+ /* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
+ since we enter state C_AHEAD only if proto >= 96 */
+}
+
/* returns number of connections (== 1, for drbd 8.4)
* expected to actually write this data,
* which does NOT include those that we are L_AHEAD for. */
@@ -1149,7 +1167,6 @@ drbd_submit_req_private_bio(struct drbd_request *req)
* stable storage, and this is a WRITE, we may not even submit
* this bio. */
if (get_ldev(device)) {
- req->pre_submit_jif = jiffies;
if (drbd_insert_fault(device,
rw == WRITE ? DRBD_FAULT_DT_WR
: rw == READ ? DRBD_FAULT_DT_RD
@@ -1293,6 +1310,7 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request
&device->pending_master_completion[rw == WRITE]);
if (req->private_bio) {
/* needs to be marked within the same spinlock */
+ req->pre_submit_jif = jiffies;
list_add_tail(&req->req_pending_local,
&device->pending_completion[rw == WRITE]);
_req_mod(req, TO_BE_SUBMITTED);
@@ -1513,6 +1531,78 @@ blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio)
return BLK_QC_T_NONE;
}
+static bool net_timeout_reached(struct drbd_request *net_req,
+ struct drbd_connection *connection,
+ unsigned long now, unsigned long ent,
+ unsigned int ko_count, unsigned int timeout)
+{
+ struct drbd_device *device = net_req->device;
+
+ if (!time_after(now, net_req->pre_send_jif + ent))
+ return false;
+
+ if (time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent))
+ return false;
+
+ if (net_req->rq_state & RQ_NET_PENDING) {
+ drbd_warn(device, "Remote failed to finish a request within %ums > ko-count (%u) * timeout (%u * 0.1s)\n",
+ jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout);
+ return true;
+ }
+
+ /* We received an ACK already (or are using protocol A),
+ * but are waiting for the epoch closing barrier ack.
+ * Check if we sent the barrier already. We should not blame the peer
+ * for being unresponsive, if we did not even ask it yet. */
+ if (net_req->epoch == connection->send.current_epoch_nr) {
+ drbd_warn(device,
+ "We did not send a P_BARRIER for %ums > ko-count (%u) * timeout (%u * 0.1s); drbd kernel thread blocked?\n",
+ jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout);
+ return false;
+ }
+
+ /* Worst case: we may have been blocked for whatever reason, then
+ * suddenly are able to send a lot of requests (and epoch separating
+ * barriers) in quick succession.
+ * The timestamp of the net_req may be much too old and not correspond
+ * to the sending time of the relevant unack'ed barrier packet, so
+ * would trigger a spurious timeout. The latest barrier packet may
+ * have a too recent timestamp to trigger the timeout, potentially miss
+ * a timeout. Right now we don't have a place to conveniently store
+ * these timestamps.
+ * But in this particular situation, the application requests are still
+ * completed to upper layers, DRBD should still "feel" responsive.
+ * No need yet to kill this connection, it may still recover.
+ * If not, eventually we will have queued enough into the network for
+ * us to block. From that point of view, the timestamp of the last sent
+ * barrier packet is relevant enough.
+ */
+ if (time_after(now, connection->send.last_sent_barrier_jif + ent)) {
+ drbd_warn(device, "Remote failed to answer a P_BARRIER (sent at %lu jif; now=%lu jif) within %ums > ko-count (%u) * timeout (%u * 0.1s)\n",
+ connection->send.last_sent_barrier_jif, now,
+ jiffies_to_msecs(now - connection->send.last_sent_barrier_jif), ko_count, timeout);
+ return true;
+ }
+ return false;
+}
+
+/* A request is considered timed out, if
+ * - we have some effective timeout from the configuration,
+ * with some state restrictions applied,
+ * - the oldest request is waiting for a response from the network
+ * resp. the local disk,
+ * - the oldest request is in fact older than the effective timeout,
+ * - the connection was established (resp. disk was attached)
+ * for longer than the timeout already.
+ * Note that for 32bit jiffies and very stable connections/disks,
+ * we may have a wrap around, which is catched by
+ * !time_in_range(now, last_..._jif, last_..._jif + timeout).
+ *
+ * Side effect: once per 32bit wrap-around interval, which means every
+ * ~198 days with 250 HZ, we have a window where the timeout would need
+ * to expire twice (worst case) to become effective. Good enough.
+ */
+
void request_timer_fn(unsigned long data)
{
struct drbd_device *device = (struct drbd_device *) data;
@@ -1522,11 +1612,14 @@ void request_timer_fn(unsigned long data)
unsigned long oldest_submit_jif;
unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
unsigned long now;
+ unsigned int ko_count = 0, timeout = 0;
rcu_read_lock();
nc = rcu_dereference(connection->net_conf);
- if (nc && device->state.conn >= C_WF_REPORT_PARAMS)
- ent = nc->timeout * HZ/10 * nc->ko_count;
+ if (nc && device->state.conn >= C_WF_REPORT_PARAMS) {
+ ko_count = nc->ko_count;
+ timeout = nc->timeout;
+ }
if (get_ldev(device)) { /* implicit state.disk >= D_INCONSISTENT */
dt = rcu_dereference(device->ldev->disk_conf)->disk_timeout * HZ / 10;
@@ -1534,6 +1627,8 @@ void request_timer_fn(unsigned long data)
}
rcu_read_unlock();
+
+ ent = timeout * HZ/10 * ko_count;
et = min_not_zero(dt, ent);
if (!et)
@@ -1545,11 +1640,22 @@ void request_timer_fn(unsigned long data)
spin_lock_irq(&device->resource->req_lock);
req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local);
req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local);
- req_peer = connection->req_not_net_done;
+
/* maybe the oldest request waiting for the peer is in fact still
- * blocking in tcp sendmsg */
- if (!req_peer && connection->req_next && connection->req_next->pre_send_jif)
- req_peer = connection->req_next;
+ * blocking in tcp sendmsg. That's ok, though, that's handled via the
+ * socket send timeout, requesting a ping, and bumping ko-count in
+ * we_should_drop_the_connection().
+ */
+
+ /* check the oldest request we did successfully sent,
+ * but which is still waiting for an ACK. */
+ req_peer = connection->req_ack_pending;
+
+ /* if we don't have such request (e.g. protocoll A)
+ * check the oldest requests which is still waiting on its epoch
+ * closing barrier ack. */
+ if (!req_peer)
+ req_peer = connection->req_not_net_done;
/* evaluate the oldest peer request only in one timer! */
if (req_peer && req_peer->device != device)
@@ -1566,28 +1672,9 @@ void request_timer_fn(unsigned long data)
: req_write ? req_write->pre_submit_jif
: req_read ? req_read->pre_submit_jif : now;
- /* The request is considered timed out, if
- * - we have some effective timeout from the configuration,
- * with above state restrictions applied,
- * - the oldest request is waiting for a response from the network
- * resp. the local disk,
- * - the oldest request is in fact older than the effective timeout,
- * - the connection was established (resp. disk was attached)
- * for longer than the timeout already.
- * Note that for 32bit jiffies and very stable connections/disks,
- * we may have a wrap around, which is catched by
- * !time_in_range(now, last_..._jif, last_..._jif + timeout).
- *
- * Side effect: once per 32bit wrap-around interval, which means every
- * ~198 days with 250 HZ, we have a window where the timeout would need
- * to expire twice (worst case) to become effective. Good enough.
- */
- if (ent && req_peer &&
- time_after(now, req_peer->pre_send_jif + ent) &&
- !time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) {
- drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n");
+ if (ent && req_peer && net_timeout_reached(req_peer, connection, now, ent, ko_count, timeout))
_conn_request_state(connection, NS(conn, C_TIMEOUT), CS_VERBOSE | CS_HARD);
- }
+
if (dt && oldest_submit_jif != now &&
time_after(now, oldest_submit_jif + dt) &&
!time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) {
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 9f6a040..bb2ef78 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -331,21 +331,6 @@ static inline int req_mod(struct drbd_request *req,
return rv;
}
-static inline bool drbd_should_do_remote(union drbd_dev_state s)
-{
- return s.pdsk == D_UP_TO_DATE ||
- (s.pdsk >= D_INCONSISTENT &&
- s.conn >= C_WF_BITMAP_T &&
- s.conn < C_AHEAD);
- /* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T.
- That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
- states. */
-}
-static inline bool drbd_should_send_out_of_sync(union drbd_dev_state s)
-{
- return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
- /* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
- since we enter state C_AHEAD only if proto >= 96 */
-}
+extern bool drbd_should_do_remote(union drbd_dev_state);
#endif
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 2d7dd26..5a7ef78 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -29,6 +29,7 @@
#include "drbd_int.h"
#include "drbd_protocol.h"
#include "drbd_req.h"
+#include "drbd_state_change.h"
struct after_state_chg_work {
struct drbd_work w;
@@ -37,6 +38,7 @@ struct after_state_chg_work {
union drbd_state ns;
enum chg_state_flags flags;
struct completion *done;
+ struct drbd_state_change *state_change;
};
enum sanitize_state_warnings {
@@ -48,9 +50,248 @@ enum sanitize_state_warnings {
IMPLICITLY_UPGRADED_PDSK,
};
+static void count_objects(struct drbd_resource *resource,
+ unsigned int *n_devices,
+ unsigned int *n_connections)
+{
+ struct drbd_device *device;
+ struct drbd_connection *connection;
+ int vnr;
+
+ *n_devices = 0;
+ *n_connections = 0;
+
+ idr_for_each_entry(&resource->devices, device, vnr)
+ (*n_devices)++;
+ for_each_connection(connection, resource)
+ (*n_connections)++;
+}
+
+static struct drbd_state_change *alloc_state_change(unsigned int n_devices, unsigned int n_connections, gfp_t gfp)
+{
+ struct drbd_state_change *state_change;
+ unsigned int size, n;
+
+ size = sizeof(struct drbd_state_change) +
+ n_devices * sizeof(struct drbd_device_state_change) +
+ n_connections * sizeof(struct drbd_connection_state_change) +
+ n_devices * n_connections * sizeof(struct drbd_peer_device_state_change);
+ state_change = kmalloc(size, gfp);
+ if (!state_change)
+ return NULL;
+ state_change->n_devices = n_devices;
+ state_change->n_connections = n_connections;
+ state_change->devices = (void *)(state_change + 1);
+ state_change->connections = (void *)&state_change->devices[n_devices];
+ state_change->peer_devices = (void *)&state_change->connections[n_connections];
+ state_change->resource->resource = NULL;
+ for (n = 0; n < n_devices; n++)
+ state_change->devices[n].device = NULL;
+ for (n = 0; n < n_connections; n++)
+ state_change->connections[n].connection = NULL;
+ return state_change;
+}
+
+struct drbd_state_change *remember_old_state(struct drbd_resource *resource, gfp_t gfp)
+{
+ struct drbd_state_change *state_change;
+ struct drbd_device *device;
+ unsigned int n_devices;
+ struct drbd_connection *connection;
+ unsigned int n_connections;
+ int vnr;
+
+ struct drbd_device_state_change *device_state_change;
+ struct drbd_peer_device_state_change *peer_device_state_change;
+ struct drbd_connection_state_change *connection_state_change;
+
+ /* Caller holds req_lock spinlock.
+ * No state, no device IDR, no connections lists can change. */
+ count_objects(resource, &n_devices, &n_connections);
+ state_change = alloc_state_change(n_devices, n_connections, gfp);
+ if (!state_change)
+ return NULL;
+
+ kref_get(&resource->kref);
+ state_change->resource->resource = resource;
+ state_change->resource->role[OLD] =
+ conn_highest_role(first_connection(resource));
+ state_change->resource->susp[OLD] = resource->susp;
+ state_change->resource->susp_nod[OLD] = resource->susp_nod;
+ state_change->resource->susp_fen[OLD] = resource->susp_fen;
+
+ connection_state_change = state_change->connections;
+ for_each_connection(connection, resource) {
+ kref_get(&connection->kref);
+ connection_state_change->connection = connection;
+ connection_state_change->cstate[OLD] =
+ connection->cstate;
+ connection_state_change->peer_role[OLD] =
+ conn_highest_peer(connection);
+ connection_state_change++;
+ }
+
+ device_state_change = state_change->devices;
+ peer_device_state_change = state_change->peer_devices;
+ idr_for_each_entry(&resource->devices, device, vnr) {
+ kref_get(&device->kref);
+ device_state_change->device = device;
+ device_state_change->disk_state[OLD] = device->state.disk;
+
+ /* The peer_devices for each device have to be enumerated in
+ the order of the connections. We may not use for_each_peer_device() here. */
+ for_each_connection(connection, resource) {
+ struct drbd_peer_device *peer_device;
+
+ peer_device = conn_peer_device(connection, device->vnr);
+ peer_device_state_change->peer_device = peer_device;
+ peer_device_state_change->disk_state[OLD] =
+ device->state.pdsk;
+ peer_device_state_change->repl_state[OLD] =
+ max_t(enum drbd_conns,
+ C_WF_REPORT_PARAMS, device->state.conn);
+ peer_device_state_change->resync_susp_user[OLD] =
+ device->state.user_isp;
+ peer_device_state_change->resync_susp_peer[OLD] =
+ device->state.peer_isp;
+ peer_device_state_change->resync_susp_dependency[OLD] =
+ device->state.aftr_isp;
+ peer_device_state_change++;
+ }
+ device_state_change++;
+ }
+
+ return state_change;
+}
+
+static void remember_new_state(struct drbd_state_change *state_change)
+{
+ struct drbd_resource_state_change *resource_state_change;
+ struct drbd_resource *resource;
+ unsigned int n;
+
+ if (!state_change)
+ return;
+
+ resource_state_change = &state_change->resource[0];
+ resource = resource_state_change->resource;
+
+ resource_state_change->role[NEW] =
+ conn_highest_role(first_connection(resource));
+ resource_state_change->susp[NEW] = resource->susp;
+ resource_state_change->susp_nod[NEW] = resource->susp_nod;
+ resource_state_change->susp_fen[NEW] = resource->susp_fen;
+
+ for (n = 0; n < state_change->n_devices; n++) {
+ struct drbd_device_state_change *device_state_change =
+ &state_change->devices[n];
+ struct drbd_device *device = device_state_change->device;
+
+ device_state_change->disk_state[NEW] = device->state.disk;
+ }
+
+ for (n = 0; n < state_change->n_connections; n++) {
+ struct drbd_connection_state_change *connection_state_change =
+ &state_change->connections[n];
+ struct drbd_connection *connection =
+ connection_state_change->connection;
+
+ connection_state_change->cstate[NEW] = connection->cstate;
+ connection_state_change->peer_role[NEW] =
+ conn_highest_peer(connection);
+ }
+
+ for (n = 0; n < state_change->n_devices * state_change->n_connections; n++) {
+ struct drbd_peer_device_state_change *peer_device_state_change =
+ &state_change->peer_devices[n];
+ struct drbd_device *device =
+ peer_device_state_change->peer_device->device;
+ union drbd_dev_state state = device->state;
+
+ peer_device_state_change->disk_state[NEW] = state.pdsk;
+ peer_device_state_change->repl_state[NEW] =
+ max_t(enum drbd_conns, C_WF_REPORT_PARAMS, state.conn);
+ peer_device_state_change->resync_susp_user[NEW] =
+ state.user_isp;
+ peer_device_state_change->resync_susp_peer[NEW] =
+ state.peer_isp;
+ peer_device_state_change->resync_susp_dependency[NEW] =
+ state.aftr_isp;
+ }
+}
+
+void copy_old_to_new_state_change(struct drbd_state_change *state_change)
+{
+ struct drbd_resource_state_change *resource_state_change = &state_change->resource[0];
+ unsigned int n_device, n_connection, n_peer_device, n_peer_devices;
+
+#define OLD_TO_NEW(x) \
+ (x[NEW] = x[OLD])
+
+ OLD_TO_NEW(resource_state_change->role);
+ OLD_TO_NEW(resource_state_change->susp);
+ OLD_TO_NEW(resource_state_change->susp_nod);
+ OLD_TO_NEW(resource_state_change->susp_fen);
+
+ for (n_connection = 0; n_connection < state_change->n_connections; n_connection++) {
+ struct drbd_connection_state_change *connection_state_change =
+ &state_change->connections[n_connection];
+
+ OLD_TO_NEW(connection_state_change->peer_role);
+ OLD_TO_NEW(connection_state_change->cstate);
+ }
+
+ for (n_device = 0; n_device < state_change->n_devices; n_device++) {
+ struct drbd_device_state_change *device_state_change =
+ &state_change->devices[n_device];
+
+ OLD_TO_NEW(device_state_change->disk_state);
+ }
+
+ n_peer_devices = state_change->n_devices * state_change->n_connections;
+ for (n_peer_device = 0; n_peer_device < n_peer_devices; n_peer_device++) {
+ struct drbd_peer_device_state_change *p =
+ &state_change->peer_devices[n_peer_device];
+
+ OLD_TO_NEW(p->disk_state);
+ OLD_TO_NEW(p->repl_state);
+ OLD_TO_NEW(p->resync_susp_user);
+ OLD_TO_NEW(p->resync_susp_peer);
+ OLD_TO_NEW(p->resync_susp_dependency);
+ }
+
+#undef OLD_TO_NEW
+}
+
+void forget_state_change(struct drbd_state_change *state_change)
+{
+ unsigned int n;
+
+ if (!state_change)
+ return;
+
+ if (state_change->resource->resource)
+ kref_put(&state_change->resource->resource->kref, drbd_destroy_resource);
+ for (n = 0; n < state_change->n_devices; n++) {
+ struct drbd_device *device = state_change->devices[n].device;
+
+ if (device)
+ kref_put(&device->kref, drbd_destroy_device);
+ }
+ for (n = 0; n < state_change->n_connections; n++) {
+ struct drbd_connection *connection =
+ state_change->connections[n].connection;
+
+ if (connection)
+ kref_put(&connection->kref, drbd_destroy_connection);
+ }
+ kfree(state_change);
+}
+
static int w_after_state_ch(struct drbd_work *w, int unused);
static void after_state_ch(struct drbd_device *device, union drbd_state os,
- union drbd_state ns, enum chg_state_flags flags);
+ union drbd_state ns, enum chg_state_flags flags,
+ struct drbd_state_change *);
static enum drbd_state_rv is_valid_state(struct drbd_device *, union drbd_state);
static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_connection *);
static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns);
@@ -93,6 +334,7 @@ static enum drbd_role max_role(enum drbd_role role1, enum drbd_role role2)
return R_SECONDARY;
return R_UNKNOWN;
}
+
static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2)
{
if (role1 == R_UNKNOWN || role2 == R_UNKNOWN)
@@ -937,7 +1179,7 @@ void drbd_resume_al(struct drbd_device *device)
drbd_info(device, "Resumed AL updates\n");
}
-/* helper for __drbd_set_state */
+/* helper for _drbd_set_state */
static void set_ov_position(struct drbd_device *device, enum drbd_conns cs)
{
if (first_peer_device(device)->connection->agreed_pro_version < 90)
@@ -965,17 +1207,17 @@ static void set_ov_position(struct drbd_device *device, enum drbd_conns cs)
}
/**
- * __drbd_set_state() - Set a new DRBD state
+ * _drbd_set_state() - Set a new DRBD state
* @device: DRBD device.
* @ns: new state.
* @flags: Flags
* @done: Optional completion, that will get completed after the after_state_ch() finished
*
- * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
+ * Caller needs to hold req_lock. Do not call directly.
*/
enum drbd_state_rv
-__drbd_set_state(struct drbd_device *device, union drbd_state ns,
- enum chg_state_flags flags, struct completion *done)
+_drbd_set_state(struct drbd_device *device, union drbd_state ns,
+ enum chg_state_flags flags, struct completion *done)
{
struct drbd_peer_device *peer_device = first_peer_device(device);
struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
@@ -983,6 +1225,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
enum drbd_state_rv rv = SS_SUCCESS;
enum sanitize_state_warnings ssw;
struct after_state_chg_work *ascw;
+ struct drbd_state_change *state_change;
os = drbd_read_state(device);
@@ -1037,6 +1280,9 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
if (!is_sync_state(os.conn) && is_sync_state(ns.conn))
clear_bit(RS_DONE, &device->flags);
+ /* FIXME: Have any flags been set earlier in this function already? */
+ state_change = remember_old_state(device->resource, GFP_ATOMIC);
+
/* changes to local_cnt and device flags should be visible before
* changes to state, which again should be visible before anything else
* depending on that change happens. */
@@ -1047,6 +1293,8 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
device->resource->susp_fen = ns.susp_fen;
smp_wmb();
+ remember_new_state(state_change);
+
/* put replicated vs not-replicated requests in seperate epochs */
if (drbd_should_do_remote((union drbd_dev_state)os.i) !=
drbd_should_do_remote((union drbd_dev_state)ns.i))
@@ -1184,6 +1432,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
ascw->w.cb = w_after_state_ch;
ascw->device = device;
ascw->done = done;
+ ascw->state_change = state_change;
drbd_queue_work(&connection->sender_work,
&ascw->w);
} else {
@@ -1199,7 +1448,8 @@ static int w_after_state_ch(struct drbd_work *w, int unused)
container_of(w, struct after_state_chg_work, w);
struct drbd_device *device = ascw->device;
- after_state_ch(device, ascw->os, ascw->ns, ascw->flags);
+ after_state_ch(device, ascw->os, ascw->ns, ascw->flags, ascw->state_change);
+ forget_state_change(ascw->state_change);
if (ascw->flags & CS_WAIT_COMPLETE)
complete(ascw->done);
kfree(ascw);
@@ -1234,7 +1484,7 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device,
D_ASSERT(device, current == first_peer_device(device)->connection->worker.task);
/* open coded non-blocking drbd_suspend_io(device); */
- set_bit(SUSPEND_IO, &device->flags);
+ atomic_inc(&device->suspend_cnt);
drbd_bm_lock(device, why, flags);
rv = io_fn(device);
@@ -1245,6 +1495,139 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device,
return rv;
}
+void notify_resource_state_change(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_resource_state_change *resource_state_change,
+ enum drbd_notification_type type)
+{
+ struct drbd_resource *resource = resource_state_change->resource;
+ struct resource_info resource_info = {
+ .res_role = resource_state_change->role[NEW],
+ .res_susp = resource_state_change->susp[NEW],
+ .res_susp_nod = resource_state_change->susp_nod[NEW],
+ .res_susp_fen = resource_state_change->susp_fen[NEW],
+ };
+
+ notify_resource_state(skb, seq, resource, &resource_info, type);
+}
+
+void notify_connection_state_change(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_connection_state_change *connection_state_change,
+ enum drbd_notification_type type)
+{
+ struct drbd_connection *connection = connection_state_change->connection;
+ struct connection_info connection_info = {
+ .conn_connection_state = connection_state_change->cstate[NEW],
+ .conn_role = connection_state_change->peer_role[NEW],
+ };
+
+ notify_connection_state(skb, seq, connection, &connection_info, type);
+}
+
+void notify_device_state_change(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_device_state_change *device_state_change,
+ enum drbd_notification_type type)
+{
+ struct drbd_device *device = device_state_change->device;
+ struct device_info device_info = {
+ .dev_disk_state = device_state_change->disk_state[NEW],
+ };
+
+ notify_device_state(skb, seq, device, &device_info, type);
+}
+
+void notify_peer_device_state_change(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_peer_device_state_change *p,
+ enum drbd_notification_type type)
+{
+ struct drbd_peer_device *peer_device = p->peer_device;
+ struct peer_device_info peer_device_info = {
+ .peer_repl_state = p->repl_state[NEW],
+ .peer_disk_state = p->disk_state[NEW],
+ .peer_resync_susp_user = p->resync_susp_user[NEW],
+ .peer_resync_susp_peer = p->resync_susp_peer[NEW],
+ .peer_resync_susp_dependency = p->resync_susp_dependency[NEW],
+ };
+
+ notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type);
+}
+
+static void broadcast_state_change(struct drbd_state_change *state_change)
+{
+ struct drbd_resource_state_change *resource_state_change = &state_change->resource[0];
+ bool resource_state_has_changed;
+ unsigned int n_device, n_connection, n_peer_device, n_peer_devices;
+ void (*last_func)(struct sk_buff *, unsigned int, void *,
+ enum drbd_notification_type) = NULL;
+ void *uninitialized_var(last_arg);
+
+#define HAS_CHANGED(state) ((state)[OLD] != (state)[NEW])
+#define FINAL_STATE_CHANGE(type) \
+ ({ if (last_func) \
+ last_func(NULL, 0, last_arg, type); \
+ })
+#define REMEMBER_STATE_CHANGE(func, arg, type) \
+ ({ FINAL_STATE_CHANGE(type | NOTIFY_CONTINUES); \
+ last_func = (typeof(last_func))func; \
+ last_arg = arg; \
+ })
+
+ mutex_lock(&notification_mutex);
+
+ resource_state_has_changed =
+ HAS_CHANGED(resource_state_change->role) ||
+ HAS_CHANGED(resource_state_change->susp) ||
+ HAS_CHANGED(resource_state_change->susp_nod) ||
+ HAS_CHANGED(resource_state_change->susp_fen);
+
+ if (resource_state_has_changed)
+ REMEMBER_STATE_CHANGE(notify_resource_state_change,
+ resource_state_change, NOTIFY_CHANGE);
+
+ for (n_connection = 0; n_connection < state_change->n_connections; n_connection++) {
+ struct drbd_connection_state_change *connection_state_change =
+ &state_change->connections[n_connection];
+
+ if (HAS_CHANGED(connection_state_change->peer_role) ||
+ HAS_CHANGED(connection_state_change->cstate))
+ REMEMBER_STATE_CHANGE(notify_connection_state_change,
+ connection_state_change, NOTIFY_CHANGE);
+ }
+
+ for (n_device = 0; n_device < state_change->n_devices; n_device++) {
+ struct drbd_device_state_change *device_state_change =
+ &state_change->devices[n_device];
+
+ if (HAS_CHANGED(device_state_change->disk_state))
+ REMEMBER_STATE_CHANGE(notify_device_state_change,
+ device_state_change, NOTIFY_CHANGE);
+ }
+
+ n_peer_devices = state_change->n_devices * state_change->n_connections;
+ for (n_peer_device = 0; n_peer_device < n_peer_devices; n_peer_device++) {
+ struct drbd_peer_device_state_change *p =
+ &state_change->peer_devices[n_peer_device];
+
+ if (HAS_CHANGED(p->disk_state) ||
+ HAS_CHANGED(p->repl_state) ||
+ HAS_CHANGED(p->resync_susp_user) ||
+ HAS_CHANGED(p->resync_susp_peer) ||
+ HAS_CHANGED(p->resync_susp_dependency))
+ REMEMBER_STATE_CHANGE(notify_peer_device_state_change,
+ p, NOTIFY_CHANGE);
+ }
+
+ FINAL_STATE_CHANGE(NOTIFY_CHANGE);
+ mutex_unlock(&notification_mutex);
+
+#undef HAS_CHANGED
+#undef FINAL_STATE_CHANGE
+#undef REMEMBER_STATE_CHANGE
+}
+
/**
* after_state_ch() - Perform after state change actions that may sleep
* @device: DRBD device.
@@ -1253,13 +1636,16 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device,
* @flags: Flags
*/
static void after_state_ch(struct drbd_device *device, union drbd_state os,
- union drbd_state ns, enum chg_state_flags flags)
+ union drbd_state ns, enum chg_state_flags flags,
+ struct drbd_state_change *state_change)
{
struct drbd_resource *resource = device->resource;
struct drbd_peer_device *peer_device = first_peer_device(device);
struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
struct sib_info sib;
+ broadcast_state_change(state_change);
+
sib.sib_reason = SIB_STATE_CHANGE;
sib.os = os;
sib.ns = ns;
@@ -1377,7 +1763,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
}
if (ns.pdsk < D_INCONSISTENT && get_ldev(device)) {
- if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY &&
+ if (os.peer != R_PRIMARY && ns.peer == R_PRIMARY &&
device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
drbd_uuid_new_current(device);
drbd_send_uuids(peer_device);
@@ -1444,7 +1830,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
if (os.disk != D_FAILED && ns.disk == D_FAILED) {
enum drbd_io_error_p eh = EP_PASS_ON;
int was_io_error = 0;
- /* corresponding get_ldev was in __drbd_set_state, to serialize
+ /* corresponding get_ldev was in _drbd_set_state, to serialize
* our cleanup here with the transition to D_DISKLESS.
* But is is still not save to dreference ldev here, since
* we might come from an failed Attach before ldev was set. */
@@ -1455,6 +1841,10 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
was_io_error = test_and_clear_bit(WAS_IO_ERROR, &device->flags);
+ /* Intentionally call this handler first, before drbd_send_state().
+ * See: 2932204 drbd: call local-io-error handler early
+ * People may chose to hard-reset the box from this handler.
+ * It is useful if this looks like a "regular node crash". */
if (was_io_error && eh == EP_CALL_HELPER)
drbd_khelper(device, "local-io-error");
@@ -1572,6 +1962,7 @@ struct after_conn_state_chg_work {
union drbd_state ns_max; /* new, max state, over all devices */
enum chg_state_flags flags;
struct drbd_connection *connection;
+ struct drbd_state_change *state_change;
};
static int w_after_conn_state_ch(struct drbd_work *w, int unused)
@@ -1584,6 +1975,8 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
struct drbd_peer_device *peer_device;
int vnr;
+ broadcast_state_change(acscw->state_change);
+ forget_state_change(acscw->state_change);
kfree(acscw);
/* Upon network configuration, we need to start the receiver */
@@ -1593,6 +1986,13 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
if (oc == C_DISCONNECTING && ns_max.conn == C_STANDALONE) {
struct net_conf *old_conf;
+ mutex_lock(&notification_mutex);
+ idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
+ notify_peer_device_state(NULL, 0, peer_device, NULL,
+ NOTIFY_DESTROY | NOTIFY_CONTINUES);
+ notify_connection_state(NULL, 0, connection, NULL, NOTIFY_DESTROY);
+ mutex_unlock(&notification_mutex);
+
mutex_lock(&connection->resource->conf_update);
old_conf = connection->net_conf;
connection->my_addr_len = 0;
@@ -1759,7 +2159,7 @@ conn_set_state(struct drbd_connection *connection, union drbd_state mask, union
if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
ns.disk = os.disk;
- rv = __drbd_set_state(device, ns, flags, NULL);
+ rv = _drbd_set_state(device, ns, flags, NULL);
if (rv < SS_SUCCESS)
BUG();
@@ -1823,6 +2223,7 @@ _conn_request_state(struct drbd_connection *connection, union drbd_state mask, u
enum drbd_conns oc = connection->cstate;
union drbd_state ns_max, ns_min, os;
bool have_mutex = false;
+ struct drbd_state_change *state_change;
if (mask.conn) {
rv = is_valid_conn_transition(oc, val.conn);
@@ -1868,10 +2269,12 @@ _conn_request_state(struct drbd_connection *connection, union drbd_state mask, u
goto abort;
}
+ state_change = remember_old_state(connection->resource, GFP_ATOMIC);
conn_old_common_state(connection, &os, &flags);
flags |= CS_DC_SUSP;
conn_set_state(connection, mask, val, &ns_min, &ns_max, flags);
conn_pr_state_change(connection, os, ns_max, flags);
+ remember_new_state(state_change);
acscw = kmalloc(sizeof(*acscw), GFP_ATOMIC);
if (acscw) {
@@ -1882,6 +2285,7 @@ _conn_request_state(struct drbd_connection *connection, union drbd_state mask, u
acscw->w.cb = w_after_conn_state_ch;
kref_get(&connection->kref);
acscw->connection = connection;
+ acscw->state_change = state_change;
drbd_queue_work(&connection->sender_work, &acscw->w);
} else {
drbd_err(connection, "Could not kmalloc an acscw\n");
diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h
index 7f53c40..bd98953 100644
--- a/drivers/block/drbd/drbd_state.h
+++ b/drivers/block/drbd/drbd_state.h
@@ -122,9 +122,9 @@ extern enum drbd_state_rv
_drbd_request_state_holding_state_mutex(struct drbd_device *, union drbd_state,
union drbd_state, enum chg_state_flags);
-extern enum drbd_state_rv __drbd_set_state(struct drbd_device *, union drbd_state,
- enum chg_state_flags,
- struct completion *done);
+extern enum drbd_state_rv _drbd_set_state(struct drbd_device *, union drbd_state,
+ enum chg_state_flags,
+ struct completion *done);
extern void print_st_err(struct drbd_device *, union drbd_state,
union drbd_state, int);
diff --git a/drivers/block/drbd/drbd_state_change.h b/drivers/block/drbd/drbd_state_change.h
new file mode 100644
index 0000000..9e503a1
--- /dev/null
+++ b/drivers/block/drbd/drbd_state_change.h
@@ -0,0 +1,63 @@
+#ifndef DRBD_STATE_CHANGE_H
+#define DRBD_STATE_CHANGE_H
+
+struct drbd_resource_state_change {
+ struct drbd_resource *resource;
+ enum drbd_role role[2];
+ bool susp[2];
+ bool susp_nod[2];
+ bool susp_fen[2];
+};
+
+struct drbd_device_state_change {
+ struct drbd_device *device;
+ enum drbd_disk_state disk_state[2];
+};
+
+struct drbd_connection_state_change {
+ struct drbd_connection *connection;
+ enum drbd_conns cstate[2]; /* drbd9: enum drbd_conn_state */
+ enum drbd_role peer_role[2];
+};
+
+struct drbd_peer_device_state_change {
+ struct drbd_peer_device *peer_device;
+ enum drbd_disk_state disk_state[2];
+ enum drbd_conns repl_state[2]; /* drbd9: enum drbd_repl_state */
+ bool resync_susp_user[2];
+ bool resync_susp_peer[2];
+ bool resync_susp_dependency[2];
+};
+
+struct drbd_state_change {
+ struct list_head list;
+ unsigned int n_devices;
+ unsigned int n_connections;
+ struct drbd_resource_state_change resource[1];
+ struct drbd_device_state_change *devices;
+ struct drbd_connection_state_change *connections;
+ struct drbd_peer_device_state_change *peer_devices;
+};
+
+extern struct drbd_state_change *remember_old_state(struct drbd_resource *, gfp_t);
+extern void copy_old_to_new_state_change(struct drbd_state_change *);
+extern void forget_state_change(struct drbd_state_change *);
+
+extern void notify_resource_state_change(struct sk_buff *,
+ unsigned int,
+ struct drbd_resource_state_change *,
+ enum drbd_notification_type type);
+extern void notify_connection_state_change(struct sk_buff *,
+ unsigned int,
+ struct drbd_connection_state_change *,
+ enum drbd_notification_type type);
+extern void notify_device_state_change(struct sk_buff *,
+ unsigned int,
+ struct drbd_device_state_change *,
+ enum drbd_notification_type type);
+extern void notify_peer_device_state_change(struct sk_buff *,
+ unsigned int,
+ struct drbd_peer_device_state_change *,
+ enum drbd_notification_type type);
+
+#endif /* DRBD_STATE_CHANGE_H */
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 5578c14..eff716c 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -55,13 +55,6 @@ static int make_resync_request(struct drbd_device *, int);
*
*/
-
-/* About the global_state_lock
- Each state transition on an device holds a read lock. In case we have
- to evaluate the resync after dependencies, we grab a write lock, because
- we need stable states on all devices for that. */
-rwlock_t global_state_lock;
-
/* used for synchronous meta data and bitmap IO
* submitted by drbd_md_sync_page_io()
*/
@@ -120,6 +113,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
unsigned long flags = 0;
struct drbd_peer_device *peer_device = peer_req->peer_device;
struct drbd_device *device = peer_device->device;
+ struct drbd_connection *connection = peer_device->connection;
struct drbd_interval i;
int do_wake;
u64 block_id;
@@ -152,6 +146,12 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
* ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
if (peer_req->flags & EE_WAS_ERROR)
__drbd_chk_io_error(device, DRBD_WRITE_ERROR);
+
+ if (connection->cstate >= C_WF_REPORT_PARAMS) {
+ kref_get(&device->kref); /* put is in drbd_send_acks_wf() */
+ if (!queue_work(connection->ack_sender, &peer_device->send_acks_work))
+ kref_put(&device->kref, drbd_destroy_device);
+ }
spin_unlock_irqrestore(&device->resource->req_lock, flags);
if (block_id == ID_SYNCER)
@@ -163,7 +163,6 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
if (do_al_complete_io)
drbd_al_complete_io(device, &i);
- wake_asender(peer_device->connection);
put_ldev(device);
}
@@ -195,6 +194,12 @@ void drbd_peer_request_endio(struct bio *bio)
}
}
+void drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device)
+{
+ panic("drbd%u %s/%u potential random memory corruption caused by delayed completion of aborted local request\n",
+ device->minor, device->resource->name, device->vnr);
+}
+
/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
*/
void drbd_request_endio(struct bio *bio)
@@ -238,7 +243,7 @@ void drbd_request_endio(struct bio *bio)
drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
if (!bio->bi_error)
- panic("possible random memory corruption caused by delayed completion of aborted local request\n");
+ drbd_panic_after_delayed_completion_of_aborted_request(device);
}
/* to avoid recursion in __req_mod */
@@ -1291,6 +1296,7 @@ static int drbd_send_barrier(struct drbd_connection *connection)
p->barrier = connection->send.current_epoch_nr;
p->pad = 0;
connection->send.current_epoch_writes = 0;
+ connection->send.last_sent_barrier_jif = jiffies;
return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
}
@@ -1315,6 +1321,7 @@ static void re_init_if_first_write(struct drbd_connection *connection, unsigned
connection->send.seen_any_write_yet = true;
connection->send.current_epoch_nr = epoch;
connection->send.current_epoch_writes = 0;
+ connection->send.last_sent_barrier_jif = jiffies;
}
}
@@ -1456,70 +1463,73 @@ static int _drbd_may_sync_now(struct drbd_device *device)
}
/**
- * _drbd_pause_after() - Pause resync on all devices that may not resync now
+ * drbd_pause_after() - Pause resync on all devices that may not resync now
* @device: DRBD device.
*
* Called from process context only (admin command and after_state_ch).
*/
-static int _drbd_pause_after(struct drbd_device *device)
+static bool drbd_pause_after(struct drbd_device *device)
{
+ bool changed = false;
struct drbd_device *odev;
- int i, rv = 0;
+ int i;
rcu_read_lock();
idr_for_each_entry(&drbd_devices, odev, i) {
if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
continue;
- if (!_drbd_may_sync_now(odev))
- rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
- != SS_NOTHING_TO_DO);
+ if (!_drbd_may_sync_now(odev) &&
+ _drbd_set_state(_NS(odev, aftr_isp, 1),
+ CS_HARD, NULL) != SS_NOTHING_TO_DO)
+ changed = true;
}
rcu_read_unlock();
- return rv;
+ return changed;
}
/**
- * _drbd_resume_next() - Resume resync on all devices that may resync now
+ * drbd_resume_next() - Resume resync on all devices that may resync now
* @device: DRBD device.
*
* Called from process context only (admin command and worker).
*/
-static int _drbd_resume_next(struct drbd_device *device)
+static bool drbd_resume_next(struct drbd_device *device)
{
+ bool changed = false;
struct drbd_device *odev;
- int i, rv = 0;
+ int i;
rcu_read_lock();
idr_for_each_entry(&drbd_devices, odev, i) {
if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
continue;
if (odev->state.aftr_isp) {
- if (_drbd_may_sync_now(odev))
- rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
- CS_HARD, NULL)
- != SS_NOTHING_TO_DO) ;
+ if (_drbd_may_sync_now(odev) &&
+ _drbd_set_state(_NS(odev, aftr_isp, 0),
+ CS_HARD, NULL) != SS_NOTHING_TO_DO)
+ changed = true;
}
}
rcu_read_unlock();
- return rv;
+ return changed;
}
void resume_next_sg(struct drbd_device *device)
{
- write_lock_irq(&global_state_lock);
- _drbd_resume_next(device);
- write_unlock_irq(&global_state_lock);
+ lock_all_resources();
+ drbd_resume_next(device);
+ unlock_all_resources();
}
void suspend_other_sg(struct drbd_device *device)
{
- write_lock_irq(&global_state_lock);
- _drbd_pause_after(device);
- write_unlock_irq(&global_state_lock);
+ lock_all_resources();
+ drbd_pause_after(device);
+ unlock_all_resources();
}
-/* caller must hold global_state_lock */
+/* caller must lock_all_resources() */
enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
{
struct drbd_device *odev;
@@ -1557,15 +1567,15 @@ enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_min
}
}
-/* caller must hold global_state_lock */
+/* caller must lock_all_resources() */
void drbd_resync_after_changed(struct drbd_device *device)
{
- int changes;
+ int changed;
do {
- changes = _drbd_pause_after(device);
- changes |= _drbd_resume_next(device);
- } while (changes);
+ changed = drbd_pause_after(device);
+ changed |= drbd_resume_next(device);
+ } while (changed);
}
void drbd_rs_controller_reset(struct drbd_device *device)
@@ -1685,19 +1695,14 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
} else {
mutex_lock(device->state_mutex);
}
- clear_bit(B_RS_H_DONE, &device->flags);
- /* req_lock: serialize with drbd_send_and_submit() and others
- * global_state_lock: for stable sync-after dependencies */
- spin_lock_irq(&device->resource->req_lock);
- write_lock(&global_state_lock);
+ lock_all_resources();
+ clear_bit(B_RS_H_DONE, &device->flags);
/* Did some connection breakage or IO error race with us? */
if (device->state.conn < C_CONNECTED
|| !get_ldev_if_state(device, D_NEGOTIATING)) {
- write_unlock(&global_state_lock);
- spin_unlock_irq(&device->resource->req_lock);
- mutex_unlock(device->state_mutex);
- return;
+ unlock_all_resources();
+ goto out;
}
ns = drbd_read_state(device);
@@ -1711,7 +1716,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
else /* side == C_SYNC_SOURCE */
ns.pdsk = D_INCONSISTENT;
- r = __drbd_set_state(device, ns, CS_VERBOSE, NULL);
+ r = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
ns = drbd_read_state(device);
if (ns.conn < C_CONNECTED)
@@ -1732,7 +1737,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
device->rs_mark_left[i] = tw;
device->rs_mark_time[i] = now;
}
- _drbd_pause_after(device);
+ drbd_pause_after(device);
/* Forget potentially stale cached per resync extent bit-counts.
* Open coded drbd_rs_cancel_all(device), we already have IRQs
* disabled, and know the disk state is ok. */
@@ -1742,8 +1747,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
device->resync_wenr = LC_FREE;
spin_unlock(&device->al_lock);
}
- write_unlock(&global_state_lock);
- spin_unlock_irq(&device->resource->req_lock);
+ unlock_all_resources();
if (r == SS_SUCCESS) {
wake_up(&device->al_wait); /* for lc_reset() above */
@@ -1807,6 +1811,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
drbd_md_sync(device);
}
put_ldev(device);
+out:
mutex_unlock(device->state_mutex);
}
@@ -1836,7 +1841,7 @@ static void drbd_ldev_destroy(struct drbd_device *device)
device->act_log = NULL;
__acquire(local);
- drbd_free_ldev(device->ldev);
+ drbd_backing_dev_free(device, device->ldev);
device->ldev = NULL;
__release(local);
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 15bec40..9b180db 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -104,9 +104,9 @@
/* Device instance number, incremented each time a device is probed. */
static int instance;
-struct list_head online_list;
-struct list_head removing_list;
-spinlock_t dev_lock;
+static struct list_head online_list;
+static struct list_head removing_list;
+static spinlock_t dev_lock;
/*
* Global variable used to hold the major block device number
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 95dff91..8ba1e97 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -436,9 +436,8 @@ static void null_del_dev(struct nullb *nullb)
static void null_lnvm_end_io(struct request *rq, int error)
{
struct nvm_rq *rqd = rq->end_io_data;
- struct nvm_dev *dev = rqd->dev;
- dev->mt->end_io(rqd, error);
+ nvm_end_io(rqd, error);
blk_put_request(rq);
}
@@ -495,17 +494,17 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
id->ppaf.ch_offset = 56;
id->ppaf.ch_len = 8;
- do_div(size, bs); /* convert size to pages */
- do_div(size, 256); /* concert size to pgs pr blk */
+ sector_div(size, bs); /* convert size to pages */
+ size >>= 8; /* concert size to pgs pr blk */
grp = &id->groups[0];
grp->mtype = 0;
grp->fmtype = 0;
grp->num_ch = 1;
grp->num_pg = 256;
blksize = size;
- do_div(size, (1 << 16));
+ size >>= 16;
grp->num_lun = size + 1;
- do_div(blksize, grp->num_lun);
+ sector_div(blksize, grp->num_lun);
grp->num_blk = blksize;
grp->num_pln = 1;
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 81ea69f..4a87678 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -5185,8 +5185,7 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
out_err:
rbd_dev_unparent(rbd_dev);
- if (parent)
- rbd_dev_destroy(parent);
+ rbd_dev_destroy(parent);
return ret;
}
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 59c91d4..ba4bfe9 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -23,7 +23,7 @@
#include <linux/workqueue.h>
#include <linux/bitops.h>
#include <linux/delay.h>
-#include <linux/time.h>
+#include <linux/ktime.h>
#include <linux/hdreg.h>
#include <linux/dma-mapping.h>
#include <linux/completion.h>
@@ -671,16 +671,15 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func)
static unsigned int carm_fill_sync_time(struct carm_host *host,
unsigned int idx, void *mem)
{
- struct timeval tv;
struct carm_msg_sync_time *st = mem;
- do_gettimeofday(&tv);
+ time64_t tv = ktime_get_real_seconds();
memset(st, 0, sizeof(*st));
st->type = CARM_MSG_MISC;
st->subtype = MISC_SET_TIME;
st->handle = cpu_to_le32(TAG_ENCODE(idx));
- st->timestamp = cpu_to_le32(tv.tv_sec);
+ st->timestamp = cpu_to_le32(tv);
return sizeof(struct carm_msg_sync_time);
}
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 41fb1a9..4809c15 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -84,6 +84,16 @@ MODULE_PARM_DESC(max_persistent_grants,
"Maximum number of grants to map persistently");
/*
+ * Maximum number of rings/queues blkback supports, allow as many queues as there
+ * are CPUs if user has not specified a value.
+ */
+unsigned int xenblk_max_queues;
+module_param_named(max_queues, xenblk_max_queues, uint, 0644);
+MODULE_PARM_DESC(max_queues,
+ "Maximum number of hardware queues per virtual disk." \
+ "By default it is the number of online CPUs.");
+
+/*
* Maximum order of pages to be used for the shared ring between front and
* backend, 4KB page granularity is used.
*/
@@ -113,71 +123,71 @@ module_param(log_stats, int, 0644);
/* Number of free pages to remove on each call to gnttab_free_pages */
#define NUM_BATCH_FREE_PAGES 10
-static inline int get_free_page(struct xen_blkif *blkif, struct page **page)
+static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page)
{
unsigned long flags;
- spin_lock_irqsave(&blkif->free_pages_lock, flags);
- if (list_empty(&blkif->free_pages)) {
- BUG_ON(blkif->free_pages_num != 0);
- spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+ spin_lock_irqsave(&ring->free_pages_lock, flags);
+ if (list_empty(&ring->free_pages)) {
+ BUG_ON(ring->free_pages_num != 0);
+ spin_unlock_irqrestore(&ring->free_pages_lock, flags);
return gnttab_alloc_pages(1, page);
}
- BUG_ON(blkif->free_pages_num == 0);
- page[0] = list_first_entry(&blkif->free_pages, struct page, lru);
+ BUG_ON(ring->free_pages_num == 0);
+ page[0] = list_first_entry(&ring->free_pages, struct page, lru);
list_del(&page[0]->lru);
- blkif->free_pages_num--;
- spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+ ring->free_pages_num--;
+ spin_unlock_irqrestore(&ring->free_pages_lock, flags);
return 0;
}
-static inline void put_free_pages(struct xen_blkif *blkif, struct page **page,
+static inline void put_free_pages(struct xen_blkif_ring *ring, struct page **page,
int num)
{
unsigned long flags;
int i;
- spin_lock_irqsave(&blkif->free_pages_lock, flags);
+ spin_lock_irqsave(&ring->free_pages_lock, flags);
for (i = 0; i < num; i++)
- list_add(&page[i]->lru, &blkif->free_pages);
- blkif->free_pages_num += num;
- spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+ list_add(&page[i]->lru, &ring->free_pages);
+ ring->free_pages_num += num;
+ spin_unlock_irqrestore(&ring->free_pages_lock, flags);
}
-static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num)
+static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num)
{
/* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */
struct page *page[NUM_BATCH_FREE_PAGES];
unsigned int num_pages = 0;
unsigned long flags;
- spin_lock_irqsave(&blkif->free_pages_lock, flags);
- while (blkif->free_pages_num > num) {
- BUG_ON(list_empty(&blkif->free_pages));
- page[num_pages] = list_first_entry(&blkif->free_pages,
+ spin_lock_irqsave(&ring->free_pages_lock, flags);
+ while (ring->free_pages_num > num) {
+ BUG_ON(list_empty(&ring->free_pages));
+ page[num_pages] = list_first_entry(&ring->free_pages,
struct page, lru);
list_del(&page[num_pages]->lru);
- blkif->free_pages_num--;
+ ring->free_pages_num--;
if (++num_pages == NUM_BATCH_FREE_PAGES) {
- spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+ spin_unlock_irqrestore(&ring->free_pages_lock, flags);
gnttab_free_pages(num_pages, page);
- spin_lock_irqsave(&blkif->free_pages_lock, flags);
+ spin_lock_irqsave(&ring->free_pages_lock, flags);
num_pages = 0;
}
}
- spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+ spin_unlock_irqrestore(&ring->free_pages_lock, flags);
if (num_pages != 0)
gnttab_free_pages(num_pages, page);
}
#define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
-static int do_block_io_op(struct xen_blkif *blkif);
-static int dispatch_rw_block_io(struct xen_blkif *blkif,
+static int do_block_io_op(struct xen_blkif_ring *ring);
+static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
struct blkif_request *req,
struct pending_req *pending_req);
-static void make_response(struct xen_blkif *blkif, u64 id,
+static void make_response(struct xen_blkif_ring *ring, u64 id,
unsigned short op, int st);
#define foreach_grant_safe(pos, n, rbtree, node) \
@@ -190,7 +200,7 @@ static void make_response(struct xen_blkif *blkif, u64 id,
/*
* We don't need locking around the persistent grant helpers
- * because blkback uses a single-thread for each backed, so we
+ * because blkback uses a single-thread for each backend, so we
* can be sure that this functions will never be called recursively.
*
* The only exception to that is put_persistent_grant, that can be called
@@ -198,19 +208,20 @@ static void make_response(struct xen_blkif *blkif, u64 id,
* bit operations to modify the flags of a persistent grant and to count
* the number of used grants.
*/
-static int add_persistent_gnt(struct xen_blkif *blkif,
+static int add_persistent_gnt(struct xen_blkif_ring *ring,
struct persistent_gnt *persistent_gnt)
{
struct rb_node **new = NULL, *parent = NULL;
struct persistent_gnt *this;
+ struct xen_blkif *blkif = ring->blkif;
- if (blkif->persistent_gnt_c >= xen_blkif_max_pgrants) {
+ if (ring->persistent_gnt_c >= xen_blkif_max_pgrants) {
if (!blkif->vbd.overflow_max_grants)
blkif->vbd.overflow_max_grants = 1;
return -EBUSY;
}
/* Figure out where to put new node */
- new = &blkif->persistent_gnts.rb_node;
+ new = &ring->persistent_gnts.rb_node;
while (*new) {
this = container_of(*new, struct persistent_gnt, node);
@@ -229,19 +240,19 @@ static int add_persistent_gnt(struct xen_blkif *blkif,
set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
/* Add new node and rebalance tree. */
rb_link_node(&(persistent_gnt->node), parent, new);
- rb_insert_color(&(persistent_gnt->node), &blkif->persistent_gnts);
- blkif->persistent_gnt_c++;
- atomic_inc(&blkif->persistent_gnt_in_use);
+ rb_insert_color(&(persistent_gnt->node), &ring->persistent_gnts);
+ ring->persistent_gnt_c++;
+ atomic_inc(&ring->persistent_gnt_in_use);
return 0;
}
-static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
+static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring,
grant_ref_t gref)
{
struct persistent_gnt *data;
struct rb_node *node = NULL;
- node = blkif->persistent_gnts.rb_node;
+ node = ring->persistent_gnts.rb_node;
while (node) {
data = container_of(node, struct persistent_gnt, node);
@@ -255,24 +266,24 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
return NULL;
}
set_bit(PERSISTENT_GNT_ACTIVE, data->flags);
- atomic_inc(&blkif->persistent_gnt_in_use);
+ atomic_inc(&ring->persistent_gnt_in_use);
return data;
}
}
return NULL;
}
-static void put_persistent_gnt(struct xen_blkif *blkif,
+static void put_persistent_gnt(struct xen_blkif_ring *ring,
struct persistent_gnt *persistent_gnt)
{
if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
pr_alert_ratelimited("freeing a grant already unused\n");
set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
- atomic_dec(&blkif->persistent_gnt_in_use);
+ atomic_dec(&ring->persistent_gnt_in_use);
}
-static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
+static void free_persistent_gnts(struct xen_blkif_ring *ring, struct rb_root *root,
unsigned int num)
{
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -303,7 +314,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
unmap_data.count = segs_to_unmap;
BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
- put_free_pages(blkif, pages, segs_to_unmap);
+ put_free_pages(ring, pages, segs_to_unmap);
segs_to_unmap = 0;
}
@@ -320,15 +331,15 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct persistent_gnt *persistent_gnt;
int segs_to_unmap = 0;
- struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work);
+ struct xen_blkif_ring *ring = container_of(work, typeof(*ring), persistent_purge_work);
struct gntab_unmap_queue_data unmap_data;
unmap_data.pages = pages;
unmap_data.unmap_ops = unmap;
unmap_data.kunmap_ops = NULL;
- while(!list_empty(&blkif->persistent_purge_list)) {
- persistent_gnt = list_first_entry(&blkif->persistent_purge_list,
+ while(!list_empty(&ring->persistent_purge_list)) {
+ persistent_gnt = list_first_entry(&ring->persistent_purge_list,
struct persistent_gnt,
remove_node);
list_del(&persistent_gnt->remove_node);
@@ -343,7 +354,7 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
unmap_data.count = segs_to_unmap;
BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
- put_free_pages(blkif, pages, segs_to_unmap);
+ put_free_pages(ring, pages, segs_to_unmap);
segs_to_unmap = 0;
}
kfree(persistent_gnt);
@@ -351,11 +362,11 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
if (segs_to_unmap > 0) {
unmap_data.count = segs_to_unmap;
BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
- put_free_pages(blkif, pages, segs_to_unmap);
+ put_free_pages(ring, pages, segs_to_unmap);
}
}
-static void purge_persistent_gnt(struct xen_blkif *blkif)
+static void purge_persistent_gnt(struct xen_blkif_ring *ring)
{
struct persistent_gnt *persistent_gnt;
struct rb_node *n;
@@ -363,23 +374,23 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
bool scan_used = false, clean_used = false;
struct rb_root *root;
- if (blkif->persistent_gnt_c < xen_blkif_max_pgrants ||
- (blkif->persistent_gnt_c == xen_blkif_max_pgrants &&
- !blkif->vbd.overflow_max_grants)) {
- return;
+ if (ring->persistent_gnt_c < xen_blkif_max_pgrants ||
+ (ring->persistent_gnt_c == xen_blkif_max_pgrants &&
+ !ring->blkif->vbd.overflow_max_grants)) {
+ goto out;
}
- if (work_busy(&blkif->persistent_purge_work)) {
+ if (work_busy(&ring->persistent_purge_work)) {
pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n");
- return;
+ goto out;
}
num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
- num_clean = blkif->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
- num_clean = min(blkif->persistent_gnt_c, num_clean);
+ num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
+ num_clean = min(ring->persistent_gnt_c, num_clean);
if ((num_clean == 0) ||
- (num_clean > (blkif->persistent_gnt_c - atomic_read(&blkif->persistent_gnt_in_use))))
- return;
+ (num_clean > (ring->persistent_gnt_c - atomic_read(&ring->persistent_gnt_in_use))))
+ goto out;
/*
* At this point, we can assure that there will be no calls
@@ -394,8 +405,8 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
pr_debug("Going to purge %u persistent grants\n", num_clean);
- BUG_ON(!list_empty(&blkif->persistent_purge_list));
- root = &blkif->persistent_gnts;
+ BUG_ON(!list_empty(&ring->persistent_purge_list));
+ root = &ring->persistent_gnts;
purge_list:
foreach_grant_safe(persistent_gnt, n, root, node) {
BUG_ON(persistent_gnt->handle ==
@@ -414,7 +425,7 @@ purge_list:
rb_erase(&persistent_gnt->node, root);
list_add(&persistent_gnt->remove_node,
- &blkif->persistent_purge_list);
+ &ring->persistent_purge_list);
if (--num_clean == 0)
goto finished;
}
@@ -435,30 +446,32 @@ finished:
goto purge_list;
}
- blkif->persistent_gnt_c -= (total - num_clean);
- blkif->vbd.overflow_max_grants = 0;
+ ring->persistent_gnt_c -= (total - num_clean);
+ ring->blkif->vbd.overflow_max_grants = 0;
/* We can defer this work */
- schedule_work(&blkif->persistent_purge_work);
+ schedule_work(&ring->persistent_purge_work);
pr_debug("Purged %u/%u\n", (total - num_clean), total);
+
+out:
return;
}
/*
* Retrieve from the 'pending_reqs' a free pending_req structure to be used.
*/
-static struct pending_req *alloc_req(struct xen_blkif *blkif)
+static struct pending_req *alloc_req(struct xen_blkif_ring *ring)
{
struct pending_req *req = NULL;
unsigned long flags;
- spin_lock_irqsave(&blkif->pending_free_lock, flags);
- if (!list_empty(&blkif->pending_free)) {
- req = list_entry(blkif->pending_free.next, struct pending_req,
+ spin_lock_irqsave(&ring->pending_free_lock, flags);
+ if (!list_empty(&ring->pending_free)) {
+ req = list_entry(ring->pending_free.next, struct pending_req,
free_list);
list_del(&req->free_list);
}
- spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
+ spin_unlock_irqrestore(&ring->pending_free_lock, flags);
return req;
}
@@ -466,17 +479,17 @@ static struct pending_req *alloc_req(struct xen_blkif *blkif)
* Return the 'pending_req' structure back to the freepool. We also
* wake up the thread if it was waiting for a free page.
*/
-static void free_req(struct xen_blkif *blkif, struct pending_req *req)
+static void free_req(struct xen_blkif_ring *ring, struct pending_req *req)
{
unsigned long flags;
int was_empty;
- spin_lock_irqsave(&blkif->pending_free_lock, flags);
- was_empty = list_empty(&blkif->pending_free);
- list_add(&req->free_list, &blkif->pending_free);
- spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
+ spin_lock_irqsave(&ring->pending_free_lock, flags);
+ was_empty = list_empty(&ring->pending_free);
+ list_add(&req->free_list, &ring->pending_free);
+ spin_unlock_irqrestore(&ring->pending_free_lock, flags);
if (was_empty)
- wake_up(&blkif->pending_free_wq);
+ wake_up(&ring->pending_free_wq);
}
/*
@@ -556,10 +569,10 @@ abort:
/*
* Notification from the guest OS.
*/
-static void blkif_notify_work(struct xen_blkif *blkif)
+static void blkif_notify_work(struct xen_blkif_ring *ring)
{
- blkif->waiting_reqs = 1;
- wake_up(&blkif->wq);
+ ring->waiting_reqs = 1;
+ wake_up(&ring->wq);
}
irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
@@ -572,31 +585,33 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
* SCHEDULER FUNCTIONS
*/
-static void print_stats(struct xen_blkif *blkif)
+static void print_stats(struct xen_blkif_ring *ring)
{
pr_info("(%s): oo %3llu | rd %4llu | wr %4llu | f %4llu"
" | ds %4llu | pg: %4u/%4d\n",
- current->comm, blkif->st_oo_req,
- blkif->st_rd_req, blkif->st_wr_req,
- blkif->st_f_req, blkif->st_ds_req,
- blkif->persistent_gnt_c,
+ current->comm, ring->st_oo_req,
+ ring->st_rd_req, ring->st_wr_req,
+ ring->st_f_req, ring->st_ds_req,
+ ring->persistent_gnt_c,
xen_blkif_max_pgrants);
- blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
- blkif->st_rd_req = 0;
- blkif->st_wr_req = 0;
- blkif->st_oo_req = 0;
- blkif->st_ds_req = 0;
+ ring->st_print = jiffies + msecs_to_jiffies(10 * 1000);
+ ring->st_rd_req = 0;
+ ring->st_wr_req = 0;
+ ring->st_oo_req = 0;
+ ring->st_ds_req = 0;
}
int xen_blkif_schedule(void *arg)
{
- struct xen_blkif *blkif = arg;
+ struct xen_blkif_ring *ring = arg;
+ struct xen_blkif *blkif = ring->blkif;
struct xen_vbd *vbd = &blkif->vbd;
unsigned long timeout;
int ret;
xen_blkif_get(blkif);
+ set_freezable();
while (!kthread_should_stop()) {
if (try_to_freeze())
continue;
@@ -606,50 +621,50 @@ int xen_blkif_schedule(void *arg)
timeout = msecs_to_jiffies(LRU_INTERVAL);
timeout = wait_event_interruptible_timeout(
- blkif->wq,
- blkif->waiting_reqs || kthread_should_stop(),
+ ring->wq,
+ ring->waiting_reqs || kthread_should_stop(),
timeout);
if (timeout == 0)
goto purge_gnt_list;
timeout = wait_event_interruptible_timeout(
- blkif->pending_free_wq,
- !list_empty(&blkif->pending_free) ||
+ ring->pending_free_wq,
+ !list_empty(&ring->pending_free) ||
kthread_should_stop(),
timeout);
if (timeout == 0)
goto purge_gnt_list;
- blkif->waiting_reqs = 0;
+ ring->waiting_reqs = 0;
smp_mb(); /* clear flag *before* checking for work */
- ret = do_block_io_op(blkif);
+ ret = do_block_io_op(ring);
if (ret > 0)
- blkif->waiting_reqs = 1;
+ ring->waiting_reqs = 1;
if (ret == -EACCES)
- wait_event_interruptible(blkif->shutdown_wq,
+ wait_event_interruptible(ring->shutdown_wq,
kthread_should_stop());
purge_gnt_list:
if (blkif->vbd.feature_gnt_persistent &&
- time_after(jiffies, blkif->next_lru)) {
- purge_persistent_gnt(blkif);
- blkif->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
+ time_after(jiffies, ring->next_lru)) {
+ purge_persistent_gnt(ring);
+ ring->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
}
/* Shrink if we have more than xen_blkif_max_buffer_pages */
- shrink_free_pagepool(blkif, xen_blkif_max_buffer_pages);
+ shrink_free_pagepool(ring, xen_blkif_max_buffer_pages);
- if (log_stats && time_after(jiffies, blkif->st_print))
- print_stats(blkif);
+ if (log_stats && time_after(jiffies, ring->st_print))
+ print_stats(ring);
}
/* Drain pending purge work */
- flush_work(&blkif->persistent_purge_work);
+ flush_work(&ring->persistent_purge_work);
if (log_stats)
- print_stats(blkif);
+ print_stats(ring);
- blkif->xenblkd = NULL;
+ ring->xenblkd = NULL;
xen_blkif_put(blkif);
return 0;
@@ -658,22 +673,22 @@ purge_gnt_list:
/*
* Remove persistent grants and empty the pool of free pages
*/
-void xen_blkbk_free_caches(struct xen_blkif *blkif)
+void xen_blkbk_free_caches(struct xen_blkif_ring *ring)
{
/* Free all persistent grant pages */
- if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
- free_persistent_gnts(blkif, &blkif->persistent_gnts,
- blkif->persistent_gnt_c);
+ if (!RB_EMPTY_ROOT(&ring->persistent_gnts))
+ free_persistent_gnts(ring, &ring->persistent_gnts,
+ ring->persistent_gnt_c);
- BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
- blkif->persistent_gnt_c = 0;
+ BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
+ ring->persistent_gnt_c = 0;
/* Since we are shutting down remove all pages from the buffer */
- shrink_free_pagepool(blkif, 0 /* All */);
+ shrink_free_pagepool(ring, 0 /* All */);
}
static unsigned int xen_blkbk_unmap_prepare(
- struct xen_blkif *blkif,
+ struct xen_blkif_ring *ring,
struct grant_page **pages,
unsigned int num,
struct gnttab_unmap_grant_ref *unmap_ops,
@@ -683,7 +698,7 @@ static unsigned int xen_blkbk_unmap_prepare(
for (i = 0; i < num; i++) {
if (pages[i]->persistent_gnt != NULL) {
- put_persistent_gnt(blkif, pages[i]->persistent_gnt);
+ put_persistent_gnt(ring, pages[i]->persistent_gnt);
continue;
}
if (pages[i]->handle == BLKBACK_INVALID_HANDLE)
@@ -700,17 +715,18 @@ static unsigned int xen_blkbk_unmap_prepare(
static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_queue_data *data)
{
- struct pending_req* pending_req = (struct pending_req*) (data->data);
- struct xen_blkif *blkif = pending_req->blkif;
+ struct pending_req *pending_req = (struct pending_req *)(data->data);
+ struct xen_blkif_ring *ring = pending_req->ring;
+ struct xen_blkif *blkif = ring->blkif;
/* BUG_ON used to reproduce existing behaviour,
but is this the best way to deal with this? */
BUG_ON(result);
- put_free_pages(blkif, data->pages, data->count);
- make_response(blkif, pending_req->id,
+ put_free_pages(ring, data->pages, data->count);
+ make_response(ring, pending_req->id,
pending_req->operation, pending_req->status);
- free_req(blkif, pending_req);
+ free_req(ring, pending_req);
/*
* Make sure the request is freed before releasing blkif,
* or there could be a race between free_req and the
@@ -723,7 +739,7 @@ static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_
* pending_free_wq if there's a drain going on, but it has
* to be taken into account if the current model is changed.
*/
- if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) {
+ if (atomic_dec_and_test(&ring->inflight) && atomic_read(&blkif->drain)) {
complete(&blkif->drain_complete);
}
xen_blkif_put(blkif);
@@ -732,11 +748,11 @@ static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_
static void xen_blkbk_unmap_and_respond(struct pending_req *req)
{
struct gntab_unmap_queue_data* work = &req->gnttab_unmap_data;
- struct xen_blkif *blkif = req->blkif;
+ struct xen_blkif_ring *ring = req->ring;
struct grant_page **pages = req->segments;
unsigned int invcount;
- invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_segs,
+ invcount = xen_blkbk_unmap_prepare(ring, pages, req->nr_segs,
req->unmap, req->unmap_pages);
work->data = req;
@@ -757,7 +773,7 @@ static void xen_blkbk_unmap_and_respond(struct pending_req *req)
* of hypercalls, but since this is only used in error paths there's
* no real need.
*/
-static void xen_blkbk_unmap(struct xen_blkif *blkif,
+static void xen_blkbk_unmap(struct xen_blkif_ring *ring,
struct grant_page *pages[],
int num)
{
@@ -768,20 +784,20 @@ static void xen_blkbk_unmap(struct xen_blkif *blkif,
while (num) {
unsigned int batch = min(num, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-
- invcount = xen_blkbk_unmap_prepare(blkif, pages, batch,
+
+ invcount = xen_blkbk_unmap_prepare(ring, pages, batch,
unmap, unmap_pages);
if (invcount) {
ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount);
BUG_ON(ret);
- put_free_pages(blkif, unmap_pages, invcount);
+ put_free_pages(ring, unmap_pages, invcount);
}
pages += batch;
num -= batch;
}
}
-static int xen_blkbk_map(struct xen_blkif *blkif,
+static int xen_blkbk_map(struct xen_blkif_ring *ring,
struct grant_page *pages[],
int num, bool ro)
{
@@ -794,6 +810,7 @@ static int xen_blkbk_map(struct xen_blkif *blkif,
int ret = 0;
int last_map = 0, map_until = 0;
int use_persistent_gnts;
+ struct xen_blkif *blkif = ring->blkif;
use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
@@ -806,10 +823,11 @@ again:
for (i = map_until; i < num; i++) {
uint32_t flags;
- if (use_persistent_gnts)
+ if (use_persistent_gnts) {
persistent_gnt = get_persistent_gnt(
- blkif,
+ ring,
pages[i]->gref);
+ }
if (persistent_gnt) {
/*
@@ -819,7 +837,7 @@ again:
pages[i]->page = persistent_gnt->page;
pages[i]->persistent_gnt = persistent_gnt;
} else {
- if (get_free_page(blkif, &pages[i]->page))
+ if (get_free_page(ring, &pages[i]->page))
goto out_of_memory;
addr = vaddr(pages[i]->page);
pages_to_gnt[segs_to_map] = pages[i]->page;
@@ -852,7 +870,7 @@ again:
BUG_ON(new_map_idx >= segs_to_map);
if (unlikely(map[new_map_idx].status != 0)) {
pr_debug("invalid buffer -- could not remap it\n");
- put_free_pages(blkif, &pages[seg_idx]->page, 1);
+ put_free_pages(ring, &pages[seg_idx]->page, 1);
pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE;
ret |= 1;
goto next;
@@ -862,7 +880,7 @@ again:
continue;
}
if (use_persistent_gnts &&
- blkif->persistent_gnt_c < xen_blkif_max_pgrants) {
+ ring->persistent_gnt_c < xen_blkif_max_pgrants) {
/*
* We are using persistent grants, the grant is
* not mapped but we might have room for it.
@@ -880,7 +898,7 @@ again:
persistent_gnt->gnt = map[new_map_idx].ref;
persistent_gnt->handle = map[new_map_idx].handle;
persistent_gnt->page = pages[seg_idx]->page;
- if (add_persistent_gnt(blkif,
+ if (add_persistent_gnt(ring,
persistent_gnt)) {
kfree(persistent_gnt);
persistent_gnt = NULL;
@@ -888,7 +906,7 @@ again:
}
pages[seg_idx]->persistent_gnt = persistent_gnt;
pr_debug("grant %u added to the tree of persistent grants, using %u/%u\n",
- persistent_gnt->gnt, blkif->persistent_gnt_c,
+ persistent_gnt->gnt, ring->persistent_gnt_c,
xen_blkif_max_pgrants);
goto next;
}
@@ -913,7 +931,7 @@ next:
out_of_memory:
pr_alert("%s: out of memory\n", __func__);
- put_free_pages(blkif, pages_to_gnt, segs_to_map);
+ put_free_pages(ring, pages_to_gnt, segs_to_map);
return -ENOMEM;
}
@@ -921,7 +939,7 @@ static int xen_blkbk_map_seg(struct pending_req *pending_req)
{
int rc;
- rc = xen_blkbk_map(pending_req->blkif, pending_req->segments,
+ rc = xen_blkbk_map(pending_req->ring, pending_req->segments,
pending_req->nr_segs,
(pending_req->operation != BLKIF_OP_READ));
@@ -934,7 +952,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
struct phys_req *preq)
{
struct grant_page **pages = pending_req->indirect_pages;
- struct xen_blkif *blkif = pending_req->blkif;
+ struct xen_blkif_ring *ring = pending_req->ring;
int indirect_grefs, rc, n, nseg, i;
struct blkif_request_segment *segments = NULL;
@@ -945,7 +963,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
for (i = 0; i < indirect_grefs; i++)
pages[i]->gref = req->u.indirect.indirect_grefs[i];
- rc = xen_blkbk_map(blkif, pages, indirect_grefs, true);
+ rc = xen_blkbk_map(ring, pages, indirect_grefs, true);
if (rc)
goto unmap;
@@ -977,15 +995,16 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
unmap:
if (segments)
kunmap_atomic(segments);
- xen_blkbk_unmap(blkif, pages, indirect_grefs);
+ xen_blkbk_unmap(ring, pages, indirect_grefs);
return rc;
}
-static int dispatch_discard_io(struct xen_blkif *blkif,
+static int dispatch_discard_io(struct xen_blkif_ring *ring,
struct blkif_request *req)
{
int err = 0;
int status = BLKIF_RSP_OKAY;
+ struct xen_blkif *blkif = ring->blkif;
struct block_device *bdev = blkif->vbd.bdev;
unsigned long secure;
struct phys_req preq;
@@ -1002,7 +1021,7 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
preq.sector_number + preq.nr_sects, blkif->vbd.pdevice);
goto fail_response;
}
- blkif->st_ds_req++;
+ ring->st_ds_req++;
secure = (blkif->vbd.discard_secure &&
(req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
@@ -1018,26 +1037,28 @@ fail_response:
} else if (err)
status = BLKIF_RSP_ERROR;
- make_response(blkif, req->u.discard.id, req->operation, status);
+ make_response(ring, req->u.discard.id, req->operation, status);
xen_blkif_put(blkif);
return err;
}
-static int dispatch_other_io(struct xen_blkif *blkif,
+static int dispatch_other_io(struct xen_blkif_ring *ring,
struct blkif_request *req,
struct pending_req *pending_req)
{
- free_req(blkif, pending_req);
- make_response(blkif, req->u.other.id, req->operation,
+ free_req(ring, pending_req);
+ make_response(ring, req->u.other.id, req->operation,
BLKIF_RSP_EOPNOTSUPP);
return -EIO;
}
-static void xen_blk_drain_io(struct xen_blkif *blkif)
+static void xen_blk_drain_io(struct xen_blkif_ring *ring)
{
+ struct xen_blkif *blkif = ring->blkif;
+
atomic_set(&blkif->drain, 1);
do {
- if (atomic_read(&blkif->inflight) == 0)
+ if (atomic_read(&ring->inflight) == 0)
break;
wait_for_completion_interruptible_timeout(
&blkif->drain_complete, HZ);
@@ -1058,12 +1079,12 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) &&
(error == -EOPNOTSUPP)) {
pr_debug("flush diskcache op failed, not supported\n");
- xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0);
+ xen_blkbk_flush_diskcache(XBT_NIL, pending_req->ring->blkif->be, 0);
pending_req->status = BLKIF_RSP_EOPNOTSUPP;
} else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
(error == -EOPNOTSUPP)) {
pr_debug("write barrier op failed, not supported\n");
- xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0);
+ xen_blkbk_barrier(XBT_NIL, pending_req->ring->blkif->be, 0);
pending_req->status = BLKIF_RSP_EOPNOTSUPP;
} else if (error) {
pr_debug("Buffer not up-to-date at end of operation,"
@@ -1097,9 +1118,9 @@ static void end_block_io_op(struct bio *bio)
* and transmute it to the block API to hand it over to the proper block disk.
*/
static int
-__do_block_io_op(struct xen_blkif *blkif)
+__do_block_io_op(struct xen_blkif_ring *ring)
{
- union blkif_back_rings *blk_rings = &blkif->blk_rings;
+ union blkif_back_rings *blk_rings = &ring->blk_rings;
struct blkif_request req;
struct pending_req *pending_req;
RING_IDX rc, rp;
@@ -1112,7 +1133,7 @@ __do_block_io_op(struct xen_blkif *blkif)
if (RING_REQUEST_PROD_OVERFLOW(&blk_rings->common, rp)) {
rc = blk_rings->common.rsp_prod_pvt;
pr_warn("Frontend provided bogus ring requests (%d - %d = %d). Halting ring processing on dev=%04x\n",
- rp, rc, rp - rc, blkif->vbd.pdevice);
+ rp, rc, rp - rc, ring->blkif->vbd.pdevice);
return -EACCES;
}
while (rc != rp) {
@@ -1125,14 +1146,14 @@ __do_block_io_op(struct xen_blkif *blkif)
break;
}
- pending_req = alloc_req(blkif);
+ pending_req = alloc_req(ring);
if (NULL == pending_req) {
- blkif->st_oo_req++;
+ ring->st_oo_req++;
more_to_do = 1;
break;
}
- switch (blkif->blk_protocol) {
+ switch (ring->blkif->blk_protocol) {
case BLKIF_PROTOCOL_NATIVE:
memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req));
break;
@@ -1156,16 +1177,16 @@ __do_block_io_op(struct xen_blkif *blkif)
case BLKIF_OP_WRITE_BARRIER:
case BLKIF_OP_FLUSH_DISKCACHE:
case BLKIF_OP_INDIRECT:
- if (dispatch_rw_block_io(blkif, &req, pending_req))
+ if (dispatch_rw_block_io(ring, &req, pending_req))
goto done;
break;
case BLKIF_OP_DISCARD:
- free_req(blkif, pending_req);
- if (dispatch_discard_io(blkif, &req))
+ free_req(ring, pending_req);
+ if (dispatch_discard_io(ring, &req))
goto done;
break;
default:
- if (dispatch_other_io(blkif, &req, pending_req))
+ if (dispatch_other_io(ring, &req, pending_req))
goto done;
break;
}
@@ -1178,13 +1199,13 @@ done:
}
static int
-do_block_io_op(struct xen_blkif *blkif)
+do_block_io_op(struct xen_blkif_ring *ring)
{
- union blkif_back_rings *blk_rings = &blkif->blk_rings;
+ union blkif_back_rings *blk_rings = &ring->blk_rings;
int more_to_do;
do {
- more_to_do = __do_block_io_op(blkif);
+ more_to_do = __do_block_io_op(ring);
if (more_to_do)
break;
@@ -1197,7 +1218,7 @@ do_block_io_op(struct xen_blkif *blkif)
* Transmutation of the 'struct blkif_request' to a proper 'struct bio'
* and call the 'submit_bio' to pass it to the underlying storage.
*/
-static int dispatch_rw_block_io(struct xen_blkif *blkif,
+static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
struct blkif_request *req,
struct pending_req *pending_req)
{
@@ -1225,17 +1246,17 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
switch (req_operation) {
case BLKIF_OP_READ:
- blkif->st_rd_req++;
+ ring->st_rd_req++;
operation = READ;
break;
case BLKIF_OP_WRITE:
- blkif->st_wr_req++;
+ ring->st_wr_req++;
operation = WRITE_ODIRECT;
break;
case BLKIF_OP_WRITE_BARRIER:
drain = true;
case BLKIF_OP_FLUSH_DISKCACHE:
- blkif->st_f_req++;
+ ring->st_f_req++;
operation = WRITE_FLUSH;
break;
default:
@@ -1260,7 +1281,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
preq.nr_sects = 0;
- pending_req->blkif = blkif;
+ pending_req->ring = ring;
pending_req->id = req->u.rw.id;
pending_req->operation = req_operation;
pending_req->status = BLKIF_RSP_OKAY;
@@ -1287,12 +1308,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
goto fail_response;
}
- if (xen_vbd_translate(&preq, blkif, operation) != 0) {
+ if (xen_vbd_translate(&preq, ring->blkif, operation) != 0) {
pr_debug("access denied: %s of [%llu,%llu] on dev=%04x\n",
operation == READ ? "read" : "write",
preq.sector_number,
preq.sector_number + preq.nr_sects,
- blkif->vbd.pdevice);
+ ring->blkif->vbd.pdevice);
goto fail_response;
}
@@ -1304,7 +1325,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
if (((int)preq.sector_number|(int)seg[i].nsec) &
((bdev_logical_block_size(preq.bdev) >> 9) - 1)) {
pr_debug("Misaligned I/O request from domain %d\n",
- blkif->domid);
+ ring->blkif->domid);
goto fail_response;
}
}
@@ -1313,7 +1334,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
* issue the WRITE_FLUSH.
*/
if (drain)
- xen_blk_drain_io(pending_req->blkif);
+ xen_blk_drain_io(pending_req->ring);
/*
* If we have failed at this point, we need to undo the M2P override,
@@ -1328,8 +1349,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
* This corresponding xen_blkif_put is done in __end_block_io_op, or
* below (in "!bio") if we are handling a BLKIF_OP_DISCARD.
*/
- xen_blkif_get(blkif);
- atomic_inc(&blkif->inflight);
+ xen_blkif_get(ring->blkif);
+ atomic_inc(&ring->inflight);
for (i = 0; i < nseg; i++) {
while ((bio == NULL) ||
@@ -1377,19 +1398,19 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
blk_finish_plug(&plug);
if (operation == READ)
- blkif->st_rd_sect += preq.nr_sects;
+ ring->st_rd_sect += preq.nr_sects;
else if (operation & WRITE)
- blkif->st_wr_sect += preq.nr_sects;
+ ring->st_wr_sect += preq.nr_sects;
return 0;
fail_flush:
- xen_blkbk_unmap(blkif, pending_req->segments,
+ xen_blkbk_unmap(ring, pending_req->segments,
pending_req->nr_segs);
fail_response:
/* Haven't submitted any bio's yet. */
- make_response(blkif, req->u.rw.id, req_operation, BLKIF_RSP_ERROR);
- free_req(blkif, pending_req);
+ make_response(ring, req->u.rw.id, req_operation, BLKIF_RSP_ERROR);
+ free_req(ring, pending_req);
msleep(1); /* back off a bit */
return -EIO;
@@ -1407,21 +1428,22 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
/*
* Put a response on the ring on how the operation fared.
*/
-static void make_response(struct xen_blkif *blkif, u64 id,
+static void make_response(struct xen_blkif_ring *ring, u64 id,
unsigned short op, int st)
{
struct blkif_response resp;
unsigned long flags;
- union blkif_back_rings *blk_rings = &blkif->blk_rings;
+ union blkif_back_rings *blk_rings;
int notify;
resp.id = id;
resp.operation = op;
resp.status = st;
- spin_lock_irqsave(&blkif->blk_ring_lock, flags);
+ spin_lock_irqsave(&ring->blk_ring_lock, flags);
+ blk_rings = &ring->blk_rings;
/* Place on the response ring for the relevant domain. */
- switch (blkif->blk_protocol) {
+ switch (ring->blkif->blk_protocol) {
case BLKIF_PROTOCOL_NATIVE:
memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
&resp, sizeof(resp));
@@ -1439,9 +1461,9 @@ static void make_response(struct xen_blkif *blkif, u64 id,
}
blk_rings->common.rsp_prod_pvt++;
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
- spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
+ spin_unlock_irqrestore(&ring->blk_ring_lock, flags);
if (notify)
- notify_remote_via_irq(blkif->irq);
+ notify_remote_via_irq(ring->irq);
}
static int __init xen_blkif_init(void)
@@ -1457,6 +1479,9 @@ static int __init xen_blkif_init(void)
xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER;
}
+ if (xenblk_max_queues == 0)
+ xenblk_max_queues = num_online_cpus();
+
rc = xen_blkif_interface_init();
if (rc)
goto failed_init;
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index c929ae2..dea61f6 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -46,6 +46,7 @@
#include <xen/interface/io/protocols.h>
extern unsigned int xen_blkif_max_ring_order;
+extern unsigned int xenblk_max_queues;
/*
* This is the maximum number of segments that would be allowed in indirect
* requests. This value will also be passed to the frontend.
@@ -269,68 +270,79 @@ struct persistent_gnt {
struct list_head remove_node;
};
-struct xen_blkif {
- /* Unique identifier for this interface. */
- domid_t domid;
- unsigned int handle;
+/* Per-ring information. */
+struct xen_blkif_ring {
/* Physical parameters of the comms window. */
unsigned int irq;
- /* Comms information. */
- enum blkif_protocol blk_protocol;
union blkif_back_rings blk_rings;
void *blk_ring;
- /* The VBD attached to this interface. */
- struct xen_vbd vbd;
- /* Back pointer to the backend_info. */
- struct backend_info *be;
/* Private fields. */
spinlock_t blk_ring_lock;
- atomic_t refcnt;
wait_queue_head_t wq;
- /* for barrier (drain) requests */
- struct completion drain_complete;
- atomic_t drain;
atomic_t inflight;
- /* One thread per one blkif. */
+ /* One thread per blkif ring. */
struct task_struct *xenblkd;
unsigned int waiting_reqs;
- /* tree to store persistent grants */
+ /* List of all 'pending_req' available */
+ struct list_head pending_free;
+ /* And its spinlock. */
+ spinlock_t pending_free_lock;
+ wait_queue_head_t pending_free_wq;
+
+ /* Tree to store persistent grants. */
+ spinlock_t pers_gnts_lock;
struct rb_root persistent_gnts;
unsigned int persistent_gnt_c;
atomic_t persistent_gnt_in_use;
unsigned long next_lru;
- /* used by the kworker that offload work from the persistent purge */
+ /* Statistics. */
+ unsigned long st_print;
+ unsigned long long st_rd_req;
+ unsigned long long st_wr_req;
+ unsigned long long st_oo_req;
+ unsigned long long st_f_req;
+ unsigned long long st_ds_req;
+ unsigned long long st_rd_sect;
+ unsigned long long st_wr_sect;
+
+ /* Used by the kworker that offload work from the persistent purge. */
struct list_head persistent_purge_list;
struct work_struct persistent_purge_work;
- /* buffer of free pages to map grant refs */
+ /* Buffer of free pages to map grant refs. */
spinlock_t free_pages_lock;
int free_pages_num;
struct list_head free_pages;
- /* List of all 'pending_req' available */
- struct list_head pending_free;
- /* And its spinlock. */
- spinlock_t pending_free_lock;
- wait_queue_head_t pending_free_wq;
-
- /* statistics */
- unsigned long st_print;
- unsigned long long st_rd_req;
- unsigned long long st_wr_req;
- unsigned long long st_oo_req;
- unsigned long long st_f_req;
- unsigned long long st_ds_req;
- unsigned long long st_rd_sect;
- unsigned long long st_wr_sect;
-
struct work_struct free_work;
/* Thread shutdown wait queue. */
wait_queue_head_t shutdown_wq;
- unsigned int nr_ring_pages;
+ struct xen_blkif *blkif;
+};
+
+struct xen_blkif {
+ /* Unique identifier for this interface. */
+ domid_t domid;
+ unsigned int handle;
+ /* Comms information. */
+ enum blkif_protocol blk_protocol;
+ /* The VBD attached to this interface. */
+ struct xen_vbd vbd;
+ /* Back pointer to the backend_info. */
+ struct backend_info *be;
+ atomic_t refcnt;
+ /* for barrier (drain) requests */
+ struct completion drain_complete;
+ atomic_t drain;
+
+ struct work_struct free_work;
+ unsigned int nr_ring_pages;
+ /* All rings for this device. */
+ struct xen_blkif_ring *rings;
+ unsigned int nr_rings;
};
struct seg_buf {
@@ -352,7 +364,7 @@ struct grant_page {
* response queued for it, with the saved 'id' passed back.
*/
struct pending_req {
- struct xen_blkif *blkif;
+ struct xen_blkif_ring *ring;
u64 id;
int nr_segs;
atomic_t pendcnt;
@@ -394,7 +406,7 @@ int xen_blkif_xenbus_init(void);
irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
int xen_blkif_schedule(void *arg);
int xen_blkif_purge_persistent(void *arg);
-void xen_blkbk_free_caches(struct xen_blkif *blkif);
+void xen_blkbk_free_caches(struct xen_blkif_ring *ring);
int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
struct backend_info *be, int state);
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index f53cff4..876763f 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -86,9 +86,11 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
{
int err;
char name[BLKBACK_NAME_LEN];
+ struct xen_blkif_ring *ring;
+ int i;
/* Not ready to connect? */
- if (!blkif->irq || !blkif->vbd.bdev)
+ if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev)
return;
/* Already connected? */
@@ -113,13 +115,55 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
}
invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
- blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, "%s", name);
- if (IS_ERR(blkif->xenblkd)) {
- err = PTR_ERR(blkif->xenblkd);
- blkif->xenblkd = NULL;
- xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
- return;
+ for (i = 0; i < blkif->nr_rings; i++) {
+ ring = &blkif->rings[i];
+ ring->xenblkd = kthread_run(xen_blkif_schedule, ring, "%s-%d", name, i);
+ if (IS_ERR(ring->xenblkd)) {
+ err = PTR_ERR(ring->xenblkd);
+ ring->xenblkd = NULL;
+ xenbus_dev_fatal(blkif->be->dev, err,
+ "start %s-%d xenblkd", name, i);
+ goto out;
+ }
+ }
+ return;
+
+out:
+ while (--i >= 0) {
+ ring = &blkif->rings[i];
+ kthread_stop(ring->xenblkd);
+ }
+ return;
+}
+
+static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
+{
+ unsigned int r;
+
+ blkif->rings = kzalloc(blkif->nr_rings * sizeof(struct xen_blkif_ring), GFP_KERNEL);
+ if (!blkif->rings)
+ return -ENOMEM;
+
+ for (r = 0; r < blkif->nr_rings; r++) {
+ struct xen_blkif_ring *ring = &blkif->rings[r];
+
+ spin_lock_init(&ring->blk_ring_lock);
+ init_waitqueue_head(&ring->wq);
+ INIT_LIST_HEAD(&ring->pending_free);
+ INIT_LIST_HEAD(&ring->persistent_purge_list);
+ INIT_WORK(&ring->persistent_purge_work, xen_blkbk_unmap_purged_grants);
+ spin_lock_init(&ring->free_pages_lock);
+ INIT_LIST_HEAD(&ring->free_pages);
+
+ spin_lock_init(&ring->pending_free_lock);
+ init_waitqueue_head(&ring->pending_free_wq);
+ init_waitqueue_head(&ring->shutdown_wq);
+ ring->blkif = blkif;
+ ring->st_print = jiffies;
+ xen_blkif_get(blkif);
}
+
+ return 0;
}
static struct xen_blkif *xen_blkif_alloc(domid_t domid)
@@ -133,41 +177,25 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
return ERR_PTR(-ENOMEM);
blkif->domid = domid;
- spin_lock_init(&blkif->blk_ring_lock);
atomic_set(&blkif->refcnt, 1);
- init_waitqueue_head(&blkif->wq);
init_completion(&blkif->drain_complete);
- atomic_set(&blkif->drain, 0);
- blkif->st_print = jiffies;
- blkif->persistent_gnts.rb_node = NULL;
- spin_lock_init(&blkif->free_pages_lock);
- INIT_LIST_HEAD(&blkif->free_pages);
- INIT_LIST_HEAD(&blkif->persistent_purge_list);
- blkif->free_pages_num = 0;
- atomic_set(&blkif->persistent_gnt_in_use, 0);
- atomic_set(&blkif->inflight, 0);
- INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
-
- INIT_LIST_HEAD(&blkif->pending_free);
INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
- spin_lock_init(&blkif->pending_free_lock);
- init_waitqueue_head(&blkif->pending_free_wq);
- init_waitqueue_head(&blkif->shutdown_wq);
return blkif;
}
-static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
+static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
unsigned int nr_grefs, unsigned int evtchn)
{
int err;
+ struct xen_blkif *blkif = ring->blkif;
/* Already connected through? */
- if (blkif->irq)
+ if (ring->irq)
return 0;
err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs,
- &blkif->blk_ring);
+ &ring->blk_ring);
if (err < 0)
return err;
@@ -175,24 +203,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
case BLKIF_PROTOCOL_NATIVE:
{
struct blkif_sring *sring;
- sring = (struct blkif_sring *)blkif->blk_ring;
- BACK_RING_INIT(&blkif->blk_rings.native, sring,
+ sring = (struct blkif_sring *)ring->blk_ring;
+ BACK_RING_INIT(&ring->blk_rings.native, sring,
XEN_PAGE_SIZE * nr_grefs);
break;
}
case BLKIF_PROTOCOL_X86_32:
{
struct blkif_x86_32_sring *sring_x86_32;
- sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
- BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32,
+ sring_x86_32 = (struct blkif_x86_32_sring *)ring->blk_ring;
+ BACK_RING_INIT(&ring->blk_rings.x86_32, sring_x86_32,
XEN_PAGE_SIZE * nr_grefs);
break;
}
case BLKIF_PROTOCOL_X86_64:
{
struct blkif_x86_64_sring *sring_x86_64;
- sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
- BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64,
+ sring_x86_64 = (struct blkif_x86_64_sring *)ring->blk_ring;
+ BACK_RING_INIT(&ring->blk_rings.x86_64, sring_x86_64,
XEN_PAGE_SIZE * nr_grefs);
break;
}
@@ -202,13 +230,13 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
xen_blkif_be_int, 0,
- "blkif-backend", blkif);
+ "blkif-backend", ring);
if (err < 0) {
- xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
- blkif->blk_rings.common.sring = NULL;
+ xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
+ ring->blk_rings.common.sring = NULL;
return err;
}
- blkif->irq = err;
+ ring->irq = err;
return 0;
}
@@ -216,50 +244,69 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
static int xen_blkif_disconnect(struct xen_blkif *blkif)
{
struct pending_req *req, *n;
- int i = 0, j;
+ unsigned int j, r;
- if (blkif->xenblkd) {
- kthread_stop(blkif->xenblkd);
- wake_up(&blkif->shutdown_wq);
- blkif->xenblkd = NULL;
- }
+ for (r = 0; r < blkif->nr_rings; r++) {
+ struct xen_blkif_ring *ring = &blkif->rings[r];
+ unsigned int i = 0;
- /* The above kthread_stop() guarantees that at this point we
- * don't have any discard_io or other_io requests. So, checking
- * for inflight IO is enough.
- */
- if (atomic_read(&blkif->inflight) > 0)
- return -EBUSY;
+ if (ring->xenblkd) {
+ kthread_stop(ring->xenblkd);
+ wake_up(&ring->shutdown_wq);
+ ring->xenblkd = NULL;
+ }
- if (blkif->irq) {
- unbind_from_irqhandler(blkif->irq, blkif);
- blkif->irq = 0;
- }
+ /* The above kthread_stop() guarantees that at this point we
+ * don't have any discard_io or other_io requests. So, checking
+ * for inflight IO is enough.
+ */
+ if (atomic_read(&ring->inflight) > 0)
+ return -EBUSY;
- if (blkif->blk_rings.common.sring) {
- xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
- blkif->blk_rings.common.sring = NULL;
- }
+ if (ring->irq) {
+ unbind_from_irqhandler(ring->irq, ring);
+ ring->irq = 0;
+ }
- /* Remove all persistent grants and the cache of ballooned pages. */
- xen_blkbk_free_caches(blkif);
+ if (ring->blk_rings.common.sring) {
+ xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
+ ring->blk_rings.common.sring = NULL;
+ }
- /* Check that there is no request in use */
- list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
- list_del(&req->free_list);
+ /* Remove all persistent grants and the cache of ballooned pages. */
+ xen_blkbk_free_caches(ring);
- for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
- kfree(req->segments[j]);
+ /* Check that there is no request in use */
+ list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
+ list_del(&req->free_list);
- for (j = 0; j < MAX_INDIRECT_PAGES; j++)
- kfree(req->indirect_pages[j]);
+ for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
+ kfree(req->segments[j]);
- kfree(req);
- i++;
- }
+ for (j = 0; j < MAX_INDIRECT_PAGES; j++)
+ kfree(req->indirect_pages[j]);
+
+ kfree(req);
+ i++;
+ }
- WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
+ BUG_ON(atomic_read(&ring->persistent_gnt_in_use) != 0);
+ BUG_ON(!list_empty(&ring->persistent_purge_list));
+ BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
+ BUG_ON(!list_empty(&ring->free_pages));
+ BUG_ON(ring->free_pages_num != 0);
+ BUG_ON(ring->persistent_gnt_c != 0);
+ WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
+ xen_blkif_put(blkif);
+ }
blkif->nr_ring_pages = 0;
+ /*
+ * blkif->rings was allocated in connect_ring, so we should free it in
+ * here.
+ */
+ kfree(blkif->rings);
+ blkif->rings = NULL;
+ blkif->nr_rings = 0;
return 0;
}
@@ -271,13 +318,6 @@ static void xen_blkif_free(struct xen_blkif *blkif)
xen_vbd_free(&blkif->vbd);
/* Make sure everything is drained before shutting down */
- BUG_ON(blkif->persistent_gnt_c != 0);
- BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0);
- BUG_ON(blkif->free_pages_num != 0);
- BUG_ON(!list_empty(&blkif->persistent_purge_list));
- BUG_ON(!list_empty(&blkif->free_pages));
- BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
-
kmem_cache_free(xen_blkif_cachep, blkif);
}
@@ -296,25 +336,38 @@ int __init xen_blkif_interface_init(void)
* sysfs interface for VBD I/O requests
*/
-#define VBD_SHOW(name, format, args...) \
+#define VBD_SHOW_ALLRING(name, format) \
static ssize_t show_##name(struct device *_dev, \
struct device_attribute *attr, \
char *buf) \
{ \
struct xenbus_device *dev = to_xenbus_device(_dev); \
struct backend_info *be = dev_get_drvdata(&dev->dev); \
+ struct xen_blkif *blkif = be->blkif; \
+ unsigned int i; \
+ unsigned long long result = 0; \
\
- return sprintf(buf, format, ##args); \
+ if (!blkif->rings) \
+ goto out; \
+ \
+ for (i = 0; i < blkif->nr_rings; i++) { \
+ struct xen_blkif_ring *ring = &blkif->rings[i]; \
+ \
+ result += ring->st_##name; \
+ } \
+ \
+out: \
+ return sprintf(buf, format, result); \
} \
static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
-VBD_SHOW(oo_req, "%llu\n", be->blkif->st_oo_req);
-VBD_SHOW(rd_req, "%llu\n", be->blkif->st_rd_req);
-VBD_SHOW(wr_req, "%llu\n", be->blkif->st_wr_req);
-VBD_SHOW(f_req, "%llu\n", be->blkif->st_f_req);
-VBD_SHOW(ds_req, "%llu\n", be->blkif->st_ds_req);
-VBD_SHOW(rd_sect, "%llu\n", be->blkif->st_rd_sect);
-VBD_SHOW(wr_sect, "%llu\n", be->blkif->st_wr_sect);
+VBD_SHOW_ALLRING(oo_req, "%llu\n");
+VBD_SHOW_ALLRING(rd_req, "%llu\n");
+VBD_SHOW_ALLRING(wr_req, "%llu\n");
+VBD_SHOW_ALLRING(f_req, "%llu\n");
+VBD_SHOW_ALLRING(ds_req, "%llu\n");
+VBD_SHOW_ALLRING(rd_sect, "%llu\n");
+VBD_SHOW_ALLRING(wr_sect, "%llu\n");
static struct attribute *xen_vbdstat_attrs[] = {
&dev_attr_oo_req.attr,
@@ -332,6 +385,18 @@ static struct attribute_group xen_vbdstat_group = {
.attrs = xen_vbdstat_attrs,
};
+#define VBD_SHOW(name, format, args...) \
+ static ssize_t show_##name(struct device *_dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+ { \
+ struct xenbus_device *dev = to_xenbus_device(_dev); \
+ struct backend_info *be = dev_get_drvdata(&dev->dev); \
+ \
+ return sprintf(buf, format, ##args); \
+ } \
+ static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+
VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
VBD_SHOW(mode, "%s\n", be->mode);
@@ -440,11 +505,11 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
dev_set_drvdata(&dev->dev, NULL);
- if (be->blkif) {
+ if (be->blkif)
xen_blkif_disconnect(be->blkif);
- xen_blkif_put(be->blkif);
- }
+ /* Put the reference we set in xen_blkif_alloc(). */
+ xen_blkif_put(be->blkif);
kfree(be->mode);
kfree(be);
return 0;
@@ -553,6 +618,12 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
goto fail;
}
+ /* Multi-queue: advertise how many queues are supported by us.*/
+ err = xenbus_printf(XBT_NIL, dev->nodename,
+ "multi-queue-max-queues", "%u", xenblk_max_queues);
+ if (err)
+ pr_warn("Error writing multi-queue-max-queues\n");
+
/* setup back pointer */
be->blkif->be = be;
@@ -708,8 +779,14 @@ static void frontend_changed(struct xenbus_device *dev,
}
err = connect_ring(be);
- if (err)
+ if (err) {
+ /*
+ * Clean up so that memory resources can be used by
+ * other devices. connect_ring reported already error.
+ */
+ xen_blkif_disconnect(be->blkif);
break;
+ }
xen_update_blkif_status(be->blkif);
break;
@@ -825,50 +902,43 @@ again:
xenbus_transaction_end(xbt, 1);
}
-
-static int connect_ring(struct backend_info *be)
+/*
+ * Each ring may have multi pages, depends on "ring-page-order".
+ */
+static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir)
{
- struct xenbus_device *dev = be->dev;
unsigned int ring_ref[XENBUS_MAX_RING_GRANTS];
- unsigned int evtchn, nr_grefs, ring_page_order;
- unsigned int pers_grants;
- char protocol[64] = "";
struct pending_req *req, *n;
int err, i, j;
+ struct xen_blkif *blkif = ring->blkif;
+ struct xenbus_device *dev = blkif->be->dev;
+ unsigned int ring_page_order, nr_grefs, evtchn;
- pr_debug("%s %s\n", __func__, dev->otherend);
-
- err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
+ err = xenbus_scanf(XBT_NIL, dir, "event-channel", "%u",
&evtchn);
if (err != 1) {
err = -EINVAL;
- xenbus_dev_fatal(dev, err, "reading %s/event-channel",
- dev->otherend);
+ xenbus_dev_fatal(dev, err, "reading %s/event-channel", dir);
return err;
}
- pr_info("event-channel %u\n", evtchn);
err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
&ring_page_order);
if (err != 1) {
- err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
- "%u", &ring_ref[0]);
+ err = xenbus_scanf(XBT_NIL, dir, "ring-ref", "%u", &ring_ref[0]);
if (err != 1) {
err = -EINVAL;
- xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
- dev->otherend);
+ xenbus_dev_fatal(dev, err, "reading %s/ring-ref", dir);
return err;
}
nr_grefs = 1;
- pr_info("%s:using single page: ring-ref %d\n", dev->otherend,
- ring_ref[0]);
} else {
unsigned int i;
if (ring_page_order > xen_blkif_max_ring_order) {
err = -EINVAL;
xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d",
- dev->otherend, ring_page_order,
+ dir, ring_page_order,
xen_blkif_max_ring_order);
return err;
}
@@ -878,52 +948,23 @@ static int connect_ring(struct backend_info *be)
char ring_ref_name[RINGREF_NAME_LEN];
snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
- err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name,
+ err = xenbus_scanf(XBT_NIL, dir, ring_ref_name,
"%u", &ring_ref[i]);
if (err != 1) {
err = -EINVAL;
xenbus_dev_fatal(dev, err, "reading %s/%s",
- dev->otherend, ring_ref_name);
+ dir, ring_ref_name);
return err;
}
- pr_info("ring-ref%u: %u\n", i, ring_ref[i]);
}
}
-
- be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
- err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
- "%63s", protocol, NULL);
- if (err)
- strcpy(protocol, "unspecified, assuming default");
- else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
- be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
- else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
- be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
- else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
- be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
- else {
- xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
- return -1;
- }
- err = xenbus_gather(XBT_NIL, dev->otherend,
- "feature-persistent", "%u",
- &pers_grants, NULL);
- if (err)
- pers_grants = 0;
-
- be->blkif->vbd.feature_gnt_persistent = pers_grants;
- be->blkif->vbd.overflow_max_grants = 0;
- be->blkif->nr_ring_pages = nr_grefs;
-
- pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n",
- nr_grefs, evtchn, be->blkif->blk_protocol, protocol,
- pers_grants ? "persistent grants" : "");
+ blkif->nr_ring_pages = nr_grefs;
for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
req = kzalloc(sizeof(*req), GFP_KERNEL);
if (!req)
goto fail;
- list_add_tail(&req->free_list, &be->blkif->pending_free);
+ list_add_tail(&req->free_list, &ring->pending_free);
for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL);
if (!req->segments[j])
@@ -938,7 +979,7 @@ static int connect_ring(struct backend_info *be)
}
/* Map the shared frame, irq etc. */
- err = xen_blkif_map(be->blkif, ring_ref, nr_grefs, evtchn);
+ err = xen_blkif_map(ring, ring_ref, nr_grefs, evtchn);
if (err) {
xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
return err;
@@ -947,7 +988,7 @@ static int connect_ring(struct backend_info *be)
return 0;
fail:
- list_for_each_entry_safe(req, n, &be->blkif->pending_free, free_list) {
+ list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
list_del(&req->free_list);
for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
if (!req->segments[j])
@@ -962,6 +1003,93 @@ fail:
kfree(req);
}
return -ENOMEM;
+
+}
+
+static int connect_ring(struct backend_info *be)
+{
+ struct xenbus_device *dev = be->dev;
+ unsigned int pers_grants;
+ char protocol[64] = "";
+ int err, i;
+ char *xspath;
+ size_t xspathsize;
+ const size_t xenstore_path_ext_size = 11; /* sufficient for "/queue-NNN" */
+ unsigned int requested_num_queues = 0;
+
+ pr_debug("%s %s\n", __func__, dev->otherend);
+
+ be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
+ err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
+ "%63s", protocol, NULL);
+ if (err)
+ strcpy(protocol, "unspecified, assuming default");
+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
+ be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
+ be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
+ be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
+ else {
+ xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
+ return -ENOSYS;
+ }
+ err = xenbus_gather(XBT_NIL, dev->otherend,
+ "feature-persistent", "%u",
+ &pers_grants, NULL);
+ if (err)
+ pers_grants = 0;
+
+ be->blkif->vbd.feature_gnt_persistent = pers_grants;
+ be->blkif->vbd.overflow_max_grants = 0;
+
+ /*
+ * Read the number of hardware queues from frontend.
+ */
+ err = xenbus_scanf(XBT_NIL, dev->otherend, "multi-queue-num-queues",
+ "%u", &requested_num_queues);
+ if (err < 0) {
+ requested_num_queues = 1;
+ } else {
+ if (requested_num_queues > xenblk_max_queues
+ || requested_num_queues == 0) {
+ /* Buggy or malicious guest. */
+ xenbus_dev_fatal(dev, err,
+ "guest requested %u queues, exceeding the maximum of %u.",
+ requested_num_queues, xenblk_max_queues);
+ return -ENOSYS;
+ }
+ }
+ be->blkif->nr_rings = requested_num_queues;
+ if (xen_blkif_alloc_rings(be->blkif))
+ return -ENOMEM;
+
+ pr_info("%s: using %d queues, protocol %d (%s) %s\n", dev->nodename,
+ be->blkif->nr_rings, be->blkif->blk_protocol, protocol,
+ pers_grants ? "persistent grants" : "");
+
+ if (be->blkif->nr_rings == 1)
+ return read_per_ring_refs(&be->blkif->rings[0], dev->otherend);
+ else {
+ xspathsize = strlen(dev->otherend) + xenstore_path_ext_size;
+ xspath = kmalloc(xspathsize, GFP_KERNEL);
+ if (!xspath) {
+ xenbus_dev_fatal(dev, -ENOMEM, "reading ring references");
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < be->blkif->nr_rings; i++) {
+ memset(xspath, 0, xspathsize);
+ snprintf(xspath, xspathsize, "%s/queue-%u", dev->otherend, i);
+ err = read_per_ring_refs(&be->blkif->rings[i], xspath);
+ if (err) {
+ kfree(xspath);
+ return err;
+ }
+ }
+ kfree(xspath);
+ }
+ return 0;
}
static const struct xenbus_device_id xen_blkbk_ids[] = {
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2fee2ee..8a8dc91 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -60,6 +60,20 @@
#include <asm/xen/hypervisor.h>
+/*
+ * The minimal size of segment supported by the block framework is PAGE_SIZE.
+ * When Linux is using a different page size than Xen, it may not be possible
+ * to put all the data in a single segment.
+ * This can happen when the backend doesn't support indirect descriptor and
+ * therefore the maximum amount of data that a request can carry is
+ * BLKIF_MAX_SEGMENTS_PER_REQUEST * XEN_PAGE_SIZE = 44KB
+ *
+ * Note that we only support one extra request. So the Linux page size
+ * should be <= ( 2 * BLKIF_MAX_SEGMENTS_PER_REQUEST * XEN_PAGE_SIZE) =
+ * 88KB.
+ */
+#define HAS_EXTRA_REQ (BLKIF_MAX_SEGMENTS_PER_REQUEST < XEN_PFN_PER_PAGE)
+
enum blkif_state {
BLKIF_STATE_DISCONNECTED,
BLKIF_STATE_CONNECTED,
@@ -72,6 +86,13 @@ struct grant {
struct list_head node;
};
+enum blk_req_status {
+ REQ_WAITING,
+ REQ_DONE,
+ REQ_ERROR,
+ REQ_EOPNOTSUPP,
+};
+
struct blk_shadow {
struct blkif_request req;
struct request *request;
@@ -79,6 +100,14 @@ struct blk_shadow {
struct grant **indirect_grants;
struct scatterlist *sg;
unsigned int num_sg;
+ enum blk_req_status status;
+
+ #define NO_ASSOCIATED_ID ~0UL
+ /*
+ * Id of the sibling if we ever need 2 requests when handling a
+ * block I/O request
+ */
+ unsigned long associated_id;
};
struct split_bio {
@@ -99,6 +128,10 @@ static unsigned int xen_blkif_max_segments = 32;
module_param_named(max, xen_blkif_max_segments, int, S_IRUGO);
MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)");
+static unsigned int xen_blkif_max_queues = 4;
+module_param_named(max_queues, xen_blkif_max_queues, uint, S_IRUGO);
+MODULE_PARM_DESC(max_queues, "Maximum number of hardware queues/rings used per virtual disk");
+
/*
* Maximum order of pages to be used for the shared ring between front and
* backend, 4KB page granularity is used.
@@ -114,10 +147,35 @@ MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the
__CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * XENBUS_MAX_RING_GRANTS)
/*
- * ring-ref%i i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19
- * characters are enough. Define to 20 to keep consist with backend.
+ * ring-ref%u i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19
+ * characters are enough. Define to 20 to keep consistent with backend.
*/
#define RINGREF_NAME_LEN (20)
+/*
+ * queue-%u would take 7 + 10(UINT_MAX) = 17 characters.
+ */
+#define QUEUE_NAME_LEN (17)
+
+/*
+ * Per-ring info.
+ * Every blkfront device can associate with one or more blkfront_ring_info,
+ * depending on how many hardware queues/rings to be used.
+ */
+struct blkfront_ring_info {
+ /* Lock to protect data in every ring buffer. */
+ spinlock_t ring_lock;
+ struct blkif_front_ring ring;
+ unsigned int ring_ref[XENBUS_MAX_RING_GRANTS];
+ unsigned int evtchn, irq;
+ struct work_struct work;
+ struct gnttab_free_callback callback;
+ struct blk_shadow shadow[BLK_MAX_RING_SIZE];
+ struct list_head indirect_pages;
+ struct list_head grants;
+ unsigned int persistent_gnts_c;
+ unsigned long shadow_free;
+ struct blkfront_info *dev_info;
+};
/*
* We have one of these per vbd, whether ide, scsi or 'other'. They
@@ -126,25 +184,15 @@ MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the
*/
struct blkfront_info
{
- spinlock_t io_lock;
struct mutex mutex;
struct xenbus_device *xbdev;
struct gendisk *gd;
int vdevice;
blkif_vdev_t handle;
enum blkif_state connected;
- int ring_ref[XENBUS_MAX_RING_GRANTS];
+ /* Number of pages per ring buffer. */
unsigned int nr_ring_pages;
- struct blkif_front_ring ring;
- unsigned int evtchn, irq;
struct request_queue *rq;
- struct work_struct work;
- struct gnttab_free_callback callback;
- struct blk_shadow shadow[BLK_MAX_RING_SIZE];
- struct list_head grants;
- struct list_head indirect_pages;
- unsigned int persistent_gnts_c;
- unsigned long shadow_free;
unsigned int feature_flush;
unsigned int feature_discard:1;
unsigned int feature_secdiscard:1;
@@ -155,6 +203,8 @@ struct blkfront_info
unsigned int max_indirect_segments;
int is_ready;
struct blk_mq_tag_set tag_set;
+ struct blkfront_ring_info *rinfo;
+ unsigned int nr_rings;
};
static unsigned int nr_minors;
@@ -198,38 +248,40 @@ static DEFINE_SPINLOCK(minor_lock);
#define GREFS(_psegs) ((_psegs) * GRANTS_PER_PSEG)
-static int blkfront_setup_indirect(struct blkfront_info *info);
-static int blkfront_gather_backend_features(struct blkfront_info *info);
+static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo);
+static void blkfront_gather_backend_features(struct blkfront_info *info);
-static int get_id_from_freelist(struct blkfront_info *info)
+static int get_id_from_freelist(struct blkfront_ring_info *rinfo)
{
- unsigned long free = info->shadow_free;
- BUG_ON(free >= BLK_RING_SIZE(info));
- info->shadow_free = info->shadow[free].req.u.rw.id;
- info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
+ unsigned long free = rinfo->shadow_free;
+
+ BUG_ON(free >= BLK_RING_SIZE(rinfo->dev_info));
+ rinfo->shadow_free = rinfo->shadow[free].req.u.rw.id;
+ rinfo->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
return free;
}
-static int add_id_to_freelist(struct blkfront_info *info,
- unsigned long id)
+static int add_id_to_freelist(struct blkfront_ring_info *rinfo,
+ unsigned long id)
{
- if (info->shadow[id].req.u.rw.id != id)
+ if (rinfo->shadow[id].req.u.rw.id != id)
return -EINVAL;
- if (info->shadow[id].request == NULL)
+ if (rinfo->shadow[id].request == NULL)
return -EINVAL;
- info->shadow[id].req.u.rw.id = info->shadow_free;
- info->shadow[id].request = NULL;
- info->shadow_free = id;
+ rinfo->shadow[id].req.u.rw.id = rinfo->shadow_free;
+ rinfo->shadow[id].request = NULL;
+ rinfo->shadow_free = id;
return 0;
}
-static int fill_grant_buffer(struct blkfront_info *info, int num)
+static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num)
{
+ struct blkfront_info *info = rinfo->dev_info;
struct page *granted_page;
struct grant *gnt_list_entry, *n;
int i = 0;
- while(i < num) {
+ while (i < num) {
gnt_list_entry = kzalloc(sizeof(struct grant), GFP_NOIO);
if (!gnt_list_entry)
goto out_of_memory;
@@ -244,7 +296,7 @@ static int fill_grant_buffer(struct blkfront_info *info, int num)
}
gnt_list_entry->gref = GRANT_INVALID_REF;
- list_add(&gnt_list_entry->node, &info->grants);
+ list_add(&gnt_list_entry->node, &rinfo->grants);
i++;
}
@@ -252,7 +304,7 @@ static int fill_grant_buffer(struct blkfront_info *info, int num)
out_of_memory:
list_for_each_entry_safe(gnt_list_entry, n,
- &info->grants, node) {
+ &rinfo->grants, node) {
list_del(&gnt_list_entry->node);
if (info->feature_persistent)
__free_page(gnt_list_entry->page);
@@ -263,17 +315,17 @@ out_of_memory:
return -ENOMEM;
}
-static struct grant *get_free_grant(struct blkfront_info *info)
+static struct grant *get_free_grant(struct blkfront_ring_info *rinfo)
{
struct grant *gnt_list_entry;
- BUG_ON(list_empty(&info->grants));
- gnt_list_entry = list_first_entry(&info->grants, struct grant,
+ BUG_ON(list_empty(&rinfo->grants));
+ gnt_list_entry = list_first_entry(&rinfo->grants, struct grant,
node);
list_del(&gnt_list_entry->node);
if (gnt_list_entry->gref != GRANT_INVALID_REF)
- info->persistent_gnts_c--;
+ rinfo->persistent_gnts_c--;
return gnt_list_entry;
}
@@ -289,9 +341,10 @@ static inline void grant_foreign_access(const struct grant *gnt_list_entry,
static struct grant *get_grant(grant_ref_t *gref_head,
unsigned long gfn,
- struct blkfront_info *info)
+ struct blkfront_ring_info *rinfo)
{
- struct grant *gnt_list_entry = get_free_grant(info);
+ struct grant *gnt_list_entry = get_free_grant(rinfo);
+ struct blkfront_info *info = rinfo->dev_info;
if (gnt_list_entry->gref != GRANT_INVALID_REF)
return gnt_list_entry;
@@ -312,9 +365,10 @@ static struct grant *get_grant(grant_ref_t *gref_head,
}
static struct grant *get_indirect_grant(grant_ref_t *gref_head,
- struct blkfront_info *info)
+ struct blkfront_ring_info *rinfo)
{
- struct grant *gnt_list_entry = get_free_grant(info);
+ struct grant *gnt_list_entry = get_free_grant(rinfo);
+ struct blkfront_info *info = rinfo->dev_info;
if (gnt_list_entry->gref != GRANT_INVALID_REF)
return gnt_list_entry;
@@ -326,8 +380,8 @@ static struct grant *get_indirect_grant(grant_ref_t *gref_head,
struct page *indirect_page;
/* Fetch a pre-allocated page to use for indirect grefs */
- BUG_ON(list_empty(&info->indirect_pages));
- indirect_page = list_first_entry(&info->indirect_pages,
+ BUG_ON(list_empty(&rinfo->indirect_pages));
+ indirect_page = list_first_entry(&rinfo->indirect_pages,
struct page, lru);
list_del(&indirect_page->lru);
gnt_list_entry->page = indirect_page;
@@ -403,8 +457,8 @@ static void xlbd_release_minors(unsigned int minor, unsigned int nr)
static void blkif_restart_queue_callback(void *arg)
{
- struct blkfront_info *info = (struct blkfront_info *)arg;
- schedule_work(&info->work);
+ struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)arg;
+ schedule_work(&rinfo->work);
}
static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
@@ -456,16 +510,33 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
return 0;
}
-static int blkif_queue_discard_req(struct request *req)
+static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo,
+ struct request *req,
+ struct blkif_request **ring_req)
{
- struct blkfront_info *info = req->rq_disk->private_data;
+ unsigned long id;
+
+ *ring_req = RING_GET_REQUEST(&rinfo->ring, rinfo->ring.req_prod_pvt);
+ rinfo->ring.req_prod_pvt++;
+
+ id = get_id_from_freelist(rinfo);
+ rinfo->shadow[id].request = req;
+ rinfo->shadow[id].status = REQ_WAITING;
+ rinfo->shadow[id].associated_id = NO_ASSOCIATED_ID;
+
+ (*ring_req)->u.rw.id = id;
+
+ return id;
+}
+
+static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_info *rinfo)
+{
+ struct blkfront_info *info = rinfo->dev_info;
struct blkif_request *ring_req;
unsigned long id;
/* Fill out a communications ring structure. */
- ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
- id = get_id_from_freelist(info);
- info->shadow[id].request = req;
+ id = blkif_ring_get_request(rinfo, req, &ring_req);
ring_req->operation = BLKIF_OP_DISCARD;
ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
@@ -476,10 +547,8 @@ static int blkif_queue_discard_req(struct request *req)
else
ring_req->u.discard.flag = 0;
- info->ring.req_prod_pvt++;
-
/* Keep a private copy so we can reissue requests when recovering. */
- info->shadow[id].req = *ring_req;
+ rinfo->shadow[id].req = *ring_req;
return 0;
}
@@ -487,7 +556,7 @@ static int blkif_queue_discard_req(struct request *req)
struct setup_rw_req {
unsigned int grant_idx;
struct blkif_request_segment *segments;
- struct blkfront_info *info;
+ struct blkfront_ring_info *rinfo;
struct blkif_request *ring_req;
grant_ref_t gref_head;
unsigned int id;
@@ -495,6 +564,9 @@ struct setup_rw_req {
bool need_copy;
unsigned int bvec_off;
char *bvec_data;
+
+ bool require_extra_req;
+ struct blkif_request *extra_ring_req;
};
static void blkif_setup_rw_req_grant(unsigned long gfn, unsigned int offset,
@@ -507,8 +579,24 @@ static void blkif_setup_rw_req_grant(unsigned long gfn, unsigned int offset,
/* Convenient aliases */
unsigned int grant_idx = setup->grant_idx;
struct blkif_request *ring_req = setup->ring_req;
- struct blkfront_info *info = setup->info;
- struct blk_shadow *shadow = &info->shadow[setup->id];
+ struct blkfront_ring_info *rinfo = setup->rinfo;
+ /*
+ * We always use the shadow of the first request to store the list
+ * of grant associated to the block I/O request. This made the
+ * completion more easy to handle even if the block I/O request is
+ * split.
+ */
+ struct blk_shadow *shadow = &rinfo->shadow[setup->id];
+
+ if (unlikely(setup->require_extra_req &&
+ grant_idx >= BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
+ /*
+ * We are using the second request, setup grant_idx
+ * to be the index of the segment array.
+ */
+ grant_idx -= BLKIF_MAX_SEGMENTS_PER_REQUEST;
+ ring_req = setup->extra_ring_req;
+ }
if ((ring_req->operation == BLKIF_OP_INDIRECT) &&
(grant_idx % GRANTS_PER_INDIRECT_FRAME == 0)) {
@@ -516,15 +604,19 @@ static void blkif_setup_rw_req_grant(unsigned long gfn, unsigned int offset,
kunmap_atomic(setup->segments);
n = grant_idx / GRANTS_PER_INDIRECT_FRAME;
- gnt_list_entry = get_indirect_grant(&setup->gref_head, info);
+ gnt_list_entry = get_indirect_grant(&setup->gref_head, rinfo);
shadow->indirect_grants[n] = gnt_list_entry;
setup->segments = kmap_atomic(gnt_list_entry->page);
ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref;
}
- gnt_list_entry = get_grant(&setup->gref_head, gfn, info);
+ gnt_list_entry = get_grant(&setup->gref_head, gfn, rinfo);
ref = gnt_list_entry->gref;
- shadow->grants_used[grant_idx] = gnt_list_entry;
+ /*
+ * All the grants are stored in the shadow of the first
+ * request. Therefore we have to use the global index.
+ */
+ shadow->grants_used[setup->grant_idx] = gnt_list_entry;
if (setup->need_copy) {
void *shared_data;
@@ -566,16 +658,36 @@ static void blkif_setup_rw_req_grant(unsigned long gfn, unsigned int offset,
(setup->grant_idx)++;
}
-static int blkif_queue_rw_req(struct request *req)
+static void blkif_setup_extra_req(struct blkif_request *first,
+ struct blkif_request *second)
{
- struct blkfront_info *info = req->rq_disk->private_data;
- struct blkif_request *ring_req;
- unsigned long id;
+ uint16_t nr_segments = first->u.rw.nr_segments;
+
+ /*
+ * The second request is only present when the first request uses
+ * all its segments. It's always the continuity of the first one.
+ */
+ first->u.rw.nr_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+
+ second->u.rw.nr_segments = nr_segments - BLKIF_MAX_SEGMENTS_PER_REQUEST;
+ second->u.rw.sector_number = first->u.rw.sector_number +
+ (BLKIF_MAX_SEGMENTS_PER_REQUEST * XEN_PAGE_SIZE) / 512;
+
+ second->u.rw.handle = first->u.rw.handle;
+ second->operation = first->operation;
+}
+
+static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *rinfo)
+{
+ struct blkfront_info *info = rinfo->dev_info;
+ struct blkif_request *ring_req, *extra_ring_req = NULL;
+ unsigned long id, extra_id = NO_ASSOCIATED_ID;
+ bool require_extra_req = false;
int i;
struct setup_rw_req setup = {
.grant_idx = 0,
.segments = NULL,
- .info = info,
+ .rinfo = rinfo,
.need_copy = rq_data_dir(req) && info->feature_persistent,
};
@@ -584,7 +696,6 @@ static int blkif_queue_rw_req(struct request *req)
* existing persistent grants, or if we have to get new grants,
* as there are not sufficiently many free.
*/
- bool new_persistent_gnts;
struct scatterlist *sg;
int num_sg, max_grefs, num_grant;
@@ -596,41 +707,36 @@ static int blkif_queue_rw_req(struct request *req)
*/
max_grefs += INDIRECT_GREFS(max_grefs);
- /* Check if we have enough grants to allocate a requests */
- if (info->persistent_gnts_c < max_grefs) {
- new_persistent_gnts = 1;
- if (gnttab_alloc_grant_references(
- max_grefs - info->persistent_gnts_c,
- &setup.gref_head) < 0) {
+ /*
+ * We have to reserve 'max_grefs' grants because persistent
+ * grants are shared by all rings.
+ */
+ if (max_grefs > 0)
+ if (gnttab_alloc_grant_references(max_grefs, &setup.gref_head) < 0) {
gnttab_request_free_callback(
- &info->callback,
+ &rinfo->callback,
blkif_restart_queue_callback,
- info,
+ rinfo,
max_grefs);
return 1;
}
- } else
- new_persistent_gnts = 0;
/* Fill out a communications ring structure. */
- ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
- id = get_id_from_freelist(info);
- info->shadow[id].request = req;
-
- BUG_ON(info->max_indirect_segments == 0 &&
- GREFS(req->nr_phys_segments) > BLKIF_MAX_SEGMENTS_PER_REQUEST);
- BUG_ON(info->max_indirect_segments &&
- GREFS(req->nr_phys_segments) > info->max_indirect_segments);
+ id = blkif_ring_get_request(rinfo, req, &ring_req);
- num_sg = blk_rq_map_sg(req->q, req, info->shadow[id].sg);
+ num_sg = blk_rq_map_sg(req->q, req, rinfo->shadow[id].sg);
num_grant = 0;
/* Calculate the number of grant used */
- for_each_sg(info->shadow[id].sg, sg, num_sg, i)
+ for_each_sg(rinfo->shadow[id].sg, sg, num_sg, i)
num_grant += gnttab_count_grant(sg->offset, sg->length);
- ring_req->u.rw.id = id;
- info->shadow[id].num_sg = num_sg;
- if (num_grant > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+ require_extra_req = info->max_indirect_segments == 0 &&
+ num_grant > BLKIF_MAX_SEGMENTS_PER_REQUEST;
+ BUG_ON(!HAS_EXTRA_REQ && require_extra_req);
+
+ rinfo->shadow[id].num_sg = num_sg;
+ if (num_grant > BLKIF_MAX_SEGMENTS_PER_REQUEST &&
+ likely(!require_extra_req)) {
/*
* The indirect operation can only be a BLKIF_OP_READ or
* BLKIF_OP_WRITE
@@ -670,11 +776,31 @@ static int blkif_queue_rw_req(struct request *req)
}
}
ring_req->u.rw.nr_segments = num_grant;
+ if (unlikely(require_extra_req)) {
+ extra_id = blkif_ring_get_request(rinfo, req,
+ &extra_ring_req);
+ /*
+ * Only the first request contains the scatter-gather
+ * list.
+ */
+ rinfo->shadow[extra_id].num_sg = 0;
+
+ blkif_setup_extra_req(ring_req, extra_ring_req);
+
+ /* Link the 2 requests together */
+ rinfo->shadow[extra_id].associated_id = id;
+ rinfo->shadow[id].associated_id = extra_id;
+ }
}
setup.ring_req = ring_req;
setup.id = id;
- for_each_sg(info->shadow[id].sg, sg, num_sg, i) {
+
+ setup.require_extra_req = require_extra_req;
+ if (unlikely(require_extra_req))
+ setup.extra_ring_req = extra_ring_req;
+
+ for_each_sg(rinfo->shadow[id].sg, sg, num_sg, i) {
BUG_ON(sg->offset + sg->length > PAGE_SIZE);
if (setup.need_copy) {
@@ -694,12 +820,12 @@ static int blkif_queue_rw_req(struct request *req)
if (setup.segments)
kunmap_atomic(setup.segments);
- info->ring.req_prod_pvt++;
-
/* Keep a private copy so we can reissue requests when recovering. */
- info->shadow[id].req = *ring_req;
+ rinfo->shadow[id].req = *ring_req;
+ if (unlikely(require_extra_req))
+ rinfo->shadow[extra_id].req = *extra_ring_req;
- if (new_persistent_gnts)
+ if (max_grefs > 0)
gnttab_free_grant_references(setup.gref_head);
return 0;
@@ -711,27 +837,25 @@ static int blkif_queue_rw_req(struct request *req)
*
* @req: a request struct
*/
-static int blkif_queue_request(struct request *req)
+static int blkif_queue_request(struct request *req, struct blkfront_ring_info *rinfo)
{
- struct blkfront_info *info = req->rq_disk->private_data;
-
- if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
+ if (unlikely(rinfo->dev_info->connected != BLKIF_STATE_CONNECTED))
return 1;
if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE)))
- return blkif_queue_discard_req(req);
+ return blkif_queue_discard_req(req, rinfo);
else
- return blkif_queue_rw_req(req);
+ return blkif_queue_rw_req(req, rinfo);
}
-static inline void flush_requests(struct blkfront_info *info)
+static inline void flush_requests(struct blkfront_ring_info *rinfo)
{
int notify;
- RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&rinfo->ring, notify);
if (notify)
- notify_remote_via_irq(info->irq);
+ notify_remote_via_irq(rinfo->irq);
}
static inline bool blkif_request_flush_invalid(struct request *req,
@@ -745,38 +869,50 @@ static inline bool blkif_request_flush_invalid(struct request *req,
}
static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
- const struct blk_mq_queue_data *qd)
+ const struct blk_mq_queue_data *qd)
{
- struct blkfront_info *info = qd->rq->rq_disk->private_data;
+ unsigned long flags;
+ struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)hctx->driver_data;
blk_mq_start_request(qd->rq);
- spin_lock_irq(&info->io_lock);
- if (RING_FULL(&info->ring))
+ spin_lock_irqsave(&rinfo->ring_lock, flags);
+ if (RING_FULL(&rinfo->ring))
goto out_busy;
- if (blkif_request_flush_invalid(qd->rq, info))
+ if (blkif_request_flush_invalid(qd->rq, rinfo->dev_info))
goto out_err;
- if (blkif_queue_request(qd->rq))
+ if (blkif_queue_request(qd->rq, rinfo))
goto out_busy;
- flush_requests(info);
- spin_unlock_irq(&info->io_lock);
+ flush_requests(rinfo);
+ spin_unlock_irqrestore(&rinfo->ring_lock, flags);
return BLK_MQ_RQ_QUEUE_OK;
out_err:
- spin_unlock_irq(&info->io_lock);
+ spin_unlock_irqrestore(&rinfo->ring_lock, flags);
return BLK_MQ_RQ_QUEUE_ERROR;
out_busy:
- spin_unlock_irq(&info->io_lock);
+ spin_unlock_irqrestore(&rinfo->ring_lock, flags);
blk_mq_stop_hw_queue(hctx);
return BLK_MQ_RQ_QUEUE_BUSY;
}
+static int blk_mq_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+ unsigned int index)
+{
+ struct blkfront_info *info = (struct blkfront_info *)data;
+
+ BUG_ON(info->nr_rings <= index);
+ hctx->driver_data = &info->rinfo[index];
+ return 0;
+}
+
static struct blk_mq_ops blkfront_mq_ops = {
.queue_rq = blkif_queue_rq,
.map_queue = blk_mq_map_queue,
+ .init_hctx = blk_mq_init_hctx,
};
static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
@@ -788,19 +924,28 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
memset(&info->tag_set, 0, sizeof(info->tag_set));
info->tag_set.ops = &blkfront_mq_ops;
- info->tag_set.nr_hw_queues = 1;
- info->tag_set.queue_depth = BLK_RING_SIZE(info);
+ info->tag_set.nr_hw_queues = info->nr_rings;
+ if (HAS_EXTRA_REQ && info->max_indirect_segments == 0) {
+ /*
+ * When indirect descriptior is not supported, the I/O request
+ * will be split between multiple request in the ring.
+ * To avoid problems when sending the request, divide by
+ * 2 the depth of the queue.
+ */
+ info->tag_set.queue_depth = BLK_RING_SIZE(info) / 2;
+ } else
+ info->tag_set.queue_depth = BLK_RING_SIZE(info);
info->tag_set.numa_node = NUMA_NO_NODE;
info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
info->tag_set.cmd_size = 0;
info->tag_set.driver_data = info;
if (blk_mq_alloc_tag_set(&info->tag_set))
- return -1;
+ return -EINVAL;
rq = blk_mq_init_queue(&info->tag_set);
if (IS_ERR(rq)) {
blk_mq_free_tag_set(&info->tag_set);
- return -1;
+ return PTR_ERR(rq);
}
queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
@@ -1028,7 +1173,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
static void xlvbd_release_gendisk(struct blkfront_info *info)
{
- unsigned int minor, nr_minors;
+ unsigned int minor, nr_minors, i;
if (info->rq == NULL)
return;
@@ -1036,11 +1181,15 @@ static void xlvbd_release_gendisk(struct blkfront_info *info)
/* No more blkif_request(). */
blk_mq_stop_hw_queues(info->rq);
- /* No more gnttab callback work. */
- gnttab_cancel_free_callback(&info->callback);
+ for (i = 0; i < info->nr_rings; i++) {
+ struct blkfront_ring_info *rinfo = &info->rinfo[i];
- /* Flush gnttab callback work. Must be done with no locks held. */
- flush_work(&info->work);
+ /* No more gnttab callback work. */
+ gnttab_cancel_free_callback(&rinfo->callback);
+
+ /* Flush gnttab callback work. Must be done with no locks held. */
+ flush_work(&rinfo->work);
+ }
del_gendisk(info->gd);
@@ -1056,88 +1205,87 @@ static void xlvbd_release_gendisk(struct blkfront_info *info)
info->gd = NULL;
}
-/* Must be called with io_lock holded */
-static void kick_pending_request_queues(struct blkfront_info *info)
+/* Already hold rinfo->ring_lock. */
+static inline void kick_pending_request_queues_locked(struct blkfront_ring_info *rinfo)
{
- if (!RING_FULL(&info->ring))
- blk_mq_start_stopped_hw_queues(info->rq, true);
+ if (!RING_FULL(&rinfo->ring))
+ blk_mq_start_stopped_hw_queues(rinfo->dev_info->rq, true);
}
-static void blkif_restart_queue(struct work_struct *work)
+static void kick_pending_request_queues(struct blkfront_ring_info *rinfo)
{
- struct blkfront_info *info = container_of(work, struct blkfront_info, work);
+ unsigned long flags;
- spin_lock_irq(&info->io_lock);
- if (info->connected == BLKIF_STATE_CONNECTED)
- kick_pending_request_queues(info);
- spin_unlock_irq(&info->io_lock);
+ spin_lock_irqsave(&rinfo->ring_lock, flags);
+ kick_pending_request_queues_locked(rinfo);
+ spin_unlock_irqrestore(&rinfo->ring_lock, flags);
}
-static void blkif_free(struct blkfront_info *info, int suspend)
+static void blkif_restart_queue(struct work_struct *work)
{
- struct grant *persistent_gnt;
- struct grant *n;
- int i, j, segs;
+ struct blkfront_ring_info *rinfo = container_of(work, struct blkfront_ring_info, work);
- /* Prevent new requests being issued until we fix things up. */
- spin_lock_irq(&info->io_lock);
- info->connected = suspend ?
- BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
- /* No more blkif_request(). */
- if (info->rq)
- blk_mq_stop_hw_queues(info->rq);
+ if (rinfo->dev_info->connected == BLKIF_STATE_CONNECTED)
+ kick_pending_request_queues(rinfo);
+}
- /* Remove all persistent grants */
- if (!list_empty(&info->grants)) {
- list_for_each_entry_safe(persistent_gnt, n,
- &info->grants, node) {
- list_del(&persistent_gnt->node);
- if (persistent_gnt->gref != GRANT_INVALID_REF) {
- gnttab_end_foreign_access(persistent_gnt->gref,
- 0, 0UL);
- info->persistent_gnts_c--;
- }
- if (info->feature_persistent)
- __free_page(persistent_gnt->page);
- kfree(persistent_gnt);
- }
- }
- BUG_ON(info->persistent_gnts_c != 0);
+static void blkif_free_ring(struct blkfront_ring_info *rinfo)
+{
+ struct grant *persistent_gnt, *n;
+ struct blkfront_info *info = rinfo->dev_info;
+ int i, j, segs;
/*
* Remove indirect pages, this only happens when using indirect
* descriptors but not persistent grants
*/
- if (!list_empty(&info->indirect_pages)) {
+ if (!list_empty(&rinfo->indirect_pages)) {
struct page *indirect_page, *n;
BUG_ON(info->feature_persistent);
- list_for_each_entry_safe(indirect_page, n, &info->indirect_pages, lru) {
+ list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) {
list_del(&indirect_page->lru);
__free_page(indirect_page);
}
}
+ /* Remove all persistent grants. */
+ if (!list_empty(&rinfo->grants)) {
+ list_for_each_entry_safe(persistent_gnt, n,
+ &rinfo->grants, node) {
+ list_del(&persistent_gnt->node);
+ if (persistent_gnt->gref != GRANT_INVALID_REF) {
+ gnttab_end_foreign_access(persistent_gnt->gref,
+ 0, 0UL);
+ rinfo->persistent_gnts_c--;
+ }
+ if (info->feature_persistent)
+ __free_page(persistent_gnt->page);
+ kfree(persistent_gnt);
+ }
+ }
+ BUG_ON(rinfo->persistent_gnts_c != 0);
+
for (i = 0; i < BLK_RING_SIZE(info); i++) {
/*
* Clear persistent grants present in requests already
* on the shared ring
*/
- if (!info->shadow[i].request)
+ if (!rinfo->shadow[i].request)
goto free_shadow;
- segs = info->shadow[i].req.operation == BLKIF_OP_INDIRECT ?
- info->shadow[i].req.u.indirect.nr_segments :
- info->shadow[i].req.u.rw.nr_segments;
+ segs = rinfo->shadow[i].req.operation == BLKIF_OP_INDIRECT ?
+ rinfo->shadow[i].req.u.indirect.nr_segments :
+ rinfo->shadow[i].req.u.rw.nr_segments;
for (j = 0; j < segs; j++) {
- persistent_gnt = info->shadow[i].grants_used[j];
+ persistent_gnt = rinfo->shadow[i].grants_used[j];
gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
if (info->feature_persistent)
__free_page(persistent_gnt->page);
kfree(persistent_gnt);
}
- if (info->shadow[i].req.operation != BLKIF_OP_INDIRECT)
+ if (rinfo->shadow[i].req.operation != BLKIF_OP_INDIRECT)
/*
* If this is not an indirect operation don't try to
* free indirect segments
@@ -1145,42 +1293,59 @@ static void blkif_free(struct blkfront_info *info, int suspend)
goto free_shadow;
for (j = 0; j < INDIRECT_GREFS(segs); j++) {
- persistent_gnt = info->shadow[i].indirect_grants[j];
+ persistent_gnt = rinfo->shadow[i].indirect_grants[j];
gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
__free_page(persistent_gnt->page);
kfree(persistent_gnt);
}
free_shadow:
- kfree(info->shadow[i].grants_used);
- info->shadow[i].grants_used = NULL;
- kfree(info->shadow[i].indirect_grants);
- info->shadow[i].indirect_grants = NULL;
- kfree(info->shadow[i].sg);
- info->shadow[i].sg = NULL;
+ kfree(rinfo->shadow[i].grants_used);
+ rinfo->shadow[i].grants_used = NULL;
+ kfree(rinfo->shadow[i].indirect_grants);
+ rinfo->shadow[i].indirect_grants = NULL;
+ kfree(rinfo->shadow[i].sg);
+ rinfo->shadow[i].sg = NULL;
}
/* No more gnttab callback work. */
- gnttab_cancel_free_callback(&info->callback);
- spin_unlock_irq(&info->io_lock);
+ gnttab_cancel_free_callback(&rinfo->callback);
/* Flush gnttab callback work. Must be done with no locks held. */
- flush_work(&info->work);
+ flush_work(&rinfo->work);
/* Free resources associated with old device channel. */
for (i = 0; i < info->nr_ring_pages; i++) {
- if (info->ring_ref[i] != GRANT_INVALID_REF) {
- gnttab_end_foreign_access(info->ring_ref[i], 0, 0);
- info->ring_ref[i] = GRANT_INVALID_REF;
+ if (rinfo->ring_ref[i] != GRANT_INVALID_REF) {
+ gnttab_end_foreign_access(rinfo->ring_ref[i], 0, 0);
+ rinfo->ring_ref[i] = GRANT_INVALID_REF;
}
}
- free_pages((unsigned long)info->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE));
- info->ring.sring = NULL;
+ free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE));
+ rinfo->ring.sring = NULL;
- if (info->irq)
- unbind_from_irqhandler(info->irq, info);
- info->evtchn = info->irq = 0;
+ if (rinfo->irq)
+ unbind_from_irqhandler(rinfo->irq, rinfo);
+ rinfo->evtchn = rinfo->irq = 0;
+}
+static void blkif_free(struct blkfront_info *info, int suspend)
+{
+ unsigned int i;
+
+ /* Prevent new requests being issued until we fix things up. */
+ info->connected = suspend ?
+ BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
+ /* No more blkif_request(). */
+ if (info->rq)
+ blk_mq_stop_hw_queues(info->rq);
+
+ for (i = 0; i < info->nr_rings; i++)
+ blkif_free_ring(&info->rinfo[i]);
+
+ kfree(info->rinfo);
+ info->rinfo = NULL;
+ info->nr_rings = 0;
}
struct copy_from_grant {
@@ -1209,19 +1374,93 @@ static void blkif_copy_from_grant(unsigned long gfn, unsigned int offset,
kunmap_atomic(shared_data);
}
-static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
+static enum blk_req_status blkif_rsp_to_req_status(int rsp)
+{
+ switch (rsp)
+ {
+ case BLKIF_RSP_OKAY:
+ return REQ_DONE;
+ case BLKIF_RSP_EOPNOTSUPP:
+ return REQ_EOPNOTSUPP;
+ case BLKIF_RSP_ERROR:
+ /* Fallthrough. */
+ default:
+ return REQ_ERROR;
+ }
+}
+
+/*
+ * Get the final status of the block request based on two ring response
+ */
+static int blkif_get_final_status(enum blk_req_status s1,
+ enum blk_req_status s2)
+{
+ BUG_ON(s1 == REQ_WAITING);
+ BUG_ON(s2 == REQ_WAITING);
+
+ if (s1 == REQ_ERROR || s2 == REQ_ERROR)
+ return BLKIF_RSP_ERROR;
+ else if (s1 == REQ_EOPNOTSUPP || s2 == REQ_EOPNOTSUPP)
+ return BLKIF_RSP_EOPNOTSUPP;
+ return BLKIF_RSP_OKAY;
+}
+
+static bool blkif_completion(unsigned long *id,
+ struct blkfront_ring_info *rinfo,
struct blkif_response *bret)
{
int i = 0;
struct scatterlist *sg;
int num_sg, num_grant;
+ struct blkfront_info *info = rinfo->dev_info;
+ struct blk_shadow *s = &rinfo->shadow[*id];
struct copy_from_grant data = {
- .s = s,
.grant_idx = 0,
};
num_grant = s->req.operation == BLKIF_OP_INDIRECT ?
s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments;
+
+ /* The I/O request may be split in two. */
+ if (unlikely(s->associated_id != NO_ASSOCIATED_ID)) {
+ struct blk_shadow *s2 = &rinfo->shadow[s->associated_id];
+
+ /* Keep the status of the current response in shadow. */
+ s->status = blkif_rsp_to_req_status(bret->status);
+
+ /* Wait the second response if not yet here. */
+ if (s2->status == REQ_WAITING)
+ return 0;
+
+ bret->status = blkif_get_final_status(s->status,
+ s2->status);
+
+ /*
+ * All the grants is stored in the first shadow in order
+ * to make the completion code simpler.
+ */
+ num_grant += s2->req.u.rw.nr_segments;
+
+ /*
+ * The two responses may not come in order. Only the
+ * first request will store the scatter-gather list.
+ */
+ if (s2->num_sg != 0) {
+ /* Update "id" with the ID of the first response. */
+ *id = s->associated_id;
+ s = s2;
+ }
+
+ /*
+ * We don't need anymore the second request, so recycling
+ * it now.
+ */
+ if (add_id_to_freelist(rinfo, s->associated_id))
+ WARN(1, "%s: can't recycle the second part (id = %ld) of the request\n",
+ info->gd->disk_name, s->associated_id);
+ }
+
+ data.s = s;
num_sg = s->num_sg;
if (bret->operation == BLKIF_OP_READ && info->feature_persistent) {
@@ -1252,8 +1491,8 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
if (!info->feature_persistent)
pr_alert_ratelimited("backed has not unmapped grant: %u\n",
s->grants_used[i]->gref);
- list_add(&s->grants_used[i]->node, &info->grants);
- info->persistent_gnts_c++;
+ list_add(&s->grants_used[i]->node, &rinfo->grants);
+ rinfo->persistent_gnts_c++;
} else {
/*
* If the grant is not mapped by the backend we end the
@@ -1263,7 +1502,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
*/
gnttab_end_foreign_access(s->grants_used[i]->gref, 0, 0UL);
s->grants_used[i]->gref = GRANT_INVALID_REF;
- list_add_tail(&s->grants_used[i]->node, &info->grants);
+ list_add_tail(&s->grants_used[i]->node, &rinfo->grants);
}
}
if (s->req.operation == BLKIF_OP_INDIRECT) {
@@ -1272,8 +1511,8 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
if (!info->feature_persistent)
pr_alert_ratelimited("backed has not unmapped grant: %u\n",
s->indirect_grants[i]->gref);
- list_add(&s->indirect_grants[i]->node, &info->grants);
- info->persistent_gnts_c++;
+ list_add(&s->indirect_grants[i]->node, &rinfo->grants);
+ rinfo->persistent_gnts_c++;
} else {
struct page *indirect_page;
@@ -1284,13 +1523,15 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
*/
if (!info->feature_persistent) {
indirect_page = s->indirect_grants[i]->page;
- list_add(&indirect_page->lru, &info->indirect_pages);
+ list_add(&indirect_page->lru, &rinfo->indirect_pages);
}
s->indirect_grants[i]->gref = GRANT_INVALID_REF;
- list_add_tail(&s->indirect_grants[i]->node, &info->grants);
+ list_add_tail(&s->indirect_grants[i]->node, &rinfo->grants);
}
}
}
+
+ return 1;
}
static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -1299,24 +1540,22 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
struct blkif_response *bret;
RING_IDX i, rp;
unsigned long flags;
- struct blkfront_info *info = (struct blkfront_info *)dev_id;
+ struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
+ struct blkfront_info *info = rinfo->dev_info;
int error;
- spin_lock_irqsave(&info->io_lock, flags);
-
- if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
- spin_unlock_irqrestore(&info->io_lock, flags);
+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
return IRQ_HANDLED;
- }
+ spin_lock_irqsave(&rinfo->ring_lock, flags);
again:
- rp = info->ring.sring->rsp_prod;
+ rp = rinfo->ring.sring->rsp_prod;
rmb(); /* Ensure we see queued responses up to 'rp'. */
- for (i = info->ring.rsp_cons; i != rp; i++) {
+ for (i = rinfo->ring.rsp_cons; i != rp; i++) {
unsigned long id;
- bret = RING_GET_RESPONSE(&info->ring, i);
+ bret = RING_GET_RESPONSE(&rinfo->ring, i);
id = bret->id;
/*
* The backend has messed up and given us an id that we would
@@ -1330,12 +1569,18 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
* the id is busted. */
continue;
}
- req = info->shadow[id].request;
+ req = rinfo->shadow[id].request;
- if (bret->operation != BLKIF_OP_DISCARD)
- blkif_completion(&info->shadow[id], info, bret);
+ if (bret->operation != BLKIF_OP_DISCARD) {
+ /*
+ * We may need to wait for an extra response if the
+ * I/O request is split in 2
+ */
+ if (!blkif_completion(&id, rinfo, bret))
+ continue;
+ }
- if (add_id_to_freelist(info, id)) {
+ if (add_id_to_freelist(rinfo, id)) {
WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
info->gd->disk_name, op_name(bret->operation), id);
continue;
@@ -1364,7 +1609,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
error = -EOPNOTSUPP;
}
if (unlikely(bret->status == BLKIF_RSP_ERROR &&
- info->shadow[id].req.u.rw.nr_segments == 0)) {
+ rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
info->gd->disk_name, op_name(bret->operation));
error = -EOPNOTSUPP;
@@ -1389,34 +1634,35 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
}
}
- info->ring.rsp_cons = i;
+ rinfo->ring.rsp_cons = i;
- if (i != info->ring.req_prod_pvt) {
+ if (i != rinfo->ring.req_prod_pvt) {
int more_to_do;
- RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
+ RING_FINAL_CHECK_FOR_RESPONSES(&rinfo->ring, more_to_do);
if (more_to_do)
goto again;
} else
- info->ring.sring->rsp_event = i + 1;
+ rinfo->ring.sring->rsp_event = i + 1;
- kick_pending_request_queues(info);
+ kick_pending_request_queues_locked(rinfo);
- spin_unlock_irqrestore(&info->io_lock, flags);
+ spin_unlock_irqrestore(&rinfo->ring_lock, flags);
return IRQ_HANDLED;
}
static int setup_blkring(struct xenbus_device *dev,
- struct blkfront_info *info)
+ struct blkfront_ring_info *rinfo)
{
struct blkif_sring *sring;
int err, i;
+ struct blkfront_info *info = rinfo->dev_info;
unsigned long ring_size = info->nr_ring_pages * XEN_PAGE_SIZE;
grant_ref_t gref[XENBUS_MAX_RING_GRANTS];
for (i = 0; i < info->nr_ring_pages; i++)
- info->ring_ref[i] = GRANT_INVALID_REF;
+ rinfo->ring_ref[i] = GRANT_INVALID_REF;
sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
get_order(ring_size));
@@ -1425,29 +1671,29 @@ static int setup_blkring(struct xenbus_device *dev,
return -ENOMEM;
}
SHARED_RING_INIT(sring);
- FRONT_RING_INIT(&info->ring, sring, ring_size);
+ FRONT_RING_INIT(&rinfo->ring, sring, ring_size);
- err = xenbus_grant_ring(dev, info->ring.sring, info->nr_ring_pages, gref);
+ err = xenbus_grant_ring(dev, rinfo->ring.sring, info->nr_ring_pages, gref);
if (err < 0) {
free_pages((unsigned long)sring, get_order(ring_size));
- info->ring.sring = NULL;
+ rinfo->ring.sring = NULL;
goto fail;
}
for (i = 0; i < info->nr_ring_pages; i++)
- info->ring_ref[i] = gref[i];
+ rinfo->ring_ref[i] = gref[i];
- err = xenbus_alloc_evtchn(dev, &info->evtchn);
+ err = xenbus_alloc_evtchn(dev, &rinfo->evtchn);
if (err)
goto fail;
- err = bind_evtchn_to_irqhandler(info->evtchn, blkif_interrupt, 0,
- "blkif", info);
+ err = bind_evtchn_to_irqhandler(rinfo->evtchn, blkif_interrupt, 0,
+ "blkif", rinfo);
if (err <= 0) {
xenbus_dev_fatal(dev, err,
"bind_evtchn_to_irqhandler failed");
goto fail;
}
- info->irq = err;
+ rinfo->irq = err;
return 0;
fail:
@@ -1455,6 +1701,53 @@ fail:
return err;
}
+/*
+ * Write out per-ring/queue nodes including ring-ref and event-channel, and each
+ * ring buffer may have multi pages depending on ->nr_ring_pages.
+ */
+static int write_per_ring_nodes(struct xenbus_transaction xbt,
+ struct blkfront_ring_info *rinfo, const char *dir)
+{
+ int err;
+ unsigned int i;
+ const char *message = NULL;
+ struct blkfront_info *info = rinfo->dev_info;
+
+ if (info->nr_ring_pages == 1) {
+ err = xenbus_printf(xbt, dir, "ring-ref", "%u", rinfo->ring_ref[0]);
+ if (err) {
+ message = "writing ring-ref";
+ goto abort_transaction;
+ }
+ } else {
+ for (i = 0; i < info->nr_ring_pages; i++) {
+ char ring_ref_name[RINGREF_NAME_LEN];
+
+ snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
+ err = xenbus_printf(xbt, dir, ring_ref_name,
+ "%u", rinfo->ring_ref[i]);
+ if (err) {
+ message = "writing ring-ref";
+ goto abort_transaction;
+ }
+ }
+ }
+
+ err = xenbus_printf(xbt, dir, "event-channel", "%u", rinfo->evtchn);
+ if (err) {
+ message = "writing event-channel";
+ goto abort_transaction;
+ }
+
+ return 0;
+
+abort_transaction:
+ xenbus_transaction_end(xbt, 1);
+ if (message)
+ xenbus_dev_fatal(info->xbdev, err, "%s", message);
+
+ return err;
+}
/* Common code used when first setting up, and when resuming. */
static int talk_to_blkback(struct xenbus_device *dev,
@@ -1462,8 +1755,8 @@ static int talk_to_blkback(struct xenbus_device *dev,
{
const char *message = NULL;
struct xenbus_transaction xbt;
- int err, i;
- unsigned int max_page_order = 0;
+ int err;
+ unsigned int i, max_page_order = 0;
unsigned int ring_page_order = 0;
err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
@@ -1475,10 +1768,14 @@ static int talk_to_blkback(struct xenbus_device *dev,
info->nr_ring_pages = 1 << ring_page_order;
}
- /* Create shared ring, alloc event channel. */
- err = setup_blkring(dev, info);
- if (err)
- goto out;
+ for (i = 0; i < info->nr_rings; i++) {
+ struct blkfront_ring_info *rinfo = &info->rinfo[i];
+
+ /* Create shared ring, alloc event channel. */
+ err = setup_blkring(dev, rinfo);
+ if (err)
+ goto destroy_blkring;
+ }
again:
err = xenbus_transaction_start(&xbt);
@@ -1487,38 +1784,49 @@ again:
goto destroy_blkring;
}
- if (info->nr_ring_pages == 1) {
- err = xenbus_printf(xbt, dev->nodename,
- "ring-ref", "%u", info->ring_ref[0]);
+ if (info->nr_ring_pages > 1) {
+ err = xenbus_printf(xbt, dev->nodename, "ring-page-order", "%u",
+ ring_page_order);
if (err) {
- message = "writing ring-ref";
+ message = "writing ring-page-order";
goto abort_transaction;
}
+ }
+
+ /* We already got the number of queues/rings in _probe */
+ if (info->nr_rings == 1) {
+ err = write_per_ring_nodes(xbt, &info->rinfo[0], dev->nodename);
+ if (err)
+ goto destroy_blkring;
} else {
- err = xenbus_printf(xbt, dev->nodename,
- "ring-page-order", "%u", ring_page_order);
+ char *path;
+ size_t pathsize;
+
+ err = xenbus_printf(xbt, dev->nodename, "multi-queue-num-queues", "%u",
+ info->nr_rings);
if (err) {
- message = "writing ring-page-order";
+ message = "writing multi-queue-num-queues";
goto abort_transaction;
}
- for (i = 0; i < info->nr_ring_pages; i++) {
- char ring_ref_name[RINGREF_NAME_LEN];
+ pathsize = strlen(dev->nodename) + QUEUE_NAME_LEN;
+ path = kmalloc(pathsize, GFP_KERNEL);
+ if (!path) {
+ err = -ENOMEM;
+ message = "ENOMEM while writing ring references";
+ goto abort_transaction;
+ }
- snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
- err = xenbus_printf(xbt, dev->nodename, ring_ref_name,
- "%u", info->ring_ref[i]);
+ for (i = 0; i < info->nr_rings; i++) {
+ memset(path, 0, pathsize);
+ snprintf(path, pathsize, "%s/queue-%u", dev->nodename, i);
+ err = write_per_ring_nodes(xbt, &info->rinfo[i], path);
if (err) {
- message = "writing ring-ref";
- goto abort_transaction;
+ kfree(path);
+ goto destroy_blkring;
}
}
- }
- err = xenbus_printf(xbt, dev->nodename,
- "event-channel", "%u", info->evtchn);
- if (err) {
- message = "writing event-channel";
- goto abort_transaction;
+ kfree(path);
}
err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
XEN_IO_PROTO_ABI_NATIVE);
@@ -1540,9 +1848,14 @@ again:
goto destroy_blkring;
}
- for (i = 0; i < BLK_RING_SIZE(info); i++)
- info->shadow[i].req.u.rw.id = i+1;
- info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
+ for (i = 0; i < info->nr_rings; i++) {
+ unsigned int j;
+ struct blkfront_ring_info *rinfo = &info->rinfo[i];
+
+ for (j = 0; j < BLK_RING_SIZE(info); j++)
+ rinfo->shadow[j].req.u.rw.id = j + 1;
+ rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
+ }
xenbus_switch_state(dev, XenbusStateInitialised);
return 0;
@@ -1553,7 +1866,10 @@ again:
xenbus_dev_fatal(dev, err, "%s", message);
destroy_blkring:
blkif_free(info, 0);
- out:
+
+ kfree(info);
+ dev_set_drvdata(&dev->dev, NULL);
+
return err;
}
@@ -1567,7 +1883,9 @@ static int blkfront_probe(struct xenbus_device *dev,
const struct xenbus_device_id *id)
{
int err, vdevice;
+ unsigned int r_index;
struct blkfront_info *info;
+ unsigned int backend_max_queues = 0;
/* FIXME: Use dynamic device id if this is not set. */
err = xenbus_scanf(XBT_NIL, dev->nodename,
@@ -1617,15 +1935,39 @@ static int blkfront_probe(struct xenbus_device *dev,
return -ENOMEM;
}
- mutex_init(&info->mutex);
- spin_lock_init(&info->io_lock);
info->xbdev = dev;
+ /* Check if backend supports multiple queues. */
+ err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+ "multi-queue-max-queues", "%u", &backend_max_queues);
+ if (err < 0)
+ backend_max_queues = 1;
+
+ info->nr_rings = min(backend_max_queues, xen_blkif_max_queues);
+ /* We need at least one ring. */
+ if (!info->nr_rings)
+ info->nr_rings = 1;
+
+ info->rinfo = kzalloc(sizeof(struct blkfront_ring_info) * info->nr_rings, GFP_KERNEL);
+ if (!info->rinfo) {
+ xenbus_dev_fatal(dev, -ENOMEM, "allocating ring_info structure");
+ kfree(info);
+ return -ENOMEM;
+ }
+
+ for (r_index = 0; r_index < info->nr_rings; r_index++) {
+ struct blkfront_ring_info *rinfo;
+
+ rinfo = &info->rinfo[r_index];
+ INIT_LIST_HEAD(&rinfo->indirect_pages);
+ INIT_LIST_HEAD(&rinfo->grants);
+ rinfo->dev_info = info;
+ INIT_WORK(&rinfo->work, blkif_restart_queue);
+ spin_lock_init(&rinfo->ring_lock);
+ }
+
+ mutex_init(&info->mutex);
info->vdevice = vdevice;
- INIT_LIST_HEAD(&info->grants);
- INIT_LIST_HEAD(&info->indirect_pages);
- info->persistent_gnts_c = 0;
info->connected = BLKIF_STATE_DISCONNECTED;
- INIT_WORK(&info->work, blkif_restart_queue);
/* Front end dir is a number, which is used as the id. */
info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
@@ -1649,7 +1991,7 @@ static void split_bio_end(struct bio *bio)
static int blkif_recover(struct blkfront_info *info)
{
- int i;
+ unsigned int i, r_index;
struct request *req, *n;
struct blk_shadow *copy;
int rc;
@@ -1660,64 +2002,73 @@ static int blkif_recover(struct blkfront_info *info)
struct split_bio *split_bio;
struct list_head requests;
- /* Stage 1: Make a safe copy of the shadow state. */
- copy = kmemdup(info->shadow, sizeof(info->shadow),
- GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
- if (!copy)
- return -ENOMEM;
-
- /* Stage 2: Set up free list. */
- memset(&info->shadow, 0, sizeof(info->shadow));
- for (i = 0; i < BLK_RING_SIZE(info); i++)
- info->shadow[i].req.u.rw.id = i+1;
- info->shadow_free = info->ring.req_prod_pvt;
- info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
-
- rc = blkfront_gather_backend_features(info);
- if (rc) {
- kfree(copy);
- return rc;
- }
-
+ blkfront_gather_backend_features(info);
segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
blk_queue_max_segments(info->rq, segs);
bio_list_init(&bio_list);
INIT_LIST_HEAD(&requests);
- for (i = 0; i < BLK_RING_SIZE(info); i++) {
- /* Not in use? */
- if (!copy[i].request)
- continue;
- /*
- * Get the bios in the request so we can re-queue them.
- */
- if (copy[i].request->cmd_flags &
- (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
+ for (r_index = 0; r_index < info->nr_rings; r_index++) {
+ struct blkfront_ring_info *rinfo;
+
+ rinfo = &info->rinfo[r_index];
+ /* Stage 1: Make a safe copy of the shadow state. */
+ copy = kmemdup(rinfo->shadow, sizeof(rinfo->shadow),
+ GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
+ if (!copy)
+ return -ENOMEM;
+
+ /* Stage 2: Set up free list. */
+ memset(&rinfo->shadow, 0, sizeof(rinfo->shadow));
+ for (i = 0; i < BLK_RING_SIZE(info); i++)
+ rinfo->shadow[i].req.u.rw.id = i+1;
+ rinfo->shadow_free = rinfo->ring.req_prod_pvt;
+ rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
+
+ rc = blkfront_setup_indirect(rinfo);
+ if (rc) {
+ kfree(copy);
+ return rc;
+ }
+
+ for (i = 0; i < BLK_RING_SIZE(info); i++) {
+ /* Not in use? */
+ if (!copy[i].request)
+ continue;
+
/*
- * Flush operations don't contain bios, so
- * we need to requeue the whole request
+ * Get the bios in the request so we can re-queue them.
*/
- list_add(&copy[i].request->queuelist, &requests);
- continue;
+ if (copy[i].request->cmd_flags &
+ (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
+ /*
+ * Flush operations don't contain bios, so
+ * we need to requeue the whole request
+ */
+ list_add(&copy[i].request->queuelist, &requests);
+ continue;
+ }
+ merge_bio.head = copy[i].request->bio;
+ merge_bio.tail = copy[i].request->biotail;
+ bio_list_merge(&bio_list, &merge_bio);
+ copy[i].request->bio = NULL;
+ blk_end_request_all(copy[i].request, 0);
}
- merge_bio.head = copy[i].request->bio;
- merge_bio.tail = copy[i].request->biotail;
- bio_list_merge(&bio_list, &merge_bio);
- copy[i].request->bio = NULL;
- blk_end_request_all(copy[i].request, 0);
- }
-
- kfree(copy);
+ kfree(copy);
+ }
xenbus_switch_state(info->xbdev, XenbusStateConnected);
- spin_lock_irq(&info->io_lock);
-
/* Now safe for us to use the shared ring */
info->connected = BLKIF_STATE_CONNECTED;
- /* Kick any other new requests queued since we resumed */
- kick_pending_request_queues(info);
+ for (r_index = 0; r_index < info->nr_rings; r_index++) {
+ struct blkfront_ring_info *rinfo;
+
+ rinfo = &info->rinfo[r_index];
+ /* Kick any other new requests queued since we resumed */
+ kick_pending_request_queues(rinfo);
+ }
list_for_each_entry_safe(req, n, &requests, queuelist) {
/* Requeue pending requests (flush or discard) */
@@ -1725,7 +2076,6 @@ static int blkif_recover(struct blkfront_info *info)
BUG_ON(req->nr_phys_segments > segs);
blk_mq_requeue_request(req);
}
- spin_unlock_irq(&info->io_lock);
blk_mq_kick_requeue_list(info->rq);
while ((bio = bio_list_pop(&bio_list)) != NULL) {
@@ -1790,8 +2140,7 @@ static int blkfront_resume(struct xenbus_device *dev)
return err;
}
-static void
-blkfront_closing(struct blkfront_info *info)
+static void blkfront_closing(struct blkfront_info *info)
{
struct xenbus_device *xbdev = info->xbdev;
struct block_device *bdev = NULL;
@@ -1851,18 +2200,29 @@ static void blkfront_setup_discard(struct blkfront_info *info)
info->feature_secdiscard = !!discard_secure;
}
-static int blkfront_setup_indirect(struct blkfront_info *info)
+static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo)
{
unsigned int psegs, grants;
int err, i;
+ struct blkfront_info *info = rinfo->dev_info;
- if (info->max_indirect_segments == 0)
- grants = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+ if (info->max_indirect_segments == 0) {
+ if (!HAS_EXTRA_REQ)
+ grants = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+ else {
+ /*
+ * When an extra req is required, the maximum
+ * grants supported is related to the size of the
+ * Linux block segment.
+ */
+ grants = GRANTS_PER_PSEG;
+ }
+ }
else
grants = info->max_indirect_segments;
psegs = grants / GRANTS_PER_PSEG;
- err = fill_grant_buffer(info,
+ err = fill_grant_buffer(rinfo,
(grants + INDIRECT_GREFS(grants)) * BLK_RING_SIZE(info));
if (err)
goto out_of_memory;
@@ -1875,31 +2235,31 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
*/
int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info);
- BUG_ON(!list_empty(&info->indirect_pages));
+ BUG_ON(!list_empty(&rinfo->indirect_pages));
for (i = 0; i < num; i++) {
struct page *indirect_page = alloc_page(GFP_NOIO);
if (!indirect_page)
goto out_of_memory;
- list_add(&indirect_page->lru, &info->indirect_pages);
+ list_add(&indirect_page->lru, &rinfo->indirect_pages);
}
}
for (i = 0; i < BLK_RING_SIZE(info); i++) {
- info->shadow[i].grants_used = kzalloc(
- sizeof(info->shadow[i].grants_used[0]) * grants,
+ rinfo->shadow[i].grants_used = kzalloc(
+ sizeof(rinfo->shadow[i].grants_used[0]) * grants,
GFP_NOIO);
- info->shadow[i].sg = kzalloc(sizeof(info->shadow[i].sg[0]) * psegs, GFP_NOIO);
+ rinfo->shadow[i].sg = kzalloc(sizeof(rinfo->shadow[i].sg[0]) * psegs, GFP_NOIO);
if (info->max_indirect_segments)
- info->shadow[i].indirect_grants = kzalloc(
- sizeof(info->shadow[i].indirect_grants[0]) *
+ rinfo->shadow[i].indirect_grants = kzalloc(
+ sizeof(rinfo->shadow[i].indirect_grants[0]) *
INDIRECT_GREFS(grants),
GFP_NOIO);
- if ((info->shadow[i].grants_used == NULL) ||
- (info->shadow[i].sg == NULL) ||
+ if ((rinfo->shadow[i].grants_used == NULL) ||
+ (rinfo->shadow[i].sg == NULL) ||
(info->max_indirect_segments &&
- (info->shadow[i].indirect_grants == NULL)))
+ (rinfo->shadow[i].indirect_grants == NULL)))
goto out_of_memory;
- sg_init_table(info->shadow[i].sg, psegs);
+ sg_init_table(rinfo->shadow[i].sg, psegs);
}
@@ -1907,16 +2267,16 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
out_of_memory:
for (i = 0; i < BLK_RING_SIZE(info); i++) {
- kfree(info->shadow[i].grants_used);
- info->shadow[i].grants_used = NULL;
- kfree(info->shadow[i].sg);
- info->shadow[i].sg = NULL;
- kfree(info->shadow[i].indirect_grants);
- info->shadow[i].indirect_grants = NULL;
- }
- if (!list_empty(&info->indirect_pages)) {
+ kfree(rinfo->shadow[i].grants_used);
+ rinfo->shadow[i].grants_used = NULL;
+ kfree(rinfo->shadow[i].sg);
+ rinfo->shadow[i].sg = NULL;
+ kfree(rinfo->shadow[i].indirect_grants);
+ rinfo->shadow[i].indirect_grants = NULL;
+ }
+ if (!list_empty(&rinfo->indirect_pages)) {
struct page *indirect_page, *n;
- list_for_each_entry_safe(indirect_page, n, &info->indirect_pages, lru) {
+ list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) {
list_del(&indirect_page->lru);
__free_page(indirect_page);
}
@@ -1927,7 +2287,7 @@ out_of_memory:
/*
* Gather all backend feature-*
*/
-static int blkfront_gather_backend_features(struct blkfront_info *info)
+static void blkfront_gather_backend_features(struct blkfront_info *info)
{
int err;
int barrier, flush, discard, persistent;
@@ -1982,8 +2342,6 @@ static int blkfront_gather_backend_features(struct blkfront_info *info)
else
info->max_indirect_segments = min(indirect_segments,
xen_blkif_max_segments);
-
- return blkfront_setup_indirect(info);
}
/*
@@ -1996,7 +2354,7 @@ static void blkfront_connect(struct blkfront_info *info)
unsigned long sector_size;
unsigned int physical_sector_size;
unsigned int binfo;
- int err;
+ int err, i;
switch (info->connected) {
case BLKIF_STATE_CONNECTED:
@@ -2053,11 +2411,15 @@ static void blkfront_connect(struct blkfront_info *info)
if (err != 1)
physical_sector_size = sector_size;
- err = blkfront_gather_backend_features(info);
- if (err) {
- xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s",
- info->xbdev->otherend);
- return;
+ blkfront_gather_backend_features(info);
+ for (i = 0; i < info->nr_rings; i++) {
+ err = blkfront_setup_indirect(&info->rinfo[i]);
+ if (err) {
+ xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s",
+ info->xbdev->otherend);
+ blkif_free(info, 0);
+ break;
+ }
}
err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size,
@@ -2071,10 +2433,9 @@ static void blkfront_connect(struct blkfront_info *info)
xenbus_switch_state(info->xbdev, XenbusStateConnected);
/* Kick pending requests. */
- spin_lock_irq(&info->io_lock);
info->connected = BLKIF_STATE_CONNECTED;
- kick_pending_request_queues(info);
- spin_unlock_irq(&info->io_lock);
+ for (i = 0; i < info->nr_rings; i++)
+ kick_pending_request_queues(&info->rinfo[i]);
add_disk(info->gd);
@@ -2095,11 +2456,8 @@ static void blkback_changed(struct xenbus_device *dev,
case XenbusStateInitWait:
if (dev->state != XenbusStateInitialising)
break;
- if (talk_to_blkback(dev, info)) {
- kfree(info);
- dev_set_drvdata(&dev->dev, NULL);
+ if (talk_to_blkback(dev, info))
break;
- }
case XenbusStateInitialising:
case XenbusStateInitialised:
case XenbusStateReconfiguring:
@@ -2108,6 +2466,10 @@ static void blkback_changed(struct xenbus_device *dev,
break;
case XenbusStateConnected:
+ if (dev->state != XenbusStateInitialised) {
+ if (talk_to_blkback(dev, info))
+ break;
+ }
blkfront_connect(info);
break;
@@ -2281,6 +2643,7 @@ static struct xenbus_driver blkfront_driver = {
static int __init xlblk_init(void)
{
int ret;
+ int nr_cpus = num_online_cpus();
if (!xen_domain())
return -ENODEV;
@@ -2288,7 +2651,13 @@ static int __init xlblk_init(void)
if (xen_blkif_max_ring_order > XENBUS_MAX_RING_GRANT_ORDER) {
pr_info("Invalid max_ring_order (%d), will use default max: %d.\n",
xen_blkif_max_ring_order, XENBUS_MAX_RING_GRANT_ORDER);
- xen_blkif_max_ring_order = 0;
+ xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER;
+ }
+
+ if (xen_blkif_max_queues > nr_cpus) {
+ pr_info("Invalid max_queues (%d), will use default max: %d.\n",
+ xen_blkif_max_queues, nr_cpus);
+ xen_blkif_max_queues = nr_cpus;
}
if (!xen_has_pv_disk_devices())
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 6b1721f..4f6f94c 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -689,7 +689,7 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
{
loff_t ret;
- mutex_lock(&file_inode(file)->i_mutex);
+ inode_lock(file_inode(file));
switch (orig) {
case SEEK_CUR:
offset += file->f_pos;
@@ -706,7 +706,7 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
default:
ret = -EINVAL;
}
- mutex_unlock(&file_inode(file)->i_mutex);
+ inode_unlock(file_inode(file));
return ret;
}
diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c
index f1d7fa4..f3f92d5 100644
--- a/drivers/char/mspec.c
+++ b/drivers/char/mspec.c
@@ -93,14 +93,11 @@ struct vma_data {
spinlock_t lock; /* Serialize access to this structure. */
int count; /* Number of pages allocated. */
enum mspec_page_type type; /* Type of pages allocated. */
- int flags; /* See VMD_xxx below. */
unsigned long vm_start; /* Original (unsplit) base. */
unsigned long vm_end; /* Original (unsplit) end. */
unsigned long maddr[0]; /* Array of MSPEC addresses. */
};
-#define VMD_VMALLOCED 0x1 /* vmalloc'd rather than kmalloc'd */
-
/* used on shub2 to clear FOP cache in the HUB */
static unsigned long scratch_page[MAX_NUMNODES];
#define SH2_AMO_CACHE_ENTRIES 4
@@ -185,10 +182,7 @@ mspec_close(struct vm_area_struct *vma)
"failed to zero page %ld\n", my_page);
}
- if (vdata->flags & VMD_VMALLOCED)
- vfree(vdata);
- else
- kfree(vdata);
+ kvfree(vdata);
}
/*
@@ -256,7 +250,7 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma,
enum mspec_page_type type)
{
struct vma_data *vdata;
- int pages, vdata_size, flags = 0;
+ int pages, vdata_size;
if (vma->vm_pgoff != 0)
return -EINVAL;
@@ -271,16 +265,13 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma,
vdata_size = sizeof(struct vma_data) + pages * sizeof(long);
if (vdata_size <= PAGE_SIZE)
vdata = kzalloc(vdata_size, GFP_KERNEL);
- else {
+ else
vdata = vzalloc(vdata_size);
- flags = VMD_VMALLOCED;
- }
if (!vdata)
return -ENOMEM;
vdata->vm_start = vma->vm_start;
vdata->vm_end = vma->vm_end;
- vdata->flags = flags;
vdata->type = type;
spin_lock_init(&vdata->lock);
atomic_set(&vdata->refcnt, 1);
diff --git a/drivers/char/ps3flash.c b/drivers/char/ps3flash.c
index 0b311fa..b526dc1 100644
--- a/drivers/char/ps3flash.c
+++ b/drivers/char/ps3flash.c
@@ -290,9 +290,9 @@ static int ps3flash_fsync(struct file *file, loff_t start, loff_t end, int datas
{
struct inode *inode = file_inode(file);
int err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = ps3flash_writeback(ps3flash_dev);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 3dd69df..07d4942 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -381,6 +381,7 @@ config CRYPTO_DEV_BFIN_CRC
config CRYPTO_DEV_ATMEL_AES
tristate "Support for Atmel AES hw accelerator"
+ depends on HAS_DMA
depends on AT_XDMAC || AT_HDMAC || COMPILE_TEST
select CRYPTO_AES
select CRYPTO_AEAD
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index 5621612..6dd3317 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -280,6 +280,7 @@ static const char *atmel_aes_reg_name(u32 offset, char *tmp, size_t sz)
case AES_GCMHR(2):
case AES_GCMHR(3):
snprintf(tmp, sz, "GCMHR[%u]", (offset - AES_GCMHR(0)) >> 2);
+ break;
default:
snprintf(tmp, sz, "0x%02x", offset);
diff --git a/drivers/crypto/qat/qat_common/qat_hal.c b/drivers/crypto/qat/qat_common/qat_hal.c
index 0ac0ba8..1e480f1 100644
--- a/drivers/crypto/qat/qat_common/qat_hal.c
+++ b/drivers/crypto/qat/qat_common/qat_hal.c
@@ -389,7 +389,7 @@ static int qat_hal_check_ae_alive(struct icp_qat_fw_loader_handle *handle)
{
unsigned int base_cnt, cur_cnt;
unsigned char ae;
- unsigned int times = MAX_RETRY_TIMES;
+ int times = MAX_RETRY_TIMES;
for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) {
qat_hal_rd_ae_csr(handle, ae, PROFILE_COUNT,
@@ -402,7 +402,7 @@ static int qat_hal_check_ae_alive(struct icp_qat_fw_loader_handle *handle)
cur_cnt &= 0xffff;
} while (times-- && (cur_cnt == base_cnt));
- if (!times) {
+ if (times < 0) {
pr_err("QAT: AE%d is inactive!!\n", ae);
return -EFAULT;
}
@@ -453,7 +453,11 @@ static int qat_hal_init_esram(struct icp_qat_fw_loader_handle *handle)
void __iomem *csr_addr =
(void __iomem *)((uintptr_t)handle->hal_ep_csr_addr_v +
ESRAM_AUTO_INIT_CSR_OFFSET);
- unsigned int csr_val, times = 30;
+ unsigned int csr_val;
+ int times = 30;
+
+ if (handle->pci_dev->device == ADF_C3XXX_PCI_DEVICE_ID)
+ return 0;
csr_val = ADF_CSR_RD(csr_addr, 0);
if ((csr_val & ESRAM_AUTO_TINIT) && (csr_val & ESRAM_AUTO_TINIT_DONE))
@@ -467,7 +471,7 @@ static int qat_hal_init_esram(struct icp_qat_fw_loader_handle *handle)
qat_hal_wait_cycles(handle, 0, ESRAM_AUTO_INIT_USED_CYCLES, 0);
csr_val = ADF_CSR_RD(csr_addr, 0);
} while (!(csr_val & ESRAM_AUTO_TINIT_DONE) && times--);
- if ((!times)) {
+ if ((times < 0)) {
pr_err("QAT: Fail to init eSram!\n");
return -EFAULT;
}
@@ -658,7 +662,7 @@ static int qat_hal_clear_gpr(struct icp_qat_fw_loader_handle *handle)
ret = qat_hal_wait_cycles(handle, ae, 20, 1);
} while (ret && times--);
- if (!times) {
+ if (times < 0) {
pr_err("QAT: clear GPR of AE %d failed", ae);
return -EINVAL;
}
@@ -693,14 +697,12 @@ int qat_hal_init(struct adf_accel_dev *accel_dev)
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
struct adf_bar *misc_bar =
&pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)];
- struct adf_bar *sram_bar =
- &pci_info->pci_bars[hw_data->get_sram_bar_id(hw_data)];
+ struct adf_bar *sram_bar;
handle = kzalloc(sizeof(*handle), GFP_KERNEL);
if (!handle)
return -ENOMEM;
- handle->hal_sram_addr_v = sram_bar->virt_addr;
handle->hal_cap_g_ctl_csr_addr_v =
(void __iomem *)((uintptr_t)misc_bar->virt_addr +
ICP_QAT_CAP_OFFSET);
@@ -714,6 +716,11 @@ int qat_hal_init(struct adf_accel_dev *accel_dev)
(void __iomem *)((uintptr_t)handle->hal_cap_ae_xfer_csr_addr_v +
LOCAL_TO_XFER_REG_OFFSET);
handle->pci_dev = pci_info->pci_dev;
+ if (handle->pci_dev->device != ADF_C3XXX_PCI_DEVICE_ID) {
+ sram_bar =
+ &pci_info->pci_bars[hw_data->get_sram_bar_id(hw_data)];
+ handle->hal_sram_addr_v = sram_bar->virt_addr;
+ }
handle->fw_auth = (handle->pci_dev->device ==
ADF_DH895XCC_PCI_DEVICE_ID) ? false : true;
handle->hal_handle = kzalloc(sizeof(*handle->hal_handle), GFP_KERNEL);
diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c
index c3b80fd..7b30b30 100644
--- a/drivers/gpu/drm/drm_hashtab.c
+++ b/drivers/gpu/drm/drm_hashtab.c
@@ -198,10 +198,7 @@ EXPORT_SYMBOL(drm_ht_remove_item);
void drm_ht_remove(struct drm_open_hash *ht)
{
if (ht->table) {
- if ((PAGE_SIZE / sizeof(*ht->table)) >> ht->order)
- kfree(ht->table);
- else
- vfree(ht->table);
+ kvfree(ht->table);
ht->table = NULL;
}
}
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index aa26f3c..8a8440c 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -5,6 +5,7 @@ menuconfig INFINIBAND
depends on NET
depends on INET
depends on m || IPV6 != m
+ select IRQ_POLL
---help---
Core support for InfiniBand (IB). Make sure to also select
any protocols you wish to use as well as drivers for your
@@ -54,6 +55,15 @@ config INFINIBAND_ADDR_TRANS
depends on INFINIBAND
default y
+config INFINIBAND_ADDR_TRANS_CONFIGFS
+ bool
+ depends on INFINIBAND_ADDR_TRANS && CONFIGFS_FS && !(INFINIBAND=y && CONFIGFS_FS=m)
+ default y
+ ---help---
+ ConfigFS support for RDMA communication manager (CM).
+ This allows the user to config the default GID type that the CM
+ uses for each device, when initiaing new connections.
+
source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/qib/Kconfig"
source "drivers/infiniband/hw/cxgb3/Kconfig"
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index d43a899..f818538 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
$(user_access-y)
-ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
+ib_core-y := packer.o ud_header.o verbs.o cq.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o \
roce_gid_mgmt.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
@@ -24,6 +24,8 @@ iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o
rdma_cm-y := cma.o
+rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o
+
rdma_ucm-y := ucma.o
ib_addr-y := addr.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 34b1ada..337353d 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -121,7 +121,8 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
}
EXPORT_SYMBOL(rdma_copy_addr);
-int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
+int rdma_translate_ip(const struct sockaddr *addr,
+ struct rdma_dev_addr *dev_addr,
u16 *vlan_id)
{
struct net_device *dev;
@@ -139,7 +140,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
switch (addr->sa_family) {
case AF_INET:
dev = ip_dev_find(dev_addr->net,
- ((struct sockaddr_in *) addr)->sin_addr.s_addr);
+ ((const struct sockaddr_in *)addr)->sin_addr.s_addr);
if (!dev)
return ret;
@@ -154,7 +155,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
rcu_read_lock();
for_each_netdev_rcu(dev_addr->net, dev) {
if (ipv6_chk_addr(dev_addr->net,
- &((struct sockaddr_in6 *) addr)->sin6_addr,
+ &((const struct sockaddr_in6 *)addr)->sin6_addr,
dev, 1)) {
ret = rdma_copy_addr(dev_addr, dev, NULL);
if (vlan_id)
@@ -198,7 +199,8 @@ static void queue_req(struct addr_req *req)
mutex_unlock(&lock);
}
-static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, void *daddr)
+static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+ const void *daddr)
{
struct neighbour *n;
int ret;
@@ -222,8 +224,9 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, v
}
static int addr4_resolve(struct sockaddr_in *src_in,
- struct sockaddr_in *dst_in,
- struct rdma_dev_addr *addr)
+ const struct sockaddr_in *dst_in,
+ struct rdma_dev_addr *addr,
+ struct rtable **prt)
{
__be32 src_ip = src_in->sin_addr.s_addr;
__be32 dst_ip = dst_in->sin_addr.s_addr;
@@ -243,33 +246,29 @@ static int addr4_resolve(struct sockaddr_in *src_in,
src_in->sin_family = AF_INET;
src_in->sin_addr.s_addr = fl4.saddr;
- if (rt->dst.dev->flags & IFF_LOOPBACK) {
- ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
- if (!ret)
- memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
- goto put;
- }
+ /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
+ * routable) and we could set the network type accordingly.
+ */
+ if (rt->rt_uses_gateway)
+ addr->network = RDMA_NETWORK_IPV4;
- /* If the device does ARP internally, return 'done' */
- if (rt->dst.dev->flags & IFF_NOARP) {
- ret = rdma_copy_addr(addr, rt->dst.dev, NULL);
- goto put;
- }
+ addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
- ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr);
-put:
- ip_rt_put(rt);
+ *prt = rt;
+ return 0;
out:
return ret;
}
#if IS_ENABLED(CONFIG_IPV6)
static int addr6_resolve(struct sockaddr_in6 *src_in,
- struct sockaddr_in6 *dst_in,
- struct rdma_dev_addr *addr)
+ const struct sockaddr_in6 *dst_in,
+ struct rdma_dev_addr *addr,
+ struct dst_entry **pdst)
{
struct flowi6 fl6;
struct dst_entry *dst;
+ struct rt6_info *rt;
int ret;
memset(&fl6, 0, sizeof fl6);
@@ -281,6 +280,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
if ((ret = dst->error))
goto put;
+ rt = (struct rt6_info *)dst;
if (ipv6_addr_any(&fl6.saddr)) {
ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
&fl6.daddr, 0, &fl6.saddr);
@@ -291,43 +291,111 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
src_in->sin6_addr = fl6.saddr;
}
- if (dst->dev->flags & IFF_LOOPBACK) {
- ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
- if (!ret)
- memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
- goto put;
- }
+ /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
+ * routable) and we could set the network type accordingly.
+ */
+ if (rt->rt6i_flags & RTF_GATEWAY)
+ addr->network = RDMA_NETWORK_IPV6;
- /* If the device does ARP internally, return 'done' */
- if (dst->dev->flags & IFF_NOARP) {
- ret = rdma_copy_addr(addr, dst->dev, NULL);
- goto put;
- }
+ addr->hoplimit = ip6_dst_hoplimit(dst);
- ret = dst_fetch_ha(dst, addr, &fl6.daddr);
+ *pdst = dst;
+ return 0;
put:
dst_release(dst);
return ret;
}
#else
static int addr6_resolve(struct sockaddr_in6 *src_in,
- struct sockaddr_in6 *dst_in,
- struct rdma_dev_addr *addr)
+ const struct sockaddr_in6 *dst_in,
+ struct rdma_dev_addr *addr,
+ struct dst_entry **pdst)
{
return -EADDRNOTAVAIL;
}
#endif
+static int addr_resolve_neigh(struct dst_entry *dst,
+ const struct sockaddr *dst_in,
+ struct rdma_dev_addr *addr)
+{
+ if (dst->dev->flags & IFF_LOOPBACK) {
+ int ret;
+
+ ret = rdma_translate_ip(dst_in, addr, NULL);
+ if (!ret)
+ memcpy(addr->dst_dev_addr, addr->src_dev_addr,
+ MAX_ADDR_LEN);
+
+ return ret;
+ }
+
+ /* If the device doesn't do ARP internally */
+ if (!(dst->dev->flags & IFF_NOARP)) {
+ const struct sockaddr_in *dst_in4 =
+ (const struct sockaddr_in *)dst_in;
+ const struct sockaddr_in6 *dst_in6 =
+ (const struct sockaddr_in6 *)dst_in;
+
+ return dst_fetch_ha(dst, addr,
+ dst_in->sa_family == AF_INET ?
+ (const void *)&dst_in4->sin_addr.s_addr :
+ (const void *)&dst_in6->sin6_addr);
+ }
+
+ return rdma_copy_addr(addr, dst->dev, NULL);
+}
+
static int addr_resolve(struct sockaddr *src_in,
- struct sockaddr *dst_in,
- struct rdma_dev_addr *addr)
+ const struct sockaddr *dst_in,
+ struct rdma_dev_addr *addr,
+ bool resolve_neigh)
{
+ struct net_device *ndev;
+ struct dst_entry *dst;
+ int ret;
+
if (src_in->sa_family == AF_INET) {
- return addr4_resolve((struct sockaddr_in *) src_in,
- (struct sockaddr_in *) dst_in, addr);
- } else
- return addr6_resolve((struct sockaddr_in6 *) src_in,
- (struct sockaddr_in6 *) dst_in, addr);
+ struct rtable *rt = NULL;
+ const struct sockaddr_in *dst_in4 =
+ (const struct sockaddr_in *)dst_in;
+
+ ret = addr4_resolve((struct sockaddr_in *)src_in,
+ dst_in4, addr, &rt);
+ if (ret)
+ return ret;
+
+ if (resolve_neigh)
+ ret = addr_resolve_neigh(&rt->dst, dst_in, addr);
+
+ ndev = rt->dst.dev;
+ dev_hold(ndev);
+
+ ip_rt_put(rt);
+ } else {
+ const struct sockaddr_in6 *dst_in6 =
+ (const struct sockaddr_in6 *)dst_in;
+
+ ret = addr6_resolve((struct sockaddr_in6 *)src_in,
+ dst_in6, addr,
+ &dst);
+ if (ret)
+ return ret;
+
+ if (resolve_neigh)
+ ret = addr_resolve_neigh(dst, dst_in, addr);
+
+ ndev = dst->dev;
+ dev_hold(ndev);
+
+ dst_release(dst);
+ }
+
+ addr->bound_dev_if = ndev->ifindex;
+ addr->net = dev_net(ndev);
+ dev_put(ndev);
+
+ return ret;
}
static void process_req(struct work_struct *work)
@@ -343,7 +411,8 @@ static void process_req(struct work_struct *work)
if (req->status == -ENODATA) {
src_in = (struct sockaddr *) &req->src_addr;
dst_in = (struct sockaddr *) &req->dst_addr;
- req->status = addr_resolve(src_in, dst_in, req->addr);
+ req->status = addr_resolve(src_in, dst_in, req->addr,
+ true);
if (req->status && time_after_eq(jiffies, req->timeout))
req->status = -ETIMEDOUT;
else if (req->status == -ENODATA)
@@ -403,7 +472,7 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
req->client = client;
atomic_inc(&client->refcount);
- req->status = addr_resolve(src_in, dst_in, addr);
+ req->status = addr_resolve(src_in, dst_in, addr, true);
switch (req->status) {
case 0:
req->timeout = jiffies;
@@ -425,6 +494,26 @@ err:
}
EXPORT_SYMBOL(rdma_resolve_ip);
+int rdma_resolve_ip_route(struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr,
+ struct rdma_dev_addr *addr)
+{
+ struct sockaddr_storage ssrc_addr = {};
+ struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;
+
+ if (src_addr) {
+ if (src_addr->sa_family != dst_addr->sa_family)
+ return -EINVAL;
+
+ memcpy(src_in, src_addr, rdma_addr_size(src_addr));
+ } else {
+ src_in->sa_family = dst_addr->sa_family;
+ }
+
+ return addr_resolve(src_in, dst_addr, addr, false);
+}
+EXPORT_SYMBOL(rdma_resolve_ip_route);
+
void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
struct addr_req *req, *temp_req;
@@ -456,8 +545,10 @@ static void resolve_cb(int status, struct sockaddr *src_addr,
complete(&((struct resolve_cb_context *)context)->comp);
}
-int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid,
- u8 *dmac, u16 *vlan_id, int if_index)
+int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
+ const union ib_gid *dgid,
+ u8 *dmac, u16 *vlan_id, int *if_index,
+ int *hoplimit)
{
int ret = 0;
struct rdma_dev_addr dev_addr;
@@ -475,7 +566,8 @@ int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgi
rdma_gid2ip(&dgid_addr._sockaddr, dgid);
memset(&dev_addr, 0, sizeof(dev_addr));
- dev_addr.bound_dev_if = if_index;
+ if (if_index)
+ dev_addr.bound_dev_if = *if_index;
dev_addr.net = &init_net;
ctx.addr = &dev_addr;
@@ -491,12 +583,16 @@ int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgi
dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
if (!dev)
return -ENODEV;
+ if (if_index)
+ *if_index = dev_addr.bound_dev_if;
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(dev);
+ if (hoplimit)
+ *hoplimit = dev_addr.hoplimit;
dev_put(dev);
return ret;
}
-EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
+EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
{
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 89bebea..53343ff 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -64,6 +64,7 @@ enum gid_attr_find_mask {
GID_ATTR_FIND_MASK_GID = 1UL << 0,
GID_ATTR_FIND_MASK_NETDEV = 1UL << 1,
GID_ATTR_FIND_MASK_DEFAULT = 1UL << 2,
+ GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3,
};
enum gid_table_entry_props {
@@ -81,10 +82,6 @@ enum gid_table_write_action {
};
struct ib_gid_table_entry {
- /* This lock protects an entry from being
- * read and written simultaneously.
- */
- rwlock_t lock;
unsigned long props;
union ib_gid gid;
struct ib_gid_attr attr;
@@ -109,28 +106,86 @@ struct ib_gid_table {
* are locked by this lock.
**/
struct mutex lock;
+ /* This lock protects the table entries from being
+ * read and written simultaneously.
+ */
+ rwlock_t rwlock;
struct ib_gid_table_entry *data_vec;
};
+static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
+{
+ if (rdma_cap_roce_gid_table(ib_dev, port)) {
+ struct ib_event event;
+
+ event.device = ib_dev;
+ event.element.port_num = port;
+ event.event = IB_EVENT_GID_CHANGE;
+
+ ib_dispatch_event(&event);
+ }
+}
+
+static const char * const gid_type_str[] = {
+ [IB_GID_TYPE_IB] = "IB/RoCE v1",
+ [IB_GID_TYPE_ROCE_UDP_ENCAP] = "RoCE v2",
+};
+
+const char *ib_cache_gid_type_str(enum ib_gid_type gid_type)
+{
+ if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type])
+ return gid_type_str[gid_type];
+
+ return "Invalid GID type";
+}
+EXPORT_SYMBOL(ib_cache_gid_type_str);
+
+int ib_cache_gid_parse_type_str(const char *buf)
+{
+ unsigned int i;
+ size_t len;
+ int err = -EINVAL;
+
+ len = strlen(buf);
+ if (len == 0)
+ return -EINVAL;
+
+ if (buf[len - 1] == '\n')
+ len--;
+
+ for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i)
+ if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) &&
+ len == strlen(gid_type_str[i])) {
+ err = i;
+ break;
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
+
+/* This function expects that rwlock will be write locked in all
+ * scenarios and that lock will be locked in sleep-able (RoCE)
+ * scenarios.
+ */
static int write_gid(struct ib_device *ib_dev, u8 port,
struct ib_gid_table *table, int ix,
const union ib_gid *gid,
const struct ib_gid_attr *attr,
enum gid_table_write_action action,
bool default_gid)
+ __releases(&table->rwlock) __acquires(&table->rwlock)
{
int ret = 0;
struct net_device *old_net_dev;
- unsigned long flags;
/* in rdma_cap_roce_gid_table, this funciton should be protected by a
* sleep-able lock.
*/
- write_lock_irqsave(&table->data_vec[ix].lock, flags);
if (rdma_cap_roce_gid_table(ib_dev, port)) {
table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
- write_unlock_irqrestore(&table->data_vec[ix].lock, flags);
+ write_unlock_irq(&table->rwlock);
/* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by
* RoCE providers and thus only updates the cache.
*/
@@ -140,7 +195,7 @@ static int write_gid(struct ib_device *ib_dev, u8 port,
else if (action == GID_TABLE_WRITE_ACTION_DEL)
ret = ib_dev->del_gid(ib_dev, port, ix,
&table->data_vec[ix].context);
- write_lock_irqsave(&table->data_vec[ix].lock, flags);
+ write_lock_irq(&table->rwlock);
}
old_net_dev = table->data_vec[ix].attr.ndev;
@@ -162,17 +217,6 @@ static int write_gid(struct ib_device *ib_dev, u8 port,
table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID;
- write_unlock_irqrestore(&table->data_vec[ix].lock, flags);
-
- if (!ret && rdma_cap_roce_gid_table(ib_dev, port)) {
- struct ib_event event;
-
- event.device = ib_dev;
- event.element.port_num = port;
- event.event = IB_EVENT_GID_CHANGE;
-
- ib_dispatch_event(&event);
- }
return ret;
}
@@ -201,41 +245,58 @@ static int del_gid(struct ib_device *ib_dev, u8 port,
GID_TABLE_WRITE_ACTION_DEL, default_gid);
}
+/* rwlock should be read locked */
static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
const struct ib_gid_attr *val, bool default_gid,
- unsigned long mask)
+ unsigned long mask, int *pempty)
{
- int i;
+ int i = 0;
+ int found = -1;
+ int empty = pempty ? -1 : 0;
- for (i = 0; i < table->sz; i++) {
- unsigned long flags;
- struct ib_gid_attr *attr = &table->data_vec[i].attr;
+ while (i < table->sz && (found < 0 || empty < 0)) {
+ struct ib_gid_table_entry *data = &table->data_vec[i];
+ struct ib_gid_attr *attr = &data->attr;
+ int curr_index = i;
- read_lock_irqsave(&table->data_vec[i].lock, flags);
+ i++;
- if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
- goto next;
+ if (data->props & GID_TABLE_ENTRY_INVALID)
+ continue;
+
+ if (empty < 0)
+ if (!memcmp(&data->gid, &zgid, sizeof(*gid)) &&
+ !memcmp(attr, &zattr, sizeof(*attr)) &&
+ !data->props)
+ empty = curr_index;
+
+ if (found >= 0)
+ continue;
+
+ if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
+ attr->gid_type != val->gid_type)
+ continue;
if (mask & GID_ATTR_FIND_MASK_GID &&
- memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
- goto next;
+ memcmp(gid, &data->gid, sizeof(*gid)))
+ continue;
if (mask & GID_ATTR_FIND_MASK_NETDEV &&
attr->ndev != val->ndev)
- goto next;
+ continue;
if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
- !!(table->data_vec[i].props & GID_TABLE_ENTRY_DEFAULT) !=
+ !!(data->props & GID_TABLE_ENTRY_DEFAULT) !=
default_gid)
- goto next;
+ continue;
- read_unlock_irqrestore(&table->data_vec[i].lock, flags);
- return i;
-next:
- read_unlock_irqrestore(&table->data_vec[i].lock, flags);
+ found = curr_index;
}
- return -1;
+ if (pempty)
+ *pempty = empty;
+
+ return found;
}
static void make_default_gid(struct net_device *dev, union ib_gid *gid)
@@ -252,6 +313,7 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
int ix;
int ret = 0;
struct net_device *idev;
+ int empty;
table = ports_table[port - rdma_start_port(ib_dev)];
@@ -275,22 +337,25 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
}
mutex_lock(&table->lock);
+ write_lock_irq(&table->rwlock);
ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID |
- GID_ATTR_FIND_MASK_NETDEV);
+ GID_ATTR_FIND_MASK_GID_TYPE |
+ GID_ATTR_FIND_MASK_NETDEV, &empty);
if (ix >= 0)
goto out_unlock;
- ix = find_gid(table, &zgid, NULL, false, GID_ATTR_FIND_MASK_GID |
- GID_ATTR_FIND_MASK_DEFAULT);
- if (ix < 0) {
+ if (empty < 0) {
ret = -ENOSPC;
goto out_unlock;
}
- add_gid(ib_dev, port, table, ix, gid, attr, false);
+ ret = add_gid(ib_dev, port, table, empty, gid, attr, false);
+ if (!ret)
+ dispatch_gid_change_event(ib_dev, port);
out_unlock:
+ write_unlock_irq(&table->rwlock);
mutex_unlock(&table->lock);
return ret;
}
@@ -305,17 +370,22 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
table = ports_table[port - rdma_start_port(ib_dev)];
mutex_lock(&table->lock);
+ write_lock_irq(&table->rwlock);
ix = find_gid(table, gid, attr, false,
GID_ATTR_FIND_MASK_GID |
+ GID_ATTR_FIND_MASK_GID_TYPE |
GID_ATTR_FIND_MASK_NETDEV |
- GID_ATTR_FIND_MASK_DEFAULT);
+ GID_ATTR_FIND_MASK_DEFAULT,
+ NULL);
if (ix < 0)
goto out_unlock;
- del_gid(ib_dev, port, table, ix, false);
+ if (!del_gid(ib_dev, port, table, ix, false))
+ dispatch_gid_change_event(ib_dev, port);
out_unlock:
+ write_unlock_irq(&table->rwlock);
mutex_unlock(&table->lock);
return 0;
}
@@ -326,16 +396,24 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
int ix;
+ bool deleted = false;
table = ports_table[port - rdma_start_port(ib_dev)];
mutex_lock(&table->lock);
+ write_lock_irq(&table->rwlock);
for (ix = 0; ix < table->sz; ix++)
if (table->data_vec[ix].attr.ndev == ndev)
- del_gid(ib_dev, port, table, ix, false);
+ if (!del_gid(ib_dev, port, table, ix, false))
+ deleted = true;
+ write_unlock_irq(&table->rwlock);
mutex_unlock(&table->lock);
+
+ if (deleted)
+ dispatch_gid_change_event(ib_dev, port);
+
return 0;
}
@@ -344,18 +422,14 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
{
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
- unsigned long flags;
table = ports_table[port - rdma_start_port(ib_dev)];
if (index < 0 || index >= table->sz)
return -EINVAL;
- read_lock_irqsave(&table->data_vec[index].lock, flags);
- if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) {
- read_unlock_irqrestore(&table->data_vec[index].lock, flags);
+ if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
return -EAGAIN;
- }
memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
if (attr) {
@@ -364,7 +438,6 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
dev_hold(attr->ndev);
}
- read_unlock_irqrestore(&table->data_vec[index].lock, flags);
return 0;
}
@@ -378,17 +451,21 @@ static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
struct ib_gid_table *table;
u8 p;
int local_index;
+ unsigned long flags;
for (p = 0; p < ib_dev->phys_port_cnt; p++) {
table = ports_table[p];
- local_index = find_gid(table, gid, val, false, mask);
+ read_lock_irqsave(&table->rwlock, flags);
+ local_index = find_gid(table, gid, val, false, mask, NULL);
if (local_index >= 0) {
if (index)
*index = local_index;
if (port)
*port = p + rdma_start_port(ib_dev);
+ read_unlock_irqrestore(&table->rwlock, flags);
return 0;
}
+ read_unlock_irqrestore(&table->rwlock, flags);
}
return -ENOENT;
@@ -396,11 +473,13 @@ static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
static int ib_cache_gid_find(struct ib_device *ib_dev,
const union ib_gid *gid,
+ enum ib_gid_type gid_type,
struct net_device *ndev, u8 *port,
u16 *index)
{
- unsigned long mask = GID_ATTR_FIND_MASK_GID;
- struct ib_gid_attr gid_attr_val = {.ndev = ndev};
+ unsigned long mask = GID_ATTR_FIND_MASK_GID |
+ GID_ATTR_FIND_MASK_GID_TYPE;
+ struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
if (ndev)
mask |= GID_ATTR_FIND_MASK_NETDEV;
@@ -411,14 +490,17 @@ static int ib_cache_gid_find(struct ib_device *ib_dev,
int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
const union ib_gid *gid,
+ enum ib_gid_type gid_type,
u8 port, struct net_device *ndev,
u16 *index)
{
int local_index;
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
- unsigned long mask = GID_ATTR_FIND_MASK_GID;
- struct ib_gid_attr val = {.ndev = ndev};
+ unsigned long mask = GID_ATTR_FIND_MASK_GID |
+ GID_ATTR_FIND_MASK_GID_TYPE;
+ struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
+ unsigned long flags;
if (port < rdma_start_port(ib_dev) ||
port > rdma_end_port(ib_dev))
@@ -429,13 +511,16 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
if (ndev)
mask |= GID_ATTR_FIND_MASK_NETDEV;
- local_index = find_gid(table, gid, &val, false, mask);
+ read_lock_irqsave(&table->rwlock, flags);
+ local_index = find_gid(table, gid, &val, false, mask, NULL);
if (local_index >= 0) {
if (index)
*index = local_index;
+ read_unlock_irqrestore(&table->rwlock, flags);
return 0;
}
+ read_unlock_irqrestore(&table->rwlock, flags);
return -ENOENT;
}
EXPORT_SYMBOL(ib_find_cached_gid_by_port);
@@ -472,6 +557,7 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
unsigned int i;
+ unsigned long flags;
bool found = false;
if (!ports_table)
@@ -484,11 +570,10 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
table = ports_table[port - rdma_start_port(ib_dev)];
+ read_lock_irqsave(&table->rwlock, flags);
for (i = 0; i < table->sz; i++) {
struct ib_gid_attr attr;
- unsigned long flags;
- read_lock_irqsave(&table->data_vec[i].lock, flags);
if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
goto next;
@@ -501,11 +586,10 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
found = true;
next:
- read_unlock_irqrestore(&table->data_vec[i].lock, flags);
-
if (found)
break;
}
+ read_unlock_irqrestore(&table->rwlock, flags);
if (!found)
return -ENOENT;
@@ -517,9 +601,9 @@ next:
static struct ib_gid_table *alloc_gid_table(int sz)
{
- unsigned int i;
struct ib_gid_table *table =
kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
+
if (!table)
return NULL;
@@ -530,9 +614,7 @@ static struct ib_gid_table *alloc_gid_table(int sz)
mutex_init(&table->lock);
table->sz = sz;
-
- for (i = 0; i < sz; i++)
- rwlock_init(&table->data_vec[i].lock);
+ rwlock_init(&table->rwlock);
return table;
@@ -553,30 +635,37 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
struct ib_gid_table *table)
{
int i;
+ bool deleted = false;
if (!table)
return;
+ write_lock_irq(&table->rwlock);
for (i = 0; i < table->sz; ++i) {
if (memcmp(&table->data_vec[i].gid, &zgid,
sizeof(table->data_vec[i].gid)))
- del_gid(ib_dev, port, table, i,
- table->data_vec[i].props &
- GID_ATTR_FIND_MASK_DEFAULT);
+ if (!del_gid(ib_dev, port, table, i,
+ table->data_vec[i].props &
+ GID_ATTR_FIND_MASK_DEFAULT))
+ deleted = true;
}
+ write_unlock_irq(&table->rwlock);
+
+ if (deleted)
+ dispatch_gid_change_event(ib_dev, port);
}
void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
struct net_device *ndev,
+ unsigned long gid_type_mask,
enum ib_cache_gid_default_mode mode)
{
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
union ib_gid gid;
struct ib_gid_attr gid_attr;
+ struct ib_gid_attr zattr_type = zattr;
struct ib_gid_table *table;
- int ix;
- union ib_gid current_gid;
- struct ib_gid_attr current_gid_attr = {};
+ unsigned int gid_type;
table = ports_table[port - rdma_start_port(ib_dev)];
@@ -584,46 +673,82 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
memset(&gid_attr, 0, sizeof(gid_attr));
gid_attr.ndev = ndev;
- mutex_lock(&table->lock);
- ix = find_gid(table, NULL, NULL, true, GID_ATTR_FIND_MASK_DEFAULT);
-
- /* Coudn't find default GID location */
- WARN_ON(ix < 0);
-
- if (!__ib_cache_gid_get(ib_dev, port, ix,
- &current_gid, &current_gid_attr) &&
- mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
- !memcmp(&gid, &current_gid, sizeof(gid)) &&
- !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr)))
- goto unlock;
-
- if ((memcmp(&current_gid, &zgid, sizeof(current_gid)) ||
- memcmp(&current_gid_attr, &zattr,
- sizeof(current_gid_attr))) &&
- del_gid(ib_dev, port, table, ix, true)) {
- pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
- ix, gid.raw);
- goto unlock;
- }
+ for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
+ int ix;
+ union ib_gid current_gid;
+ struct ib_gid_attr current_gid_attr = {};
+
+ if (1UL << gid_type & ~gid_type_mask)
+ continue;
+
+ gid_attr.gid_type = gid_type;
+
+ mutex_lock(&table->lock);
+ write_lock_irq(&table->rwlock);
+ ix = find_gid(table, NULL, &gid_attr, true,
+ GID_ATTR_FIND_MASK_GID_TYPE |
+ GID_ATTR_FIND_MASK_DEFAULT,
+ NULL);
+
+ /* Coudn't find default GID location */
+ WARN_ON(ix < 0);
+
+ zattr_type.gid_type = gid_type;
+
+ if (!__ib_cache_gid_get(ib_dev, port, ix,
+ &current_gid, &current_gid_attr) &&
+ mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
+ !memcmp(&gid, &current_gid, sizeof(gid)) &&
+ !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr)))
+ goto release;
+
+ if (memcmp(&current_gid, &zgid, sizeof(current_gid)) ||
+ memcmp(&current_gid_attr, &zattr_type,
+ sizeof(current_gid_attr))) {
+ if (del_gid(ib_dev, port, table, ix, true)) {
+ pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
+ ix, gid.raw);
+ goto release;
+ } else {
+ dispatch_gid_change_event(ib_dev, port);
+ }
+ }
- if (mode == IB_CACHE_GID_DEFAULT_MODE_SET)
- if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
- pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
- gid.raw);
+ if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
+ if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
+ pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
+ gid.raw);
+ else
+ dispatch_gid_change_event(ib_dev, port);
+ }
-unlock:
- if (current_gid_attr.ndev)
- dev_put(current_gid_attr.ndev);
- mutex_unlock(&table->lock);
+release:
+ if (current_gid_attr.ndev)
+ dev_put(current_gid_attr.ndev);
+ write_unlock_irq(&table->rwlock);
+ mutex_unlock(&table->lock);
+ }
}
static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
struct ib_gid_table *table)
{
- if (rdma_protocol_roce(ib_dev, port)) {
- struct ib_gid_table_entry *entry = &table->data_vec[0];
+ unsigned int i;
+ unsigned long roce_gid_type_mask;
+ unsigned int num_default_gids;
+ unsigned int current_gid = 0;
+
+ roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+ num_default_gids = hweight_long(roce_gid_type_mask);
+ for (i = 0; i < num_default_gids && i < table->sz; i++) {
+ struct ib_gid_table_entry *entry =
+ &table->data_vec[i];
entry->props |= GID_TABLE_ENTRY_DEFAULT;
+ current_gid = find_next_bit(&roce_gid_type_mask,
+ BITS_PER_LONG,
+ current_gid);
+ entry->attr.gid_type = current_gid++;
}
return 0;
@@ -728,20 +853,30 @@ int ib_get_cached_gid(struct ib_device *device,
union ib_gid *gid,
struct ib_gid_attr *gid_attr)
{
+ int res;
+ unsigned long flags;
+ struct ib_gid_table **ports_table = device->cache.gid_cache;
+ struct ib_gid_table *table = ports_table[port_num - rdma_start_port(device)];
+
if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
return -EINVAL;
- return __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
+ read_lock_irqsave(&table->rwlock, flags);
+ res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
+ read_unlock_irqrestore(&table->rwlock, flags);
+
+ return res;
}
EXPORT_SYMBOL(ib_get_cached_gid);
int ib_find_cached_gid(struct ib_device *device,
const union ib_gid *gid,
+ enum ib_gid_type gid_type,
struct net_device *ndev,
u8 *port_num,
u16 *index)
{
- return ib_cache_gid_find(device, gid, ndev, port_num, index);
+ return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index);
}
EXPORT_SYMBOL(ib_find_cached_gid);
@@ -956,10 +1091,12 @@ static void ib_cache_update(struct ib_device *device,
device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache;
if (!use_roce_gid_table) {
+ write_lock(&table->rwlock);
for (i = 0; i < gid_cache->table_len; i++) {
modify_gid(device, port, table, i, gid_cache->table + i,
&zattr, false);
}
+ write_unlock(&table->rwlock);
}
device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc;
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 0a26dd6..1d92e09 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -364,7 +364,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
read_lock_irqsave(&cm.device_lock, flags);
list_for_each_entry(cm_dev, &cm.device_list, list) {
if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
- ndev, &p, NULL)) {
+ path->gid_type, ndev, &p, NULL)) {
port = cm_dev->port[p-1];
break;
}
@@ -782,11 +782,11 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
/* Check if the device started its remove_one */
- spin_lock_irq(&cm.lock);
+ spin_lock_irqsave(&cm.lock, flags);
if (!cm_dev->going_down)
queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
msecs_to_jiffies(wait_time));
- spin_unlock_irq(&cm.lock);
+ spin_unlock_irqrestore(&cm.lock, flags);
cm_id_priv->timewait_info = NULL;
}
@@ -1600,6 +1600,8 @@ static int cm_req_handler(struct cm_work *work)
struct ib_cm_id *cm_id;
struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
struct cm_req_msg *req_msg;
+ union ib_gid gid;
+ struct ib_gid_attr gid_attr;
int ret;
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1639,11 +1641,31 @@ static int cm_req_handler(struct cm_work *work)
cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
- ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
+ work->path[0].hop_limit = cm_id_priv->av.ah_attr.grh.hop_limit;
+ ret = ib_get_cached_gid(work->port->cm_dev->ib_device,
+ work->port->port_num,
+ cm_id_priv->av.ah_attr.grh.sgid_index,
+ &gid, &gid_attr);
+ if (!ret) {
+ if (gid_attr.ndev) {
+ work->path[0].ifindex = gid_attr.ndev->ifindex;
+ work->path[0].net = dev_net(gid_attr.ndev);
+ dev_put(gid_attr.ndev);
+ }
+ work->path[0].gid_type = gid_attr.gid_type;
+ ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
+ }
if (ret) {
- ib_get_cached_gid(work->port->cm_dev->ib_device,
- work->port->port_num, 0, &work->path[0].sgid,
- NULL);
+ int err = ib_get_cached_gid(work->port->cm_dev->ib_device,
+ work->port->port_num, 0,
+ &work->path[0].sgid,
+ &gid_attr);
+ if (!err && gid_attr.ndev) {
+ work->path[0].ifindex = gid_attr.ndev->ifindex;
+ work->path[0].net = dev_net(gid_attr.ndev);
+ dev_put(gid_attr.ndev);
+ }
+ work->path[0].gid_type = gid_attr.gid_type;
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
&work->path[0].sgid, sizeof work->path[0].sgid,
NULL, 0);
@@ -3482,6 +3504,7 @@ int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
EXPORT_SYMBOL(ib_cm_notify);
static void cm_recv_handler(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct cm_port *port = mad_agent->context;
@@ -3731,16 +3754,6 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
}
EXPORT_SYMBOL(ib_cm_init_qp_attr);
-static void cm_get_ack_delay(struct cm_device *cm_dev)
-{
- struct ib_device_attr attr;
-
- if (ib_query_device(cm_dev->ib_device, &attr))
- cm_dev->ack_delay = 0; /* acks will rely on packet life time */
- else
- cm_dev->ack_delay = attr.local_ca_ack_delay;
-}
-
static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
char *buf)
{
@@ -3852,7 +3865,7 @@ static void cm_add_one(struct ib_device *ib_device)
return;
cm_dev->ib_device = ib_device;
- cm_get_ack_delay(cm_dev);
+ cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
cm_dev->going_down = 0;
cm_dev->device = device_create(&cm_class, &ib_device->dev,
MKDEV(0, 0), NULL,
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 2d762a2..9729639 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -38,6 +38,7 @@
#include <linux/in6.h>
#include <linux/mutex.h>
#include <linux/random.h>
+#include <linux/igmp.h>
#include <linux/idr.h>
#include <linux/inetdevice.h>
#include <linux/slab.h>
@@ -60,6 +61,8 @@
#include <rdma/ib_sa.h>
#include <rdma/iw_cm.h>
+#include "core_priv.h"
+
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("Generic RDMA CM Agent");
MODULE_LICENSE("Dual BSD/GPL");
@@ -150,6 +153,7 @@ struct cma_device {
struct completion comp;
atomic_t refcount;
struct list_head id_list;
+ enum ib_gid_type *default_gid_type;
};
struct rdma_bind_list {
@@ -185,6 +189,67 @@ enum {
CMA_OPTION_AFONLY,
};
+void cma_ref_dev(struct cma_device *cma_dev)
+{
+ atomic_inc(&cma_dev->refcount);
+}
+
+struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
+ void *cookie)
+{
+ struct cma_device *cma_dev;
+ struct cma_device *found_cma_dev = NULL;
+
+ mutex_lock(&lock);
+
+ list_for_each_entry(cma_dev, &dev_list, list)
+ if (filter(cma_dev->device, cookie)) {
+ found_cma_dev = cma_dev;
+ break;
+ }
+
+ if (found_cma_dev)
+ cma_ref_dev(found_cma_dev);
+ mutex_unlock(&lock);
+ return found_cma_dev;
+}
+
+int cma_get_default_gid_type(struct cma_device *cma_dev,
+ unsigned int port)
+{
+ if (port < rdma_start_port(cma_dev->device) ||
+ port > rdma_end_port(cma_dev->device))
+ return -EINVAL;
+
+ return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)];
+}
+
+int cma_set_default_gid_type(struct cma_device *cma_dev,
+ unsigned int port,
+ enum ib_gid_type default_gid_type)
+{
+ unsigned long supported_gids;
+
+ if (port < rdma_start_port(cma_dev->device) ||
+ port > rdma_end_port(cma_dev->device))
+ return -EINVAL;
+
+ supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
+
+ if (!(supported_gids & 1 << default_gid_type))
+ return -EINVAL;
+
+ cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] =
+ default_gid_type;
+
+ return 0;
+}
+
+struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
+{
+ return cma_dev->device;
+}
+
/*
* Device removal can occur at anytime, so we need extra handling to
* serialize notifying the user of device removal with other callbacks.
@@ -228,6 +293,7 @@ struct rdma_id_private {
u8 tos;
u8 reuseaddr;
u8 afonly;
+ enum ib_gid_type gid_type;
};
struct cma_multicast {
@@ -239,6 +305,7 @@ struct cma_multicast {
void *context;
struct sockaddr_storage addr;
struct kref mcref;
+ bool igmp_joined;
};
struct cma_work {
@@ -335,18 +402,48 @@ static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
}
-static void cma_attach_to_dev(struct rdma_id_private *id_priv,
- struct cma_device *cma_dev)
+static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
{
- atomic_inc(&cma_dev->refcount);
+ struct in_device *in_dev = NULL;
+
+ if (ndev) {
+ rtnl_lock();
+ in_dev = __in_dev_get_rtnl(ndev);
+ if (in_dev) {
+ if (join)
+ ip_mc_inc_group(in_dev,
+ *(__be32 *)(mgid->raw + 12));
+ else
+ ip_mc_dec_group(in_dev,
+ *(__be32 *)(mgid->raw + 12));
+ }
+ rtnl_unlock();
+ }
+ return (in_dev) ? 0 : -ENODEV;
+}
+
+static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
+ struct cma_device *cma_dev)
+{
+ cma_ref_dev(cma_dev);
id_priv->cma_dev = cma_dev;
+ id_priv->gid_type = 0;
id_priv->id.device = cma_dev->device;
id_priv->id.route.addr.dev_addr.transport =
rdma_node_get_transport(cma_dev->device->node_type);
list_add_tail(&id_priv->list, &cma_dev->id_list);
}
-static inline void cma_deref_dev(struct cma_device *cma_dev)
+static void cma_attach_to_dev(struct rdma_id_private *id_priv,
+ struct cma_device *cma_dev)
+{
+ _cma_attach_to_dev(id_priv, cma_dev);
+ id_priv->gid_type =
+ cma_dev->default_gid_type[id_priv->id.port_num -
+ rdma_start_port(cma_dev->device)];
+}
+
+void cma_deref_dev(struct cma_device *cma_dev)
{
if (atomic_dec_and_test(&cma_dev->refcount))
complete(&cma_dev->comp);
@@ -441,6 +538,7 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
}
static inline int cma_validate_port(struct ib_device *device, u8 port,
+ enum ib_gid_type gid_type,
union ib_gid *gid, int dev_type,
int bound_if_index)
{
@@ -453,10 +551,25 @@ static inline int cma_validate_port(struct ib_device *device, u8 port,
if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
return ret;
- if (dev_type == ARPHRD_ETHER)
+ if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
ndev = dev_get_by_index(&init_net, bound_if_index);
+ if (ndev && ndev->flags & IFF_LOOPBACK) {
+ pr_info("detected loopback device\n");
+ dev_put(ndev);
- ret = ib_find_cached_gid_by_port(device, gid, port, ndev, NULL);
+ if (!device->get_netdev)
+ return -EOPNOTSUPP;
+
+ ndev = device->get_netdev(device, port);
+ if (!ndev)
+ return -ENODEV;
+ }
+ } else {
+ gid_type = IB_GID_TYPE_IB;
+ }
+
+ ret = ib_find_cached_gid_by_port(device, gid, gid_type, port,
+ ndev, NULL);
if (ndev)
dev_put(ndev);
@@ -490,7 +603,10 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
gidp = rdma_protocol_roce(cma_dev->device, port) ?
&iboe_gid : &gid;
- ret = cma_validate_port(cma_dev->device, port, gidp,
+ ret = cma_validate_port(cma_dev->device, port,
+ rdma_protocol_ib(cma_dev->device, port) ?
+ IB_GID_TYPE_IB :
+ listen_id_priv->gid_type, gidp,
dev_addr->dev_type,
dev_addr->bound_dev_if);
if (!ret) {
@@ -509,8 +625,11 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
gidp = rdma_protocol_roce(cma_dev->device, port) ?
&iboe_gid : &gid;
- ret = cma_validate_port(cma_dev->device, port, gidp,
- dev_addr->dev_type,
+ ret = cma_validate_port(cma_dev->device, port,
+ rdma_protocol_ib(cma_dev->device, port) ?
+ IB_GID_TYPE_IB :
+ cma_dev->default_gid_type[port - 1],
+ gidp, dev_addr->dev_type,
dev_addr->bound_dev_if);
if (!ret) {
id_priv->id.port_num = port;
@@ -1437,8 +1556,24 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
id_priv->id.port_num)) {
ib_sa_free_multicast(mc->multicast.ib);
kfree(mc);
- } else
+ } else {
+ if (mc->igmp_joined) {
+ struct rdma_dev_addr *dev_addr =
+ &id_priv->id.route.addr.dev_addr;
+ struct net_device *ndev = NULL;
+
+ if (dev_addr->bound_dev_if)
+ ndev = dev_get_by_index(&init_net,
+ dev_addr->bound_dev_if);
+ if (ndev) {
+ cma_igmp_send(ndev,
+ &mc->multicast.ib->rec.mgid,
+ false);
+ dev_put(ndev);
+ }
+ }
kref_put(&mc->mcref, release_mc);
+ }
}
}
@@ -1896,7 +2031,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
struct rdma_id_private *listen_id, *conn_id;
struct rdma_cm_event event;
int ret;
- struct ib_device_attr attr;
struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
@@ -1938,13 +2072,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
- ret = ib_query_device(conn_id->id.device, &attr);
- if (ret) {
- mutex_unlock(&conn_id->handler_mutex);
- rdma_destroy_id(new_cm_id);
- goto out;
- }
-
memset(&event, 0, sizeof event);
event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
event.param.conn.private_data = iw_event->private_data;
@@ -2051,7 +2178,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
rdma_addr_size(cma_src_addr(id_priv)));
- cma_attach_to_dev(dev_id_priv, cma_dev);
+ _cma_attach_to_dev(dev_id_priv, cma_dev);
list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
atomic_inc(&id_priv->refcount);
dev_id_priv->internal_id = 1;
@@ -2321,8 +2448,23 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
if (addr->dev_addr.bound_dev_if) {
ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
+ if (!ndev)
+ return -ENODEV;
+
+ if (ndev->flags & IFF_LOOPBACK) {
+ dev_put(ndev);
+ if (!id_priv->id.device->get_netdev)
+ return -EOPNOTSUPP;
+
+ ndev = id_priv->id.device->get_netdev(id_priv->id.device,
+ id_priv->id.port_num);
+ if (!ndev)
+ return -ENODEV;
+ }
+
route->path_rec->net = &init_net;
- route->path_rec->ifindex = addr->dev_addr.bound_dev_if;
+ route->path_rec->ifindex = ndev->ifindex;
+ route->path_rec->gid_type = id_priv->gid_type;
}
if (!ndev) {
ret = -ENODEV;
@@ -2336,7 +2478,14 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
&route->path_rec->dgid);
- route->path_rec->hop_limit = 1;
+ /* Use the hint from IP Stack to select GID Type */
+ if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network))
+ route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network);
+ if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB)
+ /* TODO: get the hoplimit from the inet/inet6 device */
+ route->path_rec->hop_limit = addr->dev_addr.hoplimit;
+ else
+ route->path_rec->hop_limit = 1;
route->path_rec->reversible = 1;
route->path_rec->pkey = cpu_to_be16(0xffff);
route->path_rec->mtu_selector = IB_SA_EQ;
@@ -3534,12 +3683,23 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
event.status = status;
event.param.ud.private_data = mc->context;
if (!status) {
+ struct rdma_dev_addr *dev_addr =
+ &id_priv->id.route.addr.dev_addr;
+ struct net_device *ndev =
+ dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ enum ib_gid_type gid_type =
+ id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
+ rdma_start_port(id_priv->cma_dev->device)];
+
event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
ib_init_ah_from_mcmember(id_priv->id.device,
id_priv->id.port_num, &multicast->rec,
+ ndev, gid_type,
&event.param.ud.ah_attr);
event.param.ud.qp_num = 0xFFFFFF;
event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
+ if (ndev)
+ dev_put(ndev);
} else
event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
@@ -3672,9 +3832,10 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
{
struct iboe_mcast_work *work;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
- int err;
+ int err = 0;
struct sockaddr *addr = (struct sockaddr *)&mc->addr;
struct net_device *ndev = NULL;
+ enum ib_gid_type gid_type;
if (cma_zero_addr((struct sockaddr *)&mc->addr))
return -EINVAL;
@@ -3704,9 +3865,25 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
mc->multicast.ib->rec.hop_limit = 1;
mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
+
+ gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
+ rdma_start_port(id_priv->cma_dev->device)];
+ if (addr->sa_family == AF_INET) {
+ if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
+ true);
+ if (!err) {
+ mc->igmp_joined = true;
+ mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
+ }
+ } else {
+ if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ err = -ENOTSUPP;
+ }
dev_put(ndev);
- if (!mc->multicast.ib->rec.mtu) {
- err = -EINVAL;
+ if (err || !mc->multicast.ib->rec.mtu) {
+ if (!err)
+ err = -EINVAL;
goto out2;
}
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
@@ -3745,7 +3922,7 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
memcpy(&mc->addr, addr, rdma_addr_size(addr));
mc->context = context;
mc->id_priv = id_priv;
-
+ mc->igmp_joined = false;
spin_lock(&id_priv->lock);
list_add(&mc->list, &id_priv->mc_list);
spin_unlock(&id_priv->lock);
@@ -3790,9 +3967,25 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
if (rdma_cap_ib_mcast(id->device, id->port_num)) {
ib_sa_free_multicast(mc->multicast.ib);
kfree(mc);
- } else if (rdma_protocol_roce(id->device, id->port_num))
+ } else if (rdma_protocol_roce(id->device, id->port_num)) {
+ if (mc->igmp_joined) {
+ struct rdma_dev_addr *dev_addr =
+ &id->route.addr.dev_addr;
+ struct net_device *ndev = NULL;
+
+ if (dev_addr->bound_dev_if)
+ ndev = dev_get_by_index(&init_net,
+ dev_addr->bound_dev_if);
+ if (ndev) {
+ cma_igmp_send(ndev,
+ &mc->multicast.ib->rec.mgid,
+ false);
+ dev_put(ndev);
+ }
+ mc->igmp_joined = false;
+ }
kref_put(&mc->mcref, release_mc);
-
+ }
return;
}
}
@@ -3861,12 +4054,27 @@ static void cma_add_one(struct ib_device *device)
{
struct cma_device *cma_dev;
struct rdma_id_private *id_priv;
+ unsigned int i;
+ unsigned long supported_gids = 0;
cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
if (!cma_dev)
return;
cma_dev->device = device;
+ cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
+ sizeof(*cma_dev->default_gid_type),
+ GFP_KERNEL);
+ if (!cma_dev->default_gid_type) {
+ kfree(cma_dev);
+ return;
+ }
+ for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
+ supported_gids = roce_gid_type_mask_support(device, i);
+ WARN_ON(!supported_gids);
+ cma_dev->default_gid_type[i - rdma_start_port(device)] =
+ find_first_bit(&supported_gids, BITS_PER_LONG);
+ }
init_completion(&cma_dev->comp);
atomic_set(&cma_dev->refcount, 1);
@@ -3946,6 +4154,7 @@ static void cma_remove_one(struct ib_device *device, void *client_data)
mutex_unlock(&lock);
cma_process_remove(cma_dev);
+ kfree(cma_dev->default_gid_type);
kfree(cma_dev);
}
@@ -4079,6 +4288,7 @@ static int __init cma_init(void)
if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table))
printk(KERN_WARNING "RDMA CMA: failed to add netlink callback\n");
+ cma_configfs_init();
return 0;
@@ -4093,6 +4303,7 @@ err_wq:
static void __exit cma_cleanup(void)
{
+ cma_configfs_exit();
ibnl_remove_client(RDMA_NL_RDMA_CM);
ib_unregister_client(&cma_client);
unregister_netdevice_notifier(&cma_nb);
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
new file mode 100644
index 0000000..18b112a
--- /dev/null
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/configfs.h>
+#include <rdma/ib_verbs.h>
+#include "core_priv.h"
+
+struct cma_device;
+
+struct cma_dev_group;
+
+struct cma_dev_port_group {
+ unsigned int port_num;
+ struct cma_dev_group *cma_dev_group;
+ struct config_group group;
+};
+
+struct cma_dev_group {
+ char name[IB_DEVICE_NAME_MAX];
+ struct config_group device_group;
+ struct config_group ports_group;
+ struct config_group *default_dev_group[2];
+ struct config_group **default_ports_group;
+ struct cma_dev_port_group *ports;
+};
+
+static struct cma_dev_port_group *to_dev_port_group(struct config_item *item)
+{
+ struct config_group *group;
+
+ if (!item)
+ return NULL;
+
+ group = container_of(item, struct config_group, cg_item);
+ return container_of(group, struct cma_dev_port_group, group);
+}
+
+static bool filter_by_name(struct ib_device *ib_dev, void *cookie)
+{
+ return !strcmp(ib_dev->name, cookie);
+}
+
+static int cma_configfs_params_get(struct config_item *item,
+ struct cma_device **pcma_dev,
+ struct cma_dev_port_group **pgroup)
+{
+ struct cma_dev_port_group *group = to_dev_port_group(item);
+ struct cma_device *cma_dev;
+
+ if (!group)
+ return -ENODEV;
+
+ cma_dev = cma_enum_devices_by_ibdev(filter_by_name,
+ group->cma_dev_group->name);
+ if (!cma_dev)
+ return -ENODEV;
+
+ *pcma_dev = cma_dev;
+ *pgroup = group;
+
+ return 0;
+}
+
+static void cma_configfs_params_put(struct cma_device *cma_dev)
+{
+ cma_deref_dev(cma_dev);
+}
+
+static ssize_t default_roce_mode_show(struct config_item *item,
+ char *buf)
+{
+ struct cma_device *cma_dev;
+ struct cma_dev_port_group *group;
+ int gid_type;
+ ssize_t ret;
+
+ ret = cma_configfs_params_get(item, &cma_dev, &group);
+ if (ret)
+ return ret;
+
+ gid_type = cma_get_default_gid_type(cma_dev, group->port_num);
+ cma_configfs_params_put(cma_dev);
+
+ if (gid_type < 0)
+ return gid_type;
+
+ return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_type));
+}
+
+static ssize_t default_roce_mode_store(struct config_item *item,
+ const char *buf, size_t count)
+{
+ struct cma_device *cma_dev;
+ struct cma_dev_port_group *group;
+ int gid_type = ib_cache_gid_parse_type_str(buf);
+ ssize_t ret;
+
+ if (gid_type < 0)
+ return -EINVAL;
+
+ ret = cma_configfs_params_get(item, &cma_dev, &group);
+ if (ret)
+ return ret;
+
+ ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type);
+
+ cma_configfs_params_put(cma_dev);
+
+ return !ret ? strnlen(buf, count) : ret;
+}
+
+CONFIGFS_ATTR(, default_roce_mode);
+
+static struct configfs_attribute *cma_configfs_attributes[] = {
+ &attr_default_roce_mode,
+ NULL,
+};
+
+static struct config_item_type cma_port_group_type = {
+ .ct_attrs = cma_configfs_attributes,
+ .ct_owner = THIS_MODULE
+};
+
+static int make_cma_ports(struct cma_dev_group *cma_dev_group,
+ struct cma_device *cma_dev)
+{
+ struct ib_device *ibdev;
+ unsigned int i;
+ unsigned int ports_num;
+ struct cma_dev_port_group *ports;
+ struct config_group **ports_group;
+ int err;
+
+ ibdev = cma_get_ib_dev(cma_dev);
+
+ if (!ibdev)
+ return -ENODEV;
+
+ ports_num = ibdev->phys_port_cnt;
+ ports = kcalloc(ports_num, sizeof(*cma_dev_group->ports),
+ GFP_KERNEL);
+ ports_group = kcalloc(ports_num + 1, sizeof(*ports_group), GFP_KERNEL);
+
+ if (!ports || !ports_group) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ for (i = 0; i < ports_num; i++) {
+ char port_str[10];
+
+ ports[i].port_num = i + 1;
+ snprintf(port_str, sizeof(port_str), "%u", i + 1);
+ ports[i].cma_dev_group = cma_dev_group;
+ config_group_init_type_name(&ports[i].group,
+ port_str,
+ &cma_port_group_type);
+ ports_group[i] = &ports[i].group;
+ }
+ ports_group[i] = NULL;
+ cma_dev_group->default_ports_group = ports_group;
+ cma_dev_group->ports = ports;
+
+ return 0;
+free:
+ kfree(ports);
+ kfree(ports_group);
+ cma_dev_group->ports = NULL;
+ cma_dev_group->default_ports_group = NULL;
+ return err;
+}
+
+static void release_cma_dev(struct config_item *item)
+{
+ struct config_group *group = container_of(item, struct config_group,
+ cg_item);
+ struct cma_dev_group *cma_dev_group = container_of(group,
+ struct cma_dev_group,
+ device_group);
+
+ kfree(cma_dev_group);
+};
+
+static void release_cma_ports_group(struct config_item *item)
+{
+ struct config_group *group = container_of(item, struct config_group,
+ cg_item);
+ struct cma_dev_group *cma_dev_group = container_of(group,
+ struct cma_dev_group,
+ ports_group);
+
+ kfree(cma_dev_group->ports);
+ kfree(cma_dev_group->default_ports_group);
+ cma_dev_group->ports = NULL;
+ cma_dev_group->default_ports_group = NULL;
+};
+
+static struct configfs_item_operations cma_ports_item_ops = {
+ .release = release_cma_ports_group
+};
+
+static struct config_item_type cma_ports_group_type = {
+ .ct_item_ops = &cma_ports_item_ops,
+ .ct_owner = THIS_MODULE
+};
+
+static struct configfs_item_operations cma_device_item_ops = {
+ .release = release_cma_dev
+};
+
+static struct config_item_type cma_device_group_type = {
+ .ct_item_ops = &cma_device_item_ops,
+ .ct_owner = THIS_MODULE
+};
+
+static struct config_group *make_cma_dev(struct config_group *group,
+ const char *name)
+{
+ int err = -ENODEV;
+ struct cma_device *cma_dev = cma_enum_devices_by_ibdev(filter_by_name,
+ (void *)name);
+ struct cma_dev_group *cma_dev_group = NULL;
+
+ if (!cma_dev)
+ goto fail;
+
+ cma_dev_group = kzalloc(sizeof(*cma_dev_group), GFP_KERNEL);
+
+ if (!cma_dev_group) {
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ strncpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
+
+ err = make_cma_ports(cma_dev_group, cma_dev);
+ if (err)
+ goto fail;
+
+ cma_dev_group->ports_group.default_groups =
+ cma_dev_group->default_ports_group;
+ config_group_init_type_name(&cma_dev_group->ports_group, "ports",
+ &cma_ports_group_type);
+
+ cma_dev_group->device_group.default_groups
+ = cma_dev_group->default_dev_group;
+ cma_dev_group->default_dev_group[0] = &cma_dev_group->ports_group;
+ cma_dev_group->default_dev_group[1] = NULL;
+
+ config_group_init_type_name(&cma_dev_group->device_group, name,
+ &cma_device_group_type);
+
+ cma_deref_dev(cma_dev);
+ return &cma_dev_group->device_group;
+
+fail:
+ if (cma_dev)
+ cma_deref_dev(cma_dev);
+ kfree(cma_dev_group);
+ return ERR_PTR(err);
+}
+
+static struct configfs_group_operations cma_subsys_group_ops = {
+ .make_group = make_cma_dev,
+};
+
+static struct config_item_type cma_subsys_type = {
+ .ct_group_ops = &cma_subsys_group_ops,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct configfs_subsystem cma_subsys = {
+ .su_group = {
+ .cg_item = {
+ .ci_namebuf = "rdma_cm",
+ .ci_type = &cma_subsys_type,
+ },
+ },
+};
+
+int __init cma_configfs_init(void)
+{
+ config_group_init(&cma_subsys.su_group);
+ mutex_init(&cma_subsys.su_mutex);
+ return configfs_register_subsystem(&cma_subsys);
+}
+
+void __exit cma_configfs_exit(void)
+{
+ configfs_unregister_subsystem(&cma_subsys);
+}
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 5cf6eb7..eab3221 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -38,6 +38,32 @@
#include <rdma/ib_verbs.h>
+#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
+int cma_configfs_init(void);
+void cma_configfs_exit(void);
+#else
+static inline int cma_configfs_init(void)
+{
+ return 0;
+}
+
+static inline void cma_configfs_exit(void)
+{
+}
+#endif
+struct cma_device;
+void cma_ref_dev(struct cma_device *cma_dev);
+void cma_deref_dev(struct cma_device *cma_dev);
+typedef bool (*cma_device_filter)(struct ib_device *, void *);
+struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
+ void *cookie);
+int cma_get_default_gid_type(struct cma_device *cma_dev,
+ unsigned int port);
+int cma_set_default_gid_type(struct cma_device *cma_dev,
+ unsigned int port,
+ enum ib_gid_type default_gid_type);
+struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev);
+
int ib_device_register_sysfs(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *));
@@ -70,8 +96,13 @@ enum ib_cache_gid_default_mode {
IB_CACHE_GID_DEFAULT_MODE_DELETE
};
+int ib_cache_gid_parse_type_str(const char *buf);
+
+const char *ib_cache_gid_type_str(enum ib_gid_type gid_type);
+
void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
struct net_device *ndev,
+ unsigned long gid_type_mask,
enum ib_cache_gid_default_mode mode);
int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
@@ -87,9 +118,23 @@ int roce_gid_mgmt_init(void);
void roce_gid_mgmt_cleanup(void);
int roce_rescan_device(struct ib_device *ib_dev);
+unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port);
int ib_cache_setup_one(struct ib_device *device);
void ib_cache_cleanup_one(struct ib_device *device);
void ib_cache_release_one(struct ib_device *device);
+static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
+ struct net_device *upper)
+{
+ struct net_device *_upper = NULL;
+ struct list_head *iter;
+
+ netdev_for_each_all_upper_dev_rcu(dev, _upper, iter)
+ if (_upper == upper)
+ break;
+
+ return _upper == upper;
+}
+
#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
new file mode 100644
index 0000000..a754fc7
--- /dev/null
+++ b/drivers/infiniband/core/cq.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2015 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <rdma/ib_verbs.h>
+
+/* # of WCs to poll for with a single call to ib_poll_cq */
+#define IB_POLL_BATCH 16
+
+/* # of WCs to iterate over before yielding */
+#define IB_POLL_BUDGET_IRQ 256
+#define IB_POLL_BUDGET_WORKQUEUE 65536
+
+#define IB_POLL_FLAGS \
+ (IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)
+
+static int __ib_process_cq(struct ib_cq *cq, int budget)
+{
+ int i, n, completed = 0;
+
+ while ((n = ib_poll_cq(cq, IB_POLL_BATCH, cq->wc)) > 0) {
+ for (i = 0; i < n; i++) {
+ struct ib_wc *wc = &cq->wc[i];
+
+ if (wc->wr_cqe)
+ wc->wr_cqe->done(cq, wc);
+ else
+ WARN_ON_ONCE(wc->status == IB_WC_SUCCESS);
+ }
+
+ completed += n;
+
+ if (n != IB_POLL_BATCH ||
+ (budget != -1 && completed >= budget))
+ break;
+ }
+
+ return completed;
+}
+
+/**
+ * ib_process_direct_cq - process a CQ in caller context
+ * @cq: CQ to process
+ * @budget: number of CQEs to poll for
+ *
+ * This function is used to process all outstanding CQ entries on a
+ * %IB_POLL_DIRECT CQ. It does not offload CQ processing to a different
+ * context and does not ask for completion interrupts from the HCA.
+ *
+ * Note: for compatibility reasons -1 can be passed in %budget for unlimited
+ * polling. Do not use this feature in new code, it will be removed soon.
+ */
+int ib_process_cq_direct(struct ib_cq *cq, int budget)
+{
+ WARN_ON_ONCE(cq->poll_ctx != IB_POLL_DIRECT);
+
+ return __ib_process_cq(cq, budget);
+}
+EXPORT_SYMBOL(ib_process_cq_direct);
+
+static void ib_cq_completion_direct(struct ib_cq *cq, void *private)
+{
+ WARN_ONCE(1, "got unsolicited completion for CQ 0x%p\n", cq);
+}
+
+static int ib_poll_handler(struct irq_poll *iop, int budget)
+{
+ struct ib_cq *cq = container_of(iop, struct ib_cq, iop);
+ int completed;
+
+ completed = __ib_process_cq(cq, budget);
+ if (completed < budget) {
+ irq_poll_complete(&cq->iop);
+ if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
+ irq_poll_sched(&cq->iop);
+ }
+
+ return completed;
+}
+
+static void ib_cq_completion_softirq(struct ib_cq *cq, void *private)
+{
+ irq_poll_sched(&cq->iop);
+}
+
+static void ib_cq_poll_work(struct work_struct *work)
+{
+ struct ib_cq *cq = container_of(work, struct ib_cq, work);
+ int completed;
+
+ completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE);
+ if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
+ ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
+ queue_work(ib_comp_wq, &cq->work);
+}
+
+static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
+{
+ queue_work(ib_comp_wq, &cq->work);
+}
+
+/**
+ * ib_alloc_cq - allocate a completion queue
+ * @dev: device to allocate the CQ for
+ * @private: driver private data, accessible from cq->cq_context
+ * @nr_cqe: number of CQEs to allocate
+ * @comp_vector: HCA completion vectors for this CQ
+ * @poll_ctx: context to poll the CQ from.
+ *
+ * This is the proper interface to allocate a CQ for in-kernel users. A
+ * CQ allocated with this interface will automatically be polled from the
+ * specified context. The ULP needs must use wr->wr_cqe instead of wr->wr_id
+ * to use this CQ abstraction.
+ */
+struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
+ int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx)
+{
+ struct ib_cq_init_attr cq_attr = {
+ .cqe = nr_cqe,
+ .comp_vector = comp_vector,
+ };
+ struct ib_cq *cq;
+ int ret = -ENOMEM;
+
+ cq = dev->create_cq(dev, &cq_attr, NULL, NULL);
+ if (IS_ERR(cq))
+ return cq;
+
+ cq->device = dev;
+ cq->uobject = NULL;
+ cq->event_handler = NULL;
+ cq->cq_context = private;
+ cq->poll_ctx = poll_ctx;
+ atomic_set(&cq->usecnt, 0);
+
+ cq->wc = kmalloc_array(IB_POLL_BATCH, sizeof(*cq->wc), GFP_KERNEL);
+ if (!cq->wc)
+ goto out_destroy_cq;
+
+ switch (cq->poll_ctx) {
+ case IB_POLL_DIRECT:
+ cq->comp_handler = ib_cq_completion_direct;
+ break;
+ case IB_POLL_SOFTIRQ:
+ cq->comp_handler = ib_cq_completion_softirq;
+
+ irq_poll_init(&cq->iop, IB_POLL_BUDGET_IRQ, ib_poll_handler);
+ ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+ break;
+ case IB_POLL_WORKQUEUE:
+ cq->comp_handler = ib_cq_completion_workqueue;
+ INIT_WORK(&cq->work, ib_cq_poll_work);
+ ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+ break;
+ default:
+ ret = -EINVAL;
+ goto out_free_wc;
+ }
+
+ return cq;
+
+out_free_wc:
+ kfree(cq->wc);
+out_destroy_cq:
+ cq->device->destroy_cq(cq);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_alloc_cq);
+
+/**
+ * ib_free_cq - free a completion queue
+ * @cq: completion queue to free.
+ */
+void ib_free_cq(struct ib_cq *cq)
+{
+ int ret;
+
+ if (WARN_ON_ONCE(atomic_read(&cq->usecnt)))
+ return;
+
+ switch (cq->poll_ctx) {
+ case IB_POLL_DIRECT:
+ break;
+ case IB_POLL_SOFTIRQ:
+ irq_poll_disable(&cq->iop);
+ break;
+ case IB_POLL_WORKQUEUE:
+ flush_work(&cq->work);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+
+ kfree(cq->wc);
+ ret = cq->device->destroy_cq(cq);
+ WARN_ON_ONCE(ret);
+}
+EXPORT_SYMBOL(ib_free_cq);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 179e813..00da80e 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -58,6 +58,7 @@ struct ib_client_data {
bool going_down;
};
+struct workqueue_struct *ib_comp_wq;
struct workqueue_struct *ib_wq;
EXPORT_SYMBOL_GPL(ib_wq);
@@ -325,6 +326,7 @@ int ib_register_device(struct ib_device *device,
{
int ret;
struct ib_client *client;
+ struct ib_udata uhw = {.outlen = 0, .inlen = 0};
mutex_lock(&device_mutex);
@@ -352,6 +354,13 @@ int ib_register_device(struct ib_device *device,
goto out;
}
+ memset(&device->attrs, 0, sizeof(device->attrs));
+ ret = device->query_device(device, &device->attrs, &uhw);
+ if (ret) {
+ printk(KERN_WARNING "Couldn't query the device attributes\n");
+ goto out;
+ }
+
ret = ib_device_register_sysfs(device, port_callback);
if (ret) {
printk(KERN_WARNING "Couldn't register device %s with driver model\n",
@@ -628,25 +637,6 @@ void ib_dispatch_event(struct ib_event *event)
EXPORT_SYMBOL(ib_dispatch_event);
/**
- * ib_query_device - Query IB device attributes
- * @device:Device to query
- * @device_attr:Device attributes
- *
- * ib_query_device() returns the attributes of a device through the
- * @device_attr pointer.
- */
-int ib_query_device(struct ib_device *device,
- struct ib_device_attr *device_attr)
-{
- struct ib_udata uhw = {.outlen = 0, .inlen = 0};
-
- memset(device_attr, 0, sizeof(*device_attr));
-
- return device->query_device(device, device_attr, &uhw);
-}
-EXPORT_SYMBOL(ib_query_device);
-
-/**
* ib_query_port - Query IB port attributes
* @device:Device to query
* @port_num:Port number to query
@@ -825,26 +815,31 @@ EXPORT_SYMBOL(ib_modify_port);
* a specified GID value occurs.
* @device: The device to query.
* @gid: The GID value to search for.
+ * @gid_type: Type of GID.
* @ndev: The ndev related to the GID to search for.
* @port_num: The port number of the device where the GID value was found.
* @index: The index into the GID table where the GID was found. This
* parameter may be NULL.
*/
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- struct net_device *ndev, u8 *port_num, u16 *index)
+ enum ib_gid_type gid_type, struct net_device *ndev,
+ u8 *port_num, u16 *index)
{
union ib_gid tmp_gid;
int ret, port, i;
for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
if (rdma_cap_roce_gid_table(device, port)) {
- if (!ib_find_cached_gid_by_port(device, gid, port,
+ if (!ib_find_cached_gid_by_port(device, gid, gid_type, port,
ndev, index)) {
*port_num = port;
return 0;
}
}
+ if (gid_type != IB_GID_TYPE_IB)
+ continue;
+
for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
ret = ib_query_gid(device, port, i, &tmp_gid, NULL);
if (ret)
@@ -954,10 +949,18 @@ static int __init ib_core_init(void)
if (!ib_wq)
return -ENOMEM;
+ ib_comp_wq = alloc_workqueue("ib-comp-wq",
+ WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
+ WQ_UNBOUND_MAX_ACTIVE);
+ if (!ib_comp_wq) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
ret = class_register(&ib_class);
if (ret) {
printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
- goto err;
+ goto err_comp;
}
ret = ibnl_init();
@@ -972,7 +975,8 @@ static int __init ib_core_init(void)
err_sysfs:
class_unregister(&ib_class);
-
+err_comp:
+ destroy_workqueue(ib_comp_wq);
err:
destroy_workqueue(ib_wq);
return ret;
@@ -983,6 +987,7 @@ static void __exit ib_core_cleanup(void)
ib_cache_cleanup();
ibnl_cleanup();
class_unregister(&ib_class);
+ destroy_workqueue(ib_comp_wq);
/* Make sure that any pending umem accounting work is done. */
destroy_workqueue(ib_wq);
}
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 9f5ad7c..6ac3683 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -212,7 +212,6 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
{
struct ib_device *device;
struct ib_fmr_pool *pool;
- struct ib_device_attr *attr;
int i;
int ret;
int max_remaps;
@@ -228,25 +227,10 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
return ERR_PTR(-ENOSYS);
}
- attr = kmalloc(sizeof *attr, GFP_KERNEL);
- if (!attr) {
- printk(KERN_WARNING PFX "couldn't allocate device attr struct\n");
- return ERR_PTR(-ENOMEM);
- }
-
- ret = ib_query_device(device, attr);
- if (ret) {
- printk(KERN_WARNING PFX "couldn't query device: %d\n", ret);
- kfree(attr);
- return ERR_PTR(ret);
- }
-
- if (!attr->max_map_per_fmr)
+ if (!device->attrs.max_map_per_fmr)
max_remaps = IB_FMR_MAX_REMAPS;
else
- max_remaps = attr->max_map_per_fmr;
-
- kfree(attr);
+ max_remaps = device->attrs.max_map_per_fmr;
pool = kmalloc(sizeof *pool, GFP_KERNEL);
if (!pool) {
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 2281de1..9fa5bf3 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -84,6 +84,9 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
u8 mgmt_class);
static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
struct ib_mad_agent_private *agent_priv);
+static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
+ struct ib_wc *wc);
+static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc);
/*
* Returns a ib_mad_port_private structure or NULL for a device/port
@@ -681,7 +684,7 @@ static void snoop_recv(struct ib_mad_qp_info *qp_info,
atomic_inc(&mad_snoop_priv->refcount);
spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
- mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent,
+ mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, NULL,
mad_recv_wc);
deref_snoop_agent(mad_snoop_priv);
spin_lock_irqsave(&qp_info->snoop_lock, flags);
@@ -689,12 +692,11 @@ static void snoop_recv(struct ib_mad_qp_info *qp_info,
spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
}
-static void build_smp_wc(struct ib_qp *qp,
- u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
- struct ib_wc *wc)
+static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid,
+ u16 pkey_index, u8 port_num, struct ib_wc *wc)
{
memset(wc, 0, sizeof *wc);
- wc->wr_id = wr_id;
+ wc->wr_cqe = cqe;
wc->status = IB_WC_SUCCESS;
wc->opcode = IB_WC_RECV;
wc->pkey_index = pkey_index;
@@ -832,7 +834,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
}
build_smp_wc(mad_agent_priv->agent.qp,
- send_wr->wr.wr_id, drslid,
+ send_wr->wr.wr_cqe, drslid,
send_wr->pkey_index,
send_wr->port_num, &mad_wc);
@@ -1039,7 +1041,9 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey;
- mad_send_wr->send_wr.wr.wr_id = (unsigned long) mad_send_wr;
+ mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
+
+ mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list;
mad_send_wr->send_wr.wr.num_sge = 2;
mad_send_wr->send_wr.wr.opcode = IB_WR_SEND;
@@ -1151,8 +1155,9 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
/* Set WR ID to find mad_send_wr upon completion */
qp_info = mad_send_wr->mad_agent_priv->qp_info;
- mad_send_wr->send_wr.wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
+ mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
+ mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
mad_agent = mad_send_wr->send_buf.mad_agent;
sge = mad_send_wr->sg_list;
@@ -1982,9 +1987,9 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
/* user rmpp is in effect
* and this is an active RMPP MAD
*/
- mad_recv_wc->wc->wr_id = 0;
- mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
- mad_recv_wc);
+ mad_agent_priv->agent.recv_handler(
+ &mad_agent_priv->agent, NULL,
+ mad_recv_wc);
atomic_dec(&mad_agent_priv->refcount);
} else {
/* not user rmpp, revert to normal behavior and
@@ -1998,9 +2003,10 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
/* Defined behavior is to complete response before request */
- mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
- mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
- mad_recv_wc);
+ mad_agent_priv->agent.recv_handler(
+ &mad_agent_priv->agent,
+ &mad_send_wr->send_buf,
+ mad_recv_wc);
atomic_dec(&mad_agent_priv->refcount);
mad_send_wc.status = IB_WC_SUCCESS;
@@ -2009,7 +2015,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
}
} else {
- mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
+ mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL,
mad_recv_wc);
deref_mad_agent(mad_agent_priv);
}
@@ -2172,13 +2178,14 @@ handle_smi(struct ib_mad_port_private *port_priv,
return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response);
}
-static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
- struct ib_wc *wc)
+static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct ib_mad_port_private *port_priv = cq->cq_context;
+ struct ib_mad_list_head *mad_list =
+ container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
struct ib_mad_qp_info *qp_info;
struct ib_mad_private_header *mad_priv_hdr;
struct ib_mad_private *recv, *response = NULL;
- struct ib_mad_list_head *mad_list;
struct ib_mad_agent_private *mad_agent;
int port_num;
int ret = IB_MAD_RESULT_SUCCESS;
@@ -2186,7 +2193,17 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
u16 resp_mad_pkey_index = 0;
bool opa;
- mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
+ if (list_empty_careful(&port_priv->port_list))
+ return;
+
+ if (wc->status != IB_WC_SUCCESS) {
+ /*
+ * Receive errors indicate that the QP has entered the error
+ * state - error handling/shutdown code will cleanup
+ */
+ return;
+ }
+
qp_info = mad_list->mad_queue->qp_info;
dequeue_mad(mad_list);
@@ -2227,7 +2244,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
response = alloc_mad_private(mad_size, GFP_KERNEL);
if (!response) {
dev_err(&port_priv->device->dev,
- "ib_mad_recv_done_handler no memory for response buffer\n");
+ "%s: no memory for response buffer\n", __func__);
goto out;
}
@@ -2413,11 +2430,12 @@ done:
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
}
-static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
- struct ib_wc *wc)
+static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct ib_mad_port_private *port_priv = cq->cq_context;
+ struct ib_mad_list_head *mad_list =
+ container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr;
- struct ib_mad_list_head *mad_list;
struct ib_mad_qp_info *qp_info;
struct ib_mad_queue *send_queue;
struct ib_send_wr *bad_send_wr;
@@ -2425,7 +2443,14 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
unsigned long flags;
int ret;
- mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
+ if (list_empty_careful(&port_priv->port_list))
+ return;
+
+ if (wc->status != IB_WC_SUCCESS) {
+ if (!ib_mad_send_error(port_priv, wc))
+ return;
+ }
+
mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
mad_list);
send_queue = mad_list->mad_queue;
@@ -2490,24 +2515,15 @@ static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info)
spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
}
-static void mad_error_handler(struct ib_mad_port_private *port_priv,
- struct ib_wc *wc)
+static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
+ struct ib_wc *wc)
{
- struct ib_mad_list_head *mad_list;
- struct ib_mad_qp_info *qp_info;
+ struct ib_mad_list_head *mad_list =
+ container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
+ struct ib_mad_qp_info *qp_info = mad_list->mad_queue->qp_info;
struct ib_mad_send_wr_private *mad_send_wr;
int ret;
- /* Determine if failure was a send or receive */
- mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
- qp_info = mad_list->mad_queue->qp_info;
- if (mad_list->mad_queue == &qp_info->recv_queue)
- /*
- * Receive errors indicate that the QP has entered the error
- * state - error handling/shutdown code will cleanup
- */
- return;
-
/*
* Send errors will transition the QP to SQE - move
* QP to RTS and repost flushed work requests
@@ -2522,10 +2538,9 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
mad_send_wr->retry = 0;
ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
&bad_send_wr);
- if (ret)
- ib_mad_send_done_handler(port_priv, wc);
- } else
- ib_mad_send_done_handler(port_priv, wc);
+ if (!ret)
+ return false;
+ }
} else {
struct ib_qp_attr *attr;
@@ -2539,42 +2554,14 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
kfree(attr);
if (ret)
dev_err(&port_priv->device->dev,
- "mad_error_handler - ib_modify_qp to RTS : %d\n",
- ret);
+ "%s - ib_modify_qp to RTS: %d\n",
+ __func__, ret);
else
mark_sends_for_retry(qp_info);
}
- ib_mad_send_done_handler(port_priv, wc);
}
-}
-/*
- * IB MAD completion callback
- */
-static void ib_mad_completion_handler(struct work_struct *work)
-{
- struct ib_mad_port_private *port_priv;
- struct ib_wc wc;
-
- port_priv = container_of(work, struct ib_mad_port_private, work);
- ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
-
- while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
- if (wc.status == IB_WC_SUCCESS) {
- switch (wc.opcode) {
- case IB_WC_SEND:
- ib_mad_send_done_handler(port_priv, &wc);
- break;
- case IB_WC_RECV:
- ib_mad_recv_done_handler(port_priv, &wc);
- break;
- default:
- BUG_ON(1);
- break;
- }
- } else
- mad_error_handler(port_priv, &wc);
- }
+ return true;
}
static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
@@ -2716,7 +2703,7 @@ static void local_completions(struct work_struct *work)
* before request
*/
build_smp_wc(recv_mad_agent->agent.qp,
- (unsigned long) local->mad_send_wr,
+ local->mad_send_wr->send_wr.wr.wr_cqe,
be16_to_cpu(IB_LID_PERMISSIVE),
local->mad_send_wr->send_wr.pkey_index,
recv_mad_agent->agent.port_num, &wc);
@@ -2744,6 +2731,7 @@ static void local_completions(struct work_struct *work)
IB_MAD_SNOOP_RECVS);
recv_mad_agent->agent.recv_handler(
&recv_mad_agent->agent,
+ &local->mad_send_wr->send_buf,
&local->mad_priv->header.recv_wc);
spin_lock_irqsave(&recv_mad_agent->lock, flags);
atomic_dec(&recv_mad_agent->refcount);
@@ -2855,17 +2843,6 @@ static void timeout_sends(struct work_struct *work)
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
}
-static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg)
-{
- struct ib_mad_port_private *port_priv = cq->cq_context;
- unsigned long flags;
-
- spin_lock_irqsave(&ib_mad_port_list_lock, flags);
- if (!list_empty(&port_priv->port_list))
- queue_work(port_priv->wq, &port_priv->work);
- spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
-}
-
/*
* Allocate receive MADs and post receive WRs for them
*/
@@ -2913,8 +2890,9 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
break;
}
mad_priv->header.mapping = sg_list.addr;
- recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
mad_priv->header.mad_list.mad_queue = recv_queue;
+ mad_priv->header.mad_list.cqe.done = ib_mad_recv_done;
+ recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe;
/* Post receive WR */
spin_lock_irqsave(&recv_queue->lock, flags);
@@ -3151,7 +3129,6 @@ static int ib_mad_port_open(struct ib_device *device,
unsigned long flags;
char name[sizeof "ib_mad123"];
int has_smi;
- struct ib_cq_init_attr cq_attr = {};
if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE))
return -EFAULT;
@@ -3179,10 +3156,8 @@ static int ib_mad_port_open(struct ib_device *device,
if (has_smi)
cq_size *= 2;
- cq_attr.cqe = cq_size;
- port_priv->cq = ib_create_cq(port_priv->device,
- ib_mad_thread_completion_handler,
- NULL, port_priv, &cq_attr);
+ port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
+ IB_POLL_WORKQUEUE);
if (IS_ERR(port_priv->cq)) {
dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
ret = PTR_ERR(port_priv->cq);
@@ -3211,7 +3186,6 @@ static int ib_mad_port_open(struct ib_device *device,
ret = -ENOMEM;
goto error8;
}
- INIT_WORK(&port_priv->work, ib_mad_completion_handler);
spin_lock_irqsave(&ib_mad_port_list_lock, flags);
list_add_tail(&port_priv->port_list, &ib_mad_port_list);
@@ -3238,7 +3212,7 @@ error7:
error6:
ib_dealloc_pd(port_priv->pd);
error4:
- ib_destroy_cq(port_priv->cq);
+ ib_free_cq(port_priv->cq);
cleanup_recv_queue(&port_priv->qp_info[1]);
cleanup_recv_queue(&port_priv->qp_info[0]);
error3:
@@ -3271,7 +3245,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
destroy_mad_qp(&port_priv->qp_info[1]);
destroy_mad_qp(&port_priv->qp_info[0]);
ib_dealloc_pd(port_priv->pd);
- ib_destroy_cq(port_priv->cq);
+ ib_free_cq(port_priv->cq);
cleanup_recv_queue(&port_priv->qp_info[1]);
cleanup_recv_queue(&port_priv->qp_info[0]);
/* XXX: Handle deallocation of MAD registration tables */
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 990698a..28669f6 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -64,6 +64,7 @@
struct ib_mad_list_head {
struct list_head list;
+ struct ib_cqe cqe;
struct ib_mad_queue *mad_queue;
};
@@ -204,7 +205,6 @@ struct ib_mad_port_private {
struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION];
struct list_head agent_list;
struct workqueue_struct *wq;
- struct work_struct work;
struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
};
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index bb6685f..250937c 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -723,14 +723,27 @@ EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
struct ib_sa_mcmember_rec *rec,
+ struct net_device *ndev,
+ enum ib_gid_type gid_type,
struct ib_ah_attr *ah_attr)
{
int ret;
u16 gid_index;
u8 p;
- ret = ib_find_cached_gid(device, &rec->port_gid,
- NULL, &p, &gid_index);
+ if (rdma_protocol_roce(device, port_num)) {
+ ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
+ gid_type, port_num,
+ ndev,
+ &gid_index);
+ } else if (rdma_protocol_ib(device, port_num)) {
+ ret = ib_find_cached_gid(device, &rec->port_gid,
+ IB_GID_TYPE_IB, NULL, &p,
+ &gid_index);
+ } else {
+ ret = -EINVAL;
+ }
+
if (ret)
return ret;
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 178f984..06556c3 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -67,17 +67,53 @@ struct netdev_event_work {
struct netdev_event_work_cmd cmds[ROCE_NETDEV_CALLBACK_SZ];
};
+static const struct {
+ bool (*is_supported)(const struct ib_device *device, u8 port_num);
+ enum ib_gid_type gid_type;
+} PORT_CAP_TO_GID_TYPE[] = {
+ {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
+ {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP},
+};
+
+#define CAP_TO_GID_TABLE_SIZE ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
+
+unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
+{
+ int i;
+ unsigned int ret_flags = 0;
+
+ if (!rdma_protocol_roce(ib_dev, port))
+ return 1UL << IB_GID_TYPE_IB;
+
+ for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++)
+ if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port))
+ ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type;
+
+ return ret_flags;
+}
+EXPORT_SYMBOL(roce_gid_type_mask_support);
+
static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
u8 port, union ib_gid *gid,
struct ib_gid_attr *gid_attr)
{
- switch (gid_op) {
- case GID_ADD:
- ib_cache_gid_add(ib_dev, port, gid, gid_attr);
- break;
- case GID_DEL:
- ib_cache_gid_del(ib_dev, port, gid, gid_attr);
- break;
+ int i;
+ unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
+ for (i = 0; i < IB_GID_TYPE_SIZE; i++) {
+ if ((1UL << i) & gid_type_mask) {
+ gid_attr->gid_type = i;
+ switch (gid_op) {
+ case GID_ADD:
+ ib_cache_gid_add(ib_dev, port,
+ gid, gid_attr);
+ break;
+ case GID_DEL:
+ ib_cache_gid_del(ib_dev, port,
+ gid, gid_attr);
+ break;
+ }
+ }
}
}
@@ -103,18 +139,6 @@ static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_de
return BONDING_SLAVE_STATE_NA;
}
-static bool is_upper_dev_rcu(struct net_device *dev, struct net_device *upper)
-{
- struct net_device *_upper = NULL;
- struct list_head *iter;
-
- netdev_for_each_all_upper_dev_rcu(dev, _upper, iter)
- if (_upper == upper)
- break;
-
- return _upper == upper;
-}
-
#define REQUIRED_BOND_STATES (BONDING_SLAVE_STATE_ACTIVE | \
BONDING_SLAVE_STATE_NA)
static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
@@ -132,7 +156,7 @@ static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
if (!real_dev)
real_dev = event_ndev;
- res = ((is_upper_dev_rcu(rdma_ndev, event_ndev) &&
+ res = ((rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
(is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) &
REQUIRED_BOND_STATES)) ||
real_dev == rdma_ndev);
@@ -178,7 +202,7 @@ static int upper_device_filter(struct ib_device *ib_dev, u8 port,
return 1;
rcu_read_lock();
- res = is_upper_dev_rcu(rdma_ndev, event_ndev);
+ res = rdma_is_upper_dev_rcu(rdma_ndev, event_ndev);
rcu_read_unlock();
return res;
@@ -203,10 +227,12 @@ static void enum_netdev_default_gids(struct ib_device *ib_dev,
u8 port, struct net_device *event_ndev,
struct net_device *rdma_ndev)
{
+ unsigned long gid_type_mask;
+
rcu_read_lock();
if (!rdma_ndev ||
((rdma_ndev != event_ndev &&
- !is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
+ !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
is_eth_active_slave_of_bonding_rcu(rdma_ndev,
netdev_master_upper_dev_get_rcu(rdma_ndev)) ==
BONDING_SLAVE_STATE_INACTIVE)) {
@@ -215,7 +241,9 @@ static void enum_netdev_default_gids(struct ib_device *ib_dev,
}
rcu_read_unlock();
- ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
+ gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
+ ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, gid_type_mask,
IB_CACHE_GID_DEFAULT_MODE_SET);
}
@@ -234,12 +262,17 @@ static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
rcu_read_lock();
- if (is_upper_dev_rcu(rdma_ndev, event_ndev) &&
+ if (rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) ==
BONDING_SLAVE_STATE_INACTIVE) {
+ unsigned long gid_type_mask;
+
rcu_read_unlock();
+ gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
+ gid_type_mask,
IB_CACHE_GID_DEFAULT_MODE_DELETE);
} else {
rcu_read_unlock();
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index a95a32b..f334090 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -49,7 +49,9 @@
#include <net/netlink.h>
#include <uapi/rdma/ib_user_sa.h>
#include <rdma/ib_marshall.h>
+#include <rdma/ib_addr.h>
#include "sa.h"
+#include "core_priv.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand subnet administration query support");
@@ -715,7 +717,9 @@ static int ib_nl_handle_set_timeout(struct sk_buff *skb,
struct nlattr *tb[LS_NLA_TYPE_MAX];
int ret;
- if (!netlink_capable(skb, CAP_NET_ADMIN))
+ if (!(nlh->nlmsg_flags & NLM_F_REQUEST) ||
+ !(NETLINK_CB(skb).sk) ||
+ !netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
@@ -789,7 +793,9 @@ static int ib_nl_handle_resolve_resp(struct sk_buff *skb,
int found = 0;
int ret;
- if (!netlink_capable(skb, CAP_NET_ADMIN))
+ if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
+ !(NETLINK_CB(skb).sk) ||
+ !netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
spin_lock_irqsave(&ib_nl_request_lock, flags);
@@ -996,7 +1002,8 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
{
int ret;
u16 gid_index;
- int force_grh;
+ int use_roce;
+ struct net_device *ndev = NULL;
memset(ah_attr, 0, sizeof *ah_attr);
ah_attr->dlid = be16_to_cpu(rec->dlid);
@@ -1006,16 +1013,71 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
ah_attr->port_num = port_num;
ah_attr->static_rate = rec->rate;
- force_grh = rdma_cap_eth_ah(device, port_num);
+ use_roce = rdma_cap_eth_ah(device, port_num);
+
+ if (use_roce) {
+ struct net_device *idev;
+ struct net_device *resolved_dev;
+ struct rdma_dev_addr dev_addr = {.bound_dev_if = rec->ifindex,
+ .net = rec->net ? rec->net :
+ &init_net};
+ union {
+ struct sockaddr _sockaddr;
+ struct sockaddr_in _sockaddr_in;
+ struct sockaddr_in6 _sockaddr_in6;
+ } sgid_addr, dgid_addr;
+
+ if (!device->get_netdev)
+ return -EOPNOTSUPP;
+
+ rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
+ rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
+
+ /* validate the route */
+ ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
+ &dgid_addr._sockaddr, &dev_addr);
+ if (ret)
+ return ret;
- if (rec->hop_limit > 1 || force_grh) {
- struct net_device *ndev = ib_get_ndev_from_path(rec);
+ if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
+ dev_addr.network == RDMA_NETWORK_IPV6) &&
+ rec->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+ return -EINVAL;
+
+ idev = device->get_netdev(device, port_num);
+ if (!idev)
+ return -ENODEV;
+
+ resolved_dev = dev_get_by_index(dev_addr.net,
+ dev_addr.bound_dev_if);
+ if (resolved_dev->flags & IFF_LOOPBACK) {
+ dev_put(resolved_dev);
+ resolved_dev = idev;
+ dev_hold(resolved_dev);
+ }
+ ndev = ib_get_ndev_from_path(rec);
+ rcu_read_lock();
+ if ((ndev && ndev != resolved_dev) ||
+ (resolved_dev != idev &&
+ !rdma_is_upper_dev_rcu(idev, resolved_dev)))
+ ret = -EHOSTUNREACH;
+ rcu_read_unlock();
+ dev_put(idev);
+ dev_put(resolved_dev);
+ if (ret) {
+ if (ndev)
+ dev_put(ndev);
+ return ret;
+ }
+ }
+ if (rec->hop_limit > 1 || use_roce) {
ah_attr->ah_flags = IB_AH_GRH;
ah_attr->grh.dgid = rec->dgid;
- ret = ib_find_cached_gid(device, &rec->sgid, ndev, &port_num,
- &gid_index);
+ ret = ib_find_cached_gid_by_port(device, &rec->sgid,
+ rec->gid_type, port_num, ndev,
+ &gid_index);
if (ret) {
if (ndev)
dev_put(ndev);
@@ -1029,9 +1091,10 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
if (ndev)
dev_put(ndev);
}
- if (force_grh) {
+
+ if (use_roce)
memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
- }
+
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_path);
@@ -1157,6 +1220,7 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
mad->data, &rec);
rec.net = NULL;
rec.ifindex = 0;
+ rec.gid_type = IB_GID_TYPE_IB;
memset(rec.dmac, 0, ETH_ALEN);
query->callback(status, &rec, query->context);
} else
@@ -1609,14 +1673,15 @@ static void send_handler(struct ib_mad_agent *agent,
}
static void recv_handler(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_sa_query *query;
- struct ib_mad_send_buf *mad_buf;
- mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id;
- query = mad_buf->context[0];
+ if (!send_buf)
+ return;
+ query = send_buf->context[0];
if (query->callback) {
if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
query->callback(query,
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index b1f37d4..3de9351 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -37,15 +37,27 @@
#include <linux/slab.h>
#include <linux/stat.h>
#include <linux/string.h>
+#include <linux/netdevice.h>
#include <rdma/ib_mad.h>
+#include <rdma/ib_pma.h>
+struct ib_port;
+
+struct gid_attr_group {
+ struct ib_port *port;
+ struct kobject kobj;
+ struct attribute_group ndev;
+ struct attribute_group type;
+};
struct ib_port {
struct kobject kobj;
struct ib_device *ibdev;
+ struct gid_attr_group *gid_attr_group;
struct attribute_group gid_group;
struct attribute_group pkey_group;
u8 port_num;
+ struct attribute_group *pma_table;
};
struct port_attribute {
@@ -65,6 +77,7 @@ struct port_table_attribute {
struct port_attribute attr;
char name[8];
int index;
+ __be16 attr_id;
};
static ssize_t port_attr_show(struct kobject *kobj,
@@ -84,6 +97,24 @@ static const struct sysfs_ops port_sysfs_ops = {
.show = port_attr_show
};
+static ssize_t gid_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct port_attribute *port_attr =
+ container_of(attr, struct port_attribute, attr);
+ struct ib_port *p = container_of(kobj, struct gid_attr_group,
+ kobj)->port;
+
+ if (!port_attr->show)
+ return -EIO;
+
+ return port_attr->show(p, port_attr, buf);
+}
+
+static const struct sysfs_ops gid_attr_sysfs_ops = {
+ .show = gid_attr_show
+};
+
static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
char *buf)
{
@@ -281,6 +312,46 @@ static struct attribute *port_default_attrs[] = {
NULL
};
+static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf)
+{
+ if (!gid_attr->ndev)
+ return -EINVAL;
+
+ return sprintf(buf, "%s\n", gid_attr->ndev->name);
+}
+
+static size_t print_gid_type(struct ib_gid_attr *gid_attr, char *buf)
+{
+ return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type));
+}
+
+static ssize_t _show_port_gid_attr(struct ib_port *p,
+ struct port_attribute *attr,
+ char *buf,
+ size_t (*print)(struct ib_gid_attr *gid_attr,
+ char *buf))
+{
+ struct port_table_attribute *tab_attr =
+ container_of(attr, struct port_table_attribute, attr);
+ union ib_gid gid;
+ struct ib_gid_attr gid_attr = {};
+ ssize_t ret;
+ va_list args;
+
+ ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid,
+ &gid_attr);
+ if (ret)
+ goto err;
+
+ ret = print(&gid_attr, buf);
+
+err:
+ if (gid_attr.ndev)
+ dev_put(gid_attr.ndev);
+ va_end(args);
+ return ret;
+}
+
static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
char *buf)
{
@@ -296,6 +367,19 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
return sprintf(buf, "%pI6\n", gid.raw);
}
+static ssize_t show_port_gid_attr_ndev(struct ib_port *p,
+ struct port_attribute *attr, char *buf)
+{
+ return _show_port_gid_attr(p, attr, buf, print_ndev);
+}
+
+static ssize_t show_port_gid_attr_gid_type(struct ib_port *p,
+ struct port_attribute *attr,
+ char *buf)
+{
+ return _show_port_gid_attr(p, attr, buf, print_gid_type);
+}
+
static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
char *buf)
{
@@ -314,24 +398,32 @@ static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
#define PORT_PMA_ATTR(_name, _counter, _width, _offset) \
struct port_table_attribute port_pma_attr_##_name = { \
.attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
- .index = (_offset) | ((_width) << 16) | ((_counter) << 24) \
+ .index = (_offset) | ((_width) << 16) | ((_counter) << 24), \
+ .attr_id = IB_PMA_PORT_COUNTERS , \
}
-static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
- char *buf)
+#define PORT_PMA_ATTR_EXT(_name, _width, _offset) \
+struct port_table_attribute port_pma_attr_ext_##_name = { \
+ .attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
+ .index = (_offset) | ((_width) << 16), \
+ .attr_id = IB_PMA_PORT_COUNTERS_EXT , \
+}
+
+/*
+ * Get a Perfmgmt MAD block of data.
+ * Returns error code or the number of bytes retrieved.
+ */
+static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr,
+ void *data, int offset, size_t size)
{
- struct port_table_attribute *tab_attr =
- container_of(attr, struct port_table_attribute, attr);
- int offset = tab_attr->index & 0xffff;
- int width = (tab_attr->index >> 16) & 0xff;
- struct ib_mad *in_mad = NULL;
- struct ib_mad *out_mad = NULL;
+ struct ib_mad *in_mad;
+ struct ib_mad *out_mad;
size_t mad_size = sizeof(*out_mad);
u16 out_mad_pkey_index = 0;
ssize_t ret;
- if (!p->ibdev->process_mad)
- return sprintf(buf, "N/A (no PMA)\n");
+ if (!dev->process_mad)
+ return -ENOSYS;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
@@ -344,12 +436,13 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT;
in_mad->mad_hdr.class_version = 1;
in_mad->mad_hdr.method = IB_MGMT_METHOD_GET;
- in_mad->mad_hdr.attr_id = cpu_to_be16(0x12); /* PortCounters */
+ in_mad->mad_hdr.attr_id = attr;
- in_mad->data[41] = p->port_num; /* PortSelect field */
+ if (attr != IB_PMA_CLASS_PORT_INFO)
+ in_mad->data[41] = port_num; /* PortSelect field */
- if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY,
- p->port_num, NULL, NULL,
+ if ((dev->process_mad(dev, IB_MAD_IGNORE_MKEY,
+ port_num, NULL, NULL,
(const struct ib_mad_hdr *)in_mad, mad_size,
(struct ib_mad_hdr *)out_mad, &mad_size,
&out_mad_pkey_index) &
@@ -358,31 +451,54 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
ret = -EINVAL;
goto out;
}
+ memcpy(data, out_mad->data + offset, size);
+ ret = size;
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return ret;
+}
+
+static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
+ char *buf)
+{
+ struct port_table_attribute *tab_attr =
+ container_of(attr, struct port_table_attribute, attr);
+ int offset = tab_attr->index & 0xffff;
+ int width = (tab_attr->index >> 16) & 0xff;
+ ssize_t ret;
+ u8 data[8];
+
+ ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data,
+ 40 + offset / 8, sizeof(data));
+ if (ret < 0)
+ return sprintf(buf, "N/A (no PMA)\n");
switch (width) {
case 4:
- ret = sprintf(buf, "%u\n", (out_mad->data[40 + offset / 8] >>
+ ret = sprintf(buf, "%u\n", (*data >>
(4 - (offset % 8))) & 0xf);
break;
case 8:
- ret = sprintf(buf, "%u\n", out_mad->data[40 + offset / 8]);
+ ret = sprintf(buf, "%u\n", *data);
break;
case 16:
ret = sprintf(buf, "%u\n",
- be16_to_cpup((__be16 *)(out_mad->data + 40 + offset / 8)));
+ be16_to_cpup((__be16 *)data));
break;
case 32:
ret = sprintf(buf, "%u\n",
- be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8)));
+ be32_to_cpup((__be32 *)data));
+ break;
+ case 64:
+ ret = sprintf(buf, "%llu\n",
+ be64_to_cpup((__be64 *)data));
break;
+
default:
ret = 0;
}
-out:
- kfree(in_mad);
- kfree(out_mad);
-
return ret;
}
@@ -403,6 +519,18 @@ static PORT_PMA_ATTR(port_rcv_data , 13, 32, 224);
static PORT_PMA_ATTR(port_xmit_packets , 14, 32, 256);
static PORT_PMA_ATTR(port_rcv_packets , 15, 32, 288);
+/*
+ * Counters added by extended set
+ */
+static PORT_PMA_ATTR_EXT(port_xmit_data , 64, 64);
+static PORT_PMA_ATTR_EXT(port_rcv_data , 64, 128);
+static PORT_PMA_ATTR_EXT(port_xmit_packets , 64, 192);
+static PORT_PMA_ATTR_EXT(port_rcv_packets , 64, 256);
+static PORT_PMA_ATTR_EXT(unicast_xmit_packets , 64, 320);
+static PORT_PMA_ATTR_EXT(unicast_rcv_packets , 64, 384);
+static PORT_PMA_ATTR_EXT(multicast_xmit_packets , 64, 448);
+static PORT_PMA_ATTR_EXT(multicast_rcv_packets , 64, 512);
+
static struct attribute *pma_attrs[] = {
&port_pma_attr_symbol_error.attr.attr,
&port_pma_attr_link_error_recovery.attr.attr,
@@ -423,11 +551,65 @@ static struct attribute *pma_attrs[] = {
NULL
};
+static struct attribute *pma_attrs_ext[] = {
+ &port_pma_attr_symbol_error.attr.attr,
+ &port_pma_attr_link_error_recovery.attr.attr,
+ &port_pma_attr_link_downed.attr.attr,
+ &port_pma_attr_port_rcv_errors.attr.attr,
+ &port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
+ &port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
+ &port_pma_attr_port_xmit_discards.attr.attr,
+ &port_pma_attr_port_xmit_constraint_errors.attr.attr,
+ &port_pma_attr_port_rcv_constraint_errors.attr.attr,
+ &port_pma_attr_local_link_integrity_errors.attr.attr,
+ &port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
+ &port_pma_attr_VL15_dropped.attr.attr,
+ &port_pma_attr_ext_port_xmit_data.attr.attr,
+ &port_pma_attr_ext_port_rcv_data.attr.attr,
+ &port_pma_attr_ext_port_xmit_packets.attr.attr,
+ &port_pma_attr_ext_port_rcv_packets.attr.attr,
+ &port_pma_attr_ext_unicast_rcv_packets.attr.attr,
+ &port_pma_attr_ext_unicast_xmit_packets.attr.attr,
+ &port_pma_attr_ext_multicast_rcv_packets.attr.attr,
+ &port_pma_attr_ext_multicast_xmit_packets.attr.attr,
+ NULL
+};
+
+static struct attribute *pma_attrs_noietf[] = {
+ &port_pma_attr_symbol_error.attr.attr,
+ &port_pma_attr_link_error_recovery.attr.attr,
+ &port_pma_attr_link_downed.attr.attr,
+ &port_pma_attr_port_rcv_errors.attr.attr,
+ &port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
+ &port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
+ &port_pma_attr_port_xmit_discards.attr.attr,
+ &port_pma_attr_port_xmit_constraint_errors.attr.attr,
+ &port_pma_attr_port_rcv_constraint_errors.attr.attr,
+ &port_pma_attr_local_link_integrity_errors.attr.attr,
+ &port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
+ &port_pma_attr_VL15_dropped.attr.attr,
+ &port_pma_attr_ext_port_xmit_data.attr.attr,
+ &port_pma_attr_ext_port_rcv_data.attr.attr,
+ &port_pma_attr_ext_port_xmit_packets.attr.attr,
+ &port_pma_attr_ext_port_rcv_packets.attr.attr,
+ NULL
+};
+
static struct attribute_group pma_group = {
.name = "counters",
.attrs = pma_attrs
};
+static struct attribute_group pma_group_ext = {
+ .name = "counters",
+ .attrs = pma_attrs_ext
+};
+
+static struct attribute_group pma_group_noietf = {
+ .name = "counters",
+ .attrs = pma_attrs_noietf
+};
+
static void ib_port_release(struct kobject *kobj)
{
struct ib_port *p = container_of(kobj, struct ib_port, kobj);
@@ -451,12 +633,41 @@ static void ib_port_release(struct kobject *kobj)
kfree(p);
}
+static void ib_port_gid_attr_release(struct kobject *kobj)
+{
+ struct gid_attr_group *g = container_of(kobj, struct gid_attr_group,
+ kobj);
+ struct attribute *a;
+ int i;
+
+ if (g->ndev.attrs) {
+ for (i = 0; (a = g->ndev.attrs[i]); ++i)
+ kfree(a);
+
+ kfree(g->ndev.attrs);
+ }
+
+ if (g->type.attrs) {
+ for (i = 0; (a = g->type.attrs[i]); ++i)
+ kfree(a);
+
+ kfree(g->type.attrs);
+ }
+
+ kfree(g);
+}
+
static struct kobj_type port_type = {
.release = ib_port_release,
.sysfs_ops = &port_sysfs_ops,
.default_attrs = port_default_attrs
};
+static struct kobj_type gid_attr_type = {
+ .sysfs_ops = &gid_attr_sysfs_ops,
+ .release = ib_port_gid_attr_release
+};
+
static struct attribute **
alloc_group_attrs(ssize_t (*show)(struct ib_port *,
struct port_attribute *, char *buf),
@@ -500,6 +711,31 @@ err:
return NULL;
}
+/*
+ * Figure out which counter table to use depending on
+ * the device capabilities.
+ */
+static struct attribute_group *get_counter_table(struct ib_device *dev,
+ int port_num)
+{
+ struct ib_class_port_info cpi;
+
+ if (get_perf_mad(dev, port_num, IB_PMA_CLASS_PORT_INFO,
+ &cpi, 40, sizeof(cpi)) >= 0) {
+
+ if (cpi.capability_mask && IB_PMA_CLASS_CAP_EXT_WIDTH)
+ /* We have extended counters */
+ return &pma_group_ext;
+
+ if (cpi.capability_mask && IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF)
+ /* But not the IETF ones */
+ return &pma_group_noietf;
+ }
+
+ /* Fall back to normal counters */
+ return &pma_group;
+}
+
static int add_port(struct ib_device *device, int port_num,
int (*port_callback)(struct ib_device *,
u8, struct kobject *))
@@ -528,9 +764,24 @@ static int add_port(struct ib_device *device, int port_num,
return ret;
}
- ret = sysfs_create_group(&p->kobj, &pma_group);
- if (ret)
+ p->gid_attr_group = kzalloc(sizeof(*p->gid_attr_group), GFP_KERNEL);
+ if (!p->gid_attr_group) {
+ ret = -ENOMEM;
goto err_put;
+ }
+
+ p->gid_attr_group->port = p;
+ ret = kobject_init_and_add(&p->gid_attr_group->kobj, &gid_attr_type,
+ &p->kobj, "gid_attrs");
+ if (ret) {
+ kfree(p->gid_attr_group);
+ goto err_put;
+ }
+
+ p->pma_table = get_counter_table(device, port_num);
+ ret = sysfs_create_group(&p->kobj, p->pma_table);
+ if (ret)
+ goto err_put_gid_attrs;
p->gid_group.name = "gids";
p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
@@ -543,12 +794,38 @@ static int add_port(struct ib_device *device, int port_num,
if (ret)
goto err_free_gid;
+ p->gid_attr_group->ndev.name = "ndevs";
+ p->gid_attr_group->ndev.attrs = alloc_group_attrs(show_port_gid_attr_ndev,
+ attr.gid_tbl_len);
+ if (!p->gid_attr_group->ndev.attrs) {
+ ret = -ENOMEM;
+ goto err_remove_gid;
+ }
+
+ ret = sysfs_create_group(&p->gid_attr_group->kobj,
+ &p->gid_attr_group->ndev);
+ if (ret)
+ goto err_free_gid_ndev;
+
+ p->gid_attr_group->type.name = "types";
+ p->gid_attr_group->type.attrs = alloc_group_attrs(show_port_gid_attr_gid_type,
+ attr.gid_tbl_len);
+ if (!p->gid_attr_group->type.attrs) {
+ ret = -ENOMEM;
+ goto err_remove_gid_ndev;
+ }
+
+ ret = sysfs_create_group(&p->gid_attr_group->kobj,
+ &p->gid_attr_group->type);
+ if (ret)
+ goto err_free_gid_type;
+
p->pkey_group.name = "pkeys";
p->pkey_group.attrs = alloc_group_attrs(show_port_pkey,
attr.pkey_tbl_len);
if (!p->pkey_group.attrs) {
ret = -ENOMEM;
- goto err_remove_gid;
+ goto err_remove_gid_type;
}
ret = sysfs_create_group(&p->kobj, &p->pkey_group);
@@ -576,6 +853,28 @@ err_free_pkey:
kfree(p->pkey_group.attrs);
p->pkey_group.attrs = NULL;
+err_remove_gid_type:
+ sysfs_remove_group(&p->gid_attr_group->kobj,
+ &p->gid_attr_group->type);
+
+err_free_gid_type:
+ for (i = 0; i < attr.gid_tbl_len; ++i)
+ kfree(p->gid_attr_group->type.attrs[i]);
+
+ kfree(p->gid_attr_group->type.attrs);
+ p->gid_attr_group->type.attrs = NULL;
+
+err_remove_gid_ndev:
+ sysfs_remove_group(&p->gid_attr_group->kobj,
+ &p->gid_attr_group->ndev);
+
+err_free_gid_ndev:
+ for (i = 0; i < attr.gid_tbl_len; ++i)
+ kfree(p->gid_attr_group->ndev.attrs[i]);
+
+ kfree(p->gid_attr_group->ndev.attrs);
+ p->gid_attr_group->ndev.attrs = NULL;
+
err_remove_gid:
sysfs_remove_group(&p->kobj, &p->gid_group);
@@ -587,7 +886,10 @@ err_free_gid:
p->gid_group.attrs = NULL;
err_remove_pma:
- sysfs_remove_group(&p->kobj, &pma_group);
+ sysfs_remove_group(&p->kobj, p->pma_table);
+
+err_put_gid_attrs:
+ kobject_put(&p->gid_attr_group->kobj);
err_put:
kobject_put(&p->kobj);
@@ -614,18 +916,12 @@ static ssize_t show_sys_image_guid(struct device *device,
struct device_attribute *dev_attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
- struct ib_device_attr attr;
- ssize_t ret;
-
- ret = ib_query_device(dev, &attr);
- if (ret)
- return ret;
return sprintf(buf, "%04x:%04x:%04x:%04x\n",
- be16_to_cpu(((__be16 *) &attr.sys_image_guid)[0]),
- be16_to_cpu(((__be16 *) &attr.sys_image_guid)[1]),
- be16_to_cpu(((__be16 *) &attr.sys_image_guid)[2]),
- be16_to_cpu(((__be16 *) &attr.sys_image_guid)[3]));
+ be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[0]),
+ be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[1]),
+ be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]),
+ be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3]));
}
static ssize_t show_node_guid(struct device *device,
@@ -800,9 +1096,14 @@ static void free_port_list_attributes(struct ib_device *device)
list_for_each_entry_safe(p, t, &device->port_list, entry) {
struct ib_port *port = container_of(p, struct ib_port, kobj);
list_del(&p->entry);
- sysfs_remove_group(p, &pma_group);
+ sysfs_remove_group(p, port->pma_table);
sysfs_remove_group(p, &port->pkey_group);
sysfs_remove_group(p, &port->gid_group);
+ sysfs_remove_group(&port->gid_attr_group->kobj,
+ &port->gid_attr_group->ndev);
+ sysfs_remove_group(&port->gid_attr_group->kobj,
+ &port->gid_attr_group->type);
+ kobject_put(&port->gid_attr_group->kobj);
kobject_put(p);
}
diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c
index 72feee6..19837d2 100644
--- a/drivers/infiniband/core/ud_header.c
+++ b/drivers/infiniband/core/ud_header.c
@@ -35,6 +35,7 @@
#include <linux/string.h>
#include <linux/export.h>
#include <linux/if_ether.h>
+#include <linux/ip.h>
#include <rdma/ib_pack.h>
@@ -116,6 +117,72 @@ static const struct ib_field vlan_table[] = {
.size_bits = 16 }
};
+static const struct ib_field ip4_table[] = {
+ { STRUCT_FIELD(ip4, ver),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 4 },
+ { STRUCT_FIELD(ip4, hdr_len),
+ .offset_words = 0,
+ .offset_bits = 4,
+ .size_bits = 4 },
+ { STRUCT_FIELD(ip4, tos),
+ .offset_words = 0,
+ .offset_bits = 8,
+ .size_bits = 8 },
+ { STRUCT_FIELD(ip4, tot_len),
+ .offset_words = 0,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { STRUCT_FIELD(ip4, id),
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { STRUCT_FIELD(ip4, frag_off),
+ .offset_words = 1,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { STRUCT_FIELD(ip4, ttl),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { STRUCT_FIELD(ip4, protocol),
+ .offset_words = 2,
+ .offset_bits = 8,
+ .size_bits = 8 },
+ { STRUCT_FIELD(ip4, check),
+ .offset_words = 2,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { STRUCT_FIELD(ip4, saddr),
+ .offset_words = 3,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { STRUCT_FIELD(ip4, daddr),
+ .offset_words = 4,
+ .offset_bits = 0,
+ .size_bits = 32 }
+};
+
+static const struct ib_field udp_table[] = {
+ { STRUCT_FIELD(udp, sport),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { STRUCT_FIELD(udp, dport),
+ .offset_words = 0,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { STRUCT_FIELD(udp, length),
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { STRUCT_FIELD(udp, csum),
+ .offset_words = 1,
+ .offset_bits = 16,
+ .size_bits = 16 }
+};
+
static const struct ib_field grh_table[] = {
{ STRUCT_FIELD(grh, ip_version),
.offset_words = 0,
@@ -213,26 +280,57 @@ static const struct ib_field deth_table[] = {
.size_bits = 24 }
};
+__sum16 ib_ud_ip4_csum(struct ib_ud_header *header)
+{
+ struct iphdr iph;
+
+ iph.ihl = 5;
+ iph.version = 4;
+ iph.tos = header->ip4.tos;
+ iph.tot_len = header->ip4.tot_len;
+ iph.id = header->ip4.id;
+ iph.frag_off = header->ip4.frag_off;
+ iph.ttl = header->ip4.ttl;
+ iph.protocol = header->ip4.protocol;
+ iph.check = 0;
+ iph.saddr = header->ip4.saddr;
+ iph.daddr = header->ip4.daddr;
+
+ return ip_fast_csum((u8 *)&iph, iph.ihl);
+}
+EXPORT_SYMBOL(ib_ud_ip4_csum);
+
/**
* ib_ud_header_init - Initialize UD header structure
* @payload_bytes:Length of packet payload
* @lrh_present: specify if LRH is present
* @eth_present: specify if Eth header is present
* @vlan_present: packet is tagged vlan
- * @grh_present:GRH flag (if non-zero, GRH will be included)
+ * @grh_present: GRH flag (if non-zero, GRH will be included)
+ * @ip_version: if non-zero, IP header, V4 or V6, will be included
+ * @udp_present :if non-zero, UDP header will be included
* @immediate_present: specify if immediate data is present
* @header:Structure to initialize
*/
-void ib_ud_header_init(int payload_bytes,
- int lrh_present,
- int eth_present,
- int vlan_present,
- int grh_present,
- int immediate_present,
- struct ib_ud_header *header)
+int ib_ud_header_init(int payload_bytes,
+ int lrh_present,
+ int eth_present,
+ int vlan_present,
+ int grh_present,
+ int ip_version,
+ int udp_present,
+ int immediate_present,
+ struct ib_ud_header *header)
{
+ grh_present = grh_present && !ip_version;
memset(header, 0, sizeof *header);
+ /*
+ * UDP header without IP header doesn't make sense
+ */
+ if (udp_present && ip_version != 4 && ip_version != 6)
+ return -EINVAL;
+
if (lrh_present) {
u16 packet_length;
@@ -252,7 +350,7 @@ void ib_ud_header_init(int payload_bytes,
if (vlan_present)
header->eth.type = cpu_to_be16(ETH_P_8021Q);
- if (grh_present) {
+ if (ip_version == 6 || grh_present) {
header->grh.ip_version = 6;
header->grh.payload_length =
cpu_to_be16((IB_BTH_BYTES +
@@ -260,8 +358,30 @@ void ib_ud_header_init(int payload_bytes,
payload_bytes +
4 + /* ICRC */
3) & ~3); /* round up */
- header->grh.next_header = 0x1b;
+ header->grh.next_header = udp_present ? IPPROTO_UDP : 0x1b;
+ }
+
+ if (ip_version == 4) {
+ int udp_bytes = udp_present ? IB_UDP_BYTES : 0;
+
+ header->ip4.ver = 4; /* version 4 */
+ header->ip4.hdr_len = 5; /* 5 words */
+ header->ip4.tot_len =
+ cpu_to_be16(IB_IP4_BYTES +
+ udp_bytes +
+ IB_BTH_BYTES +
+ IB_DETH_BYTES +
+ payload_bytes +
+ 4); /* ICRC */
+ header->ip4.protocol = IPPROTO_UDP;
}
+ if (udp_present && ip_version)
+ header->udp.length =
+ cpu_to_be16(IB_UDP_BYTES +
+ IB_BTH_BYTES +
+ IB_DETH_BYTES +
+ payload_bytes +
+ 4); /* ICRC */
if (immediate_present)
header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
@@ -273,8 +393,11 @@ void ib_ud_header_init(int payload_bytes,
header->lrh_present = lrh_present;
header->eth_present = eth_present;
header->vlan_present = vlan_present;
- header->grh_present = grh_present;
+ header->grh_present = grh_present || (ip_version == 6);
+ header->ipv4_present = ip_version == 4;
+ header->udp_present = udp_present;
header->immediate_present = immediate_present;
+ return 0;
}
EXPORT_SYMBOL(ib_ud_header_init);
@@ -311,6 +434,16 @@ int ib_ud_header_pack(struct ib_ud_header *header,
&header->grh, buf + len);
len += IB_GRH_BYTES;
}
+ if (header->ipv4_present) {
+ ib_pack(ip4_table, ARRAY_SIZE(ip4_table),
+ &header->ip4, buf + len);
+ len += IB_IP4_BYTES;
+ }
+ if (header->udp_present) {
+ ib_pack(udp_table, ARRAY_SIZE(udp_table),
+ &header->udp, buf + len);
+ len += IB_UDP_BYTES;
+ }
ib_pack(bth_table, ARRAY_SIZE(bth_table),
&header->bth, buf + len);
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 40becdb..e69bf26 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -232,7 +232,7 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
ib_ucontext_notifier_end_account(context);
}
-static struct mmu_notifier_ops ib_umem_notifiers = {
+static const struct mmu_notifier_ops ib_umem_notifiers = {
.release = ib_umem_notifier_release,
.invalidate_page = ib_umem_notifier_invalidate_page,
.invalidate_range_start = ib_umem_notifier_invalidate_range_start,
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 57f281f..415a318 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -210,6 +210,7 @@ static void send_handler(struct ib_mad_agent *agent,
}
static void recv_handler(struct ib_mad_agent *agent,
+ struct ib_mad_send_buf *send_buf,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_umad_file *file = agent->context;
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 94bbd8c..612ccfd 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -204,6 +204,8 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
+int uverbs_dealloc_mw(struct ib_mw *mw);
+
struct ib_uverbs_flow_spec {
union {
union {
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 1c02dea..6ffc9c4 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -291,9 +291,6 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
struct ib_uverbs_get_context cmd;
struct ib_uverbs_get_context_resp resp;
struct ib_udata udata;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- struct ib_device_attr dev_attr;
-#endif
struct ib_ucontext *ucontext;
struct file *filp;
int ret;
@@ -342,10 +339,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
ucontext->odp_mrs_count = 0;
INIT_LIST_HEAD(&ucontext->no_private_counters);
- ret = ib_query_device(ib_dev, &dev_attr);
- if (ret)
- goto err_free;
- if (!(dev_attr.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
+ if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
ucontext->invalidate_range = NULL;
#endif
@@ -447,8 +441,6 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
{
struct ib_uverbs_query_device cmd;
struct ib_uverbs_query_device_resp resp;
- struct ib_device_attr attr;
- int ret;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -456,12 +448,8 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- ret = ib_query_device(ib_dev, &attr);
- if (ret)
- return ret;
-
memset(&resp, 0, sizeof resp);
- copy_query_dev_fields(file, ib_dev, &resp, &attr);
+ copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
@@ -986,11 +974,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
}
if (cmd.access_flags & IB_ACCESS_ON_DEMAND) {
- struct ib_device_attr attr;
-
- ret = ib_query_device(pd->device, &attr);
- if (ret || !(attr.device_cap_flags &
- IB_DEVICE_ON_DEMAND_PAGING)) {
+ if (!(pd->device->attrs.device_cap_flags &
+ IB_DEVICE_ON_DEMAND_PAGING)) {
pr_debug("ODP support not available\n");
ret = -EINVAL;
goto err_put;
@@ -1008,7 +993,6 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
mr->pd = pd;
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
- atomic_set(&mr->usecnt, 0);
uobj->object = mr;
ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
@@ -1106,11 +1090,6 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
}
}
- if (atomic_read(&mr->usecnt)) {
- ret = -EBUSY;
- goto put_uobj_pd;
- }
-
old_pd = mr->pd;
ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
cmd.length, cmd.hca_va,
@@ -1258,7 +1237,7 @@ err_copy:
idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
err_unalloc:
- ib_dealloc_mw(mw);
+ uverbs_dealloc_mw(mw);
err_put:
put_pd_read(pd);
@@ -1287,7 +1266,7 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
mw = uobj->object;
- ret = ib_dealloc_mw(mw);
+ ret = uverbs_dealloc_mw(mw);
if (!ret)
uobj->live = 0;
@@ -1845,7 +1824,10 @@ static int create_qp(struct ib_uverbs_file *file,
sizeof(cmd->create_flags))
attr.create_flags = cmd->create_flags;
- if (attr.create_flags & ~IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
+ if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
+ IB_QP_CREATE_CROSS_CHANNEL |
+ IB_QP_CREATE_MANAGED_SEND |
+ IB_QP_CREATE_MANAGED_RECV)) {
ret = -EINVAL;
goto err_put;
}
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index e3ef288..39680ae 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -133,6 +133,17 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
static void ib_uverbs_add_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
+int uverbs_dealloc_mw(struct ib_mw *mw)
+{
+ struct ib_pd *pd = mw->pd;
+ int ret;
+
+ ret = mw->device->dealloc_mw(mw);
+ if (!ret)
+ atomic_dec(&pd->usecnt);
+ return ret;
+}
+
static void ib_uverbs_release_dev(struct kobject *kobj)
{
struct ib_uverbs_device *dev =
@@ -224,7 +235,7 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
struct ib_mw *mw = uobj->object;
idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
- ib_dealloc_mw(mw);
+ uverbs_dealloc_mw(mw);
kfree(uobj);
}
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index 7d2f14c..af020f8 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -144,5 +144,6 @@ void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
memset(dst->dmac, 0, sizeof(dst->dmac));
dst->net = NULL;
dst->ifindex = 0;
+ dst->gid_type = IB_GID_TYPE_IB;
}
EXPORT_SYMBOL(ib_copy_path_rec_from_user);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 545906d..5af6d02 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -229,12 +229,6 @@ EXPORT_SYMBOL(rdma_port_get_link_layer);
struct ib_pd *ib_alloc_pd(struct ib_device *device)
{
struct ib_pd *pd;
- struct ib_device_attr devattr;
- int rc;
-
- rc = ib_query_device(device, &devattr);
- if (rc)
- return ERR_PTR(rc);
pd = device->alloc_pd(device, NULL, NULL);
if (IS_ERR(pd))
@@ -245,7 +239,7 @@ struct ib_pd *ib_alloc_pd(struct ib_device *device)
pd->local_mr = NULL;
atomic_set(&pd->usecnt, 0);
- if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
+ if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
pd->local_dma_lkey = device->local_dma_lkey;
else {
struct ib_mr *mr;
@@ -311,8 +305,61 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
}
EXPORT_SYMBOL(ib_create_ah);
+static int ib_get_header_version(const union rdma_network_hdr *hdr)
+{
+ const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh;
+ struct iphdr ip4h_checked;
+ const struct ipv6hdr *ip6h = (struct ipv6hdr *)&hdr->ibgrh;
+
+ /* If it's IPv6, the version must be 6, otherwise, the first
+ * 20 bytes (before the IPv4 header) are garbled.
+ */
+ if (ip6h->version != 6)
+ return (ip4h->version == 4) ? 4 : 0;
+ /* version may be 6 or 4 because the first 20 bytes could be garbled */
+
+ /* RoCE v2 requires no options, thus header length
+ * must be 5 words
+ */
+ if (ip4h->ihl != 5)
+ return 6;
+
+ /* Verify checksum.
+ * We can't write on scattered buffers so we need to copy to
+ * temp buffer.
+ */
+ memcpy(&ip4h_checked, ip4h, sizeof(ip4h_checked));
+ ip4h_checked.check = 0;
+ ip4h_checked.check = ip_fast_csum((u8 *)&ip4h_checked, 5);
+ /* if IPv4 header checksum is OK, believe it */
+ if (ip4h->check == ip4h_checked.check)
+ return 4;
+ return 6;
+}
+
+static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
+ u8 port_num,
+ const struct ib_grh *grh)
+{
+ int grh_version;
+
+ if (rdma_protocol_ib(device, port_num))
+ return RDMA_NETWORK_IB;
+
+ grh_version = ib_get_header_version((union rdma_network_hdr *)grh);
+
+ if (grh_version == 4)
+ return RDMA_NETWORK_IPV4;
+
+ if (grh->next_hdr == IPPROTO_UDP)
+ return RDMA_NETWORK_IPV6;
+
+ return RDMA_NETWORK_ROCE_V1;
+}
+
struct find_gid_index_context {
u16 vlan_id;
+ enum ib_gid_type gid_type;
};
static bool find_gid_index(const union ib_gid *gid,
@@ -322,6 +369,9 @@ static bool find_gid_index(const union ib_gid *gid,
struct find_gid_index_context *ctx =
(struct find_gid_index_context *)context;
+ if (ctx->gid_type != gid_attr->gid_type)
+ return false;
+
if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) ||
(is_vlan_dev(gid_attr->ndev) &&
vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id))
@@ -332,14 +382,49 @@ static bool find_gid_index(const union ib_gid *gid,
static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
u16 vlan_id, const union ib_gid *sgid,
+ enum ib_gid_type gid_type,
u16 *gid_index)
{
- struct find_gid_index_context context = {.vlan_id = vlan_id};
+ struct find_gid_index_context context = {.vlan_id = vlan_id,
+ .gid_type = gid_type};
return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index,
&context, gid_index);
}
+static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr,
+ enum rdma_network_type net_type,
+ union ib_gid *sgid, union ib_gid *dgid)
+{
+ struct sockaddr_in src_in;
+ struct sockaddr_in dst_in;
+ __be32 src_saddr, dst_saddr;
+
+ if (!sgid || !dgid)
+ return -EINVAL;
+
+ if (net_type == RDMA_NETWORK_IPV4) {
+ memcpy(&src_in.sin_addr.s_addr,
+ &hdr->roce4grh.saddr, 4);
+ memcpy(&dst_in.sin_addr.s_addr,
+ &hdr->roce4grh.daddr, 4);
+ src_saddr = src_in.sin_addr.s_addr;
+ dst_saddr = dst_in.sin_addr.s_addr;
+ ipv6_addr_set_v4mapped(src_saddr,
+ (struct in6_addr *)sgid);
+ ipv6_addr_set_v4mapped(dst_saddr,
+ (struct in6_addr *)dgid);
+ return 0;
+ } else if (net_type == RDMA_NETWORK_IPV6 ||
+ net_type == RDMA_NETWORK_IB) {
+ *dgid = hdr->ibgrh.dgid;
+ *sgid = hdr->ibgrh.sgid;
+ return 0;
+ } else {
+ return -EINVAL;
+ }
+}
+
int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
const struct ib_wc *wc, const struct ib_grh *grh,
struct ib_ah_attr *ah_attr)
@@ -347,33 +432,72 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
u32 flow_class;
u16 gid_index;
int ret;
+ enum rdma_network_type net_type = RDMA_NETWORK_IB;
+ enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+ int hoplimit = 0xff;
+ union ib_gid dgid;
+ union ib_gid sgid;
memset(ah_attr, 0, sizeof *ah_attr);
if (rdma_cap_eth_ah(device, port_num)) {
+ if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE)
+ net_type = wc->network_hdr_type;
+ else
+ net_type = ib_get_net_type_by_grh(device, port_num, grh);
+ gid_type = ib_network_to_gid_type(net_type);
+ }
+ ret = get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
+ &sgid, &dgid);
+ if (ret)
+ return ret;
+
+ if (rdma_protocol_roce(device, port_num)) {
+ int if_index = 0;
u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
wc->vlan_id : 0xffff;
+ struct net_device *idev;
+ struct net_device *resolved_dev;
if (!(wc->wc_flags & IB_WC_GRH))
return -EPROTOTYPE;
- if (!(wc->wc_flags & IB_WC_WITH_SMAC) ||
- !(wc->wc_flags & IB_WC_WITH_VLAN)) {
- ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
- ah_attr->dmac,
- wc->wc_flags & IB_WC_WITH_VLAN ?
- NULL : &vlan_id,
- 0);
- if (ret)
- return ret;
+ if (!device->get_netdev)
+ return -EOPNOTSUPP;
+
+ idev = device->get_netdev(device, port_num);
+ if (!idev)
+ return -ENODEV;
+
+ ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid,
+ ah_attr->dmac,
+ wc->wc_flags & IB_WC_WITH_VLAN ?
+ NULL : &vlan_id,
+ &if_index, &hoplimit);
+ if (ret) {
+ dev_put(idev);
+ return ret;
}
- ret = get_sgid_index_from_eth(device, port_num, vlan_id,
- &grh->dgid, &gid_index);
+ resolved_dev = dev_get_by_index(&init_net, if_index);
+ if (resolved_dev->flags & IFF_LOOPBACK) {
+ dev_put(resolved_dev);
+ resolved_dev = idev;
+ dev_hold(resolved_dev);
+ }
+ rcu_read_lock();
+ if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev,
+ resolved_dev))
+ ret = -EHOSTUNREACH;
+ rcu_read_unlock();
+ dev_put(idev);
+ dev_put(resolved_dev);
if (ret)
return ret;
- if (wc->wc_flags & IB_WC_WITH_SMAC)
- memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
+ ret = get_sgid_index_from_eth(device, port_num, vlan_id,
+ &dgid, gid_type, &gid_index);
+ if (ret)
+ return ret;
}
ah_attr->dlid = wc->slid;
@@ -383,10 +507,11 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
if (wc->wc_flags & IB_WC_GRH) {
ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.dgid = grh->sgid;
+ ah_attr->grh.dgid = sgid;
if (!rdma_cap_eth_ah(device, port_num)) {
- ret = ib_find_cached_gid_by_port(device, &grh->dgid,
+ ret = ib_find_cached_gid_by_port(device, &dgid,
+ IB_GID_TYPE_IB,
port_num, NULL,
&gid_index);
if (ret)
@@ -396,7 +521,7 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
ah_attr->grh.sgid_index = (u8) gid_index;
flow_class = be32_to_cpu(grh->version_tclass_flow);
ah_attr->grh.flow_label = flow_class & 0xFFFFF;
- ah_attr->grh.hop_limit = 0xFF;
+ ah_attr->grh.hop_limit = hoplimit;
ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
}
return 0;
@@ -1014,6 +1139,7 @@ int ib_resolve_eth_dmac(struct ib_qp *qp,
union ib_gid sgid;
struct ib_gid_attr sgid_attr;
int ifindex;
+ int hop_limit;
ret = ib_query_gid(qp->device,
qp_attr->ah_attr.port_num,
@@ -1028,12 +1154,14 @@ int ib_resolve_eth_dmac(struct ib_qp *qp,
ifindex = sgid_attr.ndev->ifindex;
- ret = rdma_addr_find_dmac_by_grh(&sgid,
- &qp_attr->ah_attr.grh.dgid,
- qp_attr->ah_attr.dmac,
- NULL, ifindex);
+ ret = rdma_addr_find_l2_eth_by_grh(&sgid,
+ &qp_attr->ah_attr.grh.dgid,
+ qp_attr->ah_attr.dmac,
+ NULL, &ifindex, &hop_limit);
dev_put(sgid_attr.ndev);
+
+ qp_attr->ah_attr.grh.hop_limit = hop_limit;
}
}
out:
@@ -1215,29 +1343,17 @@ struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
mr->pd = pd;
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
- atomic_set(&mr->usecnt, 0);
}
return mr;
}
EXPORT_SYMBOL(ib_get_dma_mr);
-int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
-{
- return mr->device->query_mr ?
- mr->device->query_mr(mr, mr_attr) : -ENOSYS;
-}
-EXPORT_SYMBOL(ib_query_mr);
-
int ib_dereg_mr(struct ib_mr *mr)
{
- struct ib_pd *pd;
+ struct ib_pd *pd = mr->pd;
int ret;
- if (atomic_read(&mr->usecnt))
- return -EBUSY;
-
- pd = mr->pd;
ret = mr->device->dereg_mr(mr);
if (!ret)
atomic_dec(&pd->usecnt);
@@ -1273,49 +1389,12 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
mr->pd = pd;
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
- atomic_set(&mr->usecnt, 0);
}
return mr;
}
EXPORT_SYMBOL(ib_alloc_mr);
-/* Memory windows */
-
-struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
-{
- struct ib_mw *mw;
-
- if (!pd->device->alloc_mw)
- return ERR_PTR(-ENOSYS);
-
- mw = pd->device->alloc_mw(pd, type);
- if (!IS_ERR(mw)) {
- mw->device = pd->device;
- mw->pd = pd;
- mw->uobject = NULL;
- mw->type = type;
- atomic_inc(&pd->usecnt);
- }
-
- return mw;
-}
-EXPORT_SYMBOL(ib_alloc_mw);
-
-int ib_dealloc_mw(struct ib_mw *mw)
-{
- struct ib_pd *pd;
- int ret;
-
- pd = mw->pd;
- ret = mw->device->dealloc_mw(mw);
- if (!ret)
- atomic_dec(&pd->usecnt);
-
- return ret;
-}
-EXPORT_SYMBOL(ib_dealloc_mw);
-
/* "Fast" memory regions */
struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
@@ -1530,7 +1609,7 @@ int ib_sg_to_pages(struct ib_mr *mr,
int (*set_page)(struct ib_mr *, u64))
{
struct scatterlist *sg;
- u64 last_end_dma_addr = 0, last_page_addr = 0;
+ u64 last_end_dma_addr = 0;
unsigned int last_page_off = 0;
u64 page_mask = ~((u64)mr->page_size - 1);
int i, ret;
@@ -1572,7 +1651,6 @@ next_page:
mr->length += dma_len;
last_end_dma_addr = end_dma_addr;
- last_page_addr = end_dma_addr & page_mask;
last_page_off = end_dma_addr & ~page_mask;
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index cb78b1e..f504ba7 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -149,7 +149,7 @@ static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_en
error = l2t_send(tdev, skb, l2e);
if (error < 0)
kfree_skb(skb);
- return error;
+ return error < 0 ? error : 0;
}
int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
@@ -165,7 +165,7 @@ int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
error = cxgb3_ofld_send(tdev, skb);
if (error < 0)
kfree_skb(skb);
- return error;
+ return error < 0 ? error : 0;
}
static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
index cfe4049..97fbfd2 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -115,10 +115,6 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
case T3_SEND_WITH_SE_INV:
wc->opcode = IB_WC_SEND;
break;
- case T3_BIND_MW:
- wc->opcode = IB_WC_BIND_MW;
- break;
-
case T3_LOCAL_INV:
wc->opcode = IB_WC_LOCAL_INV;
break;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
index 5c36ee2..1d04c87 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c
@@ -75,37 +75,6 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
return ret;
}
-int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
- struct iwch_mr *mhp,
- int shift,
- int npages)
-{
- u32 stag;
- int ret;
-
- /* We could support this... */
- if (npages > mhp->attr.pbl_size)
- return -ENOMEM;
-
- stag = mhp->attr.stag;
- if (cxio_reregister_phys_mem(&rhp->rdev,
- &stag, mhp->attr.pdid,
- mhp->attr.perms,
- mhp->attr.zbva,
- mhp->attr.va_fbo,
- mhp->attr.len,
- shift - 12,
- mhp->attr.pbl_size, mhp->attr.pbl_addr))
- return -ENOMEM;
-
- ret = iwch_finish_mem_reg(mhp, stag);
- if (ret)
- cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
-
- return ret;
-}
-
int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
{
mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev,
@@ -130,74 +99,3 @@ int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset)
return cxio_write_pbl(&mhp->rhp->rdev, pages,
mhp->attr.pbl_addr + (offset << 3), npages);
}
-
-int build_phys_page_list(struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- u64 *iova_start,
- u64 *total_size,
- int *npages,
- int *shift,
- __be64 **page_list)
-{
- u64 mask;
- int i, j, n;
-
- mask = 0;
- *total_size = 0;
- for (i = 0; i < num_phys_buf; ++i) {
- if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
- return -EINVAL;
- if (i != 0 && i != num_phys_buf - 1 &&
- (buffer_list[i].size & ~PAGE_MASK))
- return -EINVAL;
- *total_size += buffer_list[i].size;
- if (i > 0)
- mask |= buffer_list[i].addr;
- else
- mask |= buffer_list[i].addr & PAGE_MASK;
- if (i != num_phys_buf - 1)
- mask |= buffer_list[i].addr + buffer_list[i].size;
- else
- mask |= (buffer_list[i].addr + buffer_list[i].size +
- PAGE_SIZE - 1) & PAGE_MASK;
- }
-
- if (*total_size > 0xFFFFFFFFULL)
- return -ENOMEM;
-
- /* Find largest page shift we can use to cover buffers */
- for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift))
- if ((1ULL << *shift) & mask)
- break;
-
- buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1);
- buffer_list[0].addr &= ~0ull << *shift;
-
- *npages = 0;
- for (i = 0; i < num_phys_buf; ++i)
- *npages += (buffer_list[i].size +
- (1ULL << *shift) - 1) >> *shift;
-
- if (!*npages)
- return -EINVAL;
-
- *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL);
- if (!*page_list)
- return -ENOMEM;
-
- n = 0;
- for (i = 0; i < num_phys_buf; ++i)
- for (j = 0;
- j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift;
- ++j)
- (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr +
- ((u64) j << *shift));
-
- PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n",
- __func__, (unsigned long long) *iova_start,
- (unsigned long long) mask, *shift, (unsigned long long) *total_size,
- *npages);
-
- return 0;
-
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index c34725c..2734820 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -458,9 +458,6 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
u32 mmid;
PDBG("%s ib_mr %p\n", __func__, ib_mr);
- /* There can be no memory windows */
- if (atomic_read(&ib_mr->usecnt))
- return -EINVAL;
mhp = to_iwch_mr(ib_mr);
kfree(mhp->pages);
@@ -479,24 +476,25 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
return 0;
}
-static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- int acc,
- u64 *iova_start)
+static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
{
- __be64 *page_list;
- int shift;
- u64 total_size;
- int npages;
- struct iwch_dev *rhp;
- struct iwch_pd *php;
+ const u64 total_size = 0xffffffff;
+ const u64 mask = (total_size + PAGE_SIZE - 1) & PAGE_MASK;
+ struct iwch_pd *php = to_iwch_pd(pd);
+ struct iwch_dev *rhp = php->rhp;
struct iwch_mr *mhp;
- int ret;
+ __be64 *page_list;
+ int shift = 26, npages, ret, i;
PDBG("%s ib_pd %p\n", __func__, pd);
- php = to_iwch_pd(pd);
- rhp = php->rhp;
+
+ /*
+ * T3 only supports 32 bits of size.
+ */
+ if (sizeof(phys_addr_t) > 4) {
+ pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n");
+ return ERR_PTR(-ENOTSUPP);
+ }
mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
if (!mhp)
@@ -504,22 +502,23 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
mhp->rhp = rhp;
- /* First check that we have enough alignment */
- if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
+ npages = (total_size + (1ULL << shift) - 1) >> shift;
+ if (!npages) {
ret = -EINVAL;
goto err;
}
- if (num_phys_buf > 1 &&
- ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) {
- ret = -EINVAL;
+ page_list = kmalloc_array(npages, sizeof(u64), GFP_KERNEL);
+ if (!page_list) {
+ ret = -ENOMEM;
goto err;
}
- ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start,
- &total_size, &npages, &shift, &page_list);
- if (ret)
- goto err;
+ for (i = 0; i < npages; i++)
+ page_list[i] = cpu_to_be64((u64)i << shift);
+
+ PDBG("%s mask 0x%llx shift %d len %lld pbl_size %d\n",
+ __func__, mask, shift, total_size, npages);
ret = iwch_alloc_pbl(mhp, npages);
if (ret) {
@@ -536,7 +535,7 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
mhp->attr.zbva = 0;
mhp->attr.perms = iwch_ib_to_tpt_access(acc);
- mhp->attr.va_fbo = *iova_start;
+ mhp->attr.va_fbo = 0;
mhp->attr.page_size = shift - 12;
mhp->attr.len = (u32) total_size;
@@ -553,76 +552,8 @@ err_pbl:
err:
kfree(mhp);
return ERR_PTR(ret);
-
-}
-
-static int iwch_reregister_phys_mem(struct ib_mr *mr,
- int mr_rereg_mask,
- struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- int acc, u64 * iova_start)
-{
-
- struct iwch_mr mh, *mhp;
- struct iwch_pd *php;
- struct iwch_dev *rhp;
- __be64 *page_list = NULL;
- int shift = 0;
- u64 total_size;
- int npages = 0;
- int ret;
-
- PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd);
-
- /* There can be no memory windows */
- if (atomic_read(&mr->usecnt))
- return -EINVAL;
-
- mhp = to_iwch_mr(mr);
- rhp = mhp->rhp;
- php = to_iwch_pd(mr->pd);
-
- /* make sure we are on the same adapter */
- if (rhp != php->rhp)
- return -EINVAL;
-
- memcpy(&mh, mhp, sizeof *mhp);
-
- if (mr_rereg_mask & IB_MR_REREG_PD)
- php = to_iwch_pd(pd);
- if (mr_rereg_mask & IB_MR_REREG_ACCESS)
- mh.attr.perms = iwch_ib_to_tpt_access(acc);
- if (mr_rereg_mask & IB_MR_REREG_TRANS) {
- ret = build_phys_page_list(buffer_list, num_phys_buf,
- iova_start,
- &total_size, &npages,
- &shift, &page_list);
- if (ret)
- return ret;
- }
-
- ret = iwch_reregister_mem(rhp, php, &mh, shift, npages);
- kfree(page_list);
- if (ret) {
- return ret;
- }
- if (mr_rereg_mask & IB_MR_REREG_PD)
- mhp->attr.pdid = php->pdid;
- if (mr_rereg_mask & IB_MR_REREG_ACCESS)
- mhp->attr.perms = iwch_ib_to_tpt_access(acc);
- if (mr_rereg_mask & IB_MR_REREG_TRANS) {
- mhp->attr.zbva = 0;
- mhp->attr.va_fbo = *iova_start;
- mhp->attr.page_size = shift - 12;
- mhp->attr.len = (u32) total_size;
- mhp->attr.pbl_size = npages;
- }
-
- return 0;
}
-
static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
{
@@ -726,28 +657,6 @@ err:
return ERR_PTR(err);
}
-static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
-{
- struct ib_phys_buf bl;
- u64 kva;
- struct ib_mr *ibmr;
-
- PDBG("%s ib_pd %p\n", __func__, pd);
-
- /*
- * T3 only supports 32 bits of size.
- */
- if (sizeof(phys_addr_t) > 4) {
- pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n");
- return ERR_PTR(-ENOTSUPP);
- }
- bl.size = 0xffffffff;
- bl.addr = 0;
- kva = 0;
- ibmr = iwch_register_phys_mem(pd, &bl, 1, acc, &kva);
- return ibmr;
-}
-
static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
{
struct iwch_dev *rhp;
@@ -1452,12 +1361,9 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.resize_cq = iwch_resize_cq;
dev->ibdev.poll_cq = iwch_poll_cq;
dev->ibdev.get_dma_mr = iwch_get_dma_mr;
- dev->ibdev.reg_phys_mr = iwch_register_phys_mem;
- dev->ibdev.rereg_phys_mr = iwch_reregister_phys_mem;
dev->ibdev.reg_user_mr = iwch_reg_user_mr;
dev->ibdev.dereg_mr = iwch_dereg_mr;
dev->ibdev.alloc_mw = iwch_alloc_mw;
- dev->ibdev.bind_mw = iwch_bind_mw;
dev->ibdev.dealloc_mw = iwch_dealloc_mw;
dev->ibdev.alloc_mr = iwch_alloc_mr;
dev->ibdev.map_mr_sg = iwch_map_mr_sg;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 2ac85b8..252c464 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -330,9 +330,6 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr);
int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
-int iwch_bind_mw(struct ib_qp *qp,
- struct ib_mw *mw,
- struct ib_mw_bind *mw_bind);
int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
int iwch_post_zb_read(struct iwch_ep *ep);
@@ -341,21 +338,9 @@ void iwch_unregister_device(struct iwch_dev *dev);
void stop_read_rep_timer(struct iwch_qp *qhp);
int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp, int shift);
-int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
- struct iwch_mr *mhp,
- int shift,
- int npages);
int iwch_alloc_pbl(struct iwch_mr *mhp, int npages);
void iwch_free_pbl(struct iwch_mr *mhp);
int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset);
-int build_phys_page_list(struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- u64 *iova_start,
- u64 *total_size,
- int *npages,
- int *shift,
- __be64 **page_list);
-
#define IWCH_NODE_DESC "cxgb3 Chelsio Communications"
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index d0548fc..d939980 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -526,88 +526,6 @@ out:
return err;
}
-int iwch_bind_mw(struct ib_qp *qp,
- struct ib_mw *mw,
- struct ib_mw_bind *mw_bind)
-{
- struct iwch_dev *rhp;
- struct iwch_mw *mhp;
- struct iwch_qp *qhp;
- union t3_wr *wqe;
- u32 pbl_addr;
- u8 page_size;
- u32 num_wrs;
- unsigned long flag;
- struct ib_sge sgl;
- int err=0;
- enum t3_wr_flags t3_wr_flags;
- u32 idx;
- struct t3_swsq *sqp;
-
- qhp = to_iwch_qp(qp);
- mhp = to_iwch_mw(mw);
- rhp = qhp->rhp;
-
- spin_lock_irqsave(&qhp->lock, flag);
- if (qhp->attr.state > IWCH_QP_STATE_RTS) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- return -EINVAL;
- }
- num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
- qhp->wq.sq_size_log2);
- if (num_wrs == 0) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- return -ENOMEM;
- }
- idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
- PDBG("%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p\n", __func__, idx,
- mw, mw_bind);
- wqe = (union t3_wr *) (qhp->wq.queue + idx);
-
- t3_wr_flags = 0;
- if (mw_bind->send_flags & IB_SEND_SIGNALED)
- t3_wr_flags = T3_COMPLETION_FLAG;
-
- sgl.addr = mw_bind->bind_info.addr;
- sgl.lkey = mw_bind->bind_info.mr->lkey;
- sgl.length = mw_bind->bind_info.length;
- wqe->bind.reserved = 0;
- wqe->bind.type = TPT_VATO;
-
- /* TBD: check perms */
- wqe->bind.perms = iwch_ib_to_tpt_bind_access(
- mw_bind->bind_info.mw_access_flags);
- wqe->bind.mr_stag = cpu_to_be32(mw_bind->bind_info.mr->lkey);
- wqe->bind.mw_stag = cpu_to_be32(mw->rkey);
- wqe->bind.mw_len = cpu_to_be32(mw_bind->bind_info.length);
- wqe->bind.mw_va = cpu_to_be64(mw_bind->bind_info.addr);
- err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
- if (err) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- return err;
- }
- wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
- sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
- sqp->wr_id = mw_bind->wr_id;
- sqp->opcode = T3_BIND_MW;
- sqp->sq_wptr = qhp->wq.sq_wptr;
- sqp->complete = 0;
- sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED);
- wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr);
- wqe->bind.mr_pagesz = page_size;
- build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags,
- Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0,
- sizeof(struct t3_bind_mw_wr) >> 3, T3_SOPEOP);
- ++(qhp->wq.wptr);
- ++(qhp->wq.sq_wptr);
- spin_unlock_irqrestore(&qhp->lock, flag);
-
- if (cxio_wq_db_enabled(&qhp->wq))
- ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
-
- return err;
-}
-
static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
u8 *layer_type, u8 *ecode)
{
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 326d07d..cd2ff5f 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -3271,6 +3271,12 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
&ep->com.mapped_local_addr;
+ if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) {
+ err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
+ (const u32 *)&sin6->sin6_addr.s6_addr, 1);
+ if (err)
+ return err;
+ }
c4iw_init_wr_wait(&ep->com.wr_wait);
err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
ep->stid, &sin6->sin6_addr,
@@ -3282,13 +3288,13 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
0, 0, __func__);
else if (err > 0)
err = net_xmit_errno(err);
- if (err)
+ if (err) {
+ cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
+ (const u32 *)&sin6->sin6_addr.s6_addr, 1);
pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
err, ep->stid,
sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port));
- else
- cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
- (const u32 *)&sin6->sin6_addr.s6_addr, 1);
+ }
return err;
}
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index de9cd69..cf21df4 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -744,9 +744,6 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
case FW_RI_SEND_WITH_SE:
wc->opcode = IB_WC_SEND;
break;
- case FW_RI_BIND_MW:
- wc->opcode = IB_WC_BIND_MW;
- break;
case FW_RI_LOCAL_INV:
wc->opcode = IB_WC_LOCAL_INV;
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 58fce174..8024ea4 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -315,14 +315,12 @@ static int qp_release(struct inode *inode, struct file *file)
static int qp_open(struct inode *inode, struct file *file)
{
struct c4iw_debugfs_data *qpd;
- int ret = 0;
int count = 1;
qpd = kmalloc(sizeof *qpd, GFP_KERNEL);
- if (!qpd) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!qpd)
+ return -ENOMEM;
+
qpd->devp = inode->i_private;
qpd->pos = 0;
@@ -333,8 +331,8 @@ static int qp_open(struct inode *inode, struct file *file)
qpd->bufsize = count * 128;
qpd->buf = vmalloc(qpd->bufsize);
if (!qpd->buf) {
- ret = -ENOMEM;
- goto err1;
+ kfree(qpd);
+ return -ENOMEM;
}
spin_lock_irq(&qpd->devp->lock);
@@ -343,11 +341,7 @@ static int qp_open(struct inode *inode, struct file *file)
qpd->buf[qpd->pos++] = 0;
file->private_data = qpd;
- goto out;
-err1:
- kfree(qpd);
-out:
- return ret;
+ return 0;
}
static const struct file_operations qp_debugfs_fops = {
@@ -781,8 +775,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
pr_err(MOD "%s: unsupported udb/ucq densities %u/%u\n",
pci_name(rdev->lldi.pdev), rdev->lldi.udb_density,
rdev->lldi.ucq_density);
- err = -EINVAL;
- goto err1;
+ return -EINVAL;
}
if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start ||
rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) {
@@ -791,8 +784,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start,
rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size,
rdev->lldi.vr->cq.size);
- err = -EINVAL;
- goto err1;
+ return -EINVAL;
}
rdev->qpmask = rdev->lldi.udb_density - 1;
@@ -816,10 +808,8 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
rdev->lldi.db_reg, rdev->lldi.gts_reg,
rdev->qpmask, rdev->cqmask);
- if (c4iw_num_stags(rdev) == 0) {
- err = -EINVAL;
- goto err1;
- }
+ if (c4iw_num_stags(rdev) == 0)
+ return -EINVAL;
rdev->stats.pd.total = T4_MAX_NUM_PD;
rdev->stats.stag.total = rdev->lldi.vr->stag.size;
@@ -831,29 +821,31 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
if (err) {
printk(KERN_ERR MOD "error %d initializing resources\n", err);
- goto err1;
+ return err;
}
err = c4iw_pblpool_create(rdev);
if (err) {
printk(KERN_ERR MOD "error %d initializing pbl pool\n", err);
- goto err2;
+ goto destroy_resource;
}
err = c4iw_rqtpool_create(rdev);
if (err) {
printk(KERN_ERR MOD "error %d initializing rqt pool\n", err);
- goto err3;
+ goto destroy_pblpool;
}
err = c4iw_ocqp_pool_create(rdev);
if (err) {
printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err);
- goto err4;
+ goto destroy_rqtpool;
}
rdev->status_page = (struct t4_dev_status_page *)
__get_free_page(GFP_KERNEL);
- if (!rdev->status_page) {
- pr_err(MOD "error allocating status page\n");
- goto err4;
- }
+ if (!rdev->status_page)
+ goto destroy_ocqp_pool;
+ rdev->status_page->qp_start = rdev->lldi.vr->qp.start;
+ rdev->status_page->qp_size = rdev->lldi.vr->qp.size;
+ rdev->status_page->cq_start = rdev->lldi.vr->cq.start;
+ rdev->status_page->cq_size = rdev->lldi.vr->cq.size;
if (c4iw_wr_log) {
rdev->wr_log = kzalloc((1 << c4iw_wr_log_size_order) *
@@ -869,13 +861,14 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
rdev->status_page->db_off = 0;
return 0;
-err4:
+destroy_ocqp_pool:
+ c4iw_ocqp_pool_destroy(rdev);
+destroy_rqtpool:
c4iw_rqtpool_destroy(rdev);
-err3:
+destroy_pblpool:
c4iw_pblpool_destroy(rdev);
-err2:
+destroy_resource:
c4iw_destroy_resource(&rdev->resource);
-err1:
return err;
}
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 00e55fa..fb2de75 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -947,8 +947,6 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr);
int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
-int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
- struct ib_mw_bind *mw_bind);
int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog);
int c4iw_destroy_listen(struct iw_cm_id *cm_id);
@@ -968,17 +966,6 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
u64 length, u64 virt, int acc,
struct ib_udata *udata);
struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc);
-struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- int acc,
- u64 *iova_start);
-int c4iw_reregister_phys_mem(struct ib_mr *mr,
- int mr_rereg_mask,
- struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- int acc, u64 *iova_start);
int c4iw_dereg_mr(struct ib_mr *ib_mr);
int c4iw_destroy_cq(struct ib_cq *ib_cq);
struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index e1629ab..7849890 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -392,32 +392,6 @@ static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
return ret;
}
-static int reregister_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
- struct c4iw_mr *mhp, int shift, int npages)
-{
- u32 stag;
- int ret;
-
- if (npages > mhp->attr.pbl_size)
- return -ENOMEM;
-
- stag = mhp->attr.stag;
- ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid,
- FW_RI_STAG_NSMR, mhp->attr.perms,
- mhp->attr.mw_bind_enable, mhp->attr.zbva,
- mhp->attr.va_fbo, mhp->attr.len, shift - 12,
- mhp->attr.pbl_size, mhp->attr.pbl_addr);
- if (ret)
- return ret;
-
- ret = finish_mem_reg(mhp, stag);
- if (ret)
- dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
-
- return ret;
-}
-
static int alloc_pbl(struct c4iw_mr *mhp, int npages)
{
mhp->attr.pbl_addr = c4iw_pblpool_alloc(&mhp->rhp->rdev,
@@ -431,228 +405,6 @@ static int alloc_pbl(struct c4iw_mr *mhp, int npages)
return 0;
}
-static int build_phys_page_list(struct ib_phys_buf *buffer_list,
- int num_phys_buf, u64 *iova_start,
- u64 *total_size, int *npages,
- int *shift, __be64 **page_list)
-{
- u64 mask;
- int i, j, n;
-
- mask = 0;
- *total_size = 0;
- for (i = 0; i < num_phys_buf; ++i) {
- if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
- return -EINVAL;
- if (i != 0 && i != num_phys_buf - 1 &&
- (buffer_list[i].size & ~PAGE_MASK))
- return -EINVAL;
- *total_size += buffer_list[i].size;
- if (i > 0)
- mask |= buffer_list[i].addr;
- else
- mask |= buffer_list[i].addr & PAGE_MASK;
- if (i != num_phys_buf - 1)
- mask |= buffer_list[i].addr + buffer_list[i].size;
- else
- mask |= (buffer_list[i].addr + buffer_list[i].size +
- PAGE_SIZE - 1) & PAGE_MASK;
- }
-
- if (*total_size > 0xFFFFFFFFULL)
- return -ENOMEM;
-
- /* Find largest page shift we can use to cover buffers */
- for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift))
- if ((1ULL << *shift) & mask)
- break;
-
- buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1);
- buffer_list[0].addr &= ~0ull << *shift;
-
- *npages = 0;
- for (i = 0; i < num_phys_buf; ++i)
- *npages += (buffer_list[i].size +
- (1ULL << *shift) - 1) >> *shift;
-
- if (!*npages)
- return -EINVAL;
-
- *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL);
- if (!*page_list)
- return -ENOMEM;
-
- n = 0;
- for (i = 0; i < num_phys_buf; ++i)
- for (j = 0;
- j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift;
- ++j)
- (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr +
- ((u64) j << *shift));
-
- PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n",
- __func__, (unsigned long long)*iova_start,
- (unsigned long long)mask, *shift, (unsigned long long)*total_size,
- *npages);
-
- return 0;
-
-}
-
-int c4iw_reregister_phys_mem(struct ib_mr *mr, int mr_rereg_mask,
- struct ib_pd *pd, struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start)
-{
-
- struct c4iw_mr mh, *mhp;
- struct c4iw_pd *php;
- struct c4iw_dev *rhp;
- __be64 *page_list = NULL;
- int shift = 0;
- u64 total_size;
- int npages;
- int ret;
-
- PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd);
-
- /* There can be no memory windows */
- if (atomic_read(&mr->usecnt))
- return -EINVAL;
-
- mhp = to_c4iw_mr(mr);
- rhp = mhp->rhp;
- php = to_c4iw_pd(mr->pd);
-
- /* make sure we are on the same adapter */
- if (rhp != php->rhp)
- return -EINVAL;
-
- memcpy(&mh, mhp, sizeof *mhp);
-
- if (mr_rereg_mask & IB_MR_REREG_PD)
- php = to_c4iw_pd(pd);
- if (mr_rereg_mask & IB_MR_REREG_ACCESS) {
- mh.attr.perms = c4iw_ib_to_tpt_access(acc);
- mh.attr.mw_bind_enable = (acc & IB_ACCESS_MW_BIND) ==
- IB_ACCESS_MW_BIND;
- }
- if (mr_rereg_mask & IB_MR_REREG_TRANS) {
- ret = build_phys_page_list(buffer_list, num_phys_buf,
- iova_start,
- &total_size, &npages,
- &shift, &page_list);
- if (ret)
- return ret;
- }
-
- if (mr_exceeds_hw_limits(rhp, total_size)) {
- kfree(page_list);
- return -EINVAL;
- }
-
- ret = reregister_mem(rhp, php, &mh, shift, npages);
- kfree(page_list);
- if (ret)
- return ret;
- if (mr_rereg_mask & IB_MR_REREG_PD)
- mhp->attr.pdid = php->pdid;
- if (mr_rereg_mask & IB_MR_REREG_ACCESS)
- mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
- if (mr_rereg_mask & IB_MR_REREG_TRANS) {
- mhp->attr.zbva = 0;
- mhp->attr.va_fbo = *iova_start;
- mhp->attr.page_size = shift - 12;
- mhp->attr.len = (u32) total_size;
- mhp->attr.pbl_size = npages;
- }
-
- return 0;
-}
-
-struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start)
-{
- __be64 *page_list;
- int shift;
- u64 total_size;
- int npages;
- struct c4iw_dev *rhp;
- struct c4iw_pd *php;
- struct c4iw_mr *mhp;
- int ret;
-
- PDBG("%s ib_pd %p\n", __func__, pd);
- php = to_c4iw_pd(pd);
- rhp = php->rhp;
-
- mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
- if (!mhp)
- return ERR_PTR(-ENOMEM);
-
- mhp->rhp = rhp;
-
- /* First check that we have enough alignment */
- if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
- ret = -EINVAL;
- goto err;
- }
-
- if (num_phys_buf > 1 &&
- ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) {
- ret = -EINVAL;
- goto err;
- }
-
- ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start,
- &total_size, &npages, &shift,
- &page_list);
- if (ret)
- goto err;
-
- if (mr_exceeds_hw_limits(rhp, total_size)) {
- kfree(page_list);
- ret = -EINVAL;
- goto err;
- }
-
- ret = alloc_pbl(mhp, npages);
- if (ret) {
- kfree(page_list);
- goto err;
- }
-
- ret = write_pbl(&mhp->rhp->rdev, page_list, mhp->attr.pbl_addr,
- npages);
- kfree(page_list);
- if (ret)
- goto err_pbl;
-
- mhp->attr.pdid = php->pdid;
- mhp->attr.zbva = 0;
-
- mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
- mhp->attr.va_fbo = *iova_start;
- mhp->attr.page_size = shift - 12;
-
- mhp->attr.len = (u32) total_size;
- mhp->attr.pbl_size = npages;
- ret = register_mem(rhp, php, mhp, shift);
- if (ret)
- goto err_pbl;
-
- return &mhp->ibmr;
-
-err_pbl:
- c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
- mhp->attr.pbl_size << 3);
-
-err:
- kfree(mhp);
- return ERR_PTR(ret);
-
-}
-
struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
{
struct c4iw_dev *rhp;
@@ -952,9 +704,6 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
u32 mmid;
PDBG("%s ib_mr %p\n", __func__, ib_mr);
- /* There can be no memory windows */
- if (atomic_read(&ib_mr->usecnt))
- return -EINVAL;
mhp = to_c4iw_mr(ib_mr);
rhp = mhp->rhp;
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 0a7d998..ec04272 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -549,12 +549,9 @@ int c4iw_register_device(struct c4iw_dev *dev)
dev->ibdev.resize_cq = c4iw_resize_cq;
dev->ibdev.poll_cq = c4iw_poll_cq;
dev->ibdev.get_dma_mr = c4iw_get_dma_mr;
- dev->ibdev.reg_phys_mr = c4iw_register_phys_mem;
- dev->ibdev.rereg_phys_mr = c4iw_reregister_phys_mem;
dev->ibdev.reg_user_mr = c4iw_reg_user_mr;
dev->ibdev.dereg_mr = c4iw_dereg_mr;
dev->ibdev.alloc_mw = c4iw_alloc_mw;
- dev->ibdev.bind_mw = c4iw_bind_mw;
dev->ibdev.dealloc_mw = c4iw_dealloc_mw;
dev->ibdev.alloc_mr = c4iw_alloc_mr;
dev->ibdev.map_mr_sg = c4iw_map_mr_sg;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index aa515af..e99345e 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -933,11 +933,6 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
return err;
}
-int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind)
-{
- return -ENOSYS;
-}
-
static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type,
u8 *ecode)
{
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 1092a2d..6126bbe 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -699,4 +699,11 @@ static inline void t4_set_cq_in_error(struct t4_cq *cq)
struct t4_dev_status_page {
u8 db_off;
+ u8 pad1;
+ u16 pad2;
+ u32 pad3;
+ u64 qp_start;
+ u64 qp_size;
+ u64 cq_start;
+ u64 cq_size;
};
diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h
index cbd0ce1..295f422 100644
--- a/drivers/infiniband/hw/cxgb4/user.h
+++ b/drivers/infiniband/hw/cxgb4/user.h
@@ -32,7 +32,7 @@
#ifndef __C4IW_USER_H__
#define __C4IW_USER_H__
-#define C4IW_UVERBS_ABI_VERSION 2
+#define C4IW_UVERBS_ABI_VERSION 3
/*
* Make sure that all structs defined in this file remain laid out so
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 86af713..105246f 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -92,7 +92,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
ah_attr->grh.sgid_index, &sgid, &gid_attr);
if (ret)
return ERR_PTR(ret);
- memset(ah->av.eth.s_mac, 0, ETH_ALEN);
+ eth_zero_addr(ah->av.eth.s_mac);
if (gid_attr.ndev) {
if (is_vlan_dev(gid_attr.ndev))
vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
@@ -104,6 +104,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
ah->av.eth.gid_index = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
ah->av.eth.vlan = cpu_to_be16(vlan_tag);
+ ah->av.eth.hop_limit = ah_attr->grh.hop_limit;
if (ah_attr->static_rate) {
ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index b88fc8f..9f8b516 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -811,9 +811,6 @@ repoll:
wc->opcode = IB_WC_MASKED_FETCH_ADD;
wc->byte_len = 8;
break;
- case MLX4_OPCODE_BIND_MW:
- wc->opcode = IB_WC_BIND_MW;
- break;
case MLX4_OPCODE_LSO:
wc->opcode = IB_WC_LSO;
break;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 97d6878..1c7ab6c 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -154,9 +154,9 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_n
return dev;
}
-static int mlx4_ib_update_gids(struct gid_entry *gids,
- struct mlx4_ib_dev *ibdev,
- u8 port_num)
+static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
+ struct mlx4_ib_dev *ibdev,
+ u8 port_num)
{
struct mlx4_cmd_mailbox *mailbox;
int err;
@@ -187,6 +187,63 @@ static int mlx4_ib_update_gids(struct gid_entry *gids,
return err;
}
+static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
+ struct mlx4_ib_dev *ibdev,
+ u8 port_num)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ int err;
+ struct mlx4_dev *dev = ibdev->dev;
+ int i;
+ struct {
+ union ib_gid gid;
+ __be32 rsrvd1[2];
+ __be16 rsrvd2;
+ u8 type;
+ u8 version;
+ __be32 rsrvd3;
+ } *gid_tbl;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return -ENOMEM;
+
+ gid_tbl = mailbox->buf;
+ for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
+ memcpy(&gid_tbl[i].gid, &gids[i].gid, sizeof(union ib_gid));
+ if (gids[i].gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+ gid_tbl[i].version = 2;
+ if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
+ gid_tbl[i].type = 1;
+ else
+ memset(&gid_tbl[i].gid, 0, 12);
+ }
+ }
+
+ err = mlx4_cmd(dev, mailbox->dma,
+ MLX4_SET_PORT_ROCE_ADDR << 8 | port_num,
+ 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ if (mlx4_is_bonded(dev))
+ err += mlx4_cmd(dev, mailbox->dma,
+ MLX4_SET_PORT_ROCE_ADDR << 8 | 2,
+ 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+static int mlx4_ib_update_gids(struct gid_entry *gids,
+ struct mlx4_ib_dev *ibdev,
+ u8 port_num)
+{
+ if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+ return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num);
+
+ return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
+}
+
static int mlx4_ib_add_gid(struct ib_device *device,
u8 port_num,
unsigned int index,
@@ -215,7 +272,8 @@ static int mlx4_ib_add_gid(struct ib_device *device,
port_gid_table = &iboe->gids[port_num - 1];
spin_lock_bh(&iboe->lock);
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
- if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid))) {
+ if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) &&
+ (port_gid_table->gids[i].gid_type == attr->gid_type)) {
found = i;
break;
}
@@ -233,6 +291,7 @@ static int mlx4_ib_add_gid(struct ib_device *device,
} else {
*context = port_gid_table->gids[free].ctx;
memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid));
+ port_gid_table->gids[free].gid_type = attr->gid_type;
port_gid_table->gids[free].ctx->real_index = free;
port_gid_table->gids[free].ctx->refcount = 1;
hw_update = 1;
@@ -248,8 +307,10 @@ static int mlx4_ib_add_gid(struct ib_device *device,
if (!gids) {
ret = -ENOMEM;
} else {
- for (i = 0; i < MLX4_MAX_PORT_GIDS; i++)
+ for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
+ gids[i].gid_type = port_gid_table->gids[i].gid_type;
+ }
}
}
spin_unlock_bh(&iboe->lock);
@@ -325,6 +386,7 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
int i;
int ret;
unsigned long flags;
+ struct ib_gid_attr attr;
if (port_num > MLX4_MAX_PORTS)
return -EINVAL;
@@ -335,10 +397,13 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
return index;
- ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, NULL);
+ ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, &attr);
if (ret)
return ret;
+ if (attr.ndev)
+ dev_put(attr.ndev);
+
if (!memcmp(&gid, &zgid, sizeof(gid)))
return -EINVAL;
@@ -346,7 +411,8 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
port_gid_table = &iboe->gids[port_num - 1];
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
- if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid))) {
+ if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid)) &&
+ attr.gid_type == port_gid_table->gids[i].gid_type) {
ctx = port_gid_table->gids[i].ctx;
break;
}
@@ -2119,6 +2185,7 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
+ struct mlx4_ib_dev *mdev = to_mdev(ibdev);
int err;
err = mlx4_ib_query_port(ibdev, port_num, &attr);
@@ -2128,10 +2195,15 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND)
+ if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
- else
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ } else {
+ if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
+ RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ }
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
@@ -2283,7 +2355,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
- ibdev->ib_dev.bind_mw = mlx4_ib_bind_mw;
ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
ibdev->ib_dev.uverbs_cmd_mask |=
@@ -2423,7 +2494,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (mlx4_ib_init_sriov(ibdev))
goto err_mad;
- if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) {
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE ||
+ dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
if (!iboe->nb.notifier_call) {
iboe->nb.notifier_call = mlx4_ib_netdev_event;
err = register_netdevice_notifier(&iboe->nb);
@@ -2432,6 +2504,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
goto err_notif;
}
}
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+ err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
+ if (err) {
+ goto err_notif;
+ }
+ }
}
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 1caa11e..52ce7b0 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -177,11 +177,18 @@ struct mlx4_ib_wq {
unsigned tail;
};
+enum {
+ MLX4_IB_QP_CREATE_ROCE_V2_GSI = IB_QP_CREATE_RESERVED_START
+};
+
enum mlx4_ib_qp_flags {
MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO,
+
+ /* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */
+ MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI,
MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
MLX4_IB_SRIOV_SQP = 1 << 31,
};
@@ -478,6 +485,7 @@ struct gid_cache_context {
struct gid_entry {
union ib_gid gid;
+ enum ib_gid_type gid_type;
struct gid_cache_context *ctx;
};
@@ -704,8 +712,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct ib_udata *udata);
int mlx4_ib_dereg_mr(struct ib_mr *mr);
struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
-int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
- struct ib_mw_bind *mw_bind);
int mlx4_ib_dealloc_mw(struct ib_mw *mw);
struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 4d1e1c6..242b94e 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -366,28 +366,6 @@ err_free:
return ERR_PTR(err);
}
-int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
- struct ib_mw_bind *mw_bind)
-{
- struct ib_bind_mw_wr wr;
- struct ib_send_wr *bad_wr;
- int ret;
-
- memset(&wr, 0, sizeof(wr));
- wr.wr.opcode = IB_WR_BIND_MW;
- wr.wr.wr_id = mw_bind->wr_id;
- wr.wr.send_flags = mw_bind->send_flags;
- wr.mw = mw;
- wr.bind_info = mw_bind->bind_info;
- wr.rkey = ib_inc_rkey(mw->rkey);
-
- ret = mlx4_ib_post_send(qp, &wr.wr, &bad_wr);
- if (!ret)
- mw->rkey = wr.rkey;
-
- return ret;
-}
-
int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
{
struct mlx4_ib_mw *mw = to_mmw(ibmw);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 13eaaf4..bc5536f 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -32,6 +32,8 @@
*/
#include <linux/log2.h>
+#include <linux/etherdevice.h>
+#include <net/ip.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/vmalloc.h>
@@ -85,6 +87,7 @@ struct mlx4_ib_sqp {
u32 send_psn;
struct ib_ud_header ud_header;
u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
+ struct ib_qp *roce_v2_gsi;
};
enum {
@@ -115,7 +118,6 @@ static const __be32 mlx4_ib_opcode[] = {
[IB_WR_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR),
[IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
- [IB_WR_BIND_MW] = cpu_to_be32(MLX4_OPCODE_BIND_MW),
};
static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
@@ -154,7 +156,10 @@ static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
}
}
}
- return proxy_sqp;
+ if (proxy_sqp)
+ return 1;
+
+ return !!(qp->flags & MLX4_IB_ROCE_V2_GSI_QP);
}
/* used for INIT/CLOSE port logic */
@@ -796,11 +801,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (err)
goto err_mtt;
- qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
+ qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(u64),
+ gfp | __GFP_NOWARN);
if (!qp->sq.wrid)
qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
gfp, PAGE_KERNEL);
- qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(u64), gfp);
+ qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(u64),
+ gfp | __GFP_NOWARN);
if (!qp->rq.wrid)
qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
gfp, PAGE_KERNEL);
@@ -1099,9 +1106,9 @@ static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
return dev->dev->caps.qp1_proxy[attr->port_num - 1];
}
-struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
{
struct mlx4_ib_qp *qp = NULL;
int err;
@@ -1120,6 +1127,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
MLX4_IB_SRIOV_TUNNEL_QP |
MLX4_IB_SRIOV_SQP |
MLX4_IB_QP_NETIF |
+ MLX4_IB_QP_CREATE_ROCE_V2_GSI |
MLX4_IB_QP_CREATE_USE_GFP_NOIO))
return ERR_PTR(-EINVAL);
@@ -1128,15 +1136,21 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
return ERR_PTR(-EINVAL);
}
- if (init_attr->create_flags &&
- ((udata && init_attr->create_flags & ~(sup_u_create_flags)) ||
- ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
- MLX4_IB_QP_CREATE_USE_GFP_NOIO |
- MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)) &&
- init_attr->qp_type != IB_QPT_UD) ||
- ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
- init_attr->qp_type > IB_QPT_GSI)))
- return ERR_PTR(-EINVAL);
+ if (init_attr->create_flags) {
+ if (udata && init_attr->create_flags & ~(sup_u_create_flags))
+ return ERR_PTR(-EINVAL);
+
+ if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
+ MLX4_IB_QP_CREATE_USE_GFP_NOIO |
+ MLX4_IB_QP_CREATE_ROCE_V2_GSI |
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) &&
+ init_attr->qp_type != IB_QPT_UD) ||
+ (init_attr->create_flags & MLX4_IB_SRIOV_SQP &&
+ init_attr->qp_type > IB_QPT_GSI) ||
+ (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI &&
+ init_attr->qp_type != IB_QPT_GSI))
+ return ERR_PTR(-EINVAL);
+ }
switch (init_attr->qp_type) {
case IB_QPT_XRC_TGT:
@@ -1173,19 +1187,29 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
case IB_QPT_SMI:
case IB_QPT_GSI:
{
+ int sqpn;
+
/* Userspace is not allowed to create special QPs: */
if (udata)
return ERR_PTR(-EINVAL);
+ if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) {
+ int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, 1, 1, &sqpn, 0);
+
+ if (res)
+ return ERR_PTR(res);
+ } else {
+ sqpn = get_sqp_num(to_mdev(pd->device), init_attr);
+ }
err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
- get_sqp_num(to_mdev(pd->device), init_attr),
+ sqpn,
&qp, gfp);
if (err)
return ERR_PTR(err);
qp->port = init_attr->port_num;
- qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
-
+ qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 :
+ init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI ? sqpn : 1;
break;
}
default:
@@ -1196,7 +1220,41 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
return &qp->ibqp;
}
-int mlx4_ib_destroy_qp(struct ib_qp *qp)
+struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata) {
+ struct ib_device *device = pd ? pd->device : init_attr->xrcd->device;
+ struct ib_qp *ibqp;
+ struct mlx4_ib_dev *dev = to_mdev(device);
+
+ ibqp = _mlx4_ib_create_qp(pd, init_attr, udata);
+
+ if (!IS_ERR(ibqp) &&
+ (init_attr->qp_type == IB_QPT_GSI) &&
+ !(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI)) {
+ struct mlx4_ib_sqp *sqp = to_msqp((to_mqp(ibqp)));
+ int is_eth = rdma_cap_eth_ah(&dev->ib_dev, init_attr->port_num);
+
+ if (is_eth &&
+ dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+ init_attr->create_flags |= MLX4_IB_QP_CREATE_ROCE_V2_GSI;
+ sqp->roce_v2_gsi = ib_create_qp(pd, init_attr);
+
+ if (IS_ERR(sqp->roce_v2_gsi)) {
+ pr_err("Failed to create GSI QP for RoCEv2 (%ld)\n", PTR_ERR(sqp->roce_v2_gsi));
+ sqp->roce_v2_gsi = NULL;
+ } else {
+ sqp = to_msqp(to_mqp(sqp->roce_v2_gsi));
+ sqp->qp.flags |= MLX4_IB_ROCE_V2_GSI_QP;
+ }
+
+ init_attr->create_flags &= ~MLX4_IB_QP_CREATE_ROCE_V2_GSI;
+ }
+ }
+ return ibqp;
+}
+
+static int _mlx4_ib_destroy_qp(struct ib_qp *qp)
{
struct mlx4_ib_dev *dev = to_mdev(qp->device);
struct mlx4_ib_qp *mqp = to_mqp(qp);
@@ -1225,6 +1283,20 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
return 0;
}
+int mlx4_ib_destroy_qp(struct ib_qp *qp)
+{
+ struct mlx4_ib_qp *mqp = to_mqp(qp);
+
+ if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+ struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+
+ if (sqp->roce_v2_gsi)
+ ib_destroy_qp(sqp->roce_v2_gsi);
+ }
+
+ return _mlx4_ib_destroy_qp(qp);
+}
+
static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type)
{
switch (type) {
@@ -1507,6 +1579,24 @@ static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
return 0;
}
+enum {
+ MLX4_QPC_ROCE_MODE_1 = 0,
+ MLX4_QPC_ROCE_MODE_2 = 2,
+ MLX4_QPC_ROCE_MODE_UNDEFINED = 0xff
+};
+
+static u8 gid_type_to_qpc(enum ib_gid_type gid_type)
+{
+ switch (gid_type) {
+ case IB_GID_TYPE_ROCE:
+ return MLX4_QPC_ROCE_MODE_1;
+ case IB_GID_TYPE_ROCE_UDP_ENCAP:
+ return MLX4_QPC_ROCE_MODE_2;
+ default:
+ return MLX4_QPC_ROCE_MODE_UNDEFINED;
+ }
+}
+
static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state)
@@ -1633,6 +1723,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
mlx4_ib_steer_qp_reg(dev, qp, 1);
steer_qp = 1;
}
+
+ if (ibqp->qp_type == IB_QPT_GSI) {
+ enum ib_gid_type gid_type = qp->flags & MLX4_IB_ROCE_V2_GSI_QP ?
+ IB_GID_TYPE_ROCE_UDP_ENCAP : IB_GID_TYPE_ROCE;
+ u8 qpc_roce_mode = gid_type_to_qpc(gid_type);
+
+ context->rlkey_roce_mode |= (qpc_roce_mode << 6);
+ }
}
if (attr_mask & IB_QP_PKEY_INDEX) {
@@ -1650,9 +1748,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
u16 vlan = 0xffff;
u8 smac[ETH_ALEN];
int status = 0;
+ int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
+ attr->ah_attr.ah_flags & IB_AH_GRH;
- if (rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
- attr->ah_attr.ah_flags & IB_AH_GRH) {
+ if (is_eth) {
int index = attr->ah_attr.grh.sgid_index;
status = ib_get_cached_gid(ibqp->device, port_num,
@@ -1674,6 +1773,18 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
MLX4_QP_OPTPAR_SCHED_QUEUE);
+
+ if (is_eth &&
+ (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) {
+ u8 qpc_roce_mode = gid_type_to_qpc(gid_attr.gid_type);
+
+ if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) {
+ err = -EINVAL;
+ goto out;
+ }
+ context->rlkey_roce_mode |= (qpc_roce_mode << 6);
+ }
+
}
if (attr_mask & IB_QP_TIMEOUT) {
@@ -1845,7 +1956,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
sqd_event = 0;
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
- context->rlkey |= (1 << 4);
+ context->rlkey_roce_mode |= (1 << 4);
/*
* Before passing a kernel QP to the HW, make sure that the
@@ -2022,8 +2133,8 @@ out:
return err;
}
-int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_udata *udata)
+static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
struct mlx4_ib_qp *qp = to_mqp(ibqp);
@@ -2126,6 +2237,27 @@ out:
return err;
}
+int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct mlx4_ib_qp *mqp = to_mqp(ibqp);
+ int ret;
+
+ ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata);
+
+ if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+ struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+ int err = 0;
+
+ if (sqp->roce_v2_gsi)
+ err = ib_modify_qp(sqp->roce_v2_gsi, attr, attr_mask);
+ if (err)
+ pr_err("Failed to modify GSI QP for RoCEv2 (%d)\n",
+ err);
+ }
+ return ret;
+}
+
static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
{
int i;
@@ -2168,7 +2300,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
send_size += sizeof (struct mlx4_ib_tunnel_header);
- ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header);
+ ib_ud_header_init(send_size, 1, 0, 0, 0, 0, 0, 0, &sqp->ud_header);
if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
sqp->ud_header.lrh.service_level =
@@ -2252,16 +2384,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
return 0;
}
-static void mlx4_u64_to_smac(u8 *dst_mac, u64 src_mac)
-{
- int i;
-
- for (i = ETH_ALEN; i; i--) {
- dst_mac[i - 1] = src_mac & 0xff;
- src_mac >>= 8;
- }
-}
-
+#define MLX4_ROCEV2_QP1_SPORT 0xC000
static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
@@ -2281,6 +2404,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
bool is_eth;
bool is_vlan = false;
bool is_grh;
+ bool is_udp = false;
+ int ip_version = 0;
send_size = 0;
for (i = 0; i < wr->wr.num_sge; ++i)
@@ -2289,6 +2414,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
is_grh = mlx4_ib_ah_grh_present(ah);
if (is_eth) {
+ struct ib_gid_attr gid_attr;
+
if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
/* When multi-function is enabled, the ib_core gid
* indexes don't necessarily match the hw ones, so
@@ -2302,19 +2429,35 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
err = ib_get_cached_gid(ib_dev,
be32_to_cpu(ah->av.ib.port_pd) >> 24,
ah->av.ib.gid_index, &sgid,
- NULL);
- if (!err && !memcmp(&sgid, &zgid, sizeof(sgid)))
- err = -ENOENT;
- if (err)
+ &gid_attr);
+ if (!err) {
+ if (gid_attr.ndev)
+ dev_put(gid_attr.ndev);
+ if (!memcmp(&sgid, &zgid, sizeof(sgid)))
+ err = -ENOENT;
+ }
+ if (!err) {
+ is_udp = gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
+ if (is_udp) {
+ if (ipv6_addr_v4mapped((struct in6_addr *)&sgid))
+ ip_version = 4;
+ else
+ ip_version = 6;
+ is_grh = false;
+ }
+ } else {
return err;
+ }
}
-
if (ah->av.eth.vlan != cpu_to_be16(0xffff)) {
vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff;
is_vlan = 1;
}
}
- ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);
+ err = ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh,
+ ip_version, is_udp, 0, &sqp->ud_header);
+ if (err)
+ return err;
if (!is_eth) {
sqp->ud_header.lrh.service_level =
@@ -2323,7 +2466,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f);
}
- if (is_grh) {
+ if (is_grh || (ip_version == 6)) {
sqp->ud_header.grh.traffic_class =
(be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
sqp->ud_header.grh.flow_label =
@@ -2352,6 +2495,25 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
ah->av.ib.dgid, 16);
}
+ if (ip_version == 4) {
+ sqp->ud_header.ip4.tos =
+ (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
+ sqp->ud_header.ip4.id = 0;
+ sqp->ud_header.ip4.frag_off = htons(IP_DF);
+ sqp->ud_header.ip4.ttl = ah->av.eth.hop_limit;
+
+ memcpy(&sqp->ud_header.ip4.saddr,
+ sgid.raw + 12, 4);
+ memcpy(&sqp->ud_header.ip4.daddr, ah->av.ib.dgid + 12, 4);
+ sqp->ud_header.ip4.check = ib_ud_ip4_csum(&sqp->ud_header);
+ }
+
+ if (is_udp) {
+ sqp->ud_header.udp.dport = htons(ROCE_V2_UDP_DPORT);
+ sqp->ud_header.udp.sport = htons(MLX4_ROCEV2_QP1_SPORT);
+ sqp->ud_header.udp.csum = 0;
+ }
+
mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
if (!is_eth) {
@@ -2380,34 +2542,27 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
if (is_eth) {
struct in6_addr in6;
-
+ u16 ether_type;
u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
+ ether_type = (!is_udp) ? MLX4_IB_IBOE_ETHERTYPE :
+ (ip_version == 4 ? ETH_P_IP : ETH_P_IPV6);
+
mlx->sched_prio = cpu_to_be16(pcp);
+ ether_addr_copy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac);
memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
- /* FIXME: cache smac value? */
memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
memcpy(&in6, sgid.raw, sizeof(in6));
- if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
- u64 mac = atomic64_read(&to_mdev(ib_dev)->iboe.mac[sqp->qp.port - 1]);
- u8 smac[ETH_ALEN];
-
- mlx4_u64_to_smac(smac, mac);
- memcpy(sqp->ud_header.eth.smac_h, smac, ETH_ALEN);
- } else {
- /* use the src mac of the tunnel */
- memcpy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac, ETH_ALEN);
- }
if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
if (!is_vlan) {
- sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+ sqp->ud_header.eth.type = cpu_to_be16(ether_type);
} else {
- sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+ sqp->ud_header.vlan.type = cpu_to_be16(ether_type);
sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
}
} else {
@@ -2528,25 +2683,6 @@ static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg,
fseg->reserved[1] = 0;
}
-static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg,
- struct ib_bind_mw_wr *wr)
-{
- bseg->flags1 =
- convert_access(wr->bind_info.mw_access_flags) &
- cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ |
- MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE |
- MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC);
- bseg->flags2 = 0;
- if (wr->mw->type == IB_MW_TYPE_2)
- bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2);
- if (wr->bind_info.mw_access_flags & IB_ZERO_BASED)
- bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED);
- bseg->new_rkey = cpu_to_be32(wr->rkey);
- bseg->lkey = cpu_to_be32(wr->bind_info.mr->lkey);
- bseg->addr = cpu_to_be64(wr->bind_info.addr);
- bseg->length = cpu_to_be64(wr->bind_info.length);
-}
-
static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
{
memset(iseg, 0, sizeof(*iseg));
@@ -2766,6 +2902,29 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int i;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+ struct mlx4_ib_sqp *sqp = to_msqp(qp);
+
+ if (sqp->roce_v2_gsi) {
+ struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
+ struct ib_gid_attr gid_attr;
+ union ib_gid gid;
+
+ if (!ib_get_cached_gid(ibqp->device,
+ be32_to_cpu(ah->av.ib.port_pd) >> 24,
+ ah->av.ib.gid_index, &gid,
+ &gid_attr)) {
+ if (gid_attr.ndev)
+ dev_put(gid_attr.ndev);
+ qp = (gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
+ to_mqp(sqp->roce_v2_gsi) : qp;
+ } else {
+ pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n",
+ ah->av.ib.gid_index);
+ }
+ }
+ }
+
spin_lock_irqsave(&qp->sq.lock, flags);
if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
err = -EIO;
@@ -2867,13 +3026,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
size += sizeof(struct mlx4_wqe_fmr_seg) / 16;
break;
- case IB_WR_BIND_MW:
- ctrl->srcrb_flags |=
- cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
- set_bind_seg(wqe, bind_mw_wr(wr));
- wqe += sizeof(struct mlx4_wqe_bind_seg);
- size += sizeof(struct mlx4_wqe_bind_seg) / 16;
- break;
default:
/* No extra segments required for sends */
break;
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index c394376..0597f3e 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -171,7 +171,8 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
if (err)
goto err_mtt;
- srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
+ srq->wrid = kmalloc_array(srq->msrq.max, sizeof(u64),
+ GFP_KERNEL | __GFP_NOWARN);
if (!srq->wrid) {
srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64),
GFP_KERNEL, PAGE_KERNEL);
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index 6608058..745efa4 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -32,8 +32,10 @@
#include "mlx5_ib.h"
-struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
- struct mlx5_ib_ah *ah)
+static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_ah *ah,
+ struct ib_ah_attr *ah_attr,
+ enum rdma_link_layer ll)
{
if (ah_attr->ah_flags & IB_AH_GRH) {
memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16);
@@ -44,9 +46,20 @@ struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
ah->av.tclass = ah_attr->grh.traffic_class;
}
- ah->av.rlid = cpu_to_be16(ah_attr->dlid);
- ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
- ah->av.stat_rate_sl = (ah_attr->static_rate << 4) | (ah_attr->sl & 0xf);
+ ah->av.stat_rate_sl = (ah_attr->static_rate << 4);
+
+ if (ll == IB_LINK_LAYER_ETHERNET) {
+ memcpy(ah->av.rmac, ah_attr->dmac, sizeof(ah_attr->dmac));
+ ah->av.udp_sport =
+ mlx5_get_roce_udp_sport(dev,
+ ah_attr->port_num,
+ ah_attr->grh.sgid_index);
+ ah->av.stat_rate_sl |= (ah_attr->sl & 0x7) << 1;
+ } else {
+ ah->av.rlid = cpu_to_be16(ah_attr->dlid);
+ ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
+ ah->av.stat_rate_sl |= (ah_attr->sl & 0xf);
+ }
return &ah->ibah;
}
@@ -54,12 +67,19 @@ struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
{
struct mlx5_ib_ah *ah;
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ enum rdma_link_layer ll;
+
+ ll = pd->device->get_link_layer(pd->device, ah_attr->port_num);
+
+ if (ll == IB_LINK_LAYER_ETHERNET && !(ah_attr->ah_flags & IB_AH_GRH))
+ return ERR_PTR(-EINVAL);
ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
if (!ah)
return ERR_PTR(-ENOMEM);
- return create_ib_ah(ah_attr, ah); /* never fails */
+ return create_ib_ah(dev, ah, ah_attr, ll); /* never fails */
}
int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 92ddae1..fd1de31 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -154,9 +154,6 @@ static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
wc->opcode = IB_WC_MASKED_FETCH_ADD;
wc->byte_len = 8;
break;
- case MLX5_OPCODE_BIND_MW:
- wc->opcode = IB_WC_BIND_MW;
- break;
case MLX5_OPCODE_UMR:
wc->opcode = get_umr_comp(wq, idx);
break;
@@ -171,6 +168,7 @@ enum {
static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
struct mlx5_ib_qp *qp)
{
+ enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1);
struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
struct mlx5_ib_srq *srq;
struct mlx5_ib_wq *wq;
@@ -236,6 +234,22 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
} else {
wc->pkey_index = 0;
}
+
+ if (ll != IB_LINK_LAYER_ETHERNET)
+ return;
+
+ switch (wc->sl & 0x3) {
+ case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH:
+ wc->network_hdr_type = RDMA_NETWORK_IB;
+ break;
+ case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6:
+ wc->network_hdr_type = RDMA_NETWORK_IPV6;
+ break;
+ case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4:
+ wc->network_hdr_type = RDMA_NETWORK_IPV4;
+ break;
+ }
+ wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
}
static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
@@ -760,12 +774,12 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
int eqn;
int err;
- if (attr->flags)
- return ERR_PTR(-EINVAL);
-
if (entries < 0)
return ERR_PTR(-EINVAL);
+ if (check_cq_create_flags(attr->flags))
+ return ERR_PTR(-EOPNOTSUPP);
+
entries = roundup_pow_of_two(entries + 1);
if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))
return ERR_PTR(-EINVAL);
@@ -779,6 +793,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
spin_lock_init(&cq->lock);
cq->resize_buf = NULL;
cq->resize_umem = NULL;
+ cq->create_flags = attr->flags;
if (context) {
err = create_cq_user(dev, udata, context, cq, entries,
@@ -796,6 +811,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
cq->cqe_size = cqe_size;
cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
+
+ if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
+ cqb->ctx.cqe_sz_flags |= (1 << 1);
+
cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index);
err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn);
if (err)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index b0ec175..ec737e2 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -40,6 +40,8 @@
#include <linux/io-mapping.h>
#include <linux/sched.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
#include <linux/mlx5/vport.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
@@ -66,12 +68,14 @@ static char mlx5_version[] =
DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
+enum {
+ MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3,
+};
+
static enum rdma_link_layer
-mlx5_ib_port_link_layer(struct ib_device *device)
+mlx5_port_type_cap_to_rdma_ll(int port_type_cap)
{
- struct mlx5_ib_dev *dev = to_mdev(device);
-
- switch (MLX5_CAP_GEN(dev->mdev, port_type)) {
+ switch (port_type_cap) {
case MLX5_CAP_PORT_TYPE_IB:
return IB_LINK_LAYER_INFINIBAND;
case MLX5_CAP_PORT_TYPE_ETH:
@@ -81,6 +85,202 @@ mlx5_ib_port_link_layer(struct ib_device *device)
}
}
+static enum rdma_link_layer
+mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
+
+ return mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+}
+
+static int mlx5_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+ struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev,
+ roce.nb);
+
+ if ((event != NETDEV_UNREGISTER) && (event != NETDEV_REGISTER))
+ return NOTIFY_DONE;
+
+ write_lock(&ibdev->roce.netdev_lock);
+ if (ndev->dev.parent == &ibdev->mdev->pdev->dev)
+ ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev;
+ write_unlock(&ibdev->roce.netdev_lock);
+
+ return NOTIFY_DONE;
+}
+
+static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
+ u8 port_num)
+{
+ struct mlx5_ib_dev *ibdev = to_mdev(device);
+ struct net_device *ndev;
+
+ /* Ensure ndev does not disappear before we invoke dev_hold()
+ */
+ read_lock(&ibdev->roce.netdev_lock);
+ ndev = ibdev->roce.netdev;
+ if (ndev)
+ dev_hold(ndev);
+ read_unlock(&ibdev->roce.netdev_lock);
+
+ return ndev;
+}
+
+static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
+ struct ib_port_attr *props)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct net_device *ndev;
+ enum ib_mtu ndev_ib_mtu;
+ u16 qkey_viol_cntr;
+
+ memset(props, 0, sizeof(*props));
+
+ props->port_cap_flags |= IB_PORT_CM_SUP;
+ props->port_cap_flags |= IB_PORT_IP_BASED_GIDS;
+
+ props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev,
+ roce_address_table_size);
+ props->max_mtu = IB_MTU_4096;
+ props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
+ props->pkey_tbl_len = 1;
+ props->state = IB_PORT_DOWN;
+ props->phys_state = 3;
+
+ mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev, &qkey_viol_cntr);
+ props->qkey_viol_cntr = qkey_viol_cntr;
+
+ ndev = mlx5_ib_get_netdev(device, port_num);
+ if (!ndev)
+ return 0;
+
+ if (netif_running(ndev) && netif_carrier_ok(ndev)) {
+ props->state = IB_PORT_ACTIVE;
+ props->phys_state = 5;
+ }
+
+ ndev_ib_mtu = iboe_get_mtu(ndev->mtu);
+
+ dev_put(ndev);
+
+ props->active_mtu = min(props->max_mtu, ndev_ib_mtu);
+
+ props->active_width = IB_WIDTH_4X; /* TODO */
+ props->active_speed = IB_SPEED_QDR; /* TODO */
+
+ return 0;
+}
+
+static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid,
+ const struct ib_gid_attr *attr,
+ void *mlx5_addr)
+{
+#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
+ char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
+ source_l3_address);
+ void *mlx5_addr_mac = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
+ source_mac_47_32);
+
+ if (!gid)
+ return;
+
+ ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr);
+
+ if (is_vlan_dev(attr->ndev)) {
+ MLX5_SET_RA(mlx5_addr, vlan_valid, 1);
+ MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev));
+ }
+
+ switch (attr->gid_type) {
+ case IB_GID_TYPE_IB:
+ MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1);
+ break;
+ case IB_GID_TYPE_ROCE_UDP_ENCAP:
+ MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2);
+ break;
+
+ default:
+ WARN_ON(true);
+ }
+
+ if (attr->gid_type != IB_GID_TYPE_IB) {
+ if (ipv6_addr_v4mapped((void *)gid))
+ MLX5_SET_RA(mlx5_addr, roce_l3_type,
+ MLX5_ROCE_L3_TYPE_IPV4);
+ else
+ MLX5_SET_RA(mlx5_addr, roce_l3_type,
+ MLX5_ROCE_L3_TYPE_IPV6);
+ }
+
+ if ((attr->gid_type == IB_GID_TYPE_IB) ||
+ !ipv6_addr_v4mapped((void *)gid))
+ memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid));
+ else
+ memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4);
+}
+
+static int set_roce_addr(struct ib_device *device, u8 port_num,
+ unsigned int index,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ u32 in[MLX5_ST_SZ_DW(set_roce_address_in)];
+ u32 out[MLX5_ST_SZ_DW(set_roce_address_out)];
+ void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
+
+ if (ll != IB_LINK_LAYER_ETHERNET)
+ return -EINVAL;
+
+ memset(in, 0, sizeof(in));
+
+ ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
+
+ MLX5_SET(set_roce_address_in, in, roce_address_index, index);
+ MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
+
+ memset(out, 0, sizeof(out));
+ return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
+ unsigned int index, const union ib_gid *gid,
+ const struct ib_gid_attr *attr,
+ __always_unused void **context)
+{
+ return set_roce_addr(device, port_num, index, gid, attr);
+}
+
+static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
+ unsigned int index, __always_unused void **context)
+{
+ return set_roce_addr(device, port_num, index, NULL, NULL);
+}
+
+__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
+ int index)
+{
+ struct ib_gid_attr attr;
+ union ib_gid gid;
+
+ if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
+ return 0;
+
+ if (!attr.ndev)
+ return 0;
+
+ dev_put(attr.ndev);
+
+ if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+ return 0;
+
+ return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
+}
+
static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
{
return !dev->mdev->issi;
@@ -97,13 +297,35 @@ static int mlx5_get_vport_access_method(struct ib_device *ibdev)
if (mlx5_use_mad_ifc(to_mdev(ibdev)))
return MLX5_VPORT_ACCESS_METHOD_MAD;
- if (mlx5_ib_port_link_layer(ibdev) ==
+ if (mlx5_ib_port_link_layer(ibdev, 1) ==
IB_LINK_LAYER_ETHERNET)
return MLX5_VPORT_ACCESS_METHOD_NIC;
return MLX5_VPORT_ACCESS_METHOD_HCA;
}
+static void get_atomic_caps(struct mlx5_ib_dev *dev,
+ struct ib_device_attr *props)
+{
+ u8 tmp;
+ u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
+ u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
+ u8 atomic_req_8B_endianness_mode =
+ MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianess_mode);
+
+ /* Check if HW supports 8 bytes standard atomic operations and capable
+ * of host endianness respond
+ */
+ tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
+ if (((atomic_operations & tmp) == tmp) &&
+ (atomic_size_qp & MLX5_ATOMIC_SIZE_QP_8BYTES) &&
+ (atomic_req_8B_endianness_mode)) {
+ props->atomic_cap = IB_ATOMIC_HCA;
+ } else {
+ props->atomic_cap = IB_ATOMIC_NONE;
+ }
+}
+
static int mlx5_query_system_image_guid(struct ib_device *ibdev,
__be64 *sys_image_guid)
{
@@ -119,13 +341,21 @@ static int mlx5_query_system_image_guid(struct ib_device *ibdev,
case MLX5_VPORT_ACCESS_METHOD_HCA:
err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
- if (!err)
- *sys_image_guid = cpu_to_be64(tmp);
- return err;
+ break;
+
+ case MLX5_VPORT_ACCESS_METHOD_NIC:
+ err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
+ break;
default:
return -EINVAL;
}
+
+ if (!err)
+ *sys_image_guid = cpu_to_be64(tmp);
+
+ return err;
+
}
static int mlx5_query_max_pkeys(struct ib_device *ibdev,
@@ -179,13 +409,20 @@ static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
case MLX5_VPORT_ACCESS_METHOD_HCA:
err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
- if (!err)
- *node_guid = cpu_to_be64(tmp);
- return err;
+ break;
+
+ case MLX5_VPORT_ACCESS_METHOD_NIC:
+ err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
+ break;
default:
return -EINVAL;
}
+
+ if (!err)
+ *node_guid = cpu_to_be64(tmp);
+
+ return err;
}
struct mlx5_reg_node_desc {
@@ -263,6 +500,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (MLX5_CAP_GEN(mdev, block_lb_mc))
props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+ if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
+ (MLX5_CAP_ETH(dev->mdev, csum_cap)))
+ props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
+
props->vendor_part_id = mdev->pdev->device;
props->hw_ver = mdev->pdev->revision;
@@ -278,7 +519,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->max_sge = min(max_rq_sg, max_sq_sg);
props->max_sge_rd = props->max_sge;
props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
- props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_eq_sz)) - 1;
+ props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
@@ -289,13 +530,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
props->max_srq_sge = max_rq_sg - 1;
props->max_fast_reg_page_list_len = (unsigned int)-1;
- props->atomic_cap = IB_ATOMIC_NONE;
+ get_atomic_caps(dev, props);
props->masked_atomic_cap = IB_ATOMIC_NONE;
props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
+ props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
+ props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
if (MLX5_CAP_GEN(mdev, pg))
@@ -303,6 +546,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->odp_caps = dev->odp_caps;
#endif
+ if (MLX5_CAP_GEN(mdev, cd))
+ props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
+
return 0;
}
@@ -483,6 +729,9 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
case MLX5_VPORT_ACCESS_METHOD_HCA:
return mlx5_query_hca_port(ibdev, port, props);
+ case MLX5_VPORT_ACCESS_METHOD_NIC:
+ return mlx5_query_port_roce(ibdev, port, props);
+
default:
return -EINVAL;
}
@@ -583,8 +832,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_alloc_ucontext_req_v2 req;
- struct mlx5_ib_alloc_ucontext_resp resp;
+ struct mlx5_ib_alloc_ucontext_req_v2 req = {};
+ struct mlx5_ib_alloc_ucontext_resp resp = {};
struct mlx5_ib_ucontext *context;
struct mlx5_uuar_info *uuari;
struct mlx5_uar *uars;
@@ -599,20 +848,22 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
- memset(&req, 0, sizeof(req));
+ if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr))
+ return ERR_PTR(-EINVAL);
+
reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
ver = 0;
- else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
+ else if (reqlen >= sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
ver = 2;
else
return ERR_PTR(-EINVAL);
- err = ib_copy_from_udata(&req, udata, reqlen);
+ err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req)));
if (err)
return ERR_PTR(err);
- if (req.flags || req.reserved)
+ if (req.flags)
return ERR_PTR(-EINVAL);
if (req.total_num_uuars > MLX5_MAX_UUARS)
@@ -621,6 +872,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
if (req.total_num_uuars == 0)
return ERR_PTR(-EINVAL);
+ if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (reqlen > sizeof(req) &&
+ !ib_is_udata_cleared(udata, sizeof(req),
+ reqlen - sizeof(req)))
+ return ERR_PTR(-EOPNOTSUPP);
+
req.total_num_uuars = ALIGN(req.total_num_uuars,
MLX5_NON_FP_BF_REGS_PER_PAGE);
if (req.num_low_latency_uuars > req.total_num_uuars - 1)
@@ -636,6 +895,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
+ resp.cqe_version = min_t(__u8,
+ (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
+ req.max_cqe_version);
+ resp.response_length = min(offsetof(typeof(resp), response_length) +
+ sizeof(resp.response_length), udata->outlen);
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
@@ -681,22 +945,49 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
#endif
+ if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
+ err = mlx5_core_alloc_transport_domain(dev->mdev,
+ &context->tdn);
+ if (err)
+ goto out_uars;
+ }
+
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
resp.tot_uuars = req.total_num_uuars;
resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
- err = ib_copy_to_udata(udata, &resp,
- sizeof(resp) - sizeof(resp.reserved));
+
+ if (field_avail(typeof(resp), cqe_version, udata->outlen))
+ resp.response_length += sizeof(resp.cqe_version);
+
+ if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
+ resp.comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
+ resp.hca_core_clock_offset =
+ offsetof(struct mlx5_init_seg, internal_timer_h) %
+ PAGE_SIZE;
+ resp.response_length += sizeof(resp.hca_core_clock_offset) +
+ sizeof(resp.reserved2) +
+ sizeof(resp.reserved3);
+ }
+
+ err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
- goto out_uars;
+ goto out_td;
uuari->ver = ver;
uuari->num_low_latency_uuars = req.num_low_latency_uuars;
uuari->uars = uars;
uuari->num_uars = num_uars;
+ context->cqe_version = resp.cqe_version;
+
return &context->ibucontext;
+out_td:
+ if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+ mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+
out_uars:
for (i--; i >= 0; i--)
mlx5_cmd_free_uar(dev->mdev, uars[i].index);
@@ -721,6 +1012,9 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
struct mlx5_uuar_info *uuari = &context->uuari;
int i;
+ if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+ mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+
for (i = 0; i < uuari->num_uars; i++) {
if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
@@ -790,6 +1084,30 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
return -ENOSYS;
+ case MLX5_IB_MMAP_CORE_CLOCK:
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ if (vma->vm_flags & (VM_WRITE | VM_EXEC))
+ return -EPERM;
+
+ /* Don't expose to user-space information it shouldn't have */
+ if (PAGE_SIZE > 4096)
+ return -EOPNOTSUPP;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ pfn = (dev->mdev->iseg_base +
+ offsetof(struct mlx5_init_seg, internal_timer_h)) >>
+ PAGE_SHIFT;
+ if (io_remap_pfn_range(vma, vma->vm_start, pfn,
+ PAGE_SIZE, vma->vm_page_prot))
+ return -EAGAIN;
+
+ mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n",
+ vma->vm_start,
+ (unsigned long long)pfn << PAGE_SHIFT);
+ break;
+
default:
return -EINVAL;
}
@@ -1758,6 +2076,32 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr)
mlx5_ib_dealloc_pd(devr->p0);
}
+static u32 get_core_cap_flags(struct ib_device *ibdev)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
+ u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
+ u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
+ u32 ret = 0;
+
+ if (ll == IB_LINK_LAYER_INFINIBAND)
+ return RDMA_CORE_PORT_IBA_IB;
+
+ if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
+ return 0;
+
+ if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
+ return 0;
+
+ if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
+ ret |= RDMA_CORE_PORT_IBA_ROCE;
+
+ if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP)
+ ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+ return ret;
+}
+
static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_immutable *immutable)
{
@@ -1770,20 +2114,50 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+ immutable->core_cap_flags = get_core_cap_flags(ibdev);
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
}
+static int mlx5_enable_roce(struct mlx5_ib_dev *dev)
+{
+ int err;
+
+ dev->roce.nb.notifier_call = mlx5_netdev_event;
+ err = register_netdevice_notifier(&dev->roce.nb);
+ if (err)
+ return err;
+
+ err = mlx5_nic_vport_enable_roce(dev->mdev);
+ if (err)
+ goto err_unregister_netdevice_notifier;
+
+ return 0;
+
+err_unregister_netdevice_notifier:
+ unregister_netdevice_notifier(&dev->roce.nb);
+ return err;
+}
+
+static void mlx5_disable_roce(struct mlx5_ib_dev *dev)
+{
+ mlx5_nic_vport_disable_roce(dev->mdev);
+ unregister_netdevice_notifier(&dev->roce.nb);
+}
+
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
struct mlx5_ib_dev *dev;
+ enum rdma_link_layer ll;
+ int port_type_cap;
int err;
int i;
- /* don't create IB instance over Eth ports, no RoCE yet! */
- if (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH)
+ port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+ ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+
+ if ((ll == IB_LINK_LAYER_ETHERNET) && !MLX5_CAP_GEN(mdev, roce))
return NULL;
printk_once(KERN_INFO "%s", mlx5_version);
@@ -1794,6 +2168,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->mdev = mdev;
+ rwlock_init(&dev->roce.netdev_lock);
err = get_port_caps(dev);
if (err)
goto err_dealloc;
@@ -1843,7 +2218,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.query_device = mlx5_ib_query_device;
dev->ib_dev.query_port = mlx5_ib_query_port;
+ dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
+ if (ll == IB_LINK_LAYER_ETHERNET)
+ dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
dev->ib_dev.query_gid = mlx5_ib_query_gid;
+ dev->ib_dev.add_gid = mlx5_ib_add_gid;
+ dev->ib_dev.del_gid = mlx5_ib_del_gid;
dev->ib_dev.query_pkey = mlx5_ib_query_pkey;
dev->ib_dev.modify_device = mlx5_ib_modify_device;
dev->ib_dev.modify_port = mlx5_ib_modify_port;
@@ -1893,7 +2273,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
- if (mlx5_ib_port_link_layer(&dev->ib_dev) ==
+ if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
IB_LINK_LAYER_ETHERNET) {
dev->ib_dev.create_flow = mlx5_ib_create_flow;
dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
@@ -1908,9 +2288,15 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
mutex_init(&dev->flow_db.lock);
mutex_init(&dev->cap_mask_mutex);
+ if (ll == IB_LINK_LAYER_ETHERNET) {
+ err = mlx5_enable_roce(dev);
+ if (err)
+ goto err_dealloc;
+ }
+
err = create_dev_resources(&dev->devr);
if (err)
- goto err_dealloc;
+ goto err_disable_roce;
err = mlx5_ib_odp_init_one(dev);
if (err)
@@ -1947,6 +2333,10 @@ err_odp:
err_rsrc:
destroy_dev_resources(&dev->devr);
+err_disable_roce:
+ if (ll == IB_LINK_LAYER_ETHERNET)
+ mlx5_disable_roce(dev);
+
err_dealloc:
ib_dealloc_device((struct ib_device *)dev);
@@ -1956,11 +2346,14 @@ err_dealloc:
static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
{
struct mlx5_ib_dev *dev = context;
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
ib_unregister_device(&dev->ib_dev);
destroy_umrc_res(dev);
mlx5_ib_odp_remove_one(dev);
destroy_dev_resources(&dev->devr);
+ if (ll == IB_LINK_LAYER_ETHERNET)
+ mlx5_disable_roce(dev);
ib_dealloc_device(&dev->ib_dev);
}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 1474ccc..d2b9737 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -42,6 +42,7 @@
#include <linux/mlx5/qp.h>
#include <linux/mlx5/srq.h>
#include <linux/types.h>
+#include <linux/mlx5/transobj.h>
#define mlx5_ib_dbg(dev, format, arg...) \
pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
@@ -55,6 +56,11 @@ pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
__LINE__, current->pid, ##arg)
+#define field_avail(type, fld, sz) (offsetof(type, fld) + \
+ sizeof(((type *)0)->fld) <= (sz))
+#define MLX5_IB_DEFAULT_UIDX 0xffffff
+#define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)
+
enum {
MLX5_IB_MMAP_CMD_SHIFT = 8,
MLX5_IB_MMAP_CMD_MASK = 0xff,
@@ -62,7 +68,9 @@ enum {
enum mlx5_ib_mmap_cmd {
MLX5_IB_MMAP_REGULAR_PAGE = 0,
- MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, /* always last */
+ MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1,
+ /* 5 is chosen in order to be compatible with old versions of libmlx5 */
+ MLX5_IB_MMAP_CORE_CLOCK = 5,
};
enum {
@@ -85,6 +93,15 @@ enum mlx5_ib_mad_ifc_flags {
MLX5_MAD_IFC_NET_VIEW = 4,
};
+enum {
+ MLX5_CROSS_CHANNEL_UUAR = 0,
+};
+
+enum {
+ MLX5_CQE_VERSION_V0,
+ MLX5_CQE_VERSION_V1,
+};
+
struct mlx5_ib_ucontext {
struct ib_ucontext ibucontext;
struct list_head db_page_list;
@@ -93,6 +110,9 @@ struct mlx5_ib_ucontext {
*/
struct mutex db_page_mutex;
struct mlx5_uuar_info uuari;
+ u8 cqe_version;
+ /* Transport Domain number */
+ u32 tdn;
};
static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -201,47 +221,70 @@ struct mlx5_ib_pfault {
struct mlx5_pagefault mpfault;
};
+struct mlx5_ib_ubuffer {
+ struct ib_umem *umem;
+ int buf_size;
+ u64 buf_addr;
+};
+
+struct mlx5_ib_qp_base {
+ struct mlx5_ib_qp *container_mibqp;
+ struct mlx5_core_qp mqp;
+ struct mlx5_ib_ubuffer ubuffer;
+};
+
+struct mlx5_ib_qp_trans {
+ struct mlx5_ib_qp_base base;
+ u16 xrcdn;
+ u8 alt_port;
+ u8 atomic_rd_en;
+ u8 resp_depth;
+};
+
struct mlx5_ib_rq {
+ struct mlx5_ib_qp_base base;
+ struct mlx5_ib_wq *rq;
+ struct mlx5_ib_ubuffer ubuffer;
+ struct mlx5_db *doorbell;
u32 tirn;
+ u8 state;
+};
+
+struct mlx5_ib_sq {
+ struct mlx5_ib_qp_base base;
+ struct mlx5_ib_wq *sq;
+ struct mlx5_ib_ubuffer ubuffer;
+ struct mlx5_db *doorbell;
+ u32 tisn;
+ u8 state;
};
struct mlx5_ib_raw_packet_qp {
+ struct mlx5_ib_sq sq;
struct mlx5_ib_rq rq;
};
struct mlx5_ib_qp {
struct ib_qp ibqp;
union {
- struct mlx5_core_qp mqp;
- struct mlx5_ib_raw_packet_qp raw_packet_qp;
+ struct mlx5_ib_qp_trans trans_qp;
+ struct mlx5_ib_raw_packet_qp raw_packet_qp;
};
-
struct mlx5_buf buf;
struct mlx5_db db;
struct mlx5_ib_wq rq;
- u32 doorbell_qpn;
u8 sq_signal_bits;
u8 fm_cache;
- int sq_max_wqes_per_wr;
- int sq_spare_wqes;
struct mlx5_ib_wq sq;
- struct ib_umem *umem;
- int buf_size;
-
/* serialize qp state modifications
*/
struct mutex mutex;
- u16 xrcdn;
u32 flags;
u8 port;
- u8 alt_port;
- u8 atomic_rd_en;
- u8 resp_depth;
u8 state;
- int mlx_type;
int wq_sig;
int scat_cqe;
int max_inline_data;
@@ -284,6 +327,9 @@ struct mlx5_ib_cq_buf {
enum mlx5_ib_qp_flags {
MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 0,
MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 1,
+ MLX5_IB_QP_CROSS_CHANNEL = 1 << 2,
+ MLX5_IB_QP_MANAGED_SEND = 1 << 3,
+ MLX5_IB_QP_MANAGED_RECV = 1 << 4,
};
struct mlx5_umr_wr {
@@ -326,6 +372,7 @@ struct mlx5_ib_cq {
struct mlx5_ib_cq_buf *resize_buf;
struct ib_umem *resize_umem;
int cqe_size;
+ u32 create_flags;
};
struct mlx5_ib_srq {
@@ -449,9 +496,19 @@ struct mlx5_ib_resources {
struct ib_srq *s1;
};
+struct mlx5_roce {
+ /* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL
+ * netdev pointer
+ */
+ rwlock_t netdev_lock;
+ struct net_device *netdev;
+ struct notifier_block nb;
+};
+
struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
+ struct mlx5_roce roce;
MLX5_DECLARE_DOORBELL_LOCK(uar_lock);
int num_ports;
/* serialize update of capability mask
@@ -498,7 +555,7 @@ static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq)
static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
{
- return container_of(mqp, struct mlx5_ib_qp, mqp);
+ return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp;
}
static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr)
@@ -550,8 +607,6 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const void *in_mad, void *response_mad);
-struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
- struct mlx5_ib_ah *ah);
struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
int mlx5_ib_destroy_ah(struct ib_ah *ah);
@@ -578,7 +633,8 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
- void *buffer, u32 length);
+ void *buffer, u32 length,
+ struct mlx5_ib_qp_base *base);
struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
const struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
@@ -680,6 +736,9 @@ static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
+ int index);
+
static inline void init_query_mad(struct ib_smp *mad)
{
mad->base_version = 1;
@@ -705,4 +764,28 @@ static inline int is_qp1(enum ib_qp_type qp_type)
#define MLX5_MAX_UMR_SHIFT 16
#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
+static inline u32 check_cq_create_flags(u32 flags)
+{
+ /*
+ * It returns non-zero value for unsupported CQ
+ * create flags, otherwise it returns zero.
+ */
+ return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN |
+ IB_CQ_FLAGS_TIMESTAMP_COMPLETION));
+}
+
+static inline int verify_assign_uidx(u8 cqe_version, u32 cmd_uidx,
+ u32 *user_index)
+{
+ if (cqe_version) {
+ if ((cmd_uidx == MLX5_IB_DEFAULT_UIDX) ||
+ (cmd_uidx & ~MLX5_USER_ASSIGNED_UIDX_MASK))
+ return -EINVAL;
+ *user_index = cmd_uidx;
+ } else {
+ *user_index = MLX5_IB_DEFAULT_UIDX;
+ }
+
+ return 0;
+}
#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index aa8391e..b8d7636 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -153,14 +153,16 @@ static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp,
struct mlx5_ib_pfault *pfault,
- int error) {
+ int error)
+{
struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
- int ret = mlx5_core_page_fault_resume(dev->mdev, qp->mqp.qpn,
+ u32 qpn = qp->trans_qp.base.mqp.qpn;
+ int ret = mlx5_core_page_fault_resume(dev->mdev,
+ qpn,
pfault->mpfault.flags,
error);
if (ret)
- pr_err("Failed to resolve the page fault on QP 0x%x\n",
- qp->mqp.qpn);
+ pr_err("Failed to resolve the page fault on QP 0x%x\n", qpn);
}
/*
@@ -391,6 +393,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
#if defined(DEBUG)
u32 ctrl_wqe_index, ctrl_qpn;
#endif
+ u32 qpn = qp->trans_qp.base.mqp.qpn;
ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
if (ds * MLX5_WQE_DS_UNITS > wqe_length) {
@@ -401,7 +404,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
if (ds == 0) {
mlx5_ib_err(dev, "Got WQE with zero DS. wqe_index=%x, qpn=%x\n",
- wqe_index, qp->mqp.qpn);
+ wqe_index, qpn);
return -EFAULT;
}
@@ -411,16 +414,16 @@ static int mlx5_ib_mr_initiator_pfault_handler(
MLX5_WQE_CTRL_WQE_INDEX_SHIFT;
if (wqe_index != ctrl_wqe_index) {
mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n",
- wqe_index, qp->mqp.qpn,
+ wqe_index, qpn,
ctrl_wqe_index);
return -EFAULT;
}
ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >>
MLX5_WQE_CTRL_QPN_SHIFT;
- if (qp->mqp.qpn != ctrl_qpn) {
+ if (qpn != ctrl_qpn) {
mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n",
- wqe_index, qp->mqp.qpn,
+ wqe_index, qpn,
ctrl_qpn);
return -EFAULT;
}
@@ -537,6 +540,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
int resume_with_error = 0;
u16 wqe_index = pfault->mpfault.wqe.wqe_index;
int requestor = pfault->mpfault.flags & MLX5_PFAULT_REQUESTOR;
+ u32 qpn = qp->trans_qp.base.mqp.qpn;
buffer = (char *)__get_free_page(GFP_KERNEL);
if (!buffer) {
@@ -546,10 +550,10 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
}
ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
- PAGE_SIZE);
+ PAGE_SIZE, &qp->trans_qp.base);
if (ret < 0) {
mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%x, wqe_index=%x, qpn=%x\n",
- -ret, wqe_index, qp->mqp.qpn);
+ -ret, wqe_index, qpn);
resume_with_error = 1;
goto resolve_page_fault;
}
@@ -586,7 +590,8 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
resolve_page_fault:
mlx5_ib_page_fault_resume(qp, pfault, resume_with_error);
mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, flags: 0x%x\n",
- qp->mqp.qpn, resume_with_error, pfault->mpfault.flags);
+ qpn, resume_with_error,
+ pfault->mpfault.flags);
free_page((unsigned long)buffer);
}
@@ -753,7 +758,7 @@ void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp)
qp->disable_page_faults = 1;
spin_lock_init(&qp->disable_page_faults_lock);
- qp->mqp.pfault_handler = mlx5_ib_pfault_handler;
+ qp->trans_qp.base.mqp.pfault_handler = mlx5_ib_pfault_handler;
for (i = 0; i < MLX5_IB_PAGEFAULT_CONTEXTS; ++i)
INIT_WORK(&qp->pagefaults[i].work, mlx5_ib_qp_pfault_action);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 307bdbc..8fb9c27 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -32,6 +32,8 @@
#include <linux/module.h>
#include <rdma/ib_umem.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_user_verbs.h>
#include "mlx5_ib.h"
#include "user.h"
@@ -114,14 +116,15 @@ void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
* Return: the number of bytes copied, or an error code.
*/
int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
- void *buffer, u32 length)
+ void *buffer, u32 length,
+ struct mlx5_ib_qp_base *base)
{
struct ib_device *ibdev = qp->ibqp.device;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq;
size_t offset;
size_t wq_end;
- struct ib_umem *umem = qp->umem;
+ struct ib_umem *umem = base->ubuffer.umem;
u32 first_copy_length;
int wqe_length;
int ret;
@@ -172,8 +175,10 @@ static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
struct ib_event event;
- if (type == MLX5_EVENT_TYPE_PATH_MIG)
- to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
+ if (type == MLX5_EVENT_TYPE_PATH_MIG) {
+ /* This event is only valid for trans_qps */
+ to_mibqp(qp)->port = to_mibqp(qp)->trans_qp.alt_port;
+ }
if (ibqp->event_handler) {
event.device = ibqp->device;
@@ -366,7 +371,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
static int set_user_buf_size(struct mlx5_ib_dev *dev,
struct mlx5_ib_qp *qp,
- struct mlx5_ib_create_qp *ucmd)
+ struct mlx5_ib_create_qp *ucmd,
+ struct mlx5_ib_qp_base *base,
+ struct ib_qp_init_attr *attr)
{
int desc_sz = 1 << qp->sq.wqe_shift;
@@ -391,8 +398,13 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
return -EINVAL;
}
- qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
- (qp->sq.wqe_cnt << 6);
+ if (attr->qp_type == IB_QPT_RAW_PACKET) {
+ base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
+ qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6;
+ } else {
+ base->ubuffer.buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
+ (qp->sq.wqe_cnt << 6);
+ }
return 0;
}
@@ -578,8 +590,8 @@ static int to_mlx5_st(enum ib_qp_type type)
case IB_QPT_SMI: return MLX5_QP_ST_QP0;
case IB_QPT_GSI: return MLX5_QP_ST_QP1;
case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6;
- case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE;
case IB_QPT_RAW_PACKET:
+ case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE;
case IB_QPT_MAX:
default: return -EINVAL;
}
@@ -590,13 +602,51 @@ static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
}
+static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev,
+ struct ib_pd *pd,
+ unsigned long addr, size_t size,
+ struct ib_umem **umem,
+ int *npages, int *page_shift, int *ncont,
+ u32 *offset)
+{
+ int err;
+
+ *umem = ib_umem_get(pd->uobject->context, addr, size, 0, 0);
+ if (IS_ERR(*umem)) {
+ mlx5_ib_dbg(dev, "umem_get failed\n");
+ return PTR_ERR(*umem);
+ }
+
+ mlx5_ib_cont_pages(*umem, addr, npages, page_shift, ncont, NULL);
+
+ err = mlx5_ib_get_buf_offset(addr, *page_shift, offset);
+ if (err) {
+ mlx5_ib_warn(dev, "bad offset\n");
+ goto err_umem;
+ }
+
+ mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, ncont %d, offset %d\n",
+ addr, size, *npages, *page_shift, *ncont, *offset);
+
+ return 0;
+
+err_umem:
+ ib_umem_release(*umem);
+ *umem = NULL;
+
+ return err;
+}
+
static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_qp *qp, struct ib_udata *udata,
+ struct ib_qp_init_attr *attr,
struct mlx5_create_qp_mbox_in **in,
- struct mlx5_ib_create_qp_resp *resp, int *inlen)
+ struct mlx5_ib_create_qp_resp *resp, int *inlen,
+ struct mlx5_ib_qp_base *base)
{
struct mlx5_ib_ucontext *context;
struct mlx5_ib_create_qp ucmd;
+ struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer;
int page_shift = 0;
int uar_index;
int npages;
@@ -615,18 +665,23 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
/*
* TBD: should come from the verbs when we have the API
*/
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
- if (uuarn < 0) {
- mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
- mlx5_ib_dbg(dev, "reverting to medium latency\n");
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
+ if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+ /* In CROSS_CHANNEL CQ and QP must use the same UAR */
+ uuarn = MLX5_CROSS_CHANNEL_UUAR;
+ else {
+ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
if (uuarn < 0) {
- mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
- mlx5_ib_dbg(dev, "reverting to high latency\n");
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+ mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
+ mlx5_ib_dbg(dev, "reverting to medium latency\n");
+ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
if (uuarn < 0) {
- mlx5_ib_warn(dev, "uuar allocation failed\n");
- return uuarn;
+ mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
+ mlx5_ib_dbg(dev, "reverting to high latency\n");
+ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+ if (uuarn < 0) {
+ mlx5_ib_warn(dev, "uuar allocation failed\n");
+ return uuarn;
+ }
}
}
}
@@ -638,32 +693,20 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
- err = set_user_buf_size(dev, qp, &ucmd);
+ err = set_user_buf_size(dev, qp, &ucmd, base, attr);
if (err)
goto err_uuar;
- if (ucmd.buf_addr && qp->buf_size) {
- qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
- qp->buf_size, 0, 0);
- if (IS_ERR(qp->umem)) {
- mlx5_ib_dbg(dev, "umem_get failed\n");
- err = PTR_ERR(qp->umem);
+ if (ucmd.buf_addr && ubuffer->buf_size) {
+ ubuffer->buf_addr = ucmd.buf_addr;
+ err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr,
+ ubuffer->buf_size,
+ &ubuffer->umem, &npages, &page_shift,
+ &ncont, &offset);
+ if (err)
goto err_uuar;
- }
} else {
- qp->umem = NULL;
- }
-
- if (qp->umem) {
- mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
- &ncont, NULL);
- err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
- if (err) {
- mlx5_ib_warn(dev, "bad offset\n");
- goto err_umem;
- }
- mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
- ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
+ ubuffer->umem = NULL;
}
*inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
@@ -672,8 +715,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
err = -ENOMEM;
goto err_umem;
}
- if (qp->umem)
- mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
+ if (ubuffer->umem)
+ mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift,
+ (*in)->pas, 0);
(*in)->ctx.log_pg_sz_remote_qpn =
cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
(*in)->ctx.params2 = cpu_to_be32(offset << 6);
@@ -704,29 +748,31 @@ err_free:
kvfree(*in);
err_umem:
- if (qp->umem)
- ib_umem_release(qp->umem);
+ if (ubuffer->umem)
+ ib_umem_release(ubuffer->umem);
err_uuar:
free_uuar(&context->uuari, uuarn);
return err;
}
-static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)
+static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp,
+ struct mlx5_ib_qp_base *base)
{
struct mlx5_ib_ucontext *context;
context = to_mucontext(pd->uobject->context);
mlx5_ib_db_unmap_user(context, &qp->db);
- if (qp->umem)
- ib_umem_release(qp->umem);
+ if (base->ubuffer.umem)
+ ib_umem_release(base->ubuffer.umem);
free_uuar(&context->uuari, qp->uuarn);
}
static int create_kernel_qp(struct mlx5_ib_dev *dev,
struct ib_qp_init_attr *init_attr,
struct mlx5_ib_qp *qp,
- struct mlx5_create_qp_mbox_in **in, int *inlen)
+ struct mlx5_create_qp_mbox_in **in, int *inlen,
+ struct mlx5_ib_qp_base *base)
{
enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
struct mlx5_uuar_info *uuari;
@@ -758,9 +804,9 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
qp->rq.offset = 0;
qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
- qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
+ base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
- err = mlx5_buf_alloc(dev->mdev, qp->buf_size, &qp->buf);
+ err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf);
if (err) {
mlx5_ib_dbg(dev, "err %d\n", err);
goto err_uuar;
@@ -853,19 +899,304 @@ static int is_connected(enum ib_qp_type qp_type)
return 0;
}
+static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq, u32 tdn)
+{
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)];
+ void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(tisc, tisc, transport_domain, tdn);
+
+ return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn);
+}
+
+static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq)
+{
+ mlx5_core_destroy_tis(dev->mdev, sq->tisn);
+}
+
+static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq, void *qpin,
+ struct ib_pd *pd)
+{
+ struct mlx5_ib_ubuffer *ubuffer = &sq->ubuffer;
+ __be64 *pas;
+ void *in;
+ void *sqc;
+ void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
+ void *wq;
+ int inlen;
+ int err;
+ int page_shift = 0;
+ int npages;
+ int ncont = 0;
+ u32 offset = 0;
+
+ err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr, ubuffer->buf_size,
+ &sq->ubuffer.umem, &npages, &page_shift,
+ &ncont, &offset);
+ if (err)
+ return err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont;
+ in = mlx5_vzalloc(inlen);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_umem;
+ }
+
+ sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+ MLX5_SET(sqc, sqc, flush_in_error_en, 1);
+ MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+ MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index));
+ MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd));
+ MLX5_SET(sqc, sqc, tis_lst_sz, 1);
+ MLX5_SET(sqc, sqc, tis_num_0, sq->tisn);
+
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
+ MLX5_SET(wq, wq, uar_page, MLX5_GET(qpc, qpc, uar_page));
+ MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
+ MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
+ MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size));
+ MLX5_SET(wq, wq, log_wq_pg_sz, page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(wq, wq, page_offset, offset);
+
+ pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
+ mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0);
+
+ err = mlx5_core_create_sq_tracked(dev->mdev, in, inlen, &sq->base.mqp);
+
+ kvfree(in);
+
+ if (err)
+ goto err_umem;
+
+ return 0;
+
+err_umem:
+ ib_umem_release(sq->ubuffer.umem);
+ sq->ubuffer.umem = NULL;
+
+ return err;
+}
+
+static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq)
+{
+ mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
+ ib_umem_release(sq->ubuffer.umem);
+}
+
+static int get_rq_pas_size(void *qpc)
+{
+ u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12;
+ u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride);
+ u32 log_rq_size = MLX5_GET(qpc, qpc, log_rq_size);
+ u32 page_offset = MLX5_GET(qpc, qpc, page_offset);
+ u32 po_quanta = 1 << (log_page_size - 6);
+ u32 rq_sz = 1 << (log_rq_size + 4 + log_rq_stride);
+ u32 page_size = 1 << log_page_size;
+ u32 rq_sz_po = rq_sz + (page_offset * po_quanta);
+ u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size;
+
+ return rq_num_pas * sizeof(u64);
+}
+
+static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq, void *qpin)
+{
+ __be64 *pas;
+ __be64 *qp_pas;
+ void *in;
+ void *rqc;
+ void *wq;
+ void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
+ int inlen;
+ int err;
+ u32 rq_pas_size = get_rq_pas_size(qpc);
+
+ inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size;
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
+ MLX5_SET(rqc, rqc, vsd, 1);
+ MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
+ MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
+ MLX5_SET(rqc, rqc, flush_in_error_en, 1);
+ MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index));
+ MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv));
+
+ wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, end_padding_mode,
+ MLX5_GET64(qpc, qpc, end_padding_mode));
+ MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset));
+ MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
+ MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
+ MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4);
+ MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(qpc, qpc, log_page_size));
+ MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size));
+
+ pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
+ qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas);
+ memcpy(pas, qp_pas, rq_pas_size);
+
+ err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rq->base.mqp);
+
+ kvfree(in);
+
+ return err;
+}
+
+static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq)
+{
+ mlx5_core_destroy_rq_tracked(dev->mdev, &rq->base.mqp);
+}
+
+static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq, u32 tdn)
+{
+ u32 *in;
+ void *tirc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_tir_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
+ MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn);
+ MLX5_SET(tirc, tirc, transport_domain, tdn);
+
+ err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
+
+ kvfree(in);
+
+ return err;
+}
+
+static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq)
+{
+ mlx5_core_destroy_tir(dev->mdev, rq->tirn);
+}
+
+static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_mbox_in *in,
+ struct ib_pd *pd)
+{
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+ struct ib_uobject *uobj = pd->uobject;
+ struct ib_ucontext *ucontext = uobj->context;
+ struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext);
+ int err;
+ u32 tdn = mucontext->tdn;
+
+ if (qp->sq.wqe_cnt) {
+ err = create_raw_packet_qp_tis(dev, sq, tdn);
+ if (err)
+ return err;
+
+ err = create_raw_packet_qp_sq(dev, sq, in, pd);
+ if (err)
+ goto err_destroy_tis;
+
+ sq->base.container_mibqp = qp;
+ }
+
+ if (qp->rq.wqe_cnt) {
+ err = create_raw_packet_qp_rq(dev, rq, in);
+ if (err)
+ goto err_destroy_sq;
+
+ rq->base.container_mibqp = qp;
+
+ err = create_raw_packet_qp_tir(dev, rq, tdn);
+ if (err)
+ goto err_destroy_rq;
+ }
+
+ qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn :
+ rq->base.mqp.qpn;
+
+ return 0;
+
+err_destroy_rq:
+ destroy_raw_packet_qp_rq(dev, rq);
+err_destroy_sq:
+ if (!qp->sq.wqe_cnt)
+ return err;
+ destroy_raw_packet_qp_sq(dev, sq);
+err_destroy_tis:
+ destroy_raw_packet_qp_tis(dev, sq);
+
+ return err;
+}
+
+static void destroy_raw_packet_qp(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp)
+{
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+
+ if (qp->rq.wqe_cnt) {
+ destroy_raw_packet_qp_tir(dev, rq);
+ destroy_raw_packet_qp_rq(dev, rq);
+ }
+
+ if (qp->sq.wqe_cnt) {
+ destroy_raw_packet_qp_sq(dev, sq);
+ destroy_raw_packet_qp_tis(dev, sq);
+ }
+}
+
+static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp,
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp)
+{
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+
+ sq->sq = &qp->sq;
+ rq->rq = &qp->rq;
+ sq->doorbell = &qp->db;
+ rq->doorbell = &qp->db;
+}
+
static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, struct mlx5_ib_qp *qp)
{
struct mlx5_ib_resources *devr = &dev->devr;
struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_ib_qp_base *base;
struct mlx5_ib_create_qp_resp resp;
struct mlx5_create_qp_mbox_in *in;
struct mlx5_ib_create_qp ucmd;
int inlen = sizeof(*in);
int err;
+ u32 uidx = MLX5_IB_DEFAULT_UIDX;
+ void *qpc;
+
+ base = init_attr->qp_type == IB_QPT_RAW_PACKET ?
+ &qp->raw_packet_qp.rq.base :
+ &qp->trans_qp.base;
- mlx5_ib_odp_create_qp(qp);
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET)
+ mlx5_ib_odp_create_qp(qp);
mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
@@ -880,6 +1211,21 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
}
}
+ if (init_attr->create_flags &
+ (IB_QP_CREATE_CROSS_CHANNEL |
+ IB_QP_CREATE_MANAGED_SEND |
+ IB_QP_CREATE_MANAGED_RECV)) {
+ if (!MLX5_CAP_GEN(mdev, cd)) {
+ mlx5_ib_dbg(dev, "cross-channel isn't supported\n");
+ return -EINVAL;
+ }
+ if (init_attr->create_flags & IB_QP_CREATE_CROSS_CHANNEL)
+ qp->flags |= MLX5_IB_QP_CROSS_CHANNEL;
+ if (init_attr->create_flags & IB_QP_CREATE_MANAGED_SEND)
+ qp->flags |= MLX5_IB_QP_MANAGED_SEND;
+ if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV)
+ qp->flags |= MLX5_IB_QP_MANAGED_RECV;
+ }
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
@@ -889,6 +1235,11 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
return -EFAULT;
}
+ err = get_qp_user_index(to_mucontext(pd->uobject->context),
+ &ucmd, udata->inlen, &uidx);
+ if (err)
+ return err;
+
qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
} else {
@@ -918,11 +1269,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
ucmd.sq_wqe_count, max_wqes);
return -EINVAL;
}
- err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen);
+ err = create_user_qp(dev, pd, qp, udata, init_attr, &in,
+ &resp, &inlen, base);
if (err)
mlx5_ib_dbg(dev, "err %d\n", err);
} else {
- err = create_kernel_qp(dev, init_attr, qp, &in, &inlen);
+ err = create_kernel_qp(dev, init_attr, qp, &in, &inlen,
+ base);
if (err)
mlx5_ib_dbg(dev, "err %d\n", err);
}
@@ -954,6 +1307,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST);
+ if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+ in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_MASTER);
+ if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
+ in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_SEND);
+ if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
+ in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_RECV);
+
if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
int rcqe_sz;
int scqe_sz;
@@ -1018,26 +1378,35 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma);
- err = mlx5_core_create_qp(dev->mdev, &qp->mqp, in, inlen);
+ if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+ /* 0xffffff means we ask to work with cqe version 0 */
+ MLX5_SET(qpc, qpc, user_index, uidx);
+ }
+
+ if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
+ qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
+ raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
+ err = create_raw_packet_qp(dev, qp, in, pd);
+ } else {
+ err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen);
+ }
+
if (err) {
mlx5_ib_dbg(dev, "create qp failed\n");
goto err_create;
}
kvfree(in);
- /* Hardware wants QPN written in big-endian order (after
- * shifting) for send doorbell. Precompute this value to save
- * a little bit when posting sends.
- */
- qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
- qp->mqp.event = mlx5_ib_qp_event;
+ base->container_mibqp = qp;
+ base->mqp.event = mlx5_ib_qp_event;
return 0;
err_create:
if (qp->create_type == MLX5_QP_USER)
- destroy_qp_user(pd, qp);
+ destroy_qp_user(pd, qp, base);
else if (qp->create_type == MLX5_QP_KERNEL)
destroy_qp_kernel(dev, qp);
@@ -1129,11 +1498,11 @@ static void get_cqs(struct mlx5_ib_qp *qp,
case IB_QPT_UD:
case IB_QPT_RAW_IPV6:
case IB_QPT_RAW_ETHERTYPE:
+ case IB_QPT_RAW_PACKET:
*send_cq = to_mcq(qp->ibqp.send_cq);
*recv_cq = to_mcq(qp->ibqp.recv_cq);
break;
- case IB_QPT_RAW_PACKET:
case IB_QPT_MAX:
default:
*send_cq = NULL;
@@ -1142,45 +1511,66 @@ static void get_cqs(struct mlx5_ib_qp *qp,
}
}
+static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ u16 operation);
+
static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
{
struct mlx5_ib_cq *send_cq, *recv_cq;
+ struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
struct mlx5_modify_qp_mbox_in *in;
int err;
+ base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ?
+ &qp->raw_packet_qp.rq.base :
+ &qp->trans_qp.base;
+
in = kzalloc(sizeof(*in), GFP_KERNEL);
if (!in)
return;
if (qp->state != IB_QPS_RESET) {
- mlx5_ib_qp_disable_pagefaults(qp);
- if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state),
- MLX5_QP_STATE_RST, in, 0, &qp->mqp))
- mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
- qp->mqp.qpn);
+ if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
+ mlx5_ib_qp_disable_pagefaults(qp);
+ err = mlx5_core_qp_modify(dev->mdev,
+ MLX5_CMD_OP_2RST_QP, in, 0,
+ &base->mqp);
+ } else {
+ err = modify_raw_packet_qp(dev, qp,
+ MLX5_CMD_OP_2RST_QP);
+ }
+ if (err)
+ mlx5_ib_warn(dev, "mlx5_ib: modify QP 0x%06x to RESET failed\n",
+ base->mqp.qpn);
}
get_cqs(qp, &send_cq, &recv_cq);
if (qp->create_type == MLX5_QP_KERNEL) {
mlx5_ib_lock_cqs(send_cq, recv_cq);
- __mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
+ __mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
if (send_cq != recv_cq)
- __mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+ __mlx5_ib_cq_clean(send_cq, base->mqp.qpn,
+ NULL);
mlx5_ib_unlock_cqs(send_cq, recv_cq);
}
- err = mlx5_core_destroy_qp(dev->mdev, &qp->mqp);
- if (err)
- mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n", qp->mqp.qpn);
- kfree(in);
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ destroy_raw_packet_qp(dev, qp);
+ } else {
+ err = mlx5_core_destroy_qp(dev->mdev, &base->mqp);
+ if (err)
+ mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n",
+ base->mqp.qpn);
+ }
+ kfree(in);
if (qp->create_type == MLX5_QP_KERNEL)
destroy_qp_kernel(dev, qp);
else if (qp->create_type == MLX5_QP_USER)
- destroy_qp_user(&get_pd(qp)->ibpd, qp);
+ destroy_qp_user(&get_pd(qp)->ibpd, qp, base);
}
static const char *ib_qp_type_str(enum ib_qp_type type)
@@ -1234,6 +1624,16 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
return ERR_PTR(-EINVAL);
}
dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
+
+ if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
+ if (!pd->uobject) {
+ mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n");
+ return ERR_PTR(-EINVAL);
+ } else if (!to_mucontext(pd->uobject->context)->cqe_version) {
+ mlx5_ib_dbg(dev, "Raw Packet QP is only supported for CQE version > 0\n");
+ return ERR_PTR(-EINVAL);
+ }
+ }
}
switch (init_attr->qp_type) {
@@ -1250,6 +1650,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
}
/* fall through */
+ case IB_QPT_RAW_PACKET:
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_UD:
@@ -1272,19 +1673,19 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
else if (is_qp1(init_attr->qp_type))
qp->ibqp.qp_num = 1;
else
- qp->ibqp.qp_num = qp->mqp.qpn;
+ qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn;
mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
- qp->ibqp.qp_num, qp->mqp.qpn, to_mcq(init_attr->recv_cq)->mcq.cqn,
+ qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn,
+ to_mcq(init_attr->recv_cq)->mcq.cqn,
to_mcq(init_attr->send_cq)->mcq.cqn);
- qp->xrcdn = xrcdn;
+ qp->trans_qp.xrcdn = xrcdn;
break;
case IB_QPT_RAW_IPV6:
case IB_QPT_RAW_ETHERTYPE:
- case IB_QPT_RAW_PACKET:
case IB_QPT_MAX:
default:
mlx5_ib_dbg(dev, "unsupported qp type %d\n",
@@ -1318,12 +1719,12 @@ static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_att
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
dest_rd_atomic = attr->max_dest_rd_atomic;
else
- dest_rd_atomic = qp->resp_depth;
+ dest_rd_atomic = qp->trans_qp.resp_depth;
if (attr_mask & IB_QP_ACCESS_FLAGS)
access_flags = attr->qp_access_flags;
else
- access_flags = qp->atomic_rd_en;
+ access_flags = qp->trans_qp.atomic_rd_en;
if (!dest_rd_atomic)
access_flags &= IB_ACCESS_REMOTE_WRITE;
@@ -1360,21 +1761,42 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
return rate + MLX5_STAT_RATE_OFFSET;
}
-static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
+static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
+ struct mlx5_ib_sq *sq, u8 sl)
+{
+ void *in;
+ void *tisc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_tis_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_tis_in, in, bitmask.prio, 1);
+
+ tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
+ MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1));
+
+ err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+
+static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ const struct ib_ah_attr *ah,
struct mlx5_qp_path *path, u8 port, int attr_mask,
u32 path_flags, const struct ib_qp_attr *attr)
{
+ enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port);
int err;
- path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
- path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0;
-
if (attr_mask & IB_QP_PKEY_INDEX)
path->pkey_index = attr->pkey_index;
- path->grh_mlid = ah->src_path_bits & 0x7f;
- path->rlid = cpu_to_be16(ah->dlid);
-
if (ah->ah_flags & IB_AH_GRH) {
if (ah->grh.sgid_index >=
dev->mdev->port_caps[port - 1].gid_table_len) {
@@ -1383,7 +1805,27 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
dev->mdev->port_caps[port - 1].gid_table_len);
return -EINVAL;
}
- path->grh_mlid |= 1 << 7;
+ }
+
+ if (ll == IB_LINK_LAYER_ETHERNET) {
+ if (!(ah->ah_flags & IB_AH_GRH))
+ return -EINVAL;
+ memcpy(path->rmac, ah->dmac, sizeof(ah->dmac));
+ path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
+ ah->grh.sgid_index);
+ path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4;
+ } else {
+ path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
+ path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 :
+ 0;
+ path->rlid = cpu_to_be16(ah->dlid);
+ path->grh_mlid = ah->src_path_bits & 0x7f;
+ if (ah->ah_flags & IB_AH_GRH)
+ path->grh_mlid |= 1 << 7;
+ path->dci_cfi_prio_sl = ah->sl & 0xf;
+ }
+
+ if (ah->ah_flags & IB_AH_GRH) {
path->mgid_index = ah->grh.sgid_index;
path->hop_limit = ah->grh.hop_limit;
path->tclass_flowlabel =
@@ -1401,7 +1843,10 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
if (attr_mask & IB_QP_TIMEOUT)
path->ackto_lt = attr->timeout << 3;
- path->sl = ah->sl & 0xf;
+ if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
+ return modify_raw_packet_eth_prio(dev->mdev,
+ &qp->raw_packet_qp.sq,
+ ah->sl & 0xf);
return 0;
}
@@ -1549,12 +1994,154 @@ static int ib_mask_to_mlx5_opt(int ib_mask)
return result;
}
+static int modify_raw_packet_qp_rq(struct mlx5_core_dev *dev,
+ struct mlx5_ib_rq *rq, int new_state)
+{
+ void *in;
+ void *rqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_rq_in, in, rq_state, rq->state);
+
+ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+ MLX5_SET(rqc, rqc, state, new_state);
+
+ err = mlx5_core_modify_rq(dev, rq->base.mqp.qpn, in, inlen);
+ if (err)
+ goto out;
+
+ rq->state = new_state;
+
+out:
+ kvfree(in);
+ return err;
+}
+
+static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
+ struct mlx5_ib_sq *sq, int new_state)
+{
+ void *in;
+ void *sqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_sq_in, in, sq_state, sq->state);
+
+ sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+ MLX5_SET(sqc, sqc, state, new_state);
+
+ err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen);
+ if (err)
+ goto out;
+
+ sq->state = new_state;
+
+out:
+ kvfree(in);
+ return err;
+}
+
+static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ u16 operation)
+{
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ int rq_state;
+ int sq_state;
+ int err;
+
+ switch (operation) {
+ case MLX5_CMD_OP_RST2INIT_QP:
+ rq_state = MLX5_RQC_STATE_RDY;
+ sq_state = MLX5_SQC_STATE_RDY;
+ break;
+ case MLX5_CMD_OP_2ERR_QP:
+ rq_state = MLX5_RQC_STATE_ERR;
+ sq_state = MLX5_SQC_STATE_ERR;
+ break;
+ case MLX5_CMD_OP_2RST_QP:
+ rq_state = MLX5_RQC_STATE_RST;
+ sq_state = MLX5_SQC_STATE_RST;
+ break;
+ case MLX5_CMD_OP_INIT2INIT_QP:
+ case MLX5_CMD_OP_INIT2RTR_QP:
+ case MLX5_CMD_OP_RTR2RTS_QP:
+ case MLX5_CMD_OP_RTS2RTS_QP:
+ /* Nothing to do here... */
+ return 0;
+ default:
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ if (qp->rq.wqe_cnt) {
+ err = modify_raw_packet_qp_rq(dev->mdev, rq, rq_state);
+ if (err)
+ return err;
+ }
+
+ if (qp->sq.wqe_cnt)
+ return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state);
+
+ return 0;
+}
+
static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state)
{
+ static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
+ [MLX5_QP_STATE_RST] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_RST2INIT_QP,
+ },
+ [MLX5_QP_STATE_INIT] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_INIT2INIT_QP,
+ [MLX5_QP_STATE_RTR] = MLX5_CMD_OP_INIT2RTR_QP,
+ },
+ [MLX5_QP_STATE_RTR] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTR2RTS_QP,
+ },
+ [MLX5_QP_STATE_RTS] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTS2RTS_QP,
+ },
+ [MLX5_QP_STATE_SQD] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ },
+ [MLX5_QP_STATE_SQER] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQERR2RTS_QP,
+ },
+ [MLX5_QP_STATE_ERR] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ }
+ };
+
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
struct mlx5_ib_cq *send_cq, *recv_cq;
struct mlx5_qp_context *context;
struct mlx5_modify_qp_mbox_in *in;
@@ -1564,6 +2151,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
int sqd_event;
int mlx5_st;
int err;
+ u16 op;
in = kzalloc(sizeof(*in), GFP_KERNEL);
if (!in)
@@ -1623,7 +2211,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
context->pri_path.port = attr->port_num;
if (attr_mask & IB_QP_AV) {
- err = mlx5_set_path(dev, &attr->ah_attr, &context->pri_path,
+ err = mlx5_set_path(dev, qp, &attr->ah_attr, &context->pri_path,
attr_mask & IB_QP_PORT ? attr->port_num : qp->port,
attr_mask, 0, attr);
if (err)
@@ -1634,7 +2222,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
context->pri_path.ackto_lt |= attr->timeout << 3;
if (attr_mask & IB_QP_ALT_PATH) {
- err = mlx5_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
+ err = mlx5_set_path(dev, qp, &attr->alt_ah_attr,
+ &context->alt_path,
attr->alt_port_num, attr_mask, 0, attr);
if (err)
goto out;
@@ -1706,41 +2295,51 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
* again to RTS, and may cause the driver and the device to get out of
* sync. */
if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
- (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
+ (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR) &&
+ (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
mlx5_ib_qp_disable_pagefaults(qp);
+ if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
+ !optab[mlx5_cur][mlx5_new])
+ goto out;
+
+ op = optab[mlx5_cur][mlx5_new];
optpar = ib_mask_to_mlx5_opt(attr_mask);
optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
in->optparam = cpu_to_be32(optpar);
- err = mlx5_core_qp_modify(dev->mdev, to_mlx5_state(cur_state),
- to_mlx5_state(new_state), in, sqd_event,
- &qp->mqp);
+
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
+ err = modify_raw_packet_qp(dev, qp, op);
+ else
+ err = mlx5_core_qp_modify(dev->mdev, op, in, sqd_event,
+ &base->mqp);
if (err)
goto out;
- if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT &&
+ (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
mlx5_ib_qp_enable_pagefaults(qp);
qp->state = new_state;
if (attr_mask & IB_QP_ACCESS_FLAGS)
- qp->atomic_rd_en = attr->qp_access_flags;
+ qp->trans_qp.atomic_rd_en = attr->qp_access_flags;
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
- qp->resp_depth = attr->max_dest_rd_atomic;
+ qp->trans_qp.resp_depth = attr->max_dest_rd_atomic;
if (attr_mask & IB_QP_PORT)
qp->port = attr->port_num;
if (attr_mask & IB_QP_ALT_PATH)
- qp->alt_port = attr->alt_port_num;
+ qp->trans_qp.alt_port = attr->alt_port_num;
/*
* If we moved a kernel QP to RESET, clean up all old CQ
* entries and reinitialize the QP.
*/
if (new_state == IB_QPS_RESET && !ibqp->uobject) {
- mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
+ mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
ibqp->srq ? to_msrq(ibqp->srq) : NULL);
if (send_cq != recv_cq)
- mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+ mlx5_ib_cq_clean(send_cq, base->mqp.qpn, NULL);
qp->rq.head = 0;
qp->rq.tail = 0;
@@ -1765,15 +2364,21 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
enum ib_qp_state cur_state, new_state;
int err = -EINVAL;
int port;
+ enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
mutex_lock(&qp->mutex);
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+ if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) {
+ port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+ ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port);
+ }
+
if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR &&
!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
- IB_LINK_LAYER_UNSPECIFIED))
+ ll))
goto out;
if ((attr_mask & IB_QP_PORT) &&
@@ -2570,7 +3175,7 @@ static void finish_wqe(struct mlx5_ib_qp *qp,
ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
mlx5_opcode | ((u32)opmod << 24));
- ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
+ ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
ctrl->fm_ce_se |= fence;
qp->fm_cache = next_fence;
if (unlikely(qp->wq_sig))
@@ -3003,7 +3608,7 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at
ib_ah_attr->port_num > MLX5_CAP_GEN(dev, num_ports))
return;
- ib_ah_attr->sl = path->sl & 0xf;
+ ib_ah_attr->sl = path->dci_cfi_prio_sl & 0xf;
ib_ah_attr->dlid = be16_to_cpu(path->rlid);
ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f;
@@ -3021,39 +3626,153 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at
}
}
-int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
- struct ib_qp_init_attr *qp_init_attr)
+static int query_raw_packet_qp_sq_state(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq,
+ u8 *sq_state)
+{
+ void *out;
+ void *sqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(query_sq_out);
+ out = mlx5_vzalloc(inlen);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_core_query_sq(dev->mdev, sq->base.mqp.qpn, out);
+ if (err)
+ goto out;
+
+ sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context);
+ *sq_state = MLX5_GET(sqc, sqc, state);
+ sq->state = *sq_state;
+
+out:
+ kvfree(out);
+ return err;
+}
+
+static int query_raw_packet_qp_rq_state(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq,
+ u8 *rq_state)
+{
+ void *out;
+ void *rqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(query_rq_out);
+ out = mlx5_vzalloc(inlen);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_core_query_rq(dev->mdev, rq->base.mqp.qpn, out);
+ if (err)
+ goto out;
+
+ rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context);
+ *rq_state = MLX5_GET(rqc, rqc, state);
+ rq->state = *rq_state;
+
+out:
+ kvfree(out);
+ return err;
+}
+
+static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state,
+ struct mlx5_ib_qp *qp, u8 *qp_state)
+{
+ static const u8 sqrq_trans[MLX5_RQ_NUM_STATE][MLX5_SQ_NUM_STATE] = {
+ [MLX5_RQC_STATE_RST] = {
+ [MLX5_SQC_STATE_RST] = IB_QPS_RESET,
+ [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_ERR] = MLX5_QP_STATE_BAD,
+ [MLX5_SQ_STATE_NA] = IB_QPS_RESET,
+ },
+ [MLX5_RQC_STATE_RDY] = {
+ [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE,
+ [MLX5_SQC_STATE_ERR] = IB_QPS_SQE,
+ [MLX5_SQ_STATE_NA] = MLX5_QP_STATE,
+ },
+ [MLX5_RQC_STATE_ERR] = {
+ [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_ERR] = IB_QPS_ERR,
+ [MLX5_SQ_STATE_NA] = IB_QPS_ERR,
+ },
+ [MLX5_RQ_STATE_NA] = {
+ [MLX5_SQC_STATE_RST] = IB_QPS_RESET,
+ [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE,
+ [MLX5_SQC_STATE_ERR] = MLX5_QP_STATE,
+ [MLX5_SQ_STATE_NA] = MLX5_QP_STATE_BAD,
+ },
+ };
+
+ *qp_state = sqrq_trans[rq_state][sq_state];
+
+ if (*qp_state == MLX5_QP_STATE_BAD) {
+ WARN(1, "Buggy Raw Packet QP state, SQ 0x%x state: 0x%x, RQ 0x%x state: 0x%x",
+ qp->raw_packet_qp.sq.base.mqp.qpn, sq_state,
+ qp->raw_packet_qp.rq.base.mqp.qpn, rq_state);
+ return -EINVAL;
+ }
+
+ if (*qp_state == MLX5_QP_STATE)
+ *qp_state = qp->state;
+
+ return 0;
+}
+
+static int query_raw_packet_qp_state(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp,
+ u8 *raw_packet_qp_state)
+{
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+ int err;
+ u8 sq_state = MLX5_SQ_STATE_NA;
+ u8 rq_state = MLX5_RQ_STATE_NA;
+
+ if (qp->sq.wqe_cnt) {
+ err = query_raw_packet_qp_sq_state(dev, sq, &sq_state);
+ if (err)
+ return err;
+ }
+
+ if (qp->rq.wqe_cnt) {
+ err = query_raw_packet_qp_rq_state(dev, rq, &rq_state);
+ if (err)
+ return err;
+ }
+
+ return sqrq_state_to_qp_state(sq_state, rq_state, qp,
+ raw_packet_qp_state);
+}
+
+static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct ib_qp_attr *qp_attr)
{
- struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
- struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_query_qp_mbox_out *outb;
struct mlx5_qp_context *context;
int mlx5_state;
int err = 0;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- /*
- * Wait for any outstanding page faults, in case the user frees memory
- * based upon this query's result.
- */
- flush_workqueue(mlx5_ib_page_fault_wq);
-#endif
-
- mutex_lock(&qp->mutex);
outb = kzalloc(sizeof(*outb), GFP_KERNEL);
- if (!outb) {
- err = -ENOMEM;
- goto out;
- }
+ if (!outb)
+ return -ENOMEM;
+
context = &outb->ctx;
- err = mlx5_core_qp_query(dev->mdev, &qp->mqp, outb, sizeof(*outb));
+ err = mlx5_core_qp_query(dev->mdev, &qp->trans_qp.base.mqp, outb,
+ sizeof(*outb));
if (err)
- goto out_free;
+ goto out;
mlx5_state = be32_to_cpu(context->flags) >> 28;
qp->state = to_ib_qp_state(mlx5_state);
- qp_attr->qp_state = qp->state;
qp_attr->path_mtu = context->mtu_msgmax >> 5;
qp_attr->path_mig_state =
to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
@@ -3087,6 +3806,43 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7;
qp_attr->rnr_retry = (be32_to_cpu(context->params1) >> 13) & 0x7;
qp_attr->alt_timeout = context->alt_path.ackto_lt >> 3;
+
+out:
+ kfree(outb);
+ return err;
+}
+
+int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ int err = 0;
+ u8 raw_packet_qp_state;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ /*
+ * Wait for any outstanding page faults, in case the user frees memory
+ * based upon this query's result.
+ */
+ flush_workqueue(mlx5_ib_page_fault_wq);
+#endif
+
+ mutex_lock(&qp->mutex);
+
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state);
+ if (err)
+ goto out;
+ qp->state = raw_packet_qp_state;
+ qp_attr->port_num = 1;
+ } else {
+ err = query_qp_attr(dev, qp, qp_attr);
+ if (err)
+ goto out;
+ }
+
+ qp_attr->qp_state = qp->state;
qp_attr->cur_qp_state = qp_attr->qp_state;
qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt;
qp_attr->cap.max_recv_sge = qp->rq.max_gs;
@@ -3110,12 +3866,16 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
+ if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+ qp_init_attr->create_flags |= IB_QP_CREATE_CROSS_CHANNEL;
+ if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
+ qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND;
+ if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
+ qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV;
+
qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
-out_free:
- kfree(outb);
-
out:
mutex_unlock(&qp->mutex);
return err;
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index e008505..4659256 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -78,28 +78,41 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
struct ib_udata *udata, int buf_size, int *inlen)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct mlx5_ib_create_srq ucmd;
+ struct mlx5_ib_create_srq ucmd = {};
size_t ucmdlen;
+ void *xsrqc;
int err;
int npages;
int page_shift;
int ncont;
u32 offset;
+ u32 uidx = MLX5_IB_DEFAULT_UIDX;
+ int drv_data = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
- ucmdlen =
- (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) <
- sizeof(ucmd)) ? (sizeof(ucmd) -
- sizeof(ucmd.reserved)) : sizeof(ucmd);
+ if (drv_data < 0)
+ return -EINVAL;
+
+ ucmdlen = (drv_data < sizeof(ucmd)) ?
+ drv_data : sizeof(ucmd);
if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) {
mlx5_ib_dbg(dev, "failed copy udata\n");
return -EFAULT;
}
- if (ucmdlen == sizeof(ucmd) &&
- ucmd.reserved != 0)
+ if (ucmd.reserved0 || ucmd.reserved1)
return -EINVAL;
+ if (drv_data > sizeof(ucmd) &&
+ !ib_is_udata_cleared(udata, sizeof(ucmd),
+ drv_data - sizeof(ucmd)))
+ return -EINVAL;
+
+ err = get_srq_user_index(to_mucontext(pd->uobject->context),
+ &ucmd, udata->inlen, &uidx);
+ if (err)
+ return err;
+
srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
@@ -138,6 +151,12 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
(*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
+ if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+ xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
+ xrc_srq_context_entry);
+ MLX5_SET(xrc_srqc, xsrqc, user_index, uidx);
+ }
+
return 0;
err_in:
@@ -158,6 +177,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
struct mlx5_wqe_srq_next_seg *next;
int page_shift;
int npages;
+ void *xsrqc;
err = mlx5_db_alloc(dev->mdev, &srq->db);
if (err) {
@@ -204,6 +224,13 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+ xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
+ xrc_srq_context_entry);
+ /* 0xffffff means we ask to work with cqe version 0 */
+ MLX5_SET(xrc_srqc, xsrqc, user_index, MLX5_IB_DEFAULT_UIDX);
+ }
+
return 0;
err_in:
diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h
index 76fb7b9..b94a554 100644
--- a/drivers/infiniband/hw/mlx5/user.h
+++ b/drivers/infiniband/hw/mlx5/user.h
@@ -35,6 +35,8 @@
#include <linux/types.h>
+#include "mlx5_ib.h"
+
enum {
MLX5_QP_FLAG_SIGNATURE = 1 << 0,
MLX5_QP_FLAG_SCATTER_CQE = 1 << 1,
@@ -66,7 +68,15 @@ struct mlx5_ib_alloc_ucontext_req_v2 {
__u32 total_num_uuars;
__u32 num_low_latency_uuars;
__u32 flags;
- __u32 reserved;
+ __u32 comp_mask;
+ __u8 max_cqe_version;
+ __u8 reserved0;
+ __u16 reserved1;
+ __u32 reserved2;
+};
+
+enum mlx5_ib_alloc_ucontext_resp_mask {
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0,
};
struct mlx5_ib_alloc_ucontext_resp {
@@ -80,7 +90,13 @@ struct mlx5_ib_alloc_ucontext_resp {
__u32 max_recv_wr;
__u32 max_srq_recv_wr;
__u16 num_ports;
- __u16 reserved;
+ __u16 reserved1;
+ __u32 comp_mask;
+ __u32 response_length;
+ __u8 cqe_version;
+ __u8 reserved2;
+ __u16 reserved3;
+ __u64 hca_core_clock_offset;
};
struct mlx5_ib_alloc_pd_resp {
@@ -110,7 +126,9 @@ struct mlx5_ib_create_srq {
__u64 buf_addr;
__u64 db_addr;
__u32 flags;
- __u32 reserved; /* explicit padding (optional on i386) */
+ __u32 reserved0; /* explicit padding (optional on i386) */
+ __u32 uidx;
+ __u32 reserved1;
};
struct mlx5_ib_create_srq_resp {
@@ -125,9 +143,48 @@ struct mlx5_ib_create_qp {
__u32 rq_wqe_count;
__u32 rq_wqe_shift;
__u32 flags;
+ __u32 uidx;
+ __u32 reserved0;
+ __u64 sq_buf_addr;
};
struct mlx5_ib_create_qp_resp {
__u32 uuar_index;
};
+
+static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
+ struct mlx5_ib_create_qp *ucmd,
+ int inlen,
+ u32 *user_index)
+{
+ u8 cqe_version = ucontext->cqe_version;
+
+ if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) &&
+ !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+ return 0;
+
+ if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) !=
+ !!cqe_version))
+ return -EINVAL;
+
+ return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
+}
+
+static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext,
+ struct mlx5_ib_create_srq *ucmd,
+ int inlen,
+ u32 *user_index)
+{
+ u8 cqe_version = ucontext->cqe_version;
+
+ if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) &&
+ !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+ return 0;
+
+ if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) !=
+ !!cqe_version))
+ return -EINVAL;
+
+ return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
+}
#endif /* MLX5_IB_USER_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 40ba833..a6531ff 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -608,9 +608,6 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
entry->opcode = IB_WC_FETCH_ADD;
entry->byte_len = MTHCA_ATOMIC_BYTE_LEN;
break;
- case MTHCA_OPCODE_BIND_MW:
- entry->opcode = IB_WC_BIND_MW;
- break;
default:
entry->opcode = MTHCA_OPCODE_INVALID;
break;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index dc2d48c..9866c35 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -898,89 +898,6 @@ static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
return &mr->ibmr;
}
-static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- int acc,
- u64 *iova_start)
-{
- struct mthca_mr *mr;
- u64 *page_list;
- u64 total_size;
- unsigned long mask;
- int shift;
- int npages;
- int err;
- int i, j, n;
-
- mask = buffer_list[0].addr ^ *iova_start;
- total_size = 0;
- for (i = 0; i < num_phys_buf; ++i) {
- if (i != 0)
- mask |= buffer_list[i].addr;
- if (i != num_phys_buf - 1)
- mask |= buffer_list[i].addr + buffer_list[i].size;
-
- total_size += buffer_list[i].size;
- }
-
- if (mask & ~PAGE_MASK)
- return ERR_PTR(-EINVAL);
-
- shift = __ffs(mask | 1 << 31);
-
- buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1);
- buffer_list[0].addr &= ~0ull << shift;
-
- mr = kmalloc(sizeof *mr, GFP_KERNEL);
- if (!mr)
- return ERR_PTR(-ENOMEM);
-
- npages = 0;
- for (i = 0; i < num_phys_buf; ++i)
- npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
-
- if (!npages)
- return &mr->ibmr;
-
- page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL);
- if (!page_list) {
- kfree(mr);
- return ERR_PTR(-ENOMEM);
- }
-
- n = 0;
- for (i = 0; i < num_phys_buf; ++i)
- for (j = 0;
- j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
- ++j)
- page_list[n++] = buffer_list[i].addr + ((u64) j << shift);
-
- mthca_dbg(to_mdev(pd->device), "Registering memory at %llx (iova %llx) "
- "in PD %x; shift %d, npages %d.\n",
- (unsigned long long) buffer_list[0].addr,
- (unsigned long long) *iova_start,
- to_mpd(pd)->pd_num,
- shift, npages);
-
- err = mthca_mr_alloc_phys(to_mdev(pd->device),
- to_mpd(pd)->pd_num,
- page_list, shift, npages,
- *iova_start, total_size,
- convert_access(acc), mr);
-
- if (err) {
- kfree(page_list);
- kfree(mr);
- return ERR_PTR(err);
- }
-
- kfree(page_list);
- mr->umem = NULL;
-
- return &mr->ibmr;
-}
-
static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
{
@@ -1346,7 +1263,6 @@ int mthca_register_device(struct mthca_dev *dev)
dev->ib_dev.destroy_cq = mthca_destroy_cq;
dev->ib_dev.poll_cq = mthca_poll_cq;
dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
- dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr;
dev->ib_dev.reg_user_mr = mthca_reg_user_mr;
dev->ib_dev.dereg_mr = mthca_dereg_mr;
dev->ib_dev.get_port_immutable = mthca_port_immutable;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 35fe506..96e5fb9 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1485,7 +1485,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
u16 pkey;
ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0,
- mthca_ah_grh_present(to_mah(wr->ah)), 0,
+ mthca_ah_grh_present(to_mah(wr->ah)), 0, 0, 0,
&sqp->ud_header);
err = mthca_read_ah(dev, to_mah(wr->ah), &sqp->ud_header);
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 8a3ad17..cb9f0f2 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -134,7 +134,7 @@ static void record_ird_ord(struct nes_cm_node *, u16, u16);
/* External CM API Interface */
/* instance of function pointers for client API */
/* set address of this instance to cm_core->cm_ops at cm_core alloc */
-static struct nes_cm_ops nes_cm_api = {
+static const struct nes_cm_ops nes_cm_api = {
mini_cm_accelerated,
mini_cm_listen,
mini_cm_del_listen,
@@ -3232,7 +3232,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
int passive_state;
struct nes_ib_device *nesibdev;
struct ib_mr *ibmr = NULL;
- struct ib_phys_buf ibphysbuf;
struct nes_pd *nespd;
u64 tagged_offset;
u8 mpa_frame_offset = 0;
@@ -3316,21 +3315,19 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
u64temp = (unsigned long)nesqp;
nesibdev = nesvnic->nesibdev;
nespd = nesqp->nespd;
- ibphysbuf.addr = nesqp->ietf_frame_pbase + mpa_frame_offset;
- ibphysbuf.size = buff_len;
tagged_offset = (u64)(unsigned long)*start_buff;
- ibmr = nesibdev->ibdev.reg_phys_mr((struct ib_pd *)nespd,
- &ibphysbuf, 1,
- IB_ACCESS_LOCAL_WRITE,
- &tagged_offset);
- if (!ibmr) {
+ ibmr = nes_reg_phys_mr(&nespd->ibpd,
+ nesqp->ietf_frame_pbase + mpa_frame_offset,
+ buff_len, IB_ACCESS_LOCAL_WRITE,
+ &tagged_offset);
+ if (IS_ERR(ibmr)) {
nes_debug(NES_DBG_CM, "Unable to register memory region"
"for lSMM for cm_node = %p \n",
cm_node);
pci_free_consistent(nesdev->pcidev,
nesqp->private_data_len + nesqp->ietf_frame_size,
nesqp->ietf_frame, nesqp->ietf_frame_pbase);
- return -ENOMEM;
+ return PTR_ERR(ibmr);
}
ibmr->pd = &nespd->ibpd;
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index 32a6420..147c2c8 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -423,7 +423,7 @@ struct nes_cm_core {
struct timer_list tcp_timer;
- struct nes_cm_ops *api;
+ const struct nes_cm_ops *api;
int (*post_event)(struct nes_cm_event *event);
atomic_t events_posted;
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
index 2042c0f..6d3a169 100644
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -727,7 +727,7 @@ int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 acti
if (action == NES_ARP_DELETE) {
nes_debug(NES_DBG_NETDEV, "DELETE, arp_index=%d\n", arp_index);
nesadapter->arp_table[arp_index].ip_addr = 0;
- memset(nesadapter->arp_table[arp_index].mac_addr, 0x00, ETH_ALEN);
+ eth_zero_addr(nesadapter->arp_table[arp_index].mac_addr);
nes_free_resource(nesadapter, nesadapter->allocated_arps, arp_index);
return arp_index;
}
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 137880a..8c4daf7 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -206,80 +206,6 @@ static int nes_dealloc_mw(struct ib_mw *ibmw)
}
-/**
- * nes_bind_mw
- */
-static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw,
- struct ib_mw_bind *ibmw_bind)
-{
- u64 u64temp;
- struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- /* struct nes_mr *nesmr = to_nesmw(ibmw); */
- struct nes_qp *nesqp = to_nesqp(ibqp);
- struct nes_hw_qp_wqe *wqe;
- unsigned long flags = 0;
- u32 head;
- u32 wqe_misc = 0;
- u32 qsize;
-
- if (nesqp->ibqp_state > IB_QPS_RTS)
- return -EINVAL;
-
- spin_lock_irqsave(&nesqp->lock, flags);
-
- head = nesqp->hwqp.sq_head;
- qsize = nesqp->hwqp.sq_tail;
-
- /* Check for SQ overflow */
- if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
- spin_unlock_irqrestore(&nesqp->lock, flags);
- return -ENOMEM;
- }
-
- wqe = &nesqp->hwqp.sq_vbase[head];
- /* nes_debug(NES_DBG_MR, "processing sq wqe at %p, head = %u.\n", wqe, head); */
- nes_fill_init_qp_wqe(wqe, nesqp, head);
- u64temp = ibmw_bind->wr_id;
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX, u64temp);
- wqe_misc = NES_IWARP_SQ_OP_BIND;
-
- wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
-
- if (ibmw_bind->send_flags & IB_SEND_SIGNALED)
- wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
-
- if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_WRITE)
- wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE;
- if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_READ)
- wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_READ;
-
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_MISC_IDX, wqe_misc);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX,
- ibmw_bind->bind_info.mr->lkey);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MW_IDX, ibmw->rkey);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX,
- ibmw_bind->bind_info.length);
- wqe->wqe_words[NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX] = 0;
- u64temp = (u64)ibmw_bind->bind_info.addr;
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX, u64temp);
-
- head++;
- if (head >= qsize)
- head = 0;
-
- nesqp->hwqp.sq_head = head;
- barrier();
-
- nes_write32(nesdev->regs+NES_WQE_ALLOC,
- (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
-
- spin_unlock_irqrestore(&nesqp->lock, flags);
-
- return 0;
-}
-
-
/*
* nes_alloc_fast_mr
*/
@@ -2074,9 +2000,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
/**
* nes_reg_phys_mr
*/
-static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
- struct ib_phys_buf *buffer_list, int num_phys_buf, int acc,
- u64 * iova_start)
+struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, u64 size,
+ int acc, u64 *iova_start)
{
u64 region_length;
struct nes_pd *nespd = to_nespd(ib_pd);
@@ -2088,13 +2013,10 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
struct nes_vpbl vpbl;
struct nes_root_vpbl root_vpbl;
u32 stag;
- u32 i;
unsigned long mask;
u32 stag_index = 0;
u32 next_stag_index = 0;
u32 driver_key = 0;
- u32 root_pbl_index = 0;
- u32 cur_pbl_index = 0;
int err = 0;
int ret = 0;
u16 pbl_count = 0;
@@ -2113,11 +2035,8 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
next_stag_index >>= 8;
next_stag_index %= nesadapter->max_mr;
- if (num_phys_buf > (1024*512)) {
- return ERR_PTR(-E2BIG);
- }
- if ((buffer_list[0].addr ^ *iova_start) & ~PAGE_MASK)
+ if ((addr ^ *iova_start) & ~PAGE_MASK)
return ERR_PTR(-EINVAL);
err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr,
@@ -2132,84 +2051,33 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
return ERR_PTR(-ENOMEM);
}
- for (i = 0; i < num_phys_buf; i++) {
+ /* Allocate a 4K buffer for the PBL */
+ vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
+ &vpbl.pbl_pbase);
+ nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n",
+ vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase);
+ if (!vpbl.pbl_vbase) {
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ ibmr = ERR_PTR(-ENOMEM);
+ kfree(nesmr);
+ goto reg_phys_err;
+ }
- if ((i & 0x01FF) == 0) {
- if (root_pbl_index == 1) {
- /* Allocate the root PBL */
- root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192,
- &root_vpbl.pbl_pbase);
- nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
- root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
- if (!root_vpbl.pbl_vbase) {
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
- vpbl.pbl_pbase);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- kfree(nesmr);
- return ERR_PTR(-ENOMEM);
- }
- root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, GFP_KERNEL);
- if (!root_vpbl.leaf_vpbl) {
- pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
- root_vpbl.pbl_pbase);
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
- vpbl.pbl_pbase);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- kfree(nesmr);
- return ERR_PTR(-ENOMEM);
- }
- root_vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)vpbl.pbl_pbase);
- root_vpbl.pbl_vbase[0].pa_high =
- cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
- root_vpbl.leaf_vpbl[0] = vpbl;
- }
- /* Allocate a 4K buffer for the PBL */
- vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
- &vpbl.pbl_pbase);
- nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n",
- vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase);
- if (!vpbl.pbl_vbase) {
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- ibmr = ERR_PTR(-ENOMEM);
- kfree(nesmr);
- goto reg_phys_err;
- }
- /* Fill in the root table */
- if (1 <= root_pbl_index) {
- root_vpbl.pbl_vbase[root_pbl_index].pa_low =
- cpu_to_le32((u32)vpbl.pbl_pbase);
- root_vpbl.pbl_vbase[root_pbl_index].pa_high =
- cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
- root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
- }
- root_pbl_index++;
- cur_pbl_index = 0;
- }
- mask = !buffer_list[i].size;
- if (i != 0)
- mask |= buffer_list[i].addr;
- if (i != num_phys_buf - 1)
- mask |= buffer_list[i].addr + buffer_list[i].size;
-
- if (mask & ~PAGE_MASK) {
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n");
- ibmr = ERR_PTR(-EINVAL);
- kfree(nesmr);
- goto reg_phys_err;
- }
+ mask = !size;
- region_length += buffer_list[i].size;
- if ((i != 0) && (single_page)) {
- if ((buffer_list[i-1].addr+PAGE_SIZE) != buffer_list[i].addr)
- single_page = 0;
- }
- vpbl.pbl_vbase[cur_pbl_index].pa_low = cpu_to_le32((u32)buffer_list[i].addr & PAGE_MASK);
- vpbl.pbl_vbase[cur_pbl_index++].pa_high =
- cpu_to_le32((u32)((((u64)buffer_list[i].addr) >> 32)));
+ if (mask & ~PAGE_MASK) {
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n");
+ ibmr = ERR_PTR(-EINVAL);
+ kfree(nesmr);
+ goto reg_phys_err;
}
+ region_length += size;
+ vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)addr & PAGE_MASK);
+ vpbl.pbl_vbase[0].pa_high = cpu_to_le32((u32)((((u64)addr) >> 32)));
+
stag = stag_index << 8;
stag |= driver_key;
stag += (u32)stag_key;
@@ -2219,17 +2087,15 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
stag, (unsigned long)*iova_start, (unsigned long)region_length, stag_index);
/* Make the leaf PBL the root if only one PBL */
- if (root_pbl_index == 1) {
- root_vpbl.pbl_pbase = vpbl.pbl_pbase;
- }
+ root_vpbl.pbl_pbase = vpbl.pbl_pbase;
if (single_page) {
pbl_count = 0;
} else {
- pbl_count = root_pbl_index;
+ pbl_count = 1;
}
ret = nes_reg_mr(nesdev, nespd, stag, region_length, &root_vpbl,
- buffer_list[0].addr, pbl_count, (u16)cur_pbl_index, acc, iova_start,
+ addr, pbl_count, 1, acc, iova_start,
&nesmr->pbls_used, &nesmr->pbl_4k);
if (ret == 0) {
@@ -2242,21 +2108,9 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
ibmr = ERR_PTR(-ENOMEM);
}
- reg_phys_err:
- /* free the resources */
- if (root_pbl_index == 1) {
- /* single PBL case */
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase);
- } else {
- for (i=0; i<root_pbl_index; i++) {
- pci_free_consistent(nesdev->pcidev, 4096, root_vpbl.leaf_vpbl[i].pbl_vbase,
- root_vpbl.leaf_vpbl[i].pbl_pbase);
- }
- kfree(root_vpbl.leaf_vpbl);
- pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
- root_vpbl.pbl_pbase);
- }
-
+reg_phys_err:
+ /* single PBL case */
+ pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase);
return ibmr;
}
@@ -2266,17 +2120,13 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
*/
static struct ib_mr *nes_get_dma_mr(struct ib_pd *pd, int acc)
{
- struct ib_phys_buf bl;
u64 kva = 0;
nes_debug(NES_DBG_MR, "\n");
- bl.size = (u64)0xffffffffffULL;
- bl.addr = 0;
- return nes_reg_phys_mr(pd, &bl, 1, acc, &kva);
+ return nes_reg_phys_mr(pd, 0, 0xffffffffffULL, acc, &kva);
}
-
/**
* nes_reg_user_mr
*/
@@ -3888,12 +3738,10 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
nesibdev->ibdev.destroy_cq = nes_destroy_cq;
nesibdev->ibdev.poll_cq = nes_poll_cq;
nesibdev->ibdev.get_dma_mr = nes_get_dma_mr;
- nesibdev->ibdev.reg_phys_mr = nes_reg_phys_mr;
nesibdev->ibdev.reg_user_mr = nes_reg_user_mr;
nesibdev->ibdev.dereg_mr = nes_dereg_mr;
nesibdev->ibdev.alloc_mw = nes_alloc_mw;
nesibdev->ibdev.dealloc_mw = nes_dealloc_mw;
- nesibdev->ibdev.bind_mw = nes_bind_mw;
nesibdev->ibdev.alloc_mr = nes_alloc_mr;
nesibdev->ibdev.map_mr_sg = nes_map_mr_sg;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h
index a204b67..7029088 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.h
+++ b/drivers/infiniband/hw/nes/nes_verbs.h
@@ -190,4 +190,8 @@ struct nes_qp {
u8 pau_state;
__u64 nesuqp_addr;
};
+
+struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
+ u64 addr, u64 size, int acc, u64 *iova_start);
+
#endif /* NES_VERBS_H */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 9820074..3790771 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -152,9 +152,10 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
if ((pd->uctx) &&
(!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) &&
(!rdma_link_local_addr((struct in6_addr *)attr->grh.dgid.raw))) {
- status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid,
- attr->dmac, &vlan_tag,
- sgid_attr.ndev->ifindex);
+ status = rdma_addr_find_l2_eth_by_grh(&sgid, &attr->grh.dgid,
+ attr->dmac, &vlan_tag,
+ &sgid_attr.ndev->ifindex,
+ NULL);
if (status) {
pr_err("%s(): Failed to resolve dmac from gid."
"status = %d\n", __func__, status);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 3afb40b..5738493 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -175,7 +175,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
dev->ibdev.req_notify_cq = ocrdma_arm_cq;
dev->ibdev.get_dma_mr = ocrdma_get_dma_mr;
- dev->ibdev.reg_phys_mr = ocrdma_reg_kernel_mr;
dev->ibdev.dereg_mr = ocrdma_dereg_mr;
dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 76e96f9..d4c687b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -3066,169 +3066,6 @@ pl_err:
return ERR_PTR(-ENOMEM);
}
-#define MAX_KERNEL_PBE_SIZE 65536
-static inline int count_kernel_pbes(struct ib_phys_buf *buf_list,
- int buf_cnt, u32 *pbe_size)
-{
- u64 total_size = 0;
- u64 buf_size = 0;
- int i;
- *pbe_size = roundup(buf_list[0].size, PAGE_SIZE);
- *pbe_size = roundup_pow_of_two(*pbe_size);
-
- /* find the smallest PBE size that we can have */
- for (i = 0; i < buf_cnt; i++) {
- /* first addr may not be page aligned, so ignore checking */
- if ((i != 0) && ((buf_list[i].addr & ~PAGE_MASK) ||
- (buf_list[i].size & ~PAGE_MASK))) {
- return 0;
- }
-
- /* if configured PBE size is greater then the chosen one,
- * reduce the PBE size.
- */
- buf_size = roundup(buf_list[i].size, PAGE_SIZE);
- /* pbe_size has to be even multiple of 4K 1,2,4,8...*/
- buf_size = roundup_pow_of_two(buf_size);
- if (*pbe_size > buf_size)
- *pbe_size = buf_size;
-
- total_size += buf_size;
- }
- *pbe_size = *pbe_size > MAX_KERNEL_PBE_SIZE ?
- (MAX_KERNEL_PBE_SIZE) : (*pbe_size);
-
- /* num_pbes = total_size / (*pbe_size); this is implemented below. */
-
- return total_size >> ilog2(*pbe_size);
-}
-
-static void build_kernel_pbes(struct ib_phys_buf *buf_list, int ib_buf_cnt,
- u32 pbe_size, struct ocrdma_pbl *pbl_tbl,
- struct ocrdma_hw_mr *hwmr)
-{
- int i;
- int idx;
- int pbes_per_buf = 0;
- u64 buf_addr = 0;
- int num_pbes;
- struct ocrdma_pbe *pbe;
- int total_num_pbes = 0;
-
- if (!hwmr->num_pbes)
- return;
-
- pbe = (struct ocrdma_pbe *)pbl_tbl->va;
- num_pbes = 0;
-
- /* go through the OS phy regions & fill hw pbe entries into pbls. */
- for (i = 0; i < ib_buf_cnt; i++) {
- buf_addr = buf_list[i].addr;
- pbes_per_buf =
- roundup_pow_of_two(roundup(buf_list[i].size, PAGE_SIZE)) /
- pbe_size;
- hwmr->len += buf_list[i].size;
- /* number of pbes can be more for one OS buf, when
- * buffers are of different sizes.
- * split the ib_buf to one or more pbes.
- */
- for (idx = 0; idx < pbes_per_buf; idx++) {
- /* we program always page aligned addresses,
- * first unaligned address is taken care by fbo.
- */
- if (i == 0) {
- /* for non zero fbo, assign the
- * start of the page.
- */
- pbe->pa_lo =
- cpu_to_le32((u32) (buf_addr & PAGE_MASK));
- pbe->pa_hi =
- cpu_to_le32((u32) upper_32_bits(buf_addr));
- } else {
- pbe->pa_lo =
- cpu_to_le32((u32) (buf_addr & 0xffffffff));
- pbe->pa_hi =
- cpu_to_le32((u32) upper_32_bits(buf_addr));
- }
- buf_addr += pbe_size;
- num_pbes += 1;
- total_num_pbes += 1;
- pbe++;
-
- if (total_num_pbes == hwmr->num_pbes)
- goto mr_tbl_done;
- /* if the pbl is full storing the pbes,
- * move to next pbl.
- */
- if (num_pbes == (hwmr->pbl_size/sizeof(u64))) {
- pbl_tbl++;
- pbe = (struct ocrdma_pbe *)pbl_tbl->va;
- num_pbes = 0;
- }
- }
- }
-mr_tbl_done:
- return;
-}
-
-struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *ibpd,
- struct ib_phys_buf *buf_list,
- int buf_cnt, int acc, u64 *iova_start)
-{
- int status = -ENOMEM;
- struct ocrdma_mr *mr;
- struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
- struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
- u32 num_pbes;
- u32 pbe_size = 0;
-
- if ((acc & IB_ACCESS_REMOTE_WRITE) && !(acc & IB_ACCESS_LOCAL_WRITE))
- return ERR_PTR(-EINVAL);
-
- mr = kzalloc(sizeof(*mr), GFP_KERNEL);
- if (!mr)
- return ERR_PTR(status);
-
- num_pbes = count_kernel_pbes(buf_list, buf_cnt, &pbe_size);
- if (num_pbes == 0) {
- status = -EINVAL;
- goto pbl_err;
- }
- status = ocrdma_get_pbl_info(dev, mr, num_pbes);
- if (status)
- goto pbl_err;
-
- mr->hwmr.pbe_size = pbe_size;
- mr->hwmr.fbo = *iova_start - (buf_list[0].addr & PAGE_MASK);
- mr->hwmr.va = *iova_start;
- mr->hwmr.local_rd = 1;
- mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
- mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
- mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
- mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
- mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
-
- status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
- if (status)
- goto pbl_err;
- build_kernel_pbes(buf_list, buf_cnt, pbe_size, mr->hwmr.pbl_table,
- &mr->hwmr);
- status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
- if (status)
- goto mbx_err;
-
- mr->ibmr.lkey = mr->hwmr.lkey;
- if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
- mr->ibmr.rkey = mr->hwmr.lkey;
- return &mr->ibmr;
-
-mbx_err:
- ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
-pbl_err:
- kfree(mr);
- return ERR_PTR(status);
-}
-
static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr)
{
struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index a2f3b4d..8b517fd 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -117,9 +117,6 @@ int ocrdma_post_srq_recv(struct ib_srq *, struct ib_recv_wr *,
int ocrdma_dereg_mr(struct ib_mr *);
struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc);
-struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start);
struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *);
struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd,
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index 13ef22b..fcdf3791 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -89,14 +89,14 @@ static int create_file(const char *name, umode_t mode,
{
int error;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
*dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(*dentry))
error = qibfs_mknod(d_inode(parent), *dentry,
mode, fops, data);
else
error = PTR_ERR(*dentry);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
return error;
}
@@ -481,7 +481,7 @@ static int remove_device_files(struct super_block *sb,
int ret, i;
root = dget(sb->s_root);
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
snprintf(unit, sizeof(unit), "%u", dd->unit);
dir = lookup_one_len(unit, root, strlen(unit));
@@ -491,7 +491,7 @@ static int remove_device_files(struct super_block *sb,
goto bail;
}
- mutex_lock(&d_inode(dir)->i_mutex);
+ inode_lock(d_inode(dir));
remove_file(dir, "counters");
remove_file(dir, "counter_names");
remove_file(dir, "portcounter_names");
@@ -506,13 +506,13 @@ static int remove_device_files(struct super_block *sb,
}
}
remove_file(dir, "flash");
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
ret = simple_rmdir(d_inode(root), dir);
d_delete(dir);
dput(dir);
bail:
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
dput(root);
return ret;
}
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
index 294f5c7..5f53304 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -150,10 +150,7 @@ static struct qib_mr *alloc_mr(int count, struct ib_pd *pd)
rval = init_qib_mregion(&mr->mr, pd, count);
if (rval)
goto bail;
- /*
- * ib_reg_phys_mr() will initialize mr->ibmr except for
- * lkey and rkey.
- */
+
rval = qib_alloc_lkey(&mr->mr, 0);
if (rval)
goto bail_mregion;
@@ -171,52 +168,6 @@ bail:
}
/**
- * qib_reg_phys_mr - register a physical memory region
- * @pd: protection domain for this memory region
- * @buffer_list: pointer to the list of physical buffers to register
- * @num_phys_buf: the number of physical buffers to register
- * @iova_start: the starting address passed over IB which maps to this MR
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start)
-{
- struct qib_mr *mr;
- int n, m, i;
- struct ib_mr *ret;
-
- mr = alloc_mr(num_phys_buf, pd);
- if (IS_ERR(mr)) {
- ret = (struct ib_mr *)mr;
- goto bail;
- }
-
- mr->mr.user_base = *iova_start;
- mr->mr.iova = *iova_start;
- mr->mr.access_flags = acc;
-
- m = 0;
- n = 0;
- for (i = 0; i < num_phys_buf; i++) {
- mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
- mr->mr.map[m]->segs[n].length = buffer_list[i].size;
- mr->mr.length += buffer_list[i].size;
- n++;
- if (n == QIB_SEGSZ) {
- m++;
- n = 0;
- }
- }
-
- ret = &mr->ibmr;
-
-bail:
- return ret;
-}
-
-/**
* qib_reg_user_mr - register a userspace memory region
* @pd: protection domain for this memory region
* @start: starting userspace address
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 40f85bb..3eff35c 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -100,9 +100,10 @@ static u32 credit_table[31] = {
32768 /* 1E */
};
-static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map)
+static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map,
+ gfp_t gfp)
{
- unsigned long page = get_zeroed_page(GFP_KERNEL);
+ unsigned long page = get_zeroed_page(gfp);
/*
* Free the page if someone raced with us installing it.
@@ -121,7 +122,7 @@ static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map)
* zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
*/
static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
- enum ib_qp_type type, u8 port)
+ enum ib_qp_type type, u8 port, gfp_t gfp)
{
u32 i, offset, max_scan, qpn;
struct qpn_map *map;
@@ -151,7 +152,7 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
max_scan = qpt->nmaps - !offset;
for (i = 0;;) {
if (unlikely(!map->page)) {
- get_map_page(qpt, map);
+ get_map_page(qpt, map, gfp);
if (unlikely(!map->page))
break;
}
@@ -983,13 +984,21 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
size_t sz;
size_t sg_list_sz;
struct ib_qp *ret;
+ gfp_t gfp;
+
if (init_attr->cap.max_send_sge > ib_qib_max_sges ||
init_attr->cap.max_send_wr > ib_qib_max_qp_wrs ||
- init_attr->create_flags) {
- ret = ERR_PTR(-EINVAL);
- goto bail;
- }
+ init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO))
+ return ERR_PTR(-EINVAL);
+
+ /* GFP_NOIO is applicable in RC QPs only */
+ if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO &&
+ init_attr->qp_type != IB_QPT_RC)
+ return ERR_PTR(-EINVAL);
+
+ gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ?
+ GFP_NOIO : GFP_KERNEL;
/* Check receive queue parameters if no SRQ is specified. */
if (!init_attr->srq) {
@@ -1021,7 +1030,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
sz = sizeof(struct qib_sge) *
init_attr->cap.max_send_sge +
sizeof(struct qib_swqe);
- swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
+ swq = __vmalloc((init_attr->cap.max_send_wr + 1) * sz,
+ gfp, PAGE_KERNEL);
if (swq == NULL) {
ret = ERR_PTR(-ENOMEM);
goto bail;
@@ -1037,13 +1047,13 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
} else if (init_attr->cap.max_recv_sge > 1)
sg_list_sz = sizeof(*qp->r_sg_list) *
(init_attr->cap.max_recv_sge - 1);
- qp = kzalloc(sz + sg_list_sz, GFP_KERNEL);
+ qp = kzalloc(sz + sg_list_sz, gfp);
if (!qp) {
ret = ERR_PTR(-ENOMEM);
goto bail_swq;
}
RCU_INIT_POINTER(qp->next, NULL);
- qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL);
+ qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), gfp);
if (!qp->s_hdr) {
ret = ERR_PTR(-ENOMEM);
goto bail_qp;
@@ -1058,8 +1068,16 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
sizeof(struct qib_rwqe);
- qp->r_rq.wq = vmalloc_user(sizeof(struct qib_rwq) +
- qp->r_rq.size * sz);
+ if (gfp != GFP_NOIO)
+ qp->r_rq.wq = vmalloc_user(
+ sizeof(struct qib_rwq) +
+ qp->r_rq.size * sz);
+ else
+ qp->r_rq.wq = __vmalloc(
+ sizeof(struct qib_rwq) +
+ qp->r_rq.size * sz,
+ gfp, PAGE_KERNEL);
+
if (!qp->r_rq.wq) {
ret = ERR_PTR(-ENOMEM);
goto bail_qp;
@@ -1090,7 +1108,7 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
dev = to_idev(ibpd->device);
dd = dd_from_dev(dev);
err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type,
- init_attr->port_num);
+ init_attr->port_num, gfp);
if (err < 0) {
ret = ERR_PTR(err);
vfree(qp->r_rq.wq);
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index de6cb6f..baf1e42 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -346,6 +346,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
unsigned long flags;
struct qib_lkey_table *rkt;
struct qib_pd *pd;
+ int avoid_schedule = 0;
spin_lock_irqsave(&qp->s_lock, flags);
@@ -438,11 +439,15 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
qp->ibqp.qp_type == IB_QPT_RC) {
if (wqe->length > 0x80000000U)
goto bail_inval_free;
+ if (wqe->length <= qp->pmtu)
+ avoid_schedule = 1;
} else if (wqe->length > (dd_from_ibdev(qp->ibqp.device)->pport +
- qp->port_num - 1)->ibmtu)
+ qp->port_num - 1)->ibmtu) {
goto bail_inval_free;
- else
+ } else {
atomic_inc(&to_iah(ud_wr(wr)->ah)->refcount);
+ avoid_schedule = 1;
+ }
wqe->ssn = qp->s_ssn++;
qp->s_head = next;
@@ -458,7 +463,7 @@ bail_inval_free:
bail_inval:
ret = -EINVAL;
bail:
- if (!ret && !wr->next &&
+ if (!ret && !wr->next && !avoid_schedule &&
!qib_sdma_empty(
dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) {
qib_schedule_send(qp);
@@ -2256,7 +2261,6 @@ int qib_register_ib_device(struct qib_devdata *dd)
ibdev->poll_cq = qib_poll_cq;
ibdev->req_notify_cq = qib_req_notify_cq;
ibdev->get_dma_mr = qib_get_dma_mr;
- ibdev->reg_phys_mr = qib_reg_phys_mr;
ibdev->reg_user_mr = qib_reg_user_mr;
ibdev->dereg_mr = qib_dereg_mr;
ibdev->alloc_mr = qib_alloc_mr;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index bc803f3..6c5e777 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -1032,10 +1032,6 @@ int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc);
-struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start);
-
struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/qib/qib_verbs_mcast.c b/drivers/infiniband/hw/qib/qib_verbs_mcast.c
index f8ea069..b2fb528 100644
--- a/drivers/infiniband/hw/qib/qib_verbs_mcast.c
+++ b/drivers/infiniband/hw/qib/qib_verbs_mcast.c
@@ -286,15 +286,13 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
struct qib_ibdev *dev = to_idev(ibqp->device);
struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num);
struct qib_mcast *mcast = NULL;
- struct qib_mcast_qp *p, *tmp;
+ struct qib_mcast_qp *p, *tmp, *delp = NULL;
struct rb_node *n;
int last = 0;
int ret;
- if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
- ret = -EINVAL;
- goto bail;
- }
+ if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET)
+ return -EINVAL;
spin_lock_irq(&ibp->lock);
@@ -303,8 +301,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
while (1) {
if (n == NULL) {
spin_unlock_irq(&ibp->lock);
- ret = -EINVAL;
- goto bail;
+ return -EINVAL;
}
mcast = rb_entry(n, struct qib_mcast, rb_node);
@@ -328,6 +325,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
*/
list_del_rcu(&p->list);
mcast->n_attached--;
+ delp = p;
/* If this was the last attached QP, remove the GID too. */
if (list_empty(&mcast->qp_list)) {
@@ -338,15 +336,16 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
}
spin_unlock_irq(&ibp->lock);
+ /* QP not attached */
+ if (!delp)
+ return -EINVAL;
+ /*
+ * Wait for any list walkers to finish before freeing the
+ * list element.
+ */
+ wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
+ qib_mcast_qp_free(delp);
- if (p) {
- /*
- * Wait for any list walkers to finish before freeing the
- * list element.
- */
- wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
- qib_mcast_qp_free(p);
- }
if (last) {
atomic_dec(&mcast->refcount);
wait_event(mcast->wait, !atomic_read(&mcast->refcount));
@@ -355,11 +354,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
dev->n_mcast_grps_allocated--;
spin_unlock_irq(&dev->n_mcast_grps_lock);
}
-
- ret = 0;
-
-bail:
- return ret;
+ return 0;
}
int qib_mcast_tree_empty(struct qib_ibport *ibp)
diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.c b/drivers/infiniband/hw/usnic/usnic_debugfs.c
index 5e55b8b..92dc66c 100644
--- a/drivers/infiniband/hw/usnic/usnic_debugfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_debugfs.c
@@ -157,8 +157,9 @@ void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow)
qp_flow,
&flowinfo_ops);
if (IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) {
- usnic_err("Failed to create dbg fs entry for flow %u\n",
- qp_flow->flow->flow_id);
+ usnic_err("Failed to create dbg fs entry for flow %u with error %ld\n",
+ qp_flow->flow->flow_id,
+ PTR_ERR(qp_flow->dbgfs_dentry));
}
}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
index fcea3a2..5f44b66 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
@@ -521,7 +521,7 @@ int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp,
if (!status) {
qp_grp->state = new_state;
- usnic_info("Transistioned %u from %s to %s",
+ usnic_info("Transitioned %u from %s to %s",
qp_grp->grp_id,
usnic_ib_qp_grp_state_to_string(old_state),
usnic_ib_qp_grp_state_to_string(new_state));
@@ -575,7 +575,7 @@ alloc_res_chunk_list(struct usnic_vnic *vnic,
return res_chunk_list;
out_free_res:
- for (i--; i > 0; i--)
+ for (i--; i >= 0; i--)
usnic_vnic_put_resources(res_chunk_list[i]);
kfree(res_chunk_list);
return ERR_PTR(err);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index f8e3211..6cdb4d2 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -51,7 +51,7 @@
static void usnic_ib_fw_string_to_u64(char *fw_ver_str, u64 *fw_ver)
{
- *fw_ver = (u64) *fw_ver_str;
+ *fw_ver = *((u64 *)fw_ver_str);
}
static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
@@ -571,20 +571,20 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
qp_grp = to_uqp_grp(ibqp);
- /* TODO: Future Support All States */
mutex_lock(&qp_grp->vf->pf->usdev_lock);
- if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT) {
- status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_INIT, NULL);
- } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTR) {
- status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTR, NULL);
- } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTS) {
- status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTS, NULL);
+ if ((attr_mask & IB_QP_PORT) && attr->port_num != 1) {
+ /* usnic devices only have one port */
+ status = -EINVAL;
+ goto out_unlock;
+ }
+ if (attr_mask & IB_QP_STATE) {
+ status = usnic_ib_qp_grp_modify(qp_grp, attr->qp_state, NULL);
} else {
- usnic_err("Unexpected combination mask: %u state: %u\n",
- attr_mask & IB_QP_STATE, attr->qp_state);
+ usnic_err("Unhandled request, attr_mask=0x%x\n", attr_mask);
status = -EINVAL;
}
+out_unlock:
mutex_unlock(&qp_grp->vf->pf->usdev_lock);
return status;
}
@@ -625,8 +625,8 @@ struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
virt_addr, length);
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
- if (IS_ERR_OR_NULL(mr))
- return ERR_PTR(mr ? PTR_ERR(mr) : -ENOMEM);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
mr->umem = usnic_uiom_reg_get(to_upd(pd)->umem_pd, start, length,
access_flags, 0);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
index 414eaa5..0d9d2e6a 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -43,8 +43,6 @@ int usnic_ib_query_device(struct ib_device *ibdev,
struct ib_udata *uhw);
int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props);
-enum rdma_protocol_type
-usnic_ib_query_protocol(struct ib_device *device, u8 port_num);
int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr);
diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.c b/drivers/infiniband/hw/usnic/usnic_vnic.c
index 66de93f..8875107 100644
--- a/drivers/infiniband/hw/usnic/usnic_vnic.c
+++ b/drivers/infiniband/hw/usnic/usnic_vnic.c
@@ -237,7 +237,7 @@ usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type,
struct usnic_vnic_res *res;
int i;
- if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 1 || !owner)
+ if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 0 || !owner)
return ERR_PTR(-EINVAL);
ret = kzalloc(sizeof(*ret), GFP_ATOMIC);
@@ -247,26 +247,28 @@ usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type,
return ERR_PTR(-ENOMEM);
}
- ret->res = kzalloc(sizeof(*(ret->res))*cnt, GFP_ATOMIC);
- if (!ret->res) {
- usnic_err("Failed to allocate resources for %s. Out of memory\n",
- usnic_vnic_pci_name(vnic));
- kfree(ret);
- return ERR_PTR(-ENOMEM);
- }
+ if (cnt > 0) {
+ ret->res = kcalloc(cnt, sizeof(*(ret->res)), GFP_ATOMIC);
+ if (!ret->res) {
+ usnic_err("Failed to allocate resources for %s. Out of memory\n",
+ usnic_vnic_pci_name(vnic));
+ kfree(ret);
+ return ERR_PTR(-ENOMEM);
+ }
- spin_lock(&vnic->res_lock);
- src = &vnic->chunks[type];
- for (i = 0; i < src->cnt && ret->cnt < cnt; i++) {
- res = src->res[i];
- if (!res->owner) {
- src->free_cnt--;
- res->owner = owner;
- ret->res[ret->cnt++] = res;
+ spin_lock(&vnic->res_lock);
+ src = &vnic->chunks[type];
+ for (i = 0; i < src->cnt && ret->cnt < cnt; i++) {
+ res = src->res[i];
+ if (!res->owner) {
+ src->free_cnt--;
+ res->owner = owner;
+ ret->res[ret->cnt++] = res;
+ }
}
- }
- spin_unlock(&vnic->res_lock);
+ spin_unlock(&vnic->res_lock);
+ }
ret->type = type;
ret->vnic = vnic;
WARN_ON(ret->cnt != cnt);
@@ -281,14 +283,16 @@ void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk)
int i;
struct usnic_vnic *vnic = chunk->vnic;
- spin_lock(&vnic->res_lock);
- while ((i = --chunk->cnt) >= 0) {
- res = chunk->res[i];
- chunk->res[i] = NULL;
- res->owner = NULL;
- vnic->chunks[res->type].free_cnt++;
+ if (chunk->cnt > 0) {
+ spin_lock(&vnic->res_lock);
+ while ((i = --chunk->cnt) >= 0) {
+ res = chunk->res[i];
+ chunk->res[i] = NULL;
+ res->owner = NULL;
+ vnic->chunks[res->type].free_cnt++;
+ }
+ spin_unlock(&vnic->res_lock);
}
- spin_unlock(&vnic->res_lock);
kfree(chunk->res);
kfree(chunk);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 3ede103..a6f3eab 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -495,7 +495,6 @@ void ipoib_dev_cleanup(struct net_device *dev);
void ipoib_mcast_join_task(struct work_struct *work);
void ipoib_mcast_carrier_on_task(struct work_struct *work);
void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
-void ipoib_mcast_free(struct ipoib_mcast *mc);
void ipoib_mcast_restart_task(struct work_struct *work);
int ipoib_mcast_start_thread(struct net_device *dev);
@@ -549,8 +548,9 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
union ib_gid *mgid, int set_qkey);
-int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast);
-struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid);
+void ipoib_mcast_remove_list(struct list_head *remove_list);
+void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid,
+ struct list_head *remove_list);
int ipoib_init_qp(struct net_device *dev);
int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 3ae9726..917e46e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -70,7 +70,6 @@ static struct ib_qp_attr ipoib_cm_err_attr = {
#define IPOIB_CM_RX_DRAIN_WRID 0xffffffff
static struct ib_send_wr ipoib_cm_rx_drain_wr = {
- .wr_id = IPOIB_CM_RX_DRAIN_WRID,
.opcode = IB_WR_SEND,
};
@@ -223,6 +222,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
* error" WC will be immediately generated for each WR we post.
*/
p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
+ ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID;
if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr))
ipoib_warn(priv, "failed to post drain wr\n");
@@ -1522,8 +1522,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
int ipoib_cm_dev_init(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- int i, ret;
- struct ib_device_attr attr;
+ int max_srq_sge, i;
INIT_LIST_HEAD(&priv->cm.passive_ids);
INIT_LIST_HEAD(&priv->cm.reap_list);
@@ -1540,19 +1539,13 @@ int ipoib_cm_dev_init(struct net_device *dev)
skb_queue_head_init(&priv->cm.skb_queue);
- ret = ib_query_device(priv->ca, &attr);
- if (ret) {
- printk(KERN_WARNING "ib_query_device() failed with %d\n", ret);
- return ret;
- }
-
- ipoib_dbg(priv, "max_srq_sge=%d\n", attr.max_srq_sge);
+ ipoib_dbg(priv, "max_srq_sge=%d\n", priv->ca->attrs.max_srq_sge);
- attr.max_srq_sge = min_t(int, IPOIB_CM_RX_SG, attr.max_srq_sge);
- ipoib_cm_create_srq(dev, attr.max_srq_sge);
+ max_srq_sge = min_t(int, IPOIB_CM_RX_SG, priv->ca->attrs.max_srq_sge);
+ ipoib_cm_create_srq(dev, max_srq_sge);
if (ipoib_cm_has_srq(dev)) {
- priv->cm.max_cm_mtu = attr.max_srq_sge * PAGE_SIZE - 0x10;
- priv->cm.num_frags = attr.max_srq_sge;
+ priv->cm.max_cm_mtu = max_srq_sge * PAGE_SIZE - 0x10;
+ priv->cm.num_frags = max_srq_sge;
ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n",
priv->cm.max_cm_mtu, priv->cm.num_frags);
} else {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 078cadd..a53fa5f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -40,15 +40,11 @@ static void ipoib_get_drvinfo(struct net_device *netdev,
struct ethtool_drvinfo *drvinfo)
{
struct ipoib_dev_priv *priv = netdev_priv(netdev);
- struct ib_device_attr *attr;
-
- attr = kmalloc(sizeof(*attr), GFP_KERNEL);
- if (attr && !ib_query_device(priv->ca, attr))
- snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
- "%d.%d.%d", (int)(attr->fw_ver >> 32),
- (int)(attr->fw_ver >> 16) & 0xffff,
- (int)attr->fw_ver & 0xffff);
- kfree(attr);
+
+ snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%d.%d.%d", (int)(priv->ca->attrs.fw_ver >> 32),
+ (int)(priv->ca->attrs.fw_ver >> 16) & 0xffff,
+ (int)priv->ca->attrs.fw_ver & 0xffff);
strlcpy(drvinfo->bus_info, dev_name(priv->ca->dma_device),
sizeof(drvinfo->bus_info));
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 7d32818..25509bb 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1150,8 +1150,6 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
unsigned long flags;
int i;
LIST_HEAD(remove_list);
- struct ipoib_mcast *mcast, *tmcast;
- struct net_device *dev = priv->dev;
if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
return;
@@ -1179,18 +1177,8 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
lockdep_is_held(&priv->lock))) != NULL) {
/* was the neigh idle for two GC periods */
if (time_after(neigh_obsolete, neigh->alive)) {
- u8 *mgid = neigh->daddr + 4;
- /* Is this multicast ? */
- if (*mgid == 0xff) {
- mcast = __ipoib_mcast_find(dev, mgid);
-
- if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
- list_del(&mcast->list);
- rb_erase(&mcast->rb_node, &priv->multicast_tree);
- list_add_tail(&mcast->list, &remove_list);
- }
- }
+ ipoib_check_and_add_mcast_sendonly(priv, neigh->daddr + 4, &remove_list);
rcu_assign_pointer(*np,
rcu_dereference_protected(neigh->hnext,
@@ -1207,10 +1195,7 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
out_unlock:
spin_unlock_irqrestore(&priv->lock, flags);
- list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
- ipoib_mcast_leave(dev, mcast);
- ipoib_mcast_free(mcast);
- }
+ ipoib_mcast_remove_list(&remove_list);
}
static void ipoib_reap_neigh(struct work_struct *work)
@@ -1777,26 +1762,7 @@ int ipoib_add_pkey_attr(struct net_device *dev)
int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
{
- struct ib_device_attr *device_attr;
- int result = -ENOMEM;
-
- device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL);
- if (!device_attr) {
- printk(KERN_WARNING "%s: allocation of %zu bytes failed\n",
- hca->name, sizeof *device_attr);
- return result;
- }
-
- result = ib_query_device(hca, device_attr);
- if (result) {
- printk(KERN_WARNING "%s: ib_query_device failed (ret = %d)\n",
- hca->name, result);
- kfree(device_attr);
- return result;
- }
- priv->hca_caps = device_attr->device_cap_flags;
-
- kfree(device_attr);
+ priv->hca_caps = hca->attrs.device_cap_flags;
if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
priv->dev->hw_features = NETIF_F_SG |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index f357ca6..050dfa1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -106,7 +106,7 @@ static void __ipoib_mcast_schedule_join_thread(struct ipoib_dev_priv *priv,
queue_delayed_work(priv->wq, &priv->mcast_task, 0);
}
-void ipoib_mcast_free(struct ipoib_mcast *mcast)
+static void ipoib_mcast_free(struct ipoib_mcast *mcast)
{
struct net_device *dev = mcast->dev;
int tx_dropped = 0;
@@ -153,7 +153,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
return mcast;
}
-struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
+static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct rb_node *n = priv->multicast_tree.rb_node;
@@ -677,7 +677,7 @@ int ipoib_mcast_stop_thread(struct net_device *dev)
return 0;
}
-int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
+static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret = 0;
@@ -704,6 +704,35 @@ int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
return 0;
}
+/*
+ * Check if the multicast group is sendonly. If so remove it from the maps
+ * and add to the remove list
+ */
+void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid,
+ struct list_head *remove_list)
+{
+ /* Is this multicast ? */
+ if (*mgid == 0xff) {
+ struct ipoib_mcast *mcast = __ipoib_mcast_find(priv->dev, mgid);
+
+ if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
+ list_del(&mcast->list);
+ rb_erase(&mcast->rb_node, &priv->multicast_tree);
+ list_add_tail(&mcast->list, remove_list);
+ }
+ }
+}
+
+void ipoib_mcast_remove_list(struct list_head *remove_list)
+{
+ struct ipoib_mcast *mcast, *tmcast;
+
+ list_for_each_entry_safe(mcast, tmcast, remove_list, list) {
+ ipoib_mcast_leave(mcast->dev, mcast);
+ ipoib_mcast_free(mcast);
+ }
+}
+
void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -810,10 +839,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
wait_for_completion(&mcast->done);
- list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
- ipoib_mcast_leave(dev, mcast);
- ipoib_mcast_free(mcast);
- }
+ ipoib_mcast_remove_list(&remove_list);
}
static int ipoib_mcast_addr_is_valid(const u8 *addr, const u8 *broadcast)
@@ -939,10 +965,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
wait_for_completion(&mcast->done);
- list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
- ipoib_mcast_leave(mcast->dev, mcast);
- ipoib_mcast_free(mcast);
- }
+ ipoib_mcast_remove_list(&remove_list);
/*
* Double check that we are still up
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 9080161..c827c93 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -644,7 +644,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
ib_conn = &iser_conn->ib_conn;
if (ib_conn->pi_support) {
- u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap;
+ u32 sig_caps = ib_conn->device->ib_device->attrs.sig_prot_cap;
scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps));
scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP |
@@ -656,7 +656,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
* max fastreg page list length.
*/
shost->sg_tablesize = min_t(unsigned short, shost->sg_tablesize,
- ib_conn->device->dev_attr.max_fast_reg_page_list_len);
+ ib_conn->device->ib_device->attrs.max_fast_reg_page_list_len);
shost->max_sectors = min_t(unsigned int,
1024, (shost->sg_tablesize * PAGE_SIZE) >> 9);
@@ -1059,7 +1059,8 @@ static int __init iser_init(void)
release_wq = alloc_workqueue("release workqueue", 0, 0);
if (!release_wq) {
iser_err("failed to allocate release workqueue\n");
- return -ENOMEM;
+ err = -ENOMEM;
+ goto err_alloc_wq;
}
iscsi_iser_scsi_transport = iscsi_register_transport(
@@ -1067,12 +1068,14 @@ static int __init iser_init(void)
if (!iscsi_iser_scsi_transport) {
iser_err("iscsi_register_transport failed\n");
err = -EINVAL;
- goto register_transport_failure;
+ goto err_reg;
}
return 0;
-register_transport_failure:
+err_reg:
+ destroy_workqueue(release_wq);
+err_alloc_wq:
kmem_cache_destroy(ig.desc_cache);
return err;
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 8a5998e..95f0a64 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -48,6 +48,7 @@
#include <scsi/scsi_transport_iscsi.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_device.h>
+#include <scsi/iser.h>
#include <linux/interrupt.h>
#include <linux/wait.h>
@@ -151,46 +152,10 @@
- ISER_MAX_RX_MISC_PDUS) / \
(1 + ISER_INFLIGHT_DATAOUTS))
-#define ISER_WC_BATCH_COUNT 16
#define ISER_SIGNAL_CMD_COUNT 32
-#define ISER_VER 0x10
-#define ISER_WSV 0x08
-#define ISER_RSV 0x04
-
-#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL
-#define ISER_BEACON_WRID 0xfffffffffffffffeULL
-
-/**
- * struct iser_hdr - iSER header
- *
- * @flags: flags support (zbva, remote_inv)
- * @rsvd: reserved
- * @write_stag: write rkey
- * @write_va: write virtual address
- * @reaf_stag: read rkey
- * @read_va: read virtual address
- */
-struct iser_hdr {
- u8 flags;
- u8 rsvd[3];
- __be32 write_stag;
- __be64 write_va;
- __be32 read_stag;
- __be64 read_va;
-} __attribute__((packed));
-
-
-#define ISER_ZBVA_NOT_SUPPORTED 0x80
-#define ISER_SEND_W_INV_NOT_SUPPORTED 0x40
-
-struct iser_cm_hdr {
- u8 flags;
- u8 rsvd[3];
-} __packed;
-
/* Constant PDU lengths calculations */
-#define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr))
+#define ISER_HEADERS_LEN (sizeof(struct iser_ctrl) + sizeof(struct iscsi_hdr))
#define ISER_RECV_DATA_SEG_LEN 128
#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
@@ -269,7 +234,7 @@ enum iser_desc_type {
#define ISER_MAX_WRS 7
/**
- * struct iser_tx_desc - iSER TX descriptor (for send wr_id)
+ * struct iser_tx_desc - iSER TX descriptor
*
* @iser_header: iser header
* @iscsi_header: iscsi header
@@ -287,12 +252,13 @@ enum iser_desc_type {
* @sig_attrs: Signature attributes
*/
struct iser_tx_desc {
- struct iser_hdr iser_header;
+ struct iser_ctrl iser_header;
struct iscsi_hdr iscsi_header;
enum iser_desc_type type;
u64 dma_addr;
struct ib_sge tx_sg[2];
int num_sge;
+ struct ib_cqe cqe;
bool mapped;
u8 wr_idx;
union iser_wr {
@@ -306,9 +272,10 @@ struct iser_tx_desc {
};
#define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \
- sizeof(u64) + sizeof(struct ib_sge)))
+ sizeof(u64) + sizeof(struct ib_sge) + \
+ sizeof(struct ib_cqe)))
/**
- * struct iser_rx_desc - iSER RX descriptor (for recv wr_id)
+ * struct iser_rx_desc - iSER RX descriptor
*
* @iser_header: iser header
* @iscsi_header: iscsi header
@@ -318,12 +285,32 @@ struct iser_tx_desc {
* @pad: for sense data TODO: Modify to maximum sense length supported
*/
struct iser_rx_desc {
- struct iser_hdr iser_header;
+ struct iser_ctrl iser_header;
struct iscsi_hdr iscsi_header;
char data[ISER_RECV_DATA_SEG_LEN];
u64 dma_addr;
struct ib_sge rx_sg;
+ struct ib_cqe cqe;
char pad[ISER_RX_PAD_SIZE];
+} __packed;
+
+/**
+ * struct iser_login_desc - iSER login descriptor
+ *
+ * @req: pointer to login request buffer
+ * @resp: pointer to login response buffer
+ * @req_dma: DMA address of login request buffer
+ * @rsp_dma: DMA address of login response buffer
+ * @sge: IB sge for login post recv
+ * @cqe: completion handler
+ */
+struct iser_login_desc {
+ void *req;
+ void *rsp;
+ u64 req_dma;
+ u64 rsp_dma;
+ struct ib_sge sge;
+ struct ib_cqe cqe;
} __attribute__((packed));
struct iser_conn;
@@ -333,18 +320,12 @@ struct iscsi_iser_task;
/**
* struct iser_comp - iSER completion context
*
- * @device: pointer to device handle
* @cq: completion queue
- * @wcs: work completion array
- * @tasklet: Tasklet handle
* @active_qps: Number of active QPs attached
* to completion context
*/
struct iser_comp {
- struct iser_device *device;
struct ib_cq *cq;
- struct ib_wc wcs[ISER_WC_BATCH_COUNT];
- struct tasklet_struct tasklet;
int active_qps;
};
@@ -380,7 +361,6 @@ struct iser_reg_ops {
*
* @ib_device: RDMA device
* @pd: Protection Domain for this device
- * @dev_attr: Device attributes container
* @mr: Global DMA memory region
* @event_handler: IB events handle routine
* @ig_list: entry in devices list
@@ -389,18 +369,19 @@ struct iser_reg_ops {
* cpus and device max completion vectors
* @comps: Dinamically allocated array of completion handlers
* @reg_ops: Registration ops
+ * @remote_inv_sup: Remote invalidate is supported on this device
*/
struct iser_device {
struct ib_device *ib_device;
struct ib_pd *pd;
- struct ib_device_attr dev_attr;
struct ib_mr *mr;
struct ib_event_handler event_handler;
struct list_head ig_list;
int refcount;
int comps_used;
struct iser_comp *comps;
- struct iser_reg_ops *reg_ops;
+ const struct iser_reg_ops *reg_ops;
+ bool remote_inv_sup;
};
#define ISER_CHECK_GUARD 0xc0
@@ -475,10 +456,11 @@ struct iser_fr_pool {
* @rx_wr: receive work request for batch posts
* @device: reference to iser device
* @comp: iser completion context
- * @pi_support: Indicate device T10-PI support
- * @beacon: beacon send wr to signal all flush errors were drained
- * @flush_comp: completes when all connection completions consumed
* @fr_pool: connection fast registration poool
+ * @pi_support: Indicate device T10-PI support
+ * @last: last send wr to signal all flush errors were drained
+ * @last_cqe: cqe handler for last wr
+ * @last_comp: completes when all connection completions consumed
*/
struct ib_conn {
struct rdma_cm_id *cma_id;
@@ -488,10 +470,12 @@ struct ib_conn {
struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
struct iser_device *device;
struct iser_comp *comp;
- bool pi_support;
- struct ib_send_wr beacon;
- struct completion flush_comp;
struct iser_fr_pool fr_pool;
+ bool pi_support;
+ struct ib_send_wr last;
+ struct ib_cqe last_cqe;
+ struct ib_cqe reg_cqe;
+ struct completion last_comp;
};
/**
@@ -514,11 +498,7 @@ struct ib_conn {
* @up_completion: connection establishment completed
* (state is ISER_CONN_UP)
* @conn_list: entry in ig conn list
- * @login_buf: login data buffer (stores login parameters)
- * @login_req_buf: login request buffer
- * @login_req_dma: login request buffer dma address
- * @login_resp_buf: login response buffer
- * @login_resp_dma: login response buffer dma address
+ * @login_desc: login descriptor
* @rx_desc_head: head of rx_descs cyclic buffer
* @rx_descs: rx buffers array (cyclic buffer)
* @num_rx_descs: number of rx descriptors
@@ -541,15 +521,13 @@ struct iser_conn {
struct completion ib_completion;
struct completion up_completion;
struct list_head conn_list;
-
- char *login_buf;
- char *login_req_buf, *login_resp_buf;
- u64 login_req_dma, login_resp_dma;
+ struct iser_login_desc login_desc;
unsigned int rx_desc_head;
struct iser_rx_desc *rx_descs;
u32 num_rx_descs;
unsigned short scsi_sg_tablesize;
unsigned int scsi_max_sectors;
+ bool snd_w_inv;
};
/**
@@ -579,9 +557,8 @@ struct iscsi_iser_task {
struct iser_page_vec {
u64 *pages;
- int length;
- int offset;
- int data_size;
+ int npages;
+ struct ib_mr fake_mr;
};
/**
@@ -633,12 +610,14 @@ int iser_conn_terminate(struct iser_conn *iser_conn);
void iser_release_work(struct work_struct *work);
-void iser_rcv_completion(struct iser_rx_desc *desc,
- unsigned long dto_xfer_len,
- struct ib_conn *ib_conn);
-
-void iser_snd_completion(struct iser_tx_desc *desc,
- struct ib_conn *ib_conn);
+void iser_err_comp(struct ib_wc *wc, const char *type);
+void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc);
void iser_task_rdma_init(struct iscsi_iser_task *task);
@@ -651,7 +630,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir);
int iser_reg_rdma_mem(struct iscsi_iser_task *task,
- enum iser_data_dir dir);
+ enum iser_data_dir dir,
+ bool all_imm);
void iser_unreg_rdma_mem(struct iscsi_iser_task *task,
enum iser_data_dir dir);
@@ -719,4 +699,28 @@ iser_tx_next_wr(struct iser_tx_desc *tx_desc)
return cur_wr;
}
+static inline struct iser_conn *
+to_iser_conn(struct ib_conn *ib_conn)
+{
+ return container_of(ib_conn, struct iser_conn, ib_conn);
+}
+
+static inline struct iser_rx_desc *
+iser_rx(struct ib_cqe *cqe)
+{
+ return container_of(cqe, struct iser_rx_desc, cqe);
+}
+
+static inline struct iser_tx_desc *
+iser_tx(struct ib_cqe *cqe)
+{
+ return container_of(cqe, struct iser_tx_desc, cqe);
+}
+
+static inline struct iser_login_desc *
+iser_login(struct ib_cqe *cqe)
+{
+ return container_of(cqe, struct iser_login_desc, cqe);
+}
+
#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index ffd00c4..ed54b38 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -51,7 +51,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_mem_reg *mem_reg;
int err;
- struct iser_hdr *hdr = &iser_task->desc.iser_header;
+ struct iser_ctrl *hdr = &iser_task->desc.iser_header;
struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN];
err = iser_dma_map_task_data(iser_task,
@@ -72,7 +72,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
return err;
}
- err = iser_reg_rdma_mem(iser_task, ISER_DIR_IN);
+ err = iser_reg_rdma_mem(iser_task, ISER_DIR_IN, false);
if (err) {
iser_err("Failed to set up Data-IN RDMA\n");
return err;
@@ -104,7 +104,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_mem_reg *mem_reg;
int err;
- struct iser_hdr *hdr = &iser_task->desc.iser_header;
+ struct iser_ctrl *hdr = &iser_task->desc.iser_header;
struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT];
struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1];
@@ -126,7 +126,8 @@ iser_prepare_write_cmd(struct iscsi_task *task,
return err;
}
- err = iser_reg_rdma_mem(iser_task, ISER_DIR_OUT);
+ err = iser_reg_rdma_mem(iser_task, ISER_DIR_OUT,
+ buf_out->data_len == imm_sz);
if (err != 0) {
iser_err("Failed to register write cmd RDMA mem\n");
return err;
@@ -166,7 +167,7 @@ static void iser_create_send_desc(struct iser_conn *iser_conn,
ib_dma_sync_single_for_cpu(device->ib_device,
tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
- memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
+ memset(&tx_desc->iser_header, 0, sizeof(struct iser_ctrl));
tx_desc->iser_header.flags = ISER_VER;
tx_desc->num_sge = 1;
}
@@ -174,73 +175,63 @@ static void iser_create_send_desc(struct iser_conn *iser_conn,
static void iser_free_login_buf(struct iser_conn *iser_conn)
{
struct iser_device *device = iser_conn->ib_conn.device;
+ struct iser_login_desc *desc = &iser_conn->login_desc;
- if (!iser_conn->login_buf)
+ if (!desc->req)
return;
- if (iser_conn->login_req_dma)
- ib_dma_unmap_single(device->ib_device,
- iser_conn->login_req_dma,
- ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
+ ib_dma_unmap_single(device->ib_device, desc->req_dma,
+ ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
- if (iser_conn->login_resp_dma)
- ib_dma_unmap_single(device->ib_device,
- iser_conn->login_resp_dma,
- ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
+ ib_dma_unmap_single(device->ib_device, desc->rsp_dma,
+ ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
- kfree(iser_conn->login_buf);
+ kfree(desc->req);
+ kfree(desc->rsp);
/* make sure we never redo any unmapping */
- iser_conn->login_req_dma = 0;
- iser_conn->login_resp_dma = 0;
- iser_conn->login_buf = NULL;
+ desc->req = NULL;
+ desc->rsp = NULL;
}
static int iser_alloc_login_buf(struct iser_conn *iser_conn)
{
struct iser_device *device = iser_conn->ib_conn.device;
- int req_err, resp_err;
-
- BUG_ON(device == NULL);
-
- iser_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
- ISER_RX_LOGIN_SIZE, GFP_KERNEL);
- if (!iser_conn->login_buf)
- goto out_err;
-
- iser_conn->login_req_buf = iser_conn->login_buf;
- iser_conn->login_resp_buf = iser_conn->login_buf +
- ISCSI_DEF_MAX_RECV_SEG_LEN;
-
- iser_conn->login_req_dma = ib_dma_map_single(device->ib_device,
- iser_conn->login_req_buf,
- ISCSI_DEF_MAX_RECV_SEG_LEN,
- DMA_TO_DEVICE);
-
- iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device,
- iser_conn->login_resp_buf,
- ISER_RX_LOGIN_SIZE,
- DMA_FROM_DEVICE);
-
- req_err = ib_dma_mapping_error(device->ib_device,
- iser_conn->login_req_dma);
- resp_err = ib_dma_mapping_error(device->ib_device,
- iser_conn->login_resp_dma);
-
- if (req_err || resp_err) {
- if (req_err)
- iser_conn->login_req_dma = 0;
- if (resp_err)
- iser_conn->login_resp_dma = 0;
- goto free_login_buf;
- }
+ struct iser_login_desc *desc = &iser_conn->login_desc;
+
+ desc->req = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN, GFP_KERNEL);
+ if (!desc->req)
+ return -ENOMEM;
+
+ desc->req_dma = ib_dma_map_single(device->ib_device, desc->req,
+ ISCSI_DEF_MAX_RECV_SEG_LEN,
+ DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(device->ib_device,
+ desc->req_dma))
+ goto free_req;
+
+ desc->rsp = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL);
+ if (!desc->rsp)
+ goto unmap_req;
+
+ desc->rsp_dma = ib_dma_map_single(device->ib_device, desc->rsp,
+ ISER_RX_LOGIN_SIZE,
+ DMA_FROM_DEVICE);
+ if (ib_dma_mapping_error(device->ib_device,
+ desc->rsp_dma))
+ goto free_rsp;
+
return 0;
-free_login_buf:
- iser_free_login_buf(iser_conn);
+free_rsp:
+ kfree(desc->rsp);
+unmap_req:
+ ib_dma_unmap_single(device->ib_device, desc->req_dma,
+ ISCSI_DEF_MAX_RECV_SEG_LEN,
+ DMA_TO_DEVICE);
+free_req:
+ kfree(desc->req);
-out_err:
- iser_err("unable to alloc or map login buf\n");
return -ENOMEM;
}
@@ -280,11 +271,11 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
goto rx_desc_dma_map_failed;
rx_desc->dma_addr = dma_addr;
-
+ rx_desc->cqe.done = iser_task_rsp;
rx_sg = &rx_desc->rx_sg;
- rx_sg->addr = rx_desc->dma_addr;
+ rx_sg->addr = rx_desc->dma_addr;
rx_sg->length = ISER_RX_PAYLOAD_SIZE;
- rx_sg->lkey = device->pd->local_dma_lkey;
+ rx_sg->lkey = device->pd->local_dma_lkey;
}
iser_conn->rx_desc_head = 0;
@@ -383,6 +374,7 @@ int iser_send_command(struct iscsi_conn *conn,
/* build the tx desc regd header and add it to the tx desc dto */
tx_desc->type = ISCSI_TX_SCSI_COMMAND;
+ tx_desc->cqe.done = iser_cmd_comp;
iser_create_send_desc(iser_conn, tx_desc);
if (hdr->flags & ISCSI_FLAG_CMD_READ) {
@@ -464,6 +456,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
}
tx_desc->type = ISCSI_TX_DATAOUT;
+ tx_desc->cqe.done = iser_dataout_comp;
tx_desc->iser_header.flags = ISER_VER;
memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr));
@@ -513,6 +506,7 @@ int iser_send_control(struct iscsi_conn *conn,
/* build the tx desc regd header and add it to the tx desc dto */
mdesc->type = ISCSI_TX_CONTROL;
+ mdesc->cqe.done = iser_ctrl_comp;
iser_create_send_desc(iser_conn, mdesc);
device = iser_conn->ib_conn.device;
@@ -520,25 +514,25 @@ int iser_send_control(struct iscsi_conn *conn,
data_seg_len = ntoh24(task->hdr->dlength);
if (data_seg_len > 0) {
+ struct iser_login_desc *desc = &iser_conn->login_desc;
struct ib_sge *tx_dsg = &mdesc->tx_sg[1];
+
if (task != conn->login_task) {
iser_err("data present on non login task!!!\n");
goto send_control_error;
}
- ib_dma_sync_single_for_cpu(device->ib_device,
- iser_conn->login_req_dma, task->data_count,
- DMA_TO_DEVICE);
+ ib_dma_sync_single_for_cpu(device->ib_device, desc->req_dma,
+ task->data_count, DMA_TO_DEVICE);
- memcpy(iser_conn->login_req_buf, task->data, task->data_count);
+ memcpy(desc->req, task->data, task->data_count);
- ib_dma_sync_single_for_device(device->ib_device,
- iser_conn->login_req_dma, task->data_count,
- DMA_TO_DEVICE);
+ ib_dma_sync_single_for_device(device->ib_device, desc->req_dma,
+ task->data_count, DMA_TO_DEVICE);
- tx_dsg->addr = iser_conn->login_req_dma;
- tx_dsg->length = task->data_count;
- tx_dsg->lkey = device->pd->local_dma_lkey;
+ tx_dsg->addr = desc->req_dma;
+ tx_dsg->length = task->data_count;
+ tx_dsg->lkey = device->pd->local_dma_lkey;
mdesc->num_sge = 2;
}
@@ -562,41 +556,126 @@ send_control_error:
return err;
}
-/**
- * iser_rcv_dto_completion - recv DTO completion
- */
-void iser_rcv_completion(struct iser_rx_desc *rx_desc,
- unsigned long rx_xfer_len,
- struct ib_conn *ib_conn)
+void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc)
{
- struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
- ib_conn);
+ struct ib_conn *ib_conn = wc->qp->qp_context;
+ struct iser_conn *iser_conn = to_iser_conn(ib_conn);
+ struct iser_login_desc *desc = iser_login(wc->wr_cqe);
struct iscsi_hdr *hdr;
- u64 rx_dma;
- int rx_buflen, outstanding, count, err;
+ char *data;
+ int length;
- /* differentiate between login to all other PDUs */
- if ((char *)rx_desc == iser_conn->login_resp_buf) {
- rx_dma = iser_conn->login_resp_dma;
- rx_buflen = ISER_RX_LOGIN_SIZE;
- } else {
- rx_dma = rx_desc->dma_addr;
- rx_buflen = ISER_RX_PAYLOAD_SIZE;
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ iser_err_comp(wc, "login_rsp");
+ return;
+ }
+
+ ib_dma_sync_single_for_cpu(ib_conn->device->ib_device,
+ desc->rsp_dma, ISER_RX_LOGIN_SIZE,
+ DMA_FROM_DEVICE);
+
+ hdr = desc->rsp + sizeof(struct iser_ctrl);
+ data = desc->rsp + ISER_HEADERS_LEN;
+ length = wc->byte_len - ISER_HEADERS_LEN;
+
+ iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
+ hdr->itt, length);
+
+ iscsi_iser_recv(iser_conn->iscsi_conn, hdr, data, length);
+
+ ib_dma_sync_single_for_device(ib_conn->device->ib_device,
+ desc->rsp_dma, ISER_RX_LOGIN_SIZE,
+ DMA_FROM_DEVICE);
+
+ ib_conn->post_recv_buf_count--;
+}
+
+static inline void
+iser_inv_desc(struct iser_fr_desc *desc, u32 rkey)
+{
+ if (likely(rkey == desc->rsc.mr->rkey))
+ desc->rsc.mr_valid = 0;
+ else if (likely(rkey == desc->pi_ctx->sig_mr->rkey))
+ desc->pi_ctx->sig_mr_valid = 0;
+}
+
+static int
+iser_check_remote_inv(struct iser_conn *iser_conn,
+ struct ib_wc *wc,
+ struct iscsi_hdr *hdr)
+{
+ if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
+ struct iscsi_task *task;
+ u32 rkey = wc->ex.invalidate_rkey;
+
+ iser_dbg("conn %p: remote invalidation for rkey %#x\n",
+ iser_conn, rkey);
+
+ if (unlikely(!iser_conn->snd_w_inv)) {
+ iser_err("conn %p: unexepected remote invalidation, "
+ "terminating connection\n", iser_conn);
+ return -EPROTO;
+ }
+
+ task = iscsi_itt_to_ctask(iser_conn->iscsi_conn, hdr->itt);
+ if (likely(task)) {
+ struct iscsi_iser_task *iser_task = task->dd_data;
+ struct iser_fr_desc *desc;
+
+ if (iser_task->dir[ISER_DIR_IN]) {
+ desc = iser_task->rdma_reg[ISER_DIR_IN].mem_h;
+ iser_inv_desc(desc, rkey);
+ }
+
+ if (iser_task->dir[ISER_DIR_OUT]) {
+ desc = iser_task->rdma_reg[ISER_DIR_OUT].mem_h;
+ iser_inv_desc(desc, rkey);
+ }
+ } else {
+ iser_err("failed to get task for itt=%d\n", hdr->itt);
+ return -EINVAL;
+ }
}
- ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
- rx_buflen, DMA_FROM_DEVICE);
+ return 0;
+}
- hdr = &rx_desc->iscsi_header;
+
+void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct ib_conn *ib_conn = wc->qp->qp_context;
+ struct iser_conn *iser_conn = to_iser_conn(ib_conn);
+ struct iser_rx_desc *desc = iser_rx(wc->wr_cqe);
+ struct iscsi_hdr *hdr;
+ int length;
+ int outstanding, count, err;
+
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ iser_err_comp(wc, "task_rsp");
+ return;
+ }
+
+ ib_dma_sync_single_for_cpu(ib_conn->device->ib_device,
+ desc->dma_addr, ISER_RX_PAYLOAD_SIZE,
+ DMA_FROM_DEVICE);
+
+ hdr = &desc->iscsi_header;
+ length = wc->byte_len - ISER_HEADERS_LEN;
iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
- hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN));
+ hdr->itt, length);
+
+ if (iser_check_remote_inv(iser_conn, wc, hdr)) {
+ iscsi_conn_failure(iser_conn->iscsi_conn,
+ ISCSI_ERR_CONN_FAILED);
+ return;
+ }
- iscsi_iser_recv(iser_conn->iscsi_conn, hdr, rx_desc->data,
- rx_xfer_len - ISER_HEADERS_LEN);
+ iscsi_iser_recv(iser_conn->iscsi_conn, hdr, desc->data, length);
- ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
- rx_buflen, DMA_FROM_DEVICE);
+ ib_dma_sync_single_for_device(ib_conn->device->ib_device,
+ desc->dma_addr, ISER_RX_PAYLOAD_SIZE,
+ DMA_FROM_DEVICE);
/* decrementing conn->post_recv_buf_count only --after-- freeing the *
* task eliminates the need to worry on tasks which are completed in *
@@ -604,9 +683,6 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
* for the posted rx bufs refcount to become zero handles everything */
ib_conn->post_recv_buf_count--;
- if (rx_dma == iser_conn->login_resp_dma)
- return;
-
outstanding = ib_conn->post_recv_buf_count;
if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) {
count = min(iser_conn->qp_max_recv_dtos - outstanding,
@@ -617,26 +693,47 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
}
}
-void iser_snd_completion(struct iser_tx_desc *tx_desc,
- struct ib_conn *ib_conn)
+void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc)
{
+ if (unlikely(wc->status != IB_WC_SUCCESS))
+ iser_err_comp(wc, "command");
+}
+
+void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct iser_tx_desc *desc = iser_tx(wc->wr_cqe);
struct iscsi_task *task;
- struct iser_device *device = ib_conn->device;
- if (tx_desc->type == ISCSI_TX_DATAOUT) {
- ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
- ISER_HEADERS_LEN, DMA_TO_DEVICE);
- kmem_cache_free(ig.desc_cache, tx_desc);
- tx_desc = NULL;
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ iser_err_comp(wc, "control");
+ return;
}
- if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) {
- /* this arithmetic is legal by libiscsi dd_data allocation */
- task = (void *) ((long)(void *)tx_desc -
- sizeof(struct iscsi_task));
- if (task->hdr->itt == RESERVED_ITT)
- iscsi_put_task(task);
- }
+ /* this arithmetic is legal by libiscsi dd_data allocation */
+ task = (void *)desc - sizeof(struct iscsi_task);
+ if (task->hdr->itt == RESERVED_ITT)
+ iscsi_put_task(task);
+}
+
+void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct iser_tx_desc *desc = iser_tx(wc->wr_cqe);
+ struct ib_conn *ib_conn = wc->qp->qp_context;
+ struct iser_device *device = ib_conn->device;
+
+ if (unlikely(wc->status != IB_WC_SUCCESS))
+ iser_err_comp(wc, "dataout");
+
+ ib_dma_unmap_single(device->ib_device, desc->dma_addr,
+ ISER_HEADERS_LEN, DMA_TO_DEVICE);
+ kmem_cache_free(ig.desc_cache, desc);
+}
+
+void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct ib_conn *ib_conn = wc->qp->qp_context;
+
+ complete(&ib_conn->last_comp);
}
void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index ea765fb..9a391cc 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -49,7 +49,7 @@ int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
struct iser_reg_resources *rsc,
struct iser_mem_reg *mem_reg);
-static struct iser_reg_ops fastreg_ops = {
+static const struct iser_reg_ops fastreg_ops = {
.alloc_reg_res = iser_alloc_fastreg_pool,
.free_reg_res = iser_free_fastreg_pool,
.reg_mem = iser_fast_reg_mr,
@@ -58,7 +58,7 @@ static struct iser_reg_ops fastreg_ops = {
.reg_desc_put = iser_reg_desc_put_fr,
};
-static struct iser_reg_ops fmr_ops = {
+static const struct iser_reg_ops fmr_ops = {
.alloc_reg_res = iser_alloc_fmr_pool,
.free_reg_res = iser_free_fmr_pool,
.reg_mem = iser_fast_reg_fmr,
@@ -67,19 +67,24 @@ static struct iser_reg_ops fmr_ops = {
.reg_desc_put = iser_reg_desc_put_fmr,
};
+void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+ iser_err_comp(wc, "memreg");
+}
+
int iser_assign_reg_ops(struct iser_device *device)
{
- struct ib_device_attr *dev_attr = &device->dev_attr;
+ struct ib_device *ib_dev = device->ib_device;
/* Assign function handles - based on FMR support */
- if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr &&
- device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) {
+ if (ib_dev->alloc_fmr && ib_dev->dealloc_fmr &&
+ ib_dev->map_phys_fmr && ib_dev->unmap_fmr) {
iser_info("FMR supported, using FMR for registration\n");
device->reg_ops = &fmr_ops;
- } else
- if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+ } else if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
iser_info("FastReg supported, using FastReg for registration\n");
device->reg_ops = &fastreg_ops;
+ device->remote_inv_sup = iser_always_reg;
} else {
iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n");
return -1;
@@ -131,67 +136,6 @@ iser_reg_desc_put_fmr(struct ib_conn *ib_conn,
{
}
-#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
-
-/**
- * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
- * and returns the length of resulting physical address array (may be less than
- * the original due to possible compaction).
- *
- * we build a "page vec" under the assumption that the SG meets the RDMA
- * alignment requirements. Other then the first and last SG elements, all
- * the "internal" elements can be compacted into a list whose elements are
- * dma addresses of physical pages. The code supports also the weird case
- * where --few fragments of the same page-- are present in the SG as
- * consecutive elements. Also, it handles one entry SG.
- */
-
-static int iser_sg_to_page_vec(struct iser_data_buf *data,
- struct ib_device *ibdev, u64 *pages,
- int *offset, int *data_size)
-{
- struct scatterlist *sg, *sgl = data->sg;
- u64 start_addr, end_addr, page, chunk_start = 0;
- unsigned long total_sz = 0;
- unsigned int dma_len;
- int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
-
- /* compute the offset of first element */
- *offset = (u64) sgl[0].offset & ~MASK_4K;
-
- new_chunk = 1;
- cur_page = 0;
- for_each_sg(sgl, sg, data->dma_nents, i) {
- start_addr = ib_sg_dma_address(ibdev, sg);
- if (new_chunk)
- chunk_start = start_addr;
- dma_len = ib_sg_dma_len(ibdev, sg);
- end_addr = start_addr + dma_len;
- total_sz += dma_len;
-
- /* collect page fragments until aligned or end of SG list */
- if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
- new_chunk = 0;
- continue;
- }
- new_chunk = 1;
-
- /* address of the first page in the contiguous chunk;
- masking relevant for the very first SG entry,
- which might be unaligned */
- page = chunk_start & MASK_4K;
- do {
- pages[cur_page++] = page;
- page += SIZE_4K;
- } while (page < end_addr);
- }
-
- *data_size = total_sz;
- iser_dbg("page_vec->data_size:%d cur_page %d\n",
- *data_size, cur_page);
- return cur_page;
-}
-
static void iser_data_buf_dump(struct iser_data_buf *data,
struct ib_device *ibdev)
{
@@ -210,10 +154,10 @@ static void iser_dump_page_vec(struct iser_page_vec *page_vec)
{
int i;
- iser_err("page vec length %d data size %d\n",
- page_vec->length, page_vec->data_size);
- for (i = 0; i < page_vec->length; i++)
- iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);
+ iser_err("page vec npages %d data length %d\n",
+ page_vec->npages, page_vec->fake_mr.length);
+ for (i = 0; i < page_vec->npages; i++)
+ iser_err("vec[%d]: %llx\n", i, page_vec->pages[i]);
}
int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
@@ -251,7 +195,11 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
struct scatterlist *sg = mem->sg;
reg->sge.lkey = device->pd->local_dma_lkey;
- reg->rkey = device->mr->rkey;
+ /*
+ * FIXME: rework the registration code path to differentiate
+ * rkey/lkey use cases
+ */
+ reg->rkey = device->mr ? device->mr->rkey : 0;
reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);
@@ -262,11 +210,16 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
return 0;
}
-/**
- * iser_reg_page_vec - Register physical memory
- *
- * returns: 0 on success, errno code on failure
- */
+static int iser_set_page(struct ib_mr *mr, u64 addr)
+{
+ struct iser_page_vec *page_vec =
+ container_of(mr, struct iser_page_vec, fake_mr);
+
+ page_vec->pages[page_vec->npages++] = addr;
+
+ return 0;
+}
+
static
int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
struct iser_data_buf *mem,
@@ -280,22 +233,19 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
struct ib_pool_fmr *fmr;
int ret, plen;
- plen = iser_sg_to_page_vec(mem, device->ib_device,
- page_vec->pages,
- &page_vec->offset,
- &page_vec->data_size);
- page_vec->length = plen;
- if (plen * SIZE_4K < page_vec->data_size) {
+ page_vec->npages = 0;
+ page_vec->fake_mr.page_size = SIZE_4K;
+ plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg,
+ mem->size, iser_set_page);
+ if (unlikely(plen < mem->size)) {
iser_err("page vec too short to hold this SG\n");
iser_data_buf_dump(mem, device->ib_device);
iser_dump_page_vec(page_vec);
return -EINVAL;
}
- fmr = ib_fmr_pool_map_phys(fmr_pool,
- page_vec->pages,
- page_vec->length,
- page_vec->pages[0]);
+ fmr = ib_fmr_pool_map_phys(fmr_pool, page_vec->pages,
+ page_vec->npages, page_vec->pages[0]);
if (IS_ERR(fmr)) {
ret = PTR_ERR(fmr);
iser_err("ib_fmr_pool_map_phys failed: %d\n", ret);
@@ -304,8 +254,8 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
reg->sge.lkey = fmr->fmr->lkey;
reg->rkey = fmr->fmr->rkey;
- reg->sge.addr = page_vec->pages[0] + page_vec->offset;
- reg->sge.length = page_vec->data_size;
+ reg->sge.addr = page_vec->fake_mr.iova;
+ reg->sge.length = page_vec->fake_mr.length;
reg->mem_h = fmr;
iser_dbg("fmr reg: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
@@ -413,19 +363,16 @@ iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
*mask |= ISER_CHECK_GUARD;
}
-static void
-iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
+static inline void
+iser_inv_rkey(struct ib_send_wr *inv_wr,
+ struct ib_mr *mr,
+ struct ib_cqe *cqe)
{
- u32 rkey;
-
inv_wr->opcode = IB_WR_LOCAL_INV;
- inv_wr->wr_id = ISER_FASTREG_LI_WRID;
+ inv_wr->wr_cqe = cqe;
inv_wr->ex.invalidate_rkey = mr->rkey;
inv_wr->send_flags = 0;
inv_wr->num_sge = 0;
-
- rkey = ib_inc_rkey(mr->rkey);
- ib_update_fast_reg_key(mr, rkey);
}
static int
@@ -437,7 +384,9 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
{
struct iser_tx_desc *tx_desc = &iser_task->desc;
struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs;
+ struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
struct ib_sig_handover_wr *wr;
+ struct ib_mr *mr = pi_ctx->sig_mr;
int ret;
memset(sig_attrs, 0, sizeof(*sig_attrs));
@@ -447,17 +396,19 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask);
- if (!pi_ctx->sig_mr_valid)
- iser_inv_rkey(iser_tx_next_wr(tx_desc), pi_ctx->sig_mr);
+ if (pi_ctx->sig_mr_valid)
+ iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe);
+
+ ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
wr = sig_handover_wr(iser_tx_next_wr(tx_desc));
wr->wr.opcode = IB_WR_REG_SIG_MR;
- wr->wr.wr_id = ISER_FASTREG_LI_WRID;
+ wr->wr.wr_cqe = cqe;
wr->wr.sg_list = &data_reg->sge;
wr->wr.num_sge = 1;
wr->wr.send_flags = 0;
wr->sig_attrs = sig_attrs;
- wr->sig_mr = pi_ctx->sig_mr;
+ wr->sig_mr = mr;
if (scsi_prot_sg_count(iser_task->sc))
wr->prot = &prot_reg->sge;
else
@@ -465,10 +416,10 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
wr->access_flags = IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_WRITE;
- pi_ctx->sig_mr_valid = 0;
+ pi_ctx->sig_mr_valid = 1;
- sig_reg->sge.lkey = pi_ctx->sig_mr->lkey;
- sig_reg->rkey = pi_ctx->sig_mr->rkey;
+ sig_reg->sge.lkey = mr->lkey;
+ sig_reg->rkey = mr->rkey;
sig_reg->sge.addr = 0;
sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
@@ -485,12 +436,15 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
struct iser_mem_reg *reg)
{
struct iser_tx_desc *tx_desc = &iser_task->desc;
+ struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
struct ib_mr *mr = rsc->mr;
struct ib_reg_wr *wr;
int n;
- if (!rsc->mr_valid)
- iser_inv_rkey(iser_tx_next_wr(tx_desc), mr);
+ if (rsc->mr_valid)
+ iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe);
+
+ ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
n = ib_map_mr_sg(mr, mem->sg, mem->size, SIZE_4K);
if (unlikely(n != mem->size)) {
@@ -501,7 +455,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
wr = reg_wr(iser_tx_next_wr(tx_desc));
wr->wr.opcode = IB_WR_REG_MR;
- wr->wr.wr_id = ISER_FASTREG_LI_WRID;
+ wr->wr.wr_cqe = cqe;
wr->wr.send_flags = 0;
wr->wr.num_sge = 0;
wr->mr = mr;
@@ -510,7 +464,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_READ;
- rsc->mr_valid = 0;
+ rsc->mr_valid = 1;
reg->sge.lkey = mr->lkey;
reg->rkey = mr->rkey;
@@ -554,7 +508,8 @@ iser_reg_data_sg(struct iscsi_iser_task *task,
}
int iser_reg_rdma_mem(struct iscsi_iser_task *task,
- enum iser_data_dir dir)
+ enum iser_data_dir dir,
+ bool all_imm)
{
struct ib_conn *ib_conn = &task->iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
@@ -565,8 +520,8 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
bool use_dma_key;
int err;
- use_dma_key = (mem->dma_nents == 1 && !iser_always_reg &&
- scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL);
+ use_dma_key = mem->dma_nents == 1 && (all_imm || !iser_always_reg) &&
+ scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL;
if (!use_dma_key) {
desc = device->reg_ops->reg_desc_get(ib_conn);
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 42f4da6..40c0f49 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -44,17 +44,6 @@
#define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \
ISCSI_ISER_MAX_CONN)
-static int iser_cq_poll_limit = 512;
-
-static void iser_cq_tasklet_fn(unsigned long data);
-static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
-
-static void iser_cq_event_callback(struct ib_event *cause, void *context)
-{
- iser_err("cq event %s (%d)\n",
- ib_event_msg(cause->event), cause->event);
-}
-
static void iser_qp_event_callback(struct ib_event *cause, void *context)
{
iser_err("qp event %s (%d)\n",
@@ -78,59 +67,40 @@ static void iser_event_handler(struct ib_event_handler *handler,
*/
static int iser_create_device_ib_res(struct iser_device *device)
{
- struct ib_device_attr *dev_attr = &device->dev_attr;
+ struct ib_device *ib_dev = device->ib_device;
int ret, i, max_cqe;
- ret = ib_query_device(device->ib_device, dev_attr);
- if (ret) {
- pr_warn("Query device failed for %s\n", device->ib_device->name);
- return ret;
- }
-
ret = iser_assign_reg_ops(device);
if (ret)
return ret;
device->comps_used = min_t(int, num_online_cpus(),
- device->ib_device->num_comp_vectors);
+ ib_dev->num_comp_vectors);
device->comps = kcalloc(device->comps_used, sizeof(*device->comps),
GFP_KERNEL);
if (!device->comps)
goto comps_err;
- max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe);
+ max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe);
iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n",
- device->comps_used, device->ib_device->name,
- device->ib_device->num_comp_vectors, max_cqe);
+ device->comps_used, ib_dev->name,
+ ib_dev->num_comp_vectors, max_cqe);
- device->pd = ib_alloc_pd(device->ib_device);
+ device->pd = ib_alloc_pd(ib_dev);
if (IS_ERR(device->pd))
goto pd_err;
for (i = 0; i < device->comps_used; i++) {
- struct ib_cq_init_attr cq_attr = {};
struct iser_comp *comp = &device->comps[i];
- comp->device = device;
- cq_attr.cqe = max_cqe;
- cq_attr.comp_vector = i;
- comp->cq = ib_create_cq(device->ib_device,
- iser_cq_callback,
- iser_cq_event_callback,
- (void *)comp,
- &cq_attr);
+ comp->cq = ib_alloc_cq(ib_dev, comp, max_cqe, i,
+ IB_POLL_SOFTIRQ);
if (IS_ERR(comp->cq)) {
comp->cq = NULL;
goto cq_err;
}
-
- if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP))
- goto cq_err;
-
- tasklet_init(&comp->tasklet, iser_cq_tasklet_fn,
- (unsigned long)comp);
}
if (!iser_always_reg) {
@@ -140,11 +110,11 @@ static int iser_create_device_ib_res(struct iser_device *device)
device->mr = ib_get_dma_mr(device->pd, access);
if (IS_ERR(device->mr))
- goto dma_mr_err;
+ goto cq_err;
}
- INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
- iser_event_handler);
+ INIT_IB_EVENT_HANDLER(&device->event_handler, ib_dev,
+ iser_event_handler);
if (ib_register_event_handler(&device->event_handler))
goto handler_err;
@@ -153,15 +123,12 @@ static int iser_create_device_ib_res(struct iser_device *device)
handler_err:
if (device->mr)
ib_dereg_mr(device->mr);
-dma_mr_err:
- for (i = 0; i < device->comps_used; i++)
- tasklet_kill(&device->comps[i].tasklet);
cq_err:
for (i = 0; i < device->comps_used; i++) {
struct iser_comp *comp = &device->comps[i];
if (comp->cq)
- ib_destroy_cq(comp->cq);
+ ib_free_cq(comp->cq);
}
ib_dealloc_pd(device->pd);
pd_err:
@@ -182,8 +149,7 @@ static void iser_free_device_ib_res(struct iser_device *device)
for (i = 0; i < device->comps_used; i++) {
struct iser_comp *comp = &device->comps[i];
- tasklet_kill(&comp->tasklet);
- ib_destroy_cq(comp->cq);
+ ib_free_cq(comp->cq);
comp->cq = NULL;
}
@@ -299,7 +265,7 @@ iser_alloc_reg_res(struct ib_device *ib_device,
iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
return ret;
}
- res->mr_valid = 1;
+ res->mr_valid = 0;
return 0;
}
@@ -336,7 +302,7 @@ iser_alloc_pi_ctx(struct ib_device *ib_device,
ret = PTR_ERR(pi_ctx->sig_mr);
goto sig_mr_failure;
}
- pi_ctx->sig_mr_valid = 1;
+ pi_ctx->sig_mr_valid = 0;
desc->pi_ctx->sig_protected = 0;
return 0;
@@ -461,10 +427,9 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
*/
static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
{
- struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
- ib_conn);
+ struct iser_conn *iser_conn = to_iser_conn(ib_conn);
struct iser_device *device;
- struct ib_device_attr *dev_attr;
+ struct ib_device *ib_dev;
struct ib_qp_init_attr init_attr;
int ret = -ENOMEM;
int index, min_index = 0;
@@ -472,7 +437,7 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
BUG_ON(ib_conn->device == NULL);
device = ib_conn->device;
- dev_attr = &device->dev_attr;
+ ib_dev = device->ib_device;
memset(&init_attr, 0, sizeof init_attr);
@@ -503,16 +468,16 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
iser_conn->max_cmds =
ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS);
} else {
- if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
+ if (ib_dev->attrs.max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1;
iser_conn->max_cmds =
ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS);
} else {
- init_attr.cap.max_send_wr = dev_attr->max_qp_wr;
+ init_attr.cap.max_send_wr = ib_dev->attrs.max_qp_wr;
iser_conn->max_cmds =
- ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr);
+ ISER_GET_MAX_XMIT_CMDS(ib_dev->attrs.max_qp_wr);
iser_dbg("device %s supports max_send_wr %d\n",
- device->ib_device->name, dev_attr->max_qp_wr);
+ device->ib_device->name, ib_dev->attrs.max_qp_wr);
}
}
@@ -724,13 +689,13 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
iser_conn, err);
/* post an indication that all flush errors were consumed */
- err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr);
+ err = ib_post_send(ib_conn->qp, &ib_conn->last, &bad_wr);
if (err) {
- iser_err("conn %p failed to post beacon", ib_conn);
+ iser_err("conn %p failed to post last wr", ib_conn);
return 1;
}
- wait_for_completion(&ib_conn->flush_comp);
+ wait_for_completion(&ib_conn->last_comp);
}
return 1;
@@ -756,7 +721,7 @@ iser_calc_scsi_params(struct iser_conn *iser_conn,
sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K);
sup_sg_tablesize = min_t(unsigned, ISCSI_ISER_MAX_SG_TABLESIZE,
- device->dev_attr.max_fast_reg_page_list_len);
+ device->ib_device->attrs.max_fast_reg_page_list_len);
if (sg_tablesize > sup_sg_tablesize) {
sg_tablesize = sup_sg_tablesize;
@@ -799,7 +764,7 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
/* connection T10-PI support */
if (iser_pi_enable) {
- if (!(device->dev_attr.device_cap_flags &
+ if (!(device->ib_device->attrs.device_cap_flags &
IB_DEVICE_SIGNATURE_HANDOVER)) {
iser_warn("T10-PI requested but not supported on %s, "
"continue without T10-PI\n",
@@ -841,16 +806,17 @@ static void iser_route_handler(struct rdma_cm_id *cma_id)
goto failure;
memset(&conn_param, 0, sizeof conn_param);
- conn_param.responder_resources = device->dev_attr.max_qp_rd_atom;
+ conn_param.responder_resources = device->ib_device->attrs.max_qp_rd_atom;
conn_param.initiator_depth = 1;
conn_param.retry_count = 7;
conn_param.rnr_retry_count = 6;
memset(&req_hdr, 0, sizeof(req_hdr));
- req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED |
- ISER_SEND_W_INV_NOT_SUPPORTED);
- conn_param.private_data = (void *)&req_hdr;
- conn_param.private_data_len = sizeof(struct iser_cm_hdr);
+ req_hdr.flags = ISER_ZBVA_NOT_SUP;
+ if (!device->remote_inv_sup)
+ req_hdr.flags |= ISER_SEND_W_INV_NOT_SUP;
+ conn_param.private_data = (void *)&req_hdr;
+ conn_param.private_data_len = sizeof(struct iser_cm_hdr);
ret = rdma_connect(cma_id, &conn_param);
if (ret) {
@@ -863,7 +829,8 @@ failure:
iser_connect_error(cma_id);
}
-static void iser_connected_handler(struct rdma_cm_id *cma_id)
+static void iser_connected_handler(struct rdma_cm_id *cma_id,
+ const void *private_data)
{
struct iser_conn *iser_conn;
struct ib_qp_attr attr;
@@ -877,6 +844,15 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id)
(void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
+ if (private_data) {
+ u8 flags = *(u8 *)private_data;
+
+ iser_conn->snd_w_inv = !(flags & ISER_SEND_W_INV_NOT_SUP);
+ }
+
+ iser_info("conn %p: negotiated %s invalidation\n",
+ iser_conn, iser_conn->snd_w_inv ? "remote" : "local");
+
iser_conn->state = ISER_CONN_UP;
complete(&iser_conn->up_completion);
}
@@ -928,7 +904,7 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
iser_route_handler(cma_id);
break;
case RDMA_CM_EVENT_ESTABLISHED:
- iser_connected_handler(cma_id);
+ iser_connected_handler(cma_id, event->param.conn.private_data);
break;
case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_ROUTE_ERROR:
@@ -967,14 +943,21 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
void iser_conn_init(struct iser_conn *iser_conn)
{
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
+
iser_conn->state = ISER_CONN_INIT;
- iser_conn->ib_conn.post_recv_buf_count = 0;
- init_completion(&iser_conn->ib_conn.flush_comp);
init_completion(&iser_conn->stop_completion);
init_completion(&iser_conn->ib_completion);
init_completion(&iser_conn->up_completion);
INIT_LIST_HEAD(&iser_conn->conn_list);
mutex_init(&iser_conn->state_mutex);
+
+ ib_conn->post_recv_buf_count = 0;
+ ib_conn->reg_cqe.done = iser_reg_comp;
+ ib_conn->last_cqe.done = iser_last_comp;
+ ib_conn->last.wr_cqe = &ib_conn->last_cqe;
+ ib_conn->last.opcode = IB_WR_SEND;
+ init_completion(&ib_conn->last_comp);
}
/**
@@ -1000,9 +983,6 @@ int iser_connect(struct iser_conn *iser_conn,
iser_conn->state = ISER_CONN_PENDING;
- ib_conn->beacon.wr_id = ISER_BEACON_WRID;
- ib_conn->beacon.opcode = IB_WR_SEND;
-
ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler,
(void *)iser_conn,
RDMA_PS_TCP, IB_QPT_RC);
@@ -1045,56 +1025,60 @@ connect_failure:
int iser_post_recvl(struct iser_conn *iser_conn)
{
- struct ib_recv_wr rx_wr, *rx_wr_failed;
struct ib_conn *ib_conn = &iser_conn->ib_conn;
- struct ib_sge sge;
+ struct iser_login_desc *desc = &iser_conn->login_desc;
+ struct ib_recv_wr wr, *wr_failed;
int ib_ret;
- sge.addr = iser_conn->login_resp_dma;
- sge.length = ISER_RX_LOGIN_SIZE;
- sge.lkey = ib_conn->device->pd->local_dma_lkey;
+ desc->sge.addr = desc->rsp_dma;
+ desc->sge.length = ISER_RX_LOGIN_SIZE;
+ desc->sge.lkey = ib_conn->device->pd->local_dma_lkey;
- rx_wr.wr_id = (uintptr_t)iser_conn->login_resp_buf;
- rx_wr.sg_list = &sge;
- rx_wr.num_sge = 1;
- rx_wr.next = NULL;
+ desc->cqe.done = iser_login_rsp;
+ wr.wr_cqe = &desc->cqe;
+ wr.sg_list = &desc->sge;
+ wr.num_sge = 1;
+ wr.next = NULL;
ib_conn->post_recv_buf_count++;
- ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed);
+ ib_ret = ib_post_recv(ib_conn->qp, &wr, &wr_failed);
if (ib_ret) {
iser_err("ib_post_recv failed ret=%d\n", ib_ret);
ib_conn->post_recv_buf_count--;
}
+
return ib_ret;
}
int iser_post_recvm(struct iser_conn *iser_conn, int count)
{
- struct ib_recv_wr *rx_wr, *rx_wr_failed;
- int i, ib_ret;
struct ib_conn *ib_conn = &iser_conn->ib_conn;
unsigned int my_rx_head = iser_conn->rx_desc_head;
struct iser_rx_desc *rx_desc;
+ struct ib_recv_wr *wr, *wr_failed;
+ int i, ib_ret;
- for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
- rx_desc = &iser_conn->rx_descs[my_rx_head];
- rx_wr->wr_id = (uintptr_t)rx_desc;
- rx_wr->sg_list = &rx_desc->rx_sg;
- rx_wr->num_sge = 1;
- rx_wr->next = rx_wr + 1;
+ for (wr = ib_conn->rx_wr, i = 0; i < count; i++, wr++) {
+ rx_desc = &iser_conn->rx_descs[my_rx_head];
+ rx_desc->cqe.done = iser_task_rsp;
+ wr->wr_cqe = &rx_desc->cqe;
+ wr->sg_list = &rx_desc->rx_sg;
+ wr->num_sge = 1;
+ wr->next = wr + 1;
my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask;
}
- rx_wr--;
- rx_wr->next = NULL; /* mark end of work requests list */
+ wr--;
+ wr->next = NULL; /* mark end of work requests list */
ib_conn->post_recv_buf_count += count;
- ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed);
+ ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &wr_failed);
if (ib_ret) {
iser_err("ib_post_recv failed ret=%d\n", ib_ret);
ib_conn->post_recv_buf_count -= count;
} else
iser_conn->rx_desc_head = my_rx_head;
+
return ib_ret;
}
@@ -1115,7 +1099,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
DMA_TO_DEVICE);
wr->next = NULL;
- wr->wr_id = (uintptr_t)tx_desc;
+ wr->wr_cqe = &tx_desc->cqe;
wr->sg_list = tx_desc->tx_sg;
wr->num_sge = tx_desc->num_sge;
wr->opcode = IB_WR_SEND;
@@ -1129,149 +1113,6 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
return ib_ret;
}
-/**
- * is_iser_tx_desc - Indicate if the completion wr_id
- * is a TX descriptor or not.
- * @iser_conn: iser connection
- * @wr_id: completion WR identifier
- *
- * Since we cannot rely on wc opcode in FLUSH errors
- * we must work around it by checking if the wr_id address
- * falls in the iser connection rx_descs buffer. If so
- * it is an RX descriptor, otherwize it is a TX.
- */
-static inline bool
-is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id)
-{
- void *start = iser_conn->rx_descs;
- int len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs);
-
- if (wr_id >= start && wr_id < start + len)
- return false;
-
- return true;
-}
-
-/**
- * iser_handle_comp_error() - Handle error completion
- * @ib_conn: connection RDMA resources
- * @wc: work completion
- *
- * Notes: We may handle a FLUSH error completion and in this case
- * we only cleanup in case TX type was DATAOUT. For non-FLUSH
- * error completion we should also notify iscsi layer that
- * connection is failed (in case we passed bind stage).
- */
-static void
-iser_handle_comp_error(struct ib_conn *ib_conn,
- struct ib_wc *wc)
-{
- void *wr_id = (void *)(uintptr_t)wc->wr_id;
- struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
- ib_conn);
-
- if (wc->status != IB_WC_WR_FLUSH_ERR)
- if (iser_conn->iscsi_conn)
- iscsi_conn_failure(iser_conn->iscsi_conn,
- ISCSI_ERR_CONN_FAILED);
-
- if (wc->wr_id == ISER_FASTREG_LI_WRID)
- return;
-
- if (is_iser_tx_desc(iser_conn, wr_id)) {
- struct iser_tx_desc *desc = wr_id;
-
- if (desc->type == ISCSI_TX_DATAOUT)
- kmem_cache_free(ig.desc_cache, desc);
- } else {
- ib_conn->post_recv_buf_count--;
- }
-}
-
-/**
- * iser_handle_wc - handle a single work completion
- * @wc: work completion
- *
- * Soft-IRQ context, work completion can be either
- * SEND or RECV, and can turn out successful or
- * with error (or flush error).
- */
-static void iser_handle_wc(struct ib_wc *wc)
-{
- struct ib_conn *ib_conn;
- struct iser_tx_desc *tx_desc;
- struct iser_rx_desc *rx_desc;
-
- ib_conn = wc->qp->qp_context;
- if (likely(wc->status == IB_WC_SUCCESS)) {
- if (wc->opcode == IB_WC_RECV) {
- rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
- iser_rcv_completion(rx_desc, wc->byte_len,
- ib_conn);
- } else
- if (wc->opcode == IB_WC_SEND) {
- tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
- iser_snd_completion(tx_desc, ib_conn);
- } else {
- iser_err("Unknown wc opcode %d\n", wc->opcode);
- }
- } else {
- if (wc->status != IB_WC_WR_FLUSH_ERR)
- iser_err("%s (%d): wr id %llx vend_err %x\n",
- ib_wc_status_msg(wc->status), wc->status,
- wc->wr_id, wc->vendor_err);
- else
- iser_dbg("%s (%d): wr id %llx\n",
- ib_wc_status_msg(wc->status), wc->status,
- wc->wr_id);
-
- if (wc->wr_id == ISER_BEACON_WRID)
- /* all flush errors were consumed */
- complete(&ib_conn->flush_comp);
- else
- iser_handle_comp_error(ib_conn, wc);
- }
-}
-
-/**
- * iser_cq_tasklet_fn - iSER completion polling loop
- * @data: iSER completion context
- *
- * Soft-IRQ context, polling connection CQ until
- * either CQ was empty or we exausted polling budget
- */
-static void iser_cq_tasklet_fn(unsigned long data)
-{
- struct iser_comp *comp = (struct iser_comp *)data;
- struct ib_cq *cq = comp->cq;
- struct ib_wc *const wcs = comp->wcs;
- int i, n, completed = 0;
-
- while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
- for (i = 0; i < n; i++)
- iser_handle_wc(&wcs[i]);
-
- completed += n;
- if (completed >= iser_cq_poll_limit)
- break;
- }
-
- /*
- * It is assumed here that arming CQ only once its empty
- * would not cause interrupts to be missed.
- */
- ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
-
- iser_dbg("got %d completions\n", completed);
-}
-
-static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
-{
- struct iser_comp *comp = cq_context;
-
- tasklet_schedule(&comp->tasklet);
-}
-
u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir, sector_t *sector)
{
@@ -1319,3 +1160,21 @@ err:
/* Not alot we can do here, return ambiguous guard error */
return 0x1;
}
+
+void iser_err_comp(struct ib_wc *wc, const char *type)
+{
+ if (wc->status != IB_WC_WR_FLUSH_ERR) {
+ struct iser_conn *iser_conn = to_iser_conn(wc->qp->qp_context);
+
+ iser_err("%s failure: %s (%d) vend_err %x\n", type,
+ ib_wc_status_msg(wc->status), wc->status,
+ wc->vendor_err);
+
+ if (iser_conn->iscsi_conn)
+ iscsi_conn_failure(iser_conn->iscsi_conn,
+ ISCSI_ERR_CONN_FAILED);
+ } else {
+ iser_dbg("%s failure: %s (%d)\n", type,
+ ib_wc_status_msg(wc->status), wc->status);
+ }
+}
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 468c5e1..f121e61 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -29,7 +29,6 @@
#include <target/iscsi/iscsi_transport.h>
#include <linux/semaphore.h>
-#include "isert_proto.h"
#include "ib_isert.h"
#define ISERT_MAX_CONN 8
@@ -95,22 +94,6 @@ isert_qp_event_callback(struct ib_event *e, void *context)
}
}
-static int
-isert_query_device(struct ib_device *ib_dev, struct ib_device_attr *devattr)
-{
- int ret;
-
- ret = ib_query_device(ib_dev, devattr);
- if (ret) {
- isert_err("ib_query_device() failed: %d\n", ret);
- return ret;
- }
- isert_dbg("devattr->max_sge: %d\n", devattr->max_sge);
- isert_dbg("devattr->max_sge_rd: %d\n", devattr->max_sge_rd);
-
- return 0;
-}
-
static struct isert_comp *
isert_comp_get(struct isert_conn *isert_conn)
{
@@ -157,9 +140,9 @@ isert_create_qp(struct isert_conn *isert_conn,
attr.recv_cq = comp->cq;
attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS;
attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
- attr.cap.max_send_sge = device->dev_attr.max_sge;
- isert_conn->max_sge = min(device->dev_attr.max_sge,
- device->dev_attr.max_sge_rd);
+ attr.cap.max_send_sge = device->ib_device->attrs.max_sge;
+ isert_conn->max_sge = min(device->ib_device->attrs.max_sge,
+ device->ib_device->attrs.max_sge_rd);
attr.cap.max_recv_sge = 1;
attr.sq_sig_type = IB_SIGNAL_REQ_WR;
attr.qp_type = IB_QPT_RC;
@@ -287,8 +270,7 @@ isert_free_comps(struct isert_device *device)
}
static int
-isert_alloc_comps(struct isert_device *device,
- struct ib_device_attr *attr)
+isert_alloc_comps(struct isert_device *device)
{
int i, max_cqe, ret = 0;
@@ -308,7 +290,7 @@ isert_alloc_comps(struct isert_device *device,
return -ENOMEM;
}
- max_cqe = min(ISER_MAX_CQ_LEN, attr->max_cqe);
+ max_cqe = min(ISER_MAX_CQ_LEN, device->ib_device->attrs.max_cqe);
for (i = 0; i < device->comps_used; i++) {
struct ib_cq_init_attr cq_attr = {};
@@ -344,17 +326,15 @@ out_cq:
static int
isert_create_device_ib_res(struct isert_device *device)
{
- struct ib_device_attr *dev_attr;
+ struct ib_device *ib_dev = device->ib_device;
int ret;
- dev_attr = &device->dev_attr;
- ret = isert_query_device(device->ib_device, dev_attr);
- if (ret)
- goto out;
+ isert_dbg("devattr->max_sge: %d\n", ib_dev->attrs.max_sge);
+ isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd);
/* asign function handlers */
- if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
- dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
+ if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
+ ib_dev->attrs.device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
device->use_fastreg = 1;
device->reg_rdma_mem = isert_reg_rdma;
device->unreg_rdma_mem = isert_unreg_rdma;
@@ -364,11 +344,11 @@ isert_create_device_ib_res(struct isert_device *device)
device->unreg_rdma_mem = isert_unmap_cmd;
}
- ret = isert_alloc_comps(device, dev_attr);
+ ret = isert_alloc_comps(device);
if (ret)
goto out;
- device->pd = ib_alloc_pd(device->ib_device);
+ device->pd = ib_alloc_pd(ib_dev);
if (IS_ERR(device->pd)) {
ret = PTR_ERR(device->pd);
isert_err("failed to allocate pd, device %p, ret=%d\n",
@@ -377,7 +357,7 @@ isert_create_device_ib_res(struct isert_device *device)
}
/* Check signature cap */
- device->pi_capable = dev_attr->device_cap_flags &
+ device->pi_capable = ib_dev->attrs.device_cap_flags &
IB_DEVICE_SIGNATURE_HANDOVER ? true : false;
return 0;
@@ -676,6 +656,32 @@ out_login_buf:
return ret;
}
+static void
+isert_set_nego_params(struct isert_conn *isert_conn,
+ struct rdma_conn_param *param)
+{
+ struct ib_device_attr *attr = &isert_conn->device->ib_device->attrs;
+
+ /* Set max inflight RDMA READ requests */
+ isert_conn->initiator_depth = min_t(u8, param->initiator_depth,
+ attr->max_qp_init_rd_atom);
+ isert_dbg("Using initiator_depth: %u\n", isert_conn->initiator_depth);
+
+ if (param->private_data) {
+ u8 flags = *(u8 *)param->private_data;
+
+ /*
+ * use remote invalidation if the both initiator
+ * and the HCA support it
+ */
+ isert_conn->snd_w_inv = !(flags & ISER_SEND_W_INV_NOT_SUP) &&
+ (attr->device_cap_flags &
+ IB_DEVICE_MEM_MGT_EXTENSIONS);
+ if (isert_conn->snd_w_inv)
+ isert_info("Using remote invalidation\n");
+ }
+}
+
static int
isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
{
@@ -714,11 +720,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
}
isert_conn->device = device;
- /* Set max inflight RDMA READ requests */
- isert_conn->initiator_depth = min_t(u8,
- event->param.conn.initiator_depth,
- device->dev_attr.max_qp_init_rd_atom);
- isert_dbg("Using initiator_depth: %u\n", isert_conn->initiator_depth);
+ isert_set_nego_params(isert_conn, &event->param.conn);
ret = isert_conn_setup_qp(isert_conn, cma_id);
if (ret)
@@ -1050,8 +1052,8 @@ isert_create_send_desc(struct isert_conn *isert_conn,
ib_dma_sync_single_for_cpu(ib_dev, tx_desc->dma_addr,
ISER_HEADERS_LEN, DMA_TO_DEVICE);
- memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
- tx_desc->iser_header.flags = ISER_VER;
+ memset(&tx_desc->iser_header, 0, sizeof(struct iser_ctrl));
+ tx_desc->iser_header.flags = ISCSI_CTRL;
tx_desc->num_sge = 1;
tx_desc->isert_cmd = isert_cmd;
@@ -1097,7 +1099,14 @@ isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
isert_cmd->rdma_wr.iser_ib_op = ISER_IB_SEND;
send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
- send_wr->opcode = IB_WR_SEND;
+
+ if (isert_conn->snd_w_inv && isert_cmd->inv_rkey) {
+ send_wr->opcode = IB_WR_SEND_WITH_INV;
+ send_wr->ex.invalidate_rkey = isert_cmd->inv_rkey;
+ } else {
+ send_wr->opcode = IB_WR_SEND;
+ }
+
send_wr->sg_list = &tx_desc->tx_sg[0];
send_wr->num_sge = isert_cmd->tx_desc.num_sge;
send_wr->send_flags = IB_SEND_SIGNALED;
@@ -1486,6 +1495,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
isert_cmd->read_va = read_va;
isert_cmd->write_stag = write_stag;
isert_cmd->write_va = write_va;
+ isert_cmd->inv_rkey = read_stag ? read_stag : write_stag;
ret = isert_handle_scsi_cmd(isert_conn, isert_cmd, cmd,
rx_desc, (unsigned char *)hdr);
@@ -1543,21 +1553,21 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
static void
isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
{
- struct iser_hdr *iser_hdr = &rx_desc->iser_header;
+ struct iser_ctrl *iser_ctrl = &rx_desc->iser_header;
uint64_t read_va = 0, write_va = 0;
uint32_t read_stag = 0, write_stag = 0;
- switch (iser_hdr->flags & 0xF0) {
+ switch (iser_ctrl->flags & 0xF0) {
case ISCSI_CTRL:
- if (iser_hdr->flags & ISER_RSV) {
- read_stag = be32_to_cpu(iser_hdr->read_stag);
- read_va = be64_to_cpu(iser_hdr->read_va);
+ if (iser_ctrl->flags & ISER_RSV) {
+ read_stag = be32_to_cpu(iser_ctrl->read_stag);
+ read_va = be64_to_cpu(iser_ctrl->read_va);
isert_dbg("ISER_RSV: read_stag: 0x%x read_va: 0x%llx\n",
read_stag, (unsigned long long)read_va);
}
- if (iser_hdr->flags & ISER_WSV) {
- write_stag = be32_to_cpu(iser_hdr->write_stag);
- write_va = be64_to_cpu(iser_hdr->write_va);
+ if (iser_ctrl->flags & ISER_WSV) {
+ write_stag = be32_to_cpu(iser_ctrl->write_stag);
+ write_va = be64_to_cpu(iser_ctrl->write_va);
isert_dbg("ISER_WSV: write_stag: 0x%x write_va: 0x%llx\n",
write_stag, (unsigned long long)write_va);
}
@@ -1568,7 +1578,7 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
isert_err("iSER Hello message\n");
break;
default:
- isert_warn("Unknown iSER hdr flags: 0x%02x\n", iser_hdr->flags);
+ isert_warn("Unknown iSER hdr flags: 0x%02x\n", iser_ctrl->flags);
break;
}
@@ -3095,12 +3105,20 @@ isert_rdma_accept(struct isert_conn *isert_conn)
struct rdma_cm_id *cm_id = isert_conn->cm_id;
struct rdma_conn_param cp;
int ret;
+ struct iser_cm_hdr rsp_hdr;
memset(&cp, 0, sizeof(struct rdma_conn_param));
cp.initiator_depth = isert_conn->initiator_depth;
cp.retry_count = 7;
cp.rnr_retry_count = 7;
+ memset(&rsp_hdr, 0, sizeof(rsp_hdr));
+ rsp_hdr.flags = ISERT_ZBVA_NOT_USED;
+ if (!isert_conn->snd_w_inv)
+ rsp_hdr.flags = rsp_hdr.flags | ISERT_SEND_W_INV_NOT_USED;
+ cp.private_data = (void *)&rsp_hdr;
+ cp.private_data_len = sizeof(rsp_hdr);
+
ret = rdma_accept(cm_id, &cp);
if (ret) {
isert_err("rdma_accept() failed with: %d\n", ret);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index 3d7fbc4..8d50453 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -3,6 +3,8 @@
#include <linux/in6.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
+#include <scsi/iser.h>
+
#define DRV_NAME "isert"
#define PFX DRV_NAME ": "
@@ -31,6 +33,38 @@
#define isert_err(fmt, arg...) \
pr_err(PFX "%s: " fmt, __func__ , ## arg)
+/* Constant PDU lengths calculations */
+#define ISER_HEADERS_LEN (sizeof(struct iser_ctrl) + \
+ sizeof(struct iscsi_hdr))
+#define ISER_RECV_DATA_SEG_LEN 8192
+#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
+#define ISER_RX_LOGIN_SIZE (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
+
+/* QP settings */
+/* Maximal bounds on received asynchronous PDUs */
+#define ISERT_MAX_TX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2) */
+
+#define ISERT_MAX_RX_MISC_PDUS 6 /*
+ * NOOP_OUT(2), TEXT(1),
+ * SCSI_TMFUNC(2), LOGOUT(1)
+ */
+
+#define ISCSI_DEF_XMIT_CMDS_MAX 128 /* from libiscsi.h, must be power of 2 */
+
+#define ISERT_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
+
+#define ISERT_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
+
+#define ISERT_INFLIGHT_DATAOUTS 8
+
+#define ISERT_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX * \
+ (1 + ISERT_INFLIGHT_DATAOUTS) + \
+ ISERT_MAX_TX_MISC_PDUS + \
+ ISERT_MAX_RX_MISC_PDUS)
+
+#define ISER_RX_PAD_SIZE (ISER_RECV_DATA_SEG_LEN + 4096 - \
+ (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge)))
+
#define ISCSI_ISER_SG_TABLESIZE 256
#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL
#define ISER_BEACON_WRID 0xfffffffffffffffeULL
@@ -56,7 +90,7 @@ enum iser_conn_state {
};
struct iser_rx_desc {
- struct iser_hdr iser_header;
+ struct iser_ctrl iser_header;
struct iscsi_hdr iscsi_header;
char data[ISER_RECV_DATA_SEG_LEN];
u64 dma_addr;
@@ -65,7 +99,7 @@ struct iser_rx_desc {
} __packed;
struct iser_tx_desc {
- struct iser_hdr iser_header;
+ struct iser_ctrl iser_header;
struct iscsi_hdr iscsi_header;
enum isert_desc_type type;
u64 dma_addr;
@@ -129,6 +163,7 @@ struct isert_cmd {
uint32_t write_stag;
uint64_t read_va;
uint64_t write_va;
+ uint32_t inv_rkey;
u64 pdu_buf_dma;
u32 pdu_buf_len;
struct isert_conn *conn;
@@ -176,6 +211,7 @@ struct isert_conn {
struct work_struct release_work;
struct ib_recv_wr beacon;
bool logout_posted;
+ bool snd_w_inv;
};
#define ISERT_MAX_CQ 64
@@ -207,7 +243,6 @@ struct isert_device {
struct isert_comp *comps;
int comps_used;
struct list_head dev_node;
- struct ib_device_attr dev_attr;
int (*reg_rdma_mem)(struct iscsi_conn *conn,
struct iscsi_cmd *cmd,
struct isert_rdma_wr *wr);
diff --git a/drivers/infiniband/ulp/isert/isert_proto.h b/drivers/infiniband/ulp/isert/isert_proto.h
deleted file mode 100644
index 4dccd313..0000000
--- a/drivers/infiniband/ulp/isert/isert_proto.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/* From iscsi_iser.h */
-
-struct iser_hdr {
- u8 flags;
- u8 rsvd[3];
- __be32 write_stag; /* write rkey */
- __be64 write_va;
- __be32 read_stag; /* read rkey */
- __be64 read_va;
-} __packed;
-
-/*Constant PDU lengths calculations */
-#define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr))
-
-#define ISER_RECV_DATA_SEG_LEN 8192
-#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
-#define ISER_RX_LOGIN_SIZE (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
-
-/* QP settings */
-/* Maximal bounds on received asynchronous PDUs */
-#define ISERT_MAX_TX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2) */
-
-#define ISERT_MAX_RX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), *
- * SCSI_TMFUNC(2), LOGOUT(1) */
-
-#define ISCSI_DEF_XMIT_CMDS_MAX 128 /* from libiscsi.h, must be power of 2 */
-
-#define ISERT_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
-
-#define ISERT_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
-
-#define ISERT_INFLIGHT_DATAOUTS 8
-
-#define ISERT_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX * \
- (1 + ISERT_INFLIGHT_DATAOUTS) + \
- ISERT_MAX_TX_MISC_PDUS + \
- ISERT_MAX_RX_MISC_PDUS)
-
-#define ISER_RX_PAD_SIZE (ISER_RECV_DATA_SEG_LEN + 4096 - \
- (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge)))
-
-#define ISER_VER 0x10
-#define ISER_WSV 0x08
-#define ISER_RSV 0x04
-#define ISCSI_CTRL 0x10
-#define ISER_HELLO 0x20
-#define ISER_HELLORPLY 0x30
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 3db9a65..03022f6 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -132,8 +132,9 @@ MODULE_PARM_DESC(ch_count,
static void srp_add_one(struct ib_device *device);
static void srp_remove_one(struct ib_device *device, void *client_data);
-static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr);
-static void srp_send_completion(struct ib_cq *cq, void *ch_ptr);
+static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
+ const char *opname);
static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
static struct scsi_transport_template *ib_srp_transport_template;
@@ -445,6 +446,17 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
dev->max_pages_per_mr);
}
+static void srp_drain_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct srp_rdma_ch *ch = cq->cq_context;
+
+ complete(&ch->done);
+}
+
+static struct ib_cqe srp_drain_cqe = {
+ .done = srp_drain_done,
+};
+
/**
* srp_destroy_qp() - destroy an RDMA queue pair
* @ch: SRP RDMA channel.
@@ -457,10 +469,11 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
static void srp_destroy_qp(struct srp_rdma_ch *ch)
{
static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
- static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID };
+ static struct ib_recv_wr wr = { 0 };
struct ib_recv_wr *bad_wr;
int ret;
+ wr.wr_cqe = &srp_drain_cqe;
/* Destroying a QP and reusing ch->done is only safe if not connected */
WARN_ON_ONCE(ch->connected);
@@ -489,34 +502,27 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
struct ib_fmr_pool *fmr_pool = NULL;
struct srp_fr_pool *fr_pool = NULL;
const int m = dev->use_fast_reg ? 3 : 1;
- struct ib_cq_init_attr cq_attr = {};
int ret;
init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
if (!init_attr)
return -ENOMEM;
- /* + 1 for SRP_LAST_WR_ID */
- cq_attr.cqe = target->queue_size + 1;
- cq_attr.comp_vector = ch->comp_vector;
- recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch,
- &cq_attr);
+ /* queue_size + 1 for ib_drain_qp */
+ recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
+ ch->comp_vector, IB_POLL_SOFTIRQ);
if (IS_ERR(recv_cq)) {
ret = PTR_ERR(recv_cq);
goto err;
}
- cq_attr.cqe = m * target->queue_size;
- cq_attr.comp_vector = ch->comp_vector;
- send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch,
- &cq_attr);
+ send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
+ ch->comp_vector, IB_POLL_DIRECT);
if (IS_ERR(send_cq)) {
ret = PTR_ERR(send_cq);
goto err_recv_cq;
}
- ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
-
init_attr->event_handler = srp_qp_event;
init_attr->cap.max_send_wr = m * target->queue_size;
init_attr->cap.max_recv_wr = target->queue_size + 1;
@@ -558,9 +564,9 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
if (ch->qp)
srp_destroy_qp(ch);
if (ch->recv_cq)
- ib_destroy_cq(ch->recv_cq);
+ ib_free_cq(ch->recv_cq);
if (ch->send_cq)
- ib_destroy_cq(ch->send_cq);
+ ib_free_cq(ch->send_cq);
ch->qp = qp;
ch->recv_cq = recv_cq;
@@ -580,13 +586,13 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
return 0;
err_qp:
- ib_destroy_qp(qp);
+ srp_destroy_qp(ch);
err_send_cq:
- ib_destroy_cq(send_cq);
+ ib_free_cq(send_cq);
err_recv_cq:
- ib_destroy_cq(recv_cq);
+ ib_free_cq(recv_cq);
err:
kfree(init_attr);
@@ -622,9 +628,10 @@ static void srp_free_ch_ib(struct srp_target_port *target,
if (ch->fmr_pool)
ib_destroy_fmr_pool(ch->fmr_pool);
}
+
srp_destroy_qp(ch);
- ib_destroy_cq(ch->send_cq);
- ib_destroy_cq(ch->recv_cq);
+ ib_free_cq(ch->send_cq);
+ ib_free_cq(ch->recv_cq);
/*
* Avoid that the SCSI error handler tries to use this channel after
@@ -1041,18 +1048,25 @@ out:
return ret <= 0 ? ret : -ENODEV;
}
-static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
+static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ srp_handle_qp_err(cq, wc, "INV RKEY");
+}
+
+static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
+ u32 rkey)
{
struct ib_send_wr *bad_wr;
struct ib_send_wr wr = {
.opcode = IB_WR_LOCAL_INV,
- .wr_id = LOCAL_INV_WR_ID_MASK,
.next = NULL,
.num_sge = 0,
.send_flags = 0,
.ex.invalidate_rkey = rkey,
};
+ wr.wr_cqe = &req->reg_cqe;
+ req->reg_cqe.done = srp_inv_rkey_err_done;
return ib_post_send(ch->qp, &wr, &bad_wr);
}
@@ -1074,7 +1088,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
struct srp_fr_desc **pfr;
for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
- res = srp_inv_rkey(ch, (*pfr)->mr->rkey);
+ res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
if (res < 0) {
shost_printk(KERN_ERR, target->scsi_host, PFX
"Queueing INV WR for rkey %#x failed (%d)\n",
@@ -1312,7 +1326,13 @@ reset_state:
return 0;
}
+static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ srp_handle_qp_err(cq, wc, "FAST REG");
+}
+
static int srp_map_finish_fr(struct srp_map_state *state,
+ struct srp_request *req,
struct srp_rdma_ch *ch, int sg_nents)
{
struct srp_target_port *target = ch->target;
@@ -1349,9 +1369,11 @@ static int srp_map_finish_fr(struct srp_map_state *state,
if (unlikely(n < 0))
return n;
+ req->reg_cqe.done = srp_reg_mr_err_done;
+
wr.wr.next = NULL;
wr.wr.opcode = IB_WR_REG_MR;
- wr.wr.wr_id = FAST_REG_WR_ID_MASK;
+ wr.wr.wr_cqe = &req->reg_cqe;
wr.wr.num_sge = 0;
wr.wr.send_flags = 0;
wr.mr = desc->mr;
@@ -1455,7 +1477,7 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
while (count) {
int i, n;
- n = srp_map_finish_fr(state, ch, count);
+ n = srp_map_finish_fr(state, req, ch, count);
if (unlikely(n < 0))
return n;
@@ -1524,7 +1546,7 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
#ifdef CONFIG_NEED_SG_DMA_LENGTH
idb_sg->dma_length = idb_sg->length; /* hack^2 */
#endif
- ret = srp_map_finish_fr(&state, ch, 1);
+ ret = srp_map_finish_fr(&state, req, ch, 1);
if (ret < 0)
return ret;
} else if (dev->use_fmr) {
@@ -1719,7 +1741,7 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
struct srp_iu *iu;
- srp_send_completion(ch->send_cq, ch);
+ ib_process_cq_direct(ch->send_cq, -1);
if (list_empty(&ch->free_tx))
return NULL;
@@ -1739,6 +1761,19 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
return iu;
}
+static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
+ struct srp_rdma_ch *ch = cq->cq_context;
+
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ srp_handle_qp_err(cq, wc, "SEND");
+ return;
+ }
+
+ list_add(&iu->list, &ch->free_tx);
+}
+
static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
{
struct srp_target_port *target = ch->target;
@@ -1749,8 +1784,10 @@ static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
list.length = len;
list.lkey = target->lkey;
+ iu->cqe.done = srp_send_done;
+
wr.next = NULL;
- wr.wr_id = (uintptr_t) iu;
+ wr.wr_cqe = &iu->cqe;
wr.sg_list = &list;
wr.num_sge = 1;
wr.opcode = IB_WR_SEND;
@@ -1769,8 +1806,10 @@ static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
list.length = iu->size;
list.lkey = target->lkey;
+ iu->cqe.done = srp_recv_done;
+
wr.next = NULL;
- wr.wr_id = (uintptr_t) iu;
+ wr.wr_cqe = &iu->cqe;
wr.sg_list = &list;
wr.num_sge = 1;
@@ -1902,14 +1941,20 @@ static void srp_process_aer_req(struct srp_rdma_ch *ch,
"problems processing SRP_AER_REQ\n");
}
-static void srp_handle_recv(struct srp_rdma_ch *ch, struct ib_wc *wc)
+static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
+ struct srp_rdma_ch *ch = cq->cq_context;
struct srp_target_port *target = ch->target;
struct ib_device *dev = target->srp_host->srp_dev->dev;
- struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id;
int res;
u8 opcode;
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ srp_handle_qp_err(cq, wc, "RECV");
+ return;
+ }
+
ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
DMA_FROM_DEVICE);
@@ -1972,68 +2017,22 @@ static void srp_tl_err_work(struct work_struct *work)
srp_start_tl_fail_timers(target->rport);
}
-static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
- bool send_err, struct srp_rdma_ch *ch)
+static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
+ const char *opname)
{
+ struct srp_rdma_ch *ch = cq->cq_context;
struct srp_target_port *target = ch->target;
- if (wr_id == SRP_LAST_WR_ID) {
- complete(&ch->done);
- return;
- }
-
if (ch->connected && !target->qp_in_error) {
- if (wr_id & LOCAL_INV_WR_ID_MASK) {
- shost_printk(KERN_ERR, target->scsi_host, PFX
- "LOCAL_INV failed with status %s (%d)\n",
- ib_wc_status_msg(wc_status), wc_status);
- } else if (wr_id & FAST_REG_WR_ID_MASK) {
- shost_printk(KERN_ERR, target->scsi_host, PFX
- "FAST_REG_MR failed status %s (%d)\n",
- ib_wc_status_msg(wc_status), wc_status);
- } else {
- shost_printk(KERN_ERR, target->scsi_host,
- PFX "failed %s status %s (%d) for iu %p\n",
- send_err ? "send" : "receive",
- ib_wc_status_msg(wc_status), wc_status,
- (void *)(uintptr_t)wr_id);
- }
+ shost_printk(KERN_ERR, target->scsi_host,
+ PFX "failed %s status %s (%d) for CQE %p\n",
+ opname, ib_wc_status_msg(wc->status), wc->status,
+ wc->wr_cqe);
queue_work(system_long_wq, &target->tl_err_work);
}
target->qp_in_error = true;
}
-static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr)
-{
- struct srp_rdma_ch *ch = ch_ptr;
- struct ib_wc wc;
-
- ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
- while (ib_poll_cq(cq, 1, &wc) > 0) {
- if (likely(wc.status == IB_WC_SUCCESS)) {
- srp_handle_recv(ch, &wc);
- } else {
- srp_handle_qp_err(wc.wr_id, wc.status, false, ch);
- }
- }
-}
-
-static void srp_send_completion(struct ib_cq *cq, void *ch_ptr)
-{
- struct srp_rdma_ch *ch = ch_ptr;
- struct ib_wc wc;
- struct srp_iu *iu;
-
- while (ib_poll_cq(cq, 1, &wc) > 0) {
- if (likely(wc.status == IB_WC_SUCCESS)) {
- iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
- list_add(&iu->list, &ch->free_tx);
- } else {
- srp_handle_qp_err(wc.wr_id, wc.status, true, ch);
- }
- }
-}
-
static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
{
struct srp_target_port *target = host_to_target(shost);
@@ -3439,27 +3438,17 @@ free_host:
static void srp_add_one(struct ib_device *device)
{
struct srp_device *srp_dev;
- struct ib_device_attr *dev_attr;
struct srp_host *host;
int mr_page_shift, p;
u64 max_pages_per_mr;
- dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
- if (!dev_attr)
- return;
-
- if (ib_query_device(device, dev_attr)) {
- pr_warn("Query device failed for %s\n", device->name);
- goto free_attr;
- }
-
srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
if (!srp_dev)
- goto free_attr;
+ return;
srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
device->map_phys_fmr && device->unmap_fmr);
- srp_dev->has_fr = (dev_attr->device_cap_flags &
+ srp_dev->has_fr = (device->attrs.device_cap_flags &
IB_DEVICE_MEM_MGT_EXTENSIONS);
if (!srp_dev->has_fmr && !srp_dev->has_fr)
dev_warn(&device->dev, "neither FMR nor FR is supported\n");
@@ -3473,23 +3462,23 @@ static void srp_add_one(struct ib_device *device)
* minimum of 4096 bytes. We're unlikely to build large sglists
* out of smaller entries.
*/
- mr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1);
+ mr_page_shift = max(12, ffs(device->attrs.page_size_cap) - 1);
srp_dev->mr_page_size = 1 << mr_page_shift;
srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
- max_pages_per_mr = dev_attr->max_mr_size;
+ max_pages_per_mr = device->attrs.max_mr_size;
do_div(max_pages_per_mr, srp_dev->mr_page_size);
srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
max_pages_per_mr);
if (srp_dev->use_fast_reg) {
srp_dev->max_pages_per_mr =
min_t(u32, srp_dev->max_pages_per_mr,
- dev_attr->max_fast_reg_page_list_len);
+ device->attrs.max_fast_reg_page_list_len);
}
srp_dev->mr_max_size = srp_dev->mr_page_size *
srp_dev->max_pages_per_mr;
- pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
- device->name, mr_page_shift, dev_attr->max_mr_size,
- dev_attr->max_fast_reg_page_list_len,
+ pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
+ device->name, mr_page_shift, device->attrs.max_mr_size,
+ device->attrs.max_fast_reg_page_list_len,
srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
INIT_LIST_HEAD(&srp_dev->dev_list);
@@ -3517,17 +3506,13 @@ static void srp_add_one(struct ib_device *device)
}
ib_set_client_data(device, &srp_client, srp_dev);
-
- goto free_attr;
+ return;
err_pd:
ib_dealloc_pd(srp_dev->pd);
free_dev:
kfree(srp_dev);
-
-free_attr:
- kfree(dev_attr);
}
static void srp_remove_one(struct ib_device *device, void *client_data)
@@ -3587,8 +3572,6 @@ static int __init srp_init_module(void)
{
int ret;
- BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
-
if (srp_sg_tablesize) {
pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
if (!cmd_sg_entries)
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index f6af531..9e05ce4 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -66,11 +66,6 @@ enum {
SRP_TAG_TSK_MGMT = 1U << 31,
SRP_MAX_PAGES_PER_MR = 512,
-
- LOCAL_INV_WR_ID_MASK = 1,
- FAST_REG_WR_ID_MASK = 2,
-
- SRP_LAST_WR_ID = 0xfffffffcU,
};
enum srp_target_state {
@@ -128,6 +123,7 @@ struct srp_request {
struct srp_direct_buf *indirect_desc;
dma_addr_t indirect_dma_addr;
short nmdesc;
+ struct ib_cqe reg_cqe;
};
/**
@@ -231,6 +227,7 @@ struct srp_iu {
void *buf;
size_t size;
enum dma_data_direction direction;
+ struct ib_cqe cqe;
};
/**
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index bc5470c..0c37fee 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -93,6 +93,8 @@ MODULE_PARM_DESC(srpt_service_guid,
static struct ib_client srpt_client;
static void srpt_release_channel(struct srpt_rdma_ch *ch);
static int srpt_queue_status(struct se_cmd *cmd);
+static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc);
/**
* opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE.
@@ -341,10 +343,10 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
memset(iocp, 0, sizeof *iocp);
strcpy(iocp->id_string, SRPT_ID_STRING);
iocp->guid = cpu_to_be64(srpt_service_guid);
- iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
- iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
- iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
- iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
+ iocp->vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id);
+ iocp->device_id = cpu_to_be32(sdev->device->attrs.vendor_part_id);
+ iocp->device_version = cpu_to_be16(sdev->device->attrs.hw_ver);
+ iocp->subsys_vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id);
iocp->subsys_device_id = 0x0;
iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
@@ -453,6 +455,7 @@ static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
* srpt_mad_recv_handler() - MAD reception callback function.
*/
static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf,
struct ib_mad_recv_wc *mad_wc)
{
struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
@@ -778,12 +781,12 @@ static int srpt_post_recv(struct srpt_device *sdev,
struct ib_recv_wr wr, *bad_wr;
BUG_ON(!sdev);
- wr.wr_id = encode_wr_id(SRPT_RECV, ioctx->ioctx.index);
-
list.addr = ioctx->ioctx.dma;
list.length = srp_max_req_size;
list.lkey = sdev->pd->local_dma_lkey;
+ ioctx->ioctx.cqe.done = srpt_recv_done;
+ wr.wr_cqe = &ioctx->ioctx.cqe;
wr.next = NULL;
wr.sg_list = &list;
wr.num_sge = 1;
@@ -819,8 +822,9 @@ static int srpt_post_send(struct srpt_rdma_ch *ch,
list.length = len;
list.lkey = sdev->pd->local_dma_lkey;
+ ioctx->ioctx.cqe.done = srpt_send_done;
wr.next = NULL;
- wr.wr_id = encode_wr_id(SRPT_SEND, ioctx->ioctx.index);
+ wr.wr_cqe = &ioctx->ioctx.cqe;
wr.sg_list = &list;
wr.num_sge = 1;
wr.opcode = IB_WR_SEND;
@@ -1052,13 +1056,13 @@ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
BUG_ON(!ch);
BUG_ON(!ioctx);
- BUG_ON(ioctx->n_rdma && !ioctx->rdma_ius);
+ BUG_ON(ioctx->n_rdma && !ioctx->rdma_wrs);
while (ioctx->n_rdma)
- kfree(ioctx->rdma_ius[--ioctx->n_rdma].sge);
+ kfree(ioctx->rdma_wrs[--ioctx->n_rdma].wr.sg_list);
- kfree(ioctx->rdma_ius);
- ioctx->rdma_ius = NULL;
+ kfree(ioctx->rdma_wrs);
+ ioctx->rdma_wrs = NULL;
if (ioctx->mapped_sg_count) {
sg = ioctx->sg;
@@ -1082,7 +1086,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
struct scatterlist *sg, *sg_orig;
int sg_cnt;
enum dma_data_direction dir;
- struct rdma_iu *riu;
+ struct ib_rdma_wr *riu;
struct srp_direct_buf *db;
dma_addr_t dma_addr;
struct ib_sge *sge;
@@ -1109,23 +1113,24 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
ioctx->mapped_sg_count = count;
- if (ioctx->rdma_ius && ioctx->n_rdma_ius)
- nrdma = ioctx->n_rdma_ius;
+ if (ioctx->rdma_wrs && ioctx->n_rdma_wrs)
+ nrdma = ioctx->n_rdma_wrs;
else {
nrdma = (count + SRPT_DEF_SG_PER_WQE - 1) / SRPT_DEF_SG_PER_WQE
+ ioctx->n_rbuf;
- ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu, GFP_KERNEL);
- if (!ioctx->rdma_ius)
+ ioctx->rdma_wrs = kcalloc(nrdma, sizeof(*ioctx->rdma_wrs),
+ GFP_KERNEL);
+ if (!ioctx->rdma_wrs)
goto free_mem;
- ioctx->n_rdma_ius = nrdma;
+ ioctx->n_rdma_wrs = nrdma;
}
db = ioctx->rbufs;
tsize = cmd->data_length;
dma_len = ib_sg_dma_len(dev, &sg[0]);
- riu = ioctx->rdma_ius;
+ riu = ioctx->rdma_wrs;
/*
* For each remote desc - calculate the #ib_sge.
@@ -1139,9 +1144,9 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
rsize = be32_to_cpu(db->len);
raddr = be64_to_cpu(db->va);
- riu->raddr = raddr;
+ riu->remote_addr = raddr;
riu->rkey = be32_to_cpu(db->key);
- riu->sge_cnt = 0;
+ riu->wr.num_sge = 0;
/* calculate how many sge required for this remote_buf */
while (rsize > 0 && tsize > 0) {
@@ -1165,33 +1170,35 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
rsize = 0;
}
- ++riu->sge_cnt;
+ ++riu->wr.num_sge;
- if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
+ if (rsize > 0 &&
+ riu->wr.num_sge == SRPT_DEF_SG_PER_WQE) {
++ioctx->n_rdma;
- riu->sge =
- kmalloc(riu->sge_cnt * sizeof *riu->sge,
- GFP_KERNEL);
- if (!riu->sge)
+ riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
+ sizeof(*riu->wr.sg_list),
+ GFP_KERNEL);
+ if (!riu->wr.sg_list)
goto free_mem;
++riu;
- riu->sge_cnt = 0;
- riu->raddr = raddr;
+ riu->wr.num_sge = 0;
+ riu->remote_addr = raddr;
riu->rkey = be32_to_cpu(db->key);
}
}
++ioctx->n_rdma;
- riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
- GFP_KERNEL);
- if (!riu->sge)
+ riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
+ sizeof(*riu->wr.sg_list),
+ GFP_KERNEL);
+ if (!riu->wr.sg_list)
goto free_mem;
}
db = ioctx->rbufs;
tsize = cmd->data_length;
- riu = ioctx->rdma_ius;
+ riu = ioctx->rdma_wrs;
sg = sg_orig;
dma_len = ib_sg_dma_len(dev, &sg[0]);
dma_addr = ib_sg_dma_address(dev, &sg[0]);
@@ -1200,7 +1207,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
for (i = 0, j = 0;
j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
rsize = be32_to_cpu(db->len);
- sge = riu->sge;
+ sge = riu->wr.sg_list;
k = 0;
while (rsize > 0 && tsize > 0) {
@@ -1232,9 +1239,9 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
}
++k;
- if (k == riu->sge_cnt && rsize > 0 && tsize > 0) {
+ if (k == riu->wr.num_sge && rsize > 0 && tsize > 0) {
++riu;
- sge = riu->sge;
+ sge = riu->wr.sg_list;
k = 0;
} else if (rsize > 0 && tsize > 0)
++sge;
@@ -1277,8 +1284,8 @@ static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
ioctx->n_rbuf = 0;
ioctx->rbufs = NULL;
ioctx->n_rdma = 0;
- ioctx->n_rdma_ius = 0;
- ioctx->rdma_ius = NULL;
+ ioctx->n_rdma_wrs = 0;
+ ioctx->rdma_wrs = NULL;
ioctx->mapped_sg_count = 0;
init_completion(&ioctx->tx_done);
ioctx->queue_status_only = false;
@@ -1380,118 +1387,44 @@ out:
}
/**
- * srpt_handle_send_err_comp() - Process an IB_WC_SEND error completion.
- */
-static void srpt_handle_send_err_comp(struct srpt_rdma_ch *ch, u64 wr_id)
-{
- struct srpt_send_ioctx *ioctx;
- enum srpt_command_state state;
- u32 index;
-
- atomic_inc(&ch->sq_wr_avail);
-
- index = idx_from_wr_id(wr_id);
- ioctx = ch->ioctx_ring[index];
- state = srpt_get_cmd_state(ioctx);
-
- WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
- && state != SRPT_STATE_MGMT_RSP_SENT
- && state != SRPT_STATE_NEED_DATA
- && state != SRPT_STATE_DONE);
-
- /* If SRP_RSP sending failed, undo the ch->req_lim change. */
- if (state == SRPT_STATE_CMD_RSP_SENT
- || state == SRPT_STATE_MGMT_RSP_SENT)
- atomic_dec(&ch->req_lim);
-
- srpt_abort_cmd(ioctx);
-}
-
-/**
- * srpt_handle_send_comp() - Process an IB send completion notification.
- */
-static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
- struct srpt_send_ioctx *ioctx)
-{
- enum srpt_command_state state;
-
- atomic_inc(&ch->sq_wr_avail);
-
- state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
-
- if (WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
- && state != SRPT_STATE_MGMT_RSP_SENT
- && state != SRPT_STATE_DONE))
- pr_debug("state = %d\n", state);
-
- if (state != SRPT_STATE_DONE) {
- srpt_unmap_sg_to_ib_sge(ch, ioctx);
- transport_generic_free_cmd(&ioctx->cmd, 0);
- } else {
- pr_err("IB completion has been received too late for"
- " wr_id = %u.\n", ioctx->ioctx.index);
- }
-}
-
-/**
- * srpt_handle_rdma_comp() - Process an IB RDMA completion notification.
- *
* XXX: what is now target_execute_cmd used to be asynchronous, and unmapping
* the data that has been transferred via IB RDMA had to be postponed until the
* check_stop_free() callback. None of this is necessary anymore and needs to
* be cleaned up.
*/
-static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
- struct srpt_send_ioctx *ioctx,
- enum srpt_opcode opcode)
+static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct srpt_rdma_ch *ch = cq->cq_context;
+ struct srpt_send_ioctx *ioctx =
+ container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
+
WARN_ON(ioctx->n_rdma <= 0);
atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
- if (opcode == SRPT_RDMA_READ_LAST) {
- if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
- SRPT_STATE_DATA_IN))
- target_execute_cmd(&ioctx->cmd);
- else
- pr_err("%s[%d]: wrong state = %d\n", __func__,
- __LINE__, srpt_get_cmd_state(ioctx));
- } else if (opcode == SRPT_RDMA_ABORT) {
- ioctx->rdma_aborted = true;
- } else {
- WARN(true, "unexpected opcode %d\n", opcode);
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ pr_info("RDMA_READ for ioctx 0x%p failed with status %d\n",
+ ioctx, wc->status);
+ srpt_abort_cmd(ioctx);
+ return;
}
+
+ if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
+ SRPT_STATE_DATA_IN))
+ target_execute_cmd(&ioctx->cmd);
+ else
+ pr_err("%s[%d]: wrong state = %d\n", __func__,
+ __LINE__, srpt_get_cmd_state(ioctx));
}
-/**
- * srpt_handle_rdma_err_comp() - Process an IB RDMA error completion.
- */
-static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
- struct srpt_send_ioctx *ioctx,
- enum srpt_opcode opcode)
+static void srpt_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
{
- enum srpt_command_state state;
+ struct srpt_send_ioctx *ioctx =
+ container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
- state = srpt_get_cmd_state(ioctx);
- switch (opcode) {
- case SRPT_RDMA_READ_LAST:
- if (ioctx->n_rdma <= 0) {
- pr_err("Received invalid RDMA read"
- " error completion with idx %d\n",
- ioctx->ioctx.index);
- break;
- }
- atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
- if (state == SRPT_STATE_NEED_DATA)
- srpt_abort_cmd(ioctx);
- else
- pr_err("%s[%d]: wrong state = %d\n",
- __func__, __LINE__, state);
- break;
- case SRPT_RDMA_WRITE_LAST:
- break;
- default:
- pr_err("%s[%d]: opcode = %u\n", __func__, __LINE__, opcode);
- break;
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n",
+ ioctx, wc->status);
+ srpt_abort_cmd(ioctx);
}
}
@@ -1926,32 +1859,26 @@ out:
return;
}
-static void srpt_process_rcv_completion(struct ib_cq *cq,
- struct srpt_rdma_ch *ch,
- struct ib_wc *wc)
+static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
- struct srpt_device *sdev = ch->sport->sdev;
- struct srpt_recv_ioctx *ioctx;
- u32 index;
+ struct srpt_rdma_ch *ch = cq->cq_context;
+ struct srpt_recv_ioctx *ioctx =
+ container_of(wc->wr_cqe, struct srpt_recv_ioctx, ioctx.cqe);
- index = idx_from_wr_id(wc->wr_id);
if (wc->status == IB_WC_SUCCESS) {
int req_lim;
req_lim = atomic_dec_return(&ch->req_lim);
if (unlikely(req_lim < 0))
pr_err("req_lim = %d < 0\n", req_lim);
- ioctx = sdev->ioctx_ring[index];
srpt_handle_new_iu(ch, ioctx, NULL);
} else {
- pr_info("receiving failed for idx %u with status %d\n",
- index, wc->status);
+ pr_info("receiving failed for ioctx %p with status %d\n",
+ ioctx, wc->status);
}
}
/**
- * srpt_process_send_completion() - Process an IB send completion.
- *
* Note: Although this has not yet been observed during tests, at least in
* theory it is possible that the srpt_get_send_ioctx() call invoked by
* srpt_handle_new_iu() fails. This is possible because the req_lim_delta
@@ -1964,109 +1891,52 @@ static void srpt_process_rcv_completion(struct ib_cq *cq,
* are queued on cmd_wait_list. The code below processes these delayed
* requests one at a time.
*/
-static void srpt_process_send_completion(struct ib_cq *cq,
- struct srpt_rdma_ch *ch,
- struct ib_wc *wc)
+static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
{
- struct srpt_send_ioctx *send_ioctx;
- uint32_t index;
- enum srpt_opcode opcode;
+ struct srpt_rdma_ch *ch = cq->cq_context;
+ struct srpt_send_ioctx *ioctx =
+ container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe);
+ enum srpt_command_state state;
- index = idx_from_wr_id(wc->wr_id);
- opcode = opcode_from_wr_id(wc->wr_id);
- send_ioctx = ch->ioctx_ring[index];
- if (wc->status == IB_WC_SUCCESS) {
- if (opcode == SRPT_SEND)
- srpt_handle_send_comp(ch, send_ioctx);
- else {
- WARN_ON(opcode != SRPT_RDMA_ABORT &&
- wc->opcode != IB_WC_RDMA_READ);
- srpt_handle_rdma_comp(ch, send_ioctx, opcode);
- }
+ state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
+
+ WARN_ON(state != SRPT_STATE_CMD_RSP_SENT &&
+ state != SRPT_STATE_MGMT_RSP_SENT);
+
+ atomic_inc(&ch->sq_wr_avail);
+
+ if (wc->status != IB_WC_SUCCESS) {
+ pr_info("sending response for ioctx 0x%p failed"
+ " with status %d\n", ioctx, wc->status);
+
+ atomic_dec(&ch->req_lim);
+ srpt_abort_cmd(ioctx);
+ goto out;
+ }
+
+ if (state != SRPT_STATE_DONE) {
+ srpt_unmap_sg_to_ib_sge(ch, ioctx);
+ transport_generic_free_cmd(&ioctx->cmd, 0);
} else {
- if (opcode == SRPT_SEND) {
- pr_info("sending response for idx %u failed"
- " with status %d\n", index, wc->status);
- srpt_handle_send_err_comp(ch, wc->wr_id);
- } else if (opcode != SRPT_RDMA_MID) {
- pr_info("RDMA t %d for idx %u failed with"
- " status %d\n", opcode, index, wc->status);
- srpt_handle_rdma_err_comp(ch, send_ioctx, opcode);
- }
+ pr_err("IB completion has been received too late for"
+ " wr_id = %u.\n", ioctx->ioctx.index);
}
- while (unlikely(opcode == SRPT_SEND
- && !list_empty(&ch->cmd_wait_list)
- && srpt_get_ch_state(ch) == CH_LIVE
- && (send_ioctx = srpt_get_send_ioctx(ch)) != NULL)) {
+out:
+ while (!list_empty(&ch->cmd_wait_list) &&
+ srpt_get_ch_state(ch) == CH_LIVE &&
+ (ioctx = srpt_get_send_ioctx(ch)) != NULL) {
struct srpt_recv_ioctx *recv_ioctx;
recv_ioctx = list_first_entry(&ch->cmd_wait_list,
struct srpt_recv_ioctx,
wait_list);
list_del(&recv_ioctx->wait_list);
- srpt_handle_new_iu(ch, recv_ioctx, send_ioctx);
- }
-}
-
-static void srpt_process_completion(struct ib_cq *cq, struct srpt_rdma_ch *ch)
-{
- struct ib_wc *const wc = ch->wc;
- int i, n;
-
- WARN_ON(cq != ch->cq);
-
- ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
- while ((n = ib_poll_cq(cq, ARRAY_SIZE(ch->wc), wc)) > 0) {
- for (i = 0; i < n; i++) {
- if (opcode_from_wr_id(wc[i].wr_id) == SRPT_RECV)
- srpt_process_rcv_completion(cq, ch, &wc[i]);
- else
- srpt_process_send_completion(cq, ch, &wc[i]);
- }
+ srpt_handle_new_iu(ch, recv_ioctx, ioctx);
}
}
/**
- * srpt_completion() - IB completion queue callback function.
- *
- * Notes:
- * - It is guaranteed that a completion handler will never be invoked
- * concurrently on two different CPUs for the same completion queue. See also
- * Documentation/infiniband/core_locking.txt and the implementation of
- * handle_edge_irq() in kernel/irq/chip.c.
- * - When threaded IRQs are enabled, completion handlers are invoked in thread
- * context instead of interrupt context.
- */
-static void srpt_completion(struct ib_cq *cq, void *ctx)
-{
- struct srpt_rdma_ch *ch = ctx;
-
- wake_up_interruptible(&ch->wait_queue);
-}
-
-static int srpt_compl_thread(void *arg)
-{
- struct srpt_rdma_ch *ch;
-
- /* Hibernation / freezing of the SRPT kernel thread is not supported. */
- current->flags |= PF_NOFREEZE;
-
- ch = arg;
- BUG_ON(!ch);
- pr_info("Session %s: kernel thread %s (PID %d) started\n",
- ch->sess_name, ch->thread->comm, current->pid);
- while (!kthread_should_stop()) {
- wait_event_interruptible(ch->wait_queue,
- (srpt_process_completion(ch->cq, ch),
- kthread_should_stop()));
- }
- pr_info("Session %s: kernel thread %s (PID %d) stopped\n",
- ch->sess_name, ch->thread->comm, current->pid);
- return 0;
-}
-
-/**
* srpt_create_ch_ib() - Create receive and send completion queues.
*/
static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
@@ -2075,7 +1945,6 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
struct srpt_port *sport = ch->sport;
struct srpt_device *sdev = sport->sdev;
u32 srp_sq_size = sport->port_attrib.srp_sq_size;
- struct ib_cq_init_attr cq_attr = {};
int ret;
WARN_ON(ch->rq_size < 1);
@@ -2086,9 +1955,8 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
goto out;
retry:
- cq_attr.cqe = ch->rq_size + srp_sq_size;
- ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch,
- &cq_attr);
+ ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + srp_sq_size,
+ 0 /* XXX: spread CQs */, IB_POLL_WORKQUEUE);
if (IS_ERR(ch->cq)) {
ret = PTR_ERR(ch->cq);
pr_err("failed to create CQ cqe= %d ret= %d\n",
@@ -2131,18 +1999,6 @@ retry:
if (ret)
goto err_destroy_qp;
- init_waitqueue_head(&ch->wait_queue);
-
- pr_debug("creating thread for session %s\n", ch->sess_name);
-
- ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl");
- if (IS_ERR(ch->thread)) {
- pr_err("failed to create kernel thread %ld\n",
- PTR_ERR(ch->thread));
- ch->thread = NULL;
- goto err_destroy_qp;
- }
-
out:
kfree(qp_init);
return ret;
@@ -2150,17 +2006,14 @@ out:
err_destroy_qp:
ib_destroy_qp(ch->qp);
err_destroy_cq:
- ib_destroy_cq(ch->cq);
+ ib_free_cq(ch->cq);
goto out;
}
static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch)
{
- if (ch->thread)
- kthread_stop(ch->thread);
-
ib_destroy_qp(ch->qp);
- ib_destroy_cq(ch->cq);
+ ib_free_cq(ch->cq);
}
/**
@@ -2808,12 +2661,8 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
struct srpt_send_ioctx *ioctx)
{
- struct ib_rdma_wr wr;
struct ib_send_wr *bad_wr;
- struct rdma_iu *riu;
- int i;
- int ret;
- int sq_wr_avail;
+ int sq_wr_avail, ret, i;
enum dma_data_direction dir;
const int n_rdma = ioctx->n_rdma;
@@ -2829,59 +2678,32 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
}
}
- ioctx->rdma_aborted = false;
- ret = 0;
- riu = ioctx->rdma_ius;
- memset(&wr, 0, sizeof wr);
-
- for (i = 0; i < n_rdma; ++i, ++riu) {
- if (dir == DMA_FROM_DEVICE) {
- wr.wr.opcode = IB_WR_RDMA_WRITE;
- wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
- SRPT_RDMA_WRITE_LAST :
- SRPT_RDMA_MID,
- ioctx->ioctx.index);
- } else {
- wr.wr.opcode = IB_WR_RDMA_READ;
- wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
- SRPT_RDMA_READ_LAST :
- SRPT_RDMA_MID,
- ioctx->ioctx.index);
- }
- wr.wr.next = NULL;
- wr.remote_addr = riu->raddr;
- wr.rkey = riu->rkey;
- wr.wr.num_sge = riu->sge_cnt;
- wr.wr.sg_list = riu->sge;
+ for (i = 0; i < n_rdma; i++) {
+ struct ib_send_wr *wr = &ioctx->rdma_wrs[i].wr;
- /* only get completion event for the last rdma write */
- if (i == (n_rdma - 1) && dir == DMA_TO_DEVICE)
- wr.wr.send_flags = IB_SEND_SIGNALED;
+ wr->opcode = (dir == DMA_FROM_DEVICE) ?
+ IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
- ret = ib_post_send(ch->qp, &wr.wr, &bad_wr);
- if (ret)
- break;
+ if (i == n_rdma - 1) {
+ /* only get completion event for the last rdma read */
+ if (dir == DMA_TO_DEVICE) {
+ wr->send_flags = IB_SEND_SIGNALED;
+ ioctx->rdma_cqe.done = srpt_rdma_read_done;
+ } else {
+ ioctx->rdma_cqe.done = srpt_rdma_write_done;
+ }
+ wr->wr_cqe = &ioctx->rdma_cqe;
+ wr->next = NULL;
+ } else {
+ wr->wr_cqe = NULL;
+ wr->next = &ioctx->rdma_wrs[i + 1].wr;
+ }
}
+ ret = ib_post_send(ch->qp, &ioctx->rdma_wrs->wr, &bad_wr);
if (ret)
pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n",
__func__, __LINE__, ret, i, n_rdma);
- if (ret && i > 0) {
- wr.wr.num_sge = 0;
- wr.wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index);
- wr.wr.send_flags = IB_SEND_SIGNALED;
- while (ch->state == CH_LIVE &&
- ib_post_send(ch->qp, &wr.wr, &bad_wr) != 0) {
- pr_info("Trying to abort failed RDMA transfer [%d]\n",
- ioctx->ioctx.index);
- msleep(1000);
- }
- while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) {
- pr_info("Waiting until RDMA abort finished [%d]\n",
- ioctx->ioctx.index);
- msleep(1000);
- }
- }
out:
if (unlikely(dir == DMA_TO_DEVICE && ret < 0))
atomic_add(n_rdma, &ch->sq_wr_avail);
@@ -3190,14 +3012,11 @@ static void srpt_add_one(struct ib_device *device)
init_waitqueue_head(&sdev->ch_releaseQ);
spin_lock_init(&sdev->spinlock);
- if (ib_query_device(device, &sdev->dev_attr))
- goto free_dev;
-
sdev->pd = ib_alloc_pd(device);
if (IS_ERR(sdev->pd))
goto free_dev;
- sdev->srq_size = min(srpt_srq_size, sdev->dev_attr.max_srq_wr);
+ sdev->srq_size = min(srpt_srq_size, sdev->device->attrs.max_srq_wr);
srq_attr.event_handler = srpt_srq_event;
srq_attr.srq_context = (void *)sdev;
@@ -3211,7 +3030,7 @@ static void srpt_add_one(struct ib_device *device)
goto err_pd;
pr_debug("%s: create SRQ #wr= %d max_allow=%d dev= %s\n",
- __func__, sdev->srq_size, sdev->dev_attr.max_srq_wr,
+ __func__, sdev->srq_size, sdev->device->attrs.max_srq_wr,
device->name);
if (!srpt_service_guid)
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index 5366e0a..09037f2b 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -128,36 +128,6 @@ enum {
DEFAULT_MAX_RDMA_SIZE = 65536,
};
-enum srpt_opcode {
- SRPT_RECV,
- SRPT_SEND,
- SRPT_RDMA_MID,
- SRPT_RDMA_ABORT,
- SRPT_RDMA_READ_LAST,
- SRPT_RDMA_WRITE_LAST,
-};
-
-static inline u64 encode_wr_id(u8 opcode, u32 idx)
-{
- return ((u64)opcode << 32) | idx;
-}
-static inline enum srpt_opcode opcode_from_wr_id(u64 wr_id)
-{
- return wr_id >> 32;
-}
-static inline u32 idx_from_wr_id(u64 wr_id)
-{
- return (u32)wr_id;
-}
-
-struct rdma_iu {
- u64 raddr;
- u32 rkey;
- struct ib_sge *sge;
- u32 sge_cnt;
- int mem_id;
-};
-
/**
* enum srpt_command_state - SCSI command state managed by SRPT.
* @SRPT_STATE_NEW: New command arrived and is being processed.
@@ -189,6 +159,7 @@ enum srpt_command_state {
* @index: Index of the I/O context in its ioctx_ring array.
*/
struct srpt_ioctx {
+ struct ib_cqe cqe;
void *buf;
dma_addr_t dma;
uint32_t index;
@@ -215,32 +186,30 @@ struct srpt_recv_ioctx {
* @sg: Pointer to sg-list associated with this I/O context.
* @sg_cnt: SG-list size.
* @mapped_sg_count: ib_dma_map_sg() return value.
- * @n_rdma_ius: Number of elements in the rdma_ius array.
- * @rdma_ius: Array with information about the RDMA mapping.
+ * @n_rdma_wrs: Number of elements in the rdma_wrs array.
+ * @rdma_wrs: Array with information about the RDMA mapping.
* @tag: Tag of the received SRP information unit.
* @spinlock: Protects 'state'.
* @state: I/O context state.
- * @rdma_aborted: If initiating a multipart RDMA transfer failed, whether
- * the already initiated transfers have finished.
* @cmd: Target core command data structure.
* @sense_data: SCSI sense data.
*/
struct srpt_send_ioctx {
struct srpt_ioctx ioctx;
struct srpt_rdma_ch *ch;
- struct rdma_iu *rdma_ius;
+ struct ib_rdma_wr *rdma_wrs;
+ struct ib_cqe rdma_cqe;
struct srp_direct_buf *rbufs;
struct srp_direct_buf single_rbuf;
struct scatterlist *sg;
struct list_head free_list;
spinlock_t spinlock;
enum srpt_command_state state;
- bool rdma_aborted;
struct se_cmd cmd;
struct completion tx_done;
int sg_cnt;
int mapped_sg_count;
- u16 n_rdma_ius;
+ u16 n_rdma_wrs;
u8 n_rdma;
u8 n_rbuf;
bool queue_status_only;
@@ -267,9 +236,6 @@ enum rdma_ch_state {
/**
* struct srpt_rdma_ch - RDMA channel.
- * @wait_queue: Allows the kernel thread to wait for more work.
- * @thread: Kernel thread that processes the IB queues associated with
- * the channel.
* @cm_id: IB CM ID associated with the channel.
* @qp: IB queue pair used for communicating over this channel.
* @cq: IB completion queue for this channel.
@@ -288,7 +254,6 @@ enum rdma_ch_state {
* @free_list: Head of list with free send I/O contexts.
* @state: channel state. See also enum rdma_ch_state.
* @ioctx_ring: Send ring.
- * @wc: IB work completion array for srpt_process_completion().
* @list: Node for insertion in the srpt_device.rch_list list.
* @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This
* list contains struct srpt_ioctx elements and is protected
@@ -299,8 +264,6 @@ enum rdma_ch_state {
* @release_done: Enables waiting for srpt_release_channel() completion.
*/
struct srpt_rdma_ch {
- wait_queue_head_t wait_queue;
- struct task_struct *thread;
struct ib_cm_id *cm_id;
struct ib_qp *qp;
struct ib_cq *cq;
@@ -317,7 +280,6 @@ struct srpt_rdma_ch {
struct list_head free_list;
enum rdma_ch_state state;
struct srpt_send_ioctx **ioctx_ring;
- struct ib_wc wc[16];
struct list_head list;
struct list_head cmd_wait_list;
struct se_session *sess;
@@ -377,8 +339,6 @@ struct srpt_port {
* @mr: L_Key (local key) with write access to all local memory.
* @srq: Per-HCA SRQ (shared receive queue).
* @cm_id: Connection identifier.
- * @dev_attr: Attributes of the InfiniBand device as obtained during the
- * ib_client.add() callback.
* @srq_size: SRQ size.
* @ioctx_ring: Per-HCA SRQ.
* @rch_list: Per-device channel list -- see also srpt_rdma_ch.list.
@@ -393,7 +353,6 @@ struct srpt_device {
struct ib_pd *pd;
struct ib_srq *srq;
struct ib_cm_id *cm_id;
- struct ib_device_attr dev_attr;
int srq_size;
struct srpt_recv_ioctx **ioctx_ring;
struct list_head rch_list;
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index fd4100d..6727954 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -76,10 +76,13 @@
*/
#include <linux/kernel.h>
+#include <linux/input.h>
+#include <linux/rcupdate.h>
#include <linux/slab.h>
#include <linux/stat.h>
#include <linux/module.h>
#include <linux/usb/input.h>
+#include <linux/usb/quirks.h>
#define DRIVER_AUTHOR "Marko Friedemann <mfr@bmx-chemnitz.de>"
#define DRIVER_DESC "X-Box pad driver"
@@ -125,7 +128,7 @@ static const struct xpad_device {
{ 0x045e, 0x0289, "Microsoft X-Box pad v2 (US)", 0, XTYPE_XBOX },
{ 0x045e, 0x028e, "Microsoft X-Box 360 pad", 0, XTYPE_XBOX360 },
{ 0x045e, 0x02d1, "Microsoft X-Box One pad", 0, XTYPE_XBOXONE },
- { 0x045e, 0x02dd, "Microsoft X-Box One pad (Covert Forces)", 0, XTYPE_XBOXONE },
+ { 0x045e, 0x02dd, "Microsoft X-Box One pad (Firmware 2015)", 0, XTYPE_XBOXONE },
{ 0x045e, 0x0291, "Xbox 360 Wireless Receiver (XBOX)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W },
{ 0x045e, 0x0719, "Xbox 360 Wireless Receiver", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W },
{ 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX },
@@ -317,21 +320,42 @@ static struct usb_device_id xpad_table[] = {
MODULE_DEVICE_TABLE(usb, xpad_table);
+struct xpad_output_packet {
+ u8 data[XPAD_PKT_LEN];
+ u8 len;
+ bool pending;
+};
+
+#define XPAD_OUT_CMD_IDX 0
+#define XPAD_OUT_FF_IDX 1
+#define XPAD_OUT_LED_IDX (1 + IS_ENABLED(CONFIG_JOYSTICK_XPAD_FF))
+#define XPAD_NUM_OUT_PACKETS (1 + \
+ IS_ENABLED(CONFIG_JOYSTICK_XPAD_FF) + \
+ IS_ENABLED(CONFIG_JOYSTICK_XPAD_LEDS))
+
struct usb_xpad {
struct input_dev *dev; /* input device interface */
+ struct input_dev __rcu *x360w_dev;
struct usb_device *udev; /* usb device */
struct usb_interface *intf; /* usb interface */
- int pad_present;
+ bool pad_present;
+ bool input_created;
struct urb *irq_in; /* urb for interrupt in report */
unsigned char *idata; /* input data */
dma_addr_t idata_dma;
struct urb *irq_out; /* urb for interrupt out report */
+ struct usb_anchor irq_out_anchor;
+ bool irq_out_active; /* we must not use an active URB */
+ u8 odata_serial; /* serial number for xbox one protocol */
unsigned char *odata; /* output data */
dma_addr_t odata_dma;
- struct mutex odata_mutex;
+ spinlock_t odata_lock;
+
+ struct xpad_output_packet out_packets[XPAD_NUM_OUT_PACKETS];
+ int last_out_packet;
#if defined(CONFIG_JOYSTICK_XPAD_LEDS)
struct xpad_led *led;
@@ -343,8 +367,12 @@ struct usb_xpad {
int xtype; /* type of xbox device */
int pad_nr; /* the order x360 pads were attached */
const char *name; /* name of the device */
+ struct work_struct work; /* init/remove device from callback */
};
+static int xpad_init_input(struct usb_xpad *xpad);
+static void xpad_deinit_input(struct usb_xpad *xpad);
+
/*
* xpad_process_packet
*
@@ -424,11 +452,9 @@ static void xpad_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *d
* http://www.free60.org/wiki/Gamepad
*/
-static void xpad360_process_packet(struct usb_xpad *xpad,
+static void xpad360_process_packet(struct usb_xpad *xpad, struct input_dev *dev,
u16 cmd, unsigned char *data)
{
- struct input_dev *dev = xpad->dev;
-
/* digital pad */
if (xpad->mapping & MAP_DPAD_TO_BUTTONS) {
/* dpad as buttons (left, right, up, down) */
@@ -495,7 +521,30 @@ static void xpad360_process_packet(struct usb_xpad *xpad,
input_sync(dev);
}
-static void xpad_identify_controller(struct usb_xpad *xpad);
+static void xpad_presence_work(struct work_struct *work)
+{
+ struct usb_xpad *xpad = container_of(work, struct usb_xpad, work);
+ int error;
+
+ if (xpad->pad_present) {
+ error = xpad_init_input(xpad);
+ if (error) {
+ /* complain only, not much else we can do here */
+ dev_err(&xpad->dev->dev,
+ "unable to init device: %d\n", error);
+ } else {
+ rcu_assign_pointer(xpad->x360w_dev, xpad->dev);
+ }
+ } else {
+ RCU_INIT_POINTER(xpad->x360w_dev, NULL);
+ synchronize_rcu();
+ /*
+ * Now that we are sure xpad360w_process_packet is not
+ * using input device we can get rid of it.
+ */
+ xpad_deinit_input(xpad);
+ }
+}
/*
* xpad360w_process_packet
@@ -513,24 +562,28 @@ static void xpad_identify_controller(struct usb_xpad *xpad);
*/
static void xpad360w_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data)
{
+ struct input_dev *dev;
+ bool present;
+
/* Presence change */
if (data[0] & 0x08) {
- if (data[1] & 0x80) {
- xpad->pad_present = 1;
- /*
- * Light up the segment corresponding to
- * controller number.
- */
- xpad_identify_controller(xpad);
- } else
- xpad->pad_present = 0;
+ present = (data[1] & 0x80) != 0;
+
+ if (xpad->pad_present != present) {
+ xpad->pad_present = present;
+ schedule_work(&xpad->work);
+ }
}
/* Valid pad data */
- if (!(data[1] & 0x1))
+ if (data[1] != 0x1)
return;
- xpad360_process_packet(xpad, cmd, &data[4]);
+ rcu_read_lock();
+ dev = rcu_dereference(xpad->x360w_dev);
+ if (dev)
+ xpad360_process_packet(xpad, dev, cmd, &data[4]);
+ rcu_read_unlock();
}
/*
@@ -659,7 +712,7 @@ static void xpad_irq_in(struct urb *urb)
switch (xpad->xtype) {
case XTYPE_XBOX360:
- xpad360_process_packet(xpad, 0, xpad->idata);
+ xpad360_process_packet(xpad, xpad->dev, 0, xpad->idata);
break;
case XTYPE_XBOX360W:
xpad360w_process_packet(xpad, 0, xpad->idata);
@@ -678,18 +731,73 @@ exit:
__func__, retval);
}
+/* Callers must hold xpad->odata_lock spinlock */
+static bool xpad_prepare_next_out_packet(struct usb_xpad *xpad)
+{
+ struct xpad_output_packet *pkt, *packet = NULL;
+ int i;
+
+ for (i = 0; i < XPAD_NUM_OUT_PACKETS; i++) {
+ if (++xpad->last_out_packet >= XPAD_NUM_OUT_PACKETS)
+ xpad->last_out_packet = 0;
+
+ pkt = &xpad->out_packets[xpad->last_out_packet];
+ if (pkt->pending) {
+ dev_dbg(&xpad->intf->dev,
+ "%s - found pending output packet %d\n",
+ __func__, xpad->last_out_packet);
+ packet = pkt;
+ break;
+ }
+ }
+
+ if (packet) {
+ memcpy(xpad->odata, packet->data, packet->len);
+ xpad->irq_out->transfer_buffer_length = packet->len;
+ return true;
+ }
+
+ return false;
+}
+
+/* Callers must hold xpad->odata_lock spinlock */
+static int xpad_try_sending_next_out_packet(struct usb_xpad *xpad)
+{
+ int error;
+
+ if (!xpad->irq_out_active && xpad_prepare_next_out_packet(xpad)) {
+ usb_anchor_urb(xpad->irq_out, &xpad->irq_out_anchor);
+ error = usb_submit_urb(xpad->irq_out, GFP_ATOMIC);
+ if (error) {
+ dev_err(&xpad->intf->dev,
+ "%s - usb_submit_urb failed with result %d\n",
+ __func__, error);
+ usb_unanchor_urb(xpad->irq_out);
+ return -EIO;
+ }
+
+ xpad->irq_out_active = true;
+ }
+
+ return 0;
+}
+
static void xpad_irq_out(struct urb *urb)
{
struct usb_xpad *xpad = urb->context;
struct device *dev = &xpad->intf->dev;
- int retval, status;
+ int status = urb->status;
+ int error;
+ unsigned long flags;
- status = urb->status;
+ spin_lock_irqsave(&xpad->odata_lock, flags);
switch (status) {
case 0:
/* success */
- return;
+ xpad->out_packets[xpad->last_out_packet].pending = false;
+ xpad->irq_out_active = xpad_prepare_next_out_packet(xpad);
+ break;
case -ECONNRESET:
case -ENOENT:
@@ -697,19 +805,28 @@ static void xpad_irq_out(struct urb *urb)
/* this urb is terminated, clean up */
dev_dbg(dev, "%s - urb shutting down with status: %d\n",
__func__, status);
- return;
+ xpad->irq_out_active = false;
+ break;
default:
dev_dbg(dev, "%s - nonzero urb status received: %d\n",
__func__, status);
- goto exit;
+ break;
}
-exit:
- retval = usb_submit_urb(urb, GFP_ATOMIC);
- if (retval)
- dev_err(dev, "%s - usb_submit_urb failed with result %d\n",
- __func__, retval);
+ if (xpad->irq_out_active) {
+ usb_anchor_urb(urb, &xpad->irq_out_anchor);
+ error = usb_submit_urb(urb, GFP_ATOMIC);
+ if (error) {
+ dev_err(dev,
+ "%s - usb_submit_urb failed with result %d\n",
+ __func__, error);
+ usb_unanchor_urb(urb);
+ xpad->irq_out_active = false;
+ }
+ }
+
+ spin_unlock_irqrestore(&xpad->odata_lock, flags);
}
static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad)
@@ -721,6 +838,8 @@ static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad)
if (xpad->xtype == XTYPE_UNKNOWN)
return 0;
+ init_usb_anchor(&xpad->irq_out_anchor);
+
xpad->odata = usb_alloc_coherent(xpad->udev, XPAD_PKT_LEN,
GFP_KERNEL, &xpad->odata_dma);
if (!xpad->odata) {
@@ -728,7 +847,7 @@ static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad)
goto fail1;
}
- mutex_init(&xpad->odata_mutex);
+ spin_lock_init(&xpad->odata_lock);
xpad->irq_out = usb_alloc_urb(0, GFP_KERNEL);
if (!xpad->irq_out) {
@@ -755,8 +874,14 @@ static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad)
static void xpad_stop_output(struct usb_xpad *xpad)
{
- if (xpad->xtype != XTYPE_UNKNOWN)
- usb_kill_urb(xpad->irq_out);
+ if (xpad->xtype != XTYPE_UNKNOWN) {
+ if (!usb_wait_anchor_empty_timeout(&xpad->irq_out_anchor,
+ 5000)) {
+ dev_warn(&xpad->intf->dev,
+ "timed out waiting for output URB to complete, killing\n");
+ usb_kill_anchored_urbs(&xpad->irq_out_anchor);
+ }
+ }
}
static void xpad_deinit_output(struct usb_xpad *xpad)
@@ -770,27 +895,60 @@ static void xpad_deinit_output(struct usb_xpad *xpad)
static int xpad_inquiry_pad_presence(struct usb_xpad *xpad)
{
+ struct xpad_output_packet *packet =
+ &xpad->out_packets[XPAD_OUT_CMD_IDX];
+ unsigned long flags;
int retval;
- mutex_lock(&xpad->odata_mutex);
+ spin_lock_irqsave(&xpad->odata_lock, flags);
+
+ packet->data[0] = 0x08;
+ packet->data[1] = 0x00;
+ packet->data[2] = 0x0F;
+ packet->data[3] = 0xC0;
+ packet->data[4] = 0x00;
+ packet->data[5] = 0x00;
+ packet->data[6] = 0x00;
+ packet->data[7] = 0x00;
+ packet->data[8] = 0x00;
+ packet->data[9] = 0x00;
+ packet->data[10] = 0x00;
+ packet->data[11] = 0x00;
+ packet->len = 12;
+ packet->pending = true;
+
+ /* Reset the sequence so we send out presence first */
+ xpad->last_out_packet = -1;
+ retval = xpad_try_sending_next_out_packet(xpad);
+
+ spin_unlock_irqrestore(&xpad->odata_lock, flags);
- xpad->odata[0] = 0x08;
- xpad->odata[1] = 0x00;
- xpad->odata[2] = 0x0F;
- xpad->odata[3] = 0xC0;
- xpad->odata[4] = 0x00;
- xpad->odata[5] = 0x00;
- xpad->odata[6] = 0x00;
- xpad->odata[7] = 0x00;
- xpad->odata[8] = 0x00;
- xpad->odata[9] = 0x00;
- xpad->odata[10] = 0x00;
- xpad->odata[11] = 0x00;
- xpad->irq_out->transfer_buffer_length = 12;
+ return retval;
+}
+
+static int xpad_start_xbox_one(struct usb_xpad *xpad)
+{
+ struct xpad_output_packet *packet =
+ &xpad->out_packets[XPAD_OUT_CMD_IDX];
+ unsigned long flags;
+ int retval;
- retval = usb_submit_urb(xpad->irq_out, GFP_KERNEL);
+ spin_lock_irqsave(&xpad->odata_lock, flags);
- mutex_unlock(&xpad->odata_mutex);
+ /* Xbox one controller needs to be initialized. */
+ packet->data[0] = 0x05;
+ packet->data[1] = 0x20;
+ packet->data[2] = xpad->odata_serial++; /* packet serial */
+ packet->data[3] = 0x01; /* rumble bit enable? */
+ packet->data[4] = 0x00;
+ packet->len = 5;
+ packet->pending = true;
+
+ /* Reset the sequence so we send out start packet first */
+ xpad->last_out_packet = -1;
+ retval = xpad_try_sending_next_out_packet(xpad);
+
+ spin_unlock_irqrestore(&xpad->odata_lock, flags);
return retval;
}
@@ -799,8 +957,11 @@ static int xpad_inquiry_pad_presence(struct usb_xpad *xpad)
static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect *effect)
{
struct usb_xpad *xpad = input_get_drvdata(dev);
+ struct xpad_output_packet *packet = &xpad->out_packets[XPAD_OUT_FF_IDX];
__u16 strong;
__u16 weak;
+ int retval;
+ unsigned long flags;
if (effect->type != FF_RUMBLE)
return 0;
@@ -808,69 +969,81 @@ static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect
strong = effect->u.rumble.strong_magnitude;
weak = effect->u.rumble.weak_magnitude;
+ spin_lock_irqsave(&xpad->odata_lock, flags);
+
switch (xpad->xtype) {
case XTYPE_XBOX:
- xpad->odata[0] = 0x00;
- xpad->odata[1] = 0x06;
- xpad->odata[2] = 0x00;
- xpad->odata[3] = strong / 256; /* left actuator */
- xpad->odata[4] = 0x00;
- xpad->odata[5] = weak / 256; /* right actuator */
- xpad->irq_out->transfer_buffer_length = 6;
+ packet->data[0] = 0x00;
+ packet->data[1] = 0x06;
+ packet->data[2] = 0x00;
+ packet->data[3] = strong / 256; /* left actuator */
+ packet->data[4] = 0x00;
+ packet->data[5] = weak / 256; /* right actuator */
+ packet->len = 6;
+ packet->pending = true;
break;
case XTYPE_XBOX360:
- xpad->odata[0] = 0x00;
- xpad->odata[1] = 0x08;
- xpad->odata[2] = 0x00;
- xpad->odata[3] = strong / 256; /* left actuator? */
- xpad->odata[4] = weak / 256; /* right actuator? */
- xpad->odata[5] = 0x00;
- xpad->odata[6] = 0x00;
- xpad->odata[7] = 0x00;
- xpad->irq_out->transfer_buffer_length = 8;
+ packet->data[0] = 0x00;
+ packet->data[1] = 0x08;
+ packet->data[2] = 0x00;
+ packet->data[3] = strong / 256; /* left actuator? */
+ packet->data[4] = weak / 256; /* right actuator? */
+ packet->data[5] = 0x00;
+ packet->data[6] = 0x00;
+ packet->data[7] = 0x00;
+ packet->len = 8;
+ packet->pending = true;
break;
case XTYPE_XBOX360W:
- xpad->odata[0] = 0x00;
- xpad->odata[1] = 0x01;
- xpad->odata[2] = 0x0F;
- xpad->odata[3] = 0xC0;
- xpad->odata[4] = 0x00;
- xpad->odata[5] = strong / 256;
- xpad->odata[6] = weak / 256;
- xpad->odata[7] = 0x00;
- xpad->odata[8] = 0x00;
- xpad->odata[9] = 0x00;
- xpad->odata[10] = 0x00;
- xpad->odata[11] = 0x00;
- xpad->irq_out->transfer_buffer_length = 12;
+ packet->data[0] = 0x00;
+ packet->data[1] = 0x01;
+ packet->data[2] = 0x0F;
+ packet->data[3] = 0xC0;
+ packet->data[4] = 0x00;
+ packet->data[5] = strong / 256;
+ packet->data[6] = weak / 256;
+ packet->data[7] = 0x00;
+ packet->data[8] = 0x00;
+ packet->data[9] = 0x00;
+ packet->data[10] = 0x00;
+ packet->data[11] = 0x00;
+ packet->len = 12;
+ packet->pending = true;
break;
case XTYPE_XBOXONE:
- xpad->odata[0] = 0x09; /* activate rumble */
- xpad->odata[1] = 0x08;
- xpad->odata[2] = 0x00;
- xpad->odata[3] = 0x08; /* continuous effect */
- xpad->odata[4] = 0x00; /* simple rumble mode */
- xpad->odata[5] = 0x03; /* L and R actuator only */
- xpad->odata[6] = 0x00; /* TODO: LT actuator */
- xpad->odata[7] = 0x00; /* TODO: RT actuator */
- xpad->odata[8] = strong / 256; /* left actuator */
- xpad->odata[9] = weak / 256; /* right actuator */
- xpad->odata[10] = 0x80; /* length of pulse */
- xpad->odata[11] = 0x00; /* stop period of pulse */
- xpad->irq_out->transfer_buffer_length = 12;
+ packet->data[0] = 0x09; /* activate rumble */
+ packet->data[1] = 0x08;
+ packet->data[2] = xpad->odata_serial++;
+ packet->data[3] = 0x08; /* continuous effect */
+ packet->data[4] = 0x00; /* simple rumble mode */
+ packet->data[5] = 0x03; /* L and R actuator only */
+ packet->data[6] = 0x00; /* TODO: LT actuator */
+ packet->data[7] = 0x00; /* TODO: RT actuator */
+ packet->data[8] = strong / 512; /* left actuator */
+ packet->data[9] = weak / 512; /* right actuator */
+ packet->data[10] = 0x80; /* length of pulse */
+ packet->data[11] = 0x00; /* stop period of pulse */
+ packet->data[12] = 0x00;
+ packet->len = 13;
+ packet->pending = true;
break;
default:
dev_dbg(&xpad->dev->dev,
"%s - rumble command sent to unsupported xpad type: %d\n",
__func__, xpad->xtype);
- return -EINVAL;
+ retval = -EINVAL;
+ goto out;
}
- return usb_submit_urb(xpad->irq_out, GFP_ATOMIC);
+ retval = xpad_try_sending_next_out_packet(xpad);
+
+out:
+ spin_unlock_irqrestore(&xpad->odata_lock, flags);
+ return retval;
}
static int xpad_init_ff(struct usb_xpad *xpad)
@@ -921,36 +1094,44 @@ struct xpad_led {
*/
static void xpad_send_led_command(struct usb_xpad *xpad, int command)
{
+ struct xpad_output_packet *packet =
+ &xpad->out_packets[XPAD_OUT_LED_IDX];
+ unsigned long flags;
+
command %= 16;
- mutex_lock(&xpad->odata_mutex);
+ spin_lock_irqsave(&xpad->odata_lock, flags);
switch (xpad->xtype) {
case XTYPE_XBOX360:
- xpad->odata[0] = 0x01;
- xpad->odata[1] = 0x03;
- xpad->odata[2] = command;
- xpad->irq_out->transfer_buffer_length = 3;
+ packet->data[0] = 0x01;
+ packet->data[1] = 0x03;
+ packet->data[2] = command;
+ packet->len = 3;
+ packet->pending = true;
break;
+
case XTYPE_XBOX360W:
- xpad->odata[0] = 0x00;
- xpad->odata[1] = 0x00;
- xpad->odata[2] = 0x08;
- xpad->odata[3] = 0x40 + command;
- xpad->odata[4] = 0x00;
- xpad->odata[5] = 0x00;
- xpad->odata[6] = 0x00;
- xpad->odata[7] = 0x00;
- xpad->odata[8] = 0x00;
- xpad->odata[9] = 0x00;
- xpad->odata[10] = 0x00;
- xpad->odata[11] = 0x00;
- xpad->irq_out->transfer_buffer_length = 12;
+ packet->data[0] = 0x00;
+ packet->data[1] = 0x00;
+ packet->data[2] = 0x08;
+ packet->data[3] = 0x40 + command;
+ packet->data[4] = 0x00;
+ packet->data[5] = 0x00;
+ packet->data[6] = 0x00;
+ packet->data[7] = 0x00;
+ packet->data[8] = 0x00;
+ packet->data[9] = 0x00;
+ packet->data[10] = 0x00;
+ packet->data[11] = 0x00;
+ packet->len = 12;
+ packet->pending = true;
break;
}
- usb_submit_urb(xpad->irq_out, GFP_KERNEL);
- mutex_unlock(&xpad->odata_mutex);
+ xpad_try_sending_next_out_packet(xpad);
+
+ spin_unlock_irqrestore(&xpad->odata_lock, flags);
}
/*
@@ -959,7 +1140,7 @@ static void xpad_send_led_command(struct usb_xpad *xpad, int command)
*/
static void xpad_identify_controller(struct usb_xpad *xpad)
{
- xpad_send_led_command(xpad, (xpad->pad_nr % 4) + 2);
+ led_set_brightness(&xpad->led->led_cdev, (xpad->pad_nr % 4) + 2);
}
static void xpad_led_set(struct led_classdev *led_cdev,
@@ -1001,14 +1182,7 @@ static int xpad_led_probe(struct usb_xpad *xpad)
if (error)
goto err_free_id;
- if (xpad->xtype == XTYPE_XBOX360) {
- /*
- * Light up the segment corresponding to controller
- * number on wired devices. On wireless we'll do that
- * when they respond to "presence" packet.
- */
- xpad_identify_controller(xpad);
- }
+ xpad_identify_controller(xpad);
return 0;
@@ -1036,37 +1210,73 @@ static void xpad_led_disconnect(struct usb_xpad *xpad) { }
static void xpad_identify_controller(struct usb_xpad *xpad) { }
#endif
-static int xpad_open(struct input_dev *dev)
+static int xpad_start_input(struct usb_xpad *xpad)
{
- struct usb_xpad *xpad = input_get_drvdata(dev);
-
- /* URB was submitted in probe */
- if (xpad->xtype == XTYPE_XBOX360W)
- return 0;
+ int error;
- xpad->irq_in->dev = xpad->udev;
if (usb_submit_urb(xpad->irq_in, GFP_KERNEL))
return -EIO;
if (xpad->xtype == XTYPE_XBOXONE) {
- /* Xbox one controller needs to be initialized. */
- xpad->odata[0] = 0x05;
- xpad->odata[1] = 0x20;
- xpad->irq_out->transfer_buffer_length = 2;
- return usb_submit_urb(xpad->irq_out, GFP_KERNEL);
+ error = xpad_start_xbox_one(xpad);
+ if (error) {
+ usb_kill_urb(xpad->irq_in);
+ return error;
+ }
}
return 0;
}
-static void xpad_close(struct input_dev *dev)
+static void xpad_stop_input(struct usb_xpad *xpad)
{
- struct usb_xpad *xpad = input_get_drvdata(dev);
+ usb_kill_urb(xpad->irq_in);
+}
+
+static int xpad360w_start_input(struct usb_xpad *xpad)
+{
+ int error;
- if (xpad->xtype != XTYPE_XBOX360W)
+ error = usb_submit_urb(xpad->irq_in, GFP_KERNEL);
+ if (error)
+ return -EIO;
+
+ /*
+ * Send presence packet.
+ * This will force the controller to resend connection packets.
+ * This is useful in the case we activate the module after the
+ * adapter has been plugged in, as it won't automatically
+ * send us info about the controllers.
+ */
+ error = xpad_inquiry_pad_presence(xpad);
+ if (error) {
usb_kill_urb(xpad->irq_in);
+ return error;
+ }
- xpad_stop_output(xpad);
+ return 0;
+}
+
+static void xpad360w_stop_input(struct usb_xpad *xpad)
+{
+ usb_kill_urb(xpad->irq_in);
+
+ /* Make sure we are done with presence work if it was scheduled */
+ flush_work(&xpad->work);
+}
+
+static int xpad_open(struct input_dev *dev)
+{
+ struct usb_xpad *xpad = input_get_drvdata(dev);
+
+ return xpad_start_input(xpad);
+}
+
+static void xpad_close(struct input_dev *dev)
+{
+ struct usb_xpad *xpad = input_get_drvdata(dev);
+
+ xpad_stop_input(xpad);
}
static void xpad_set_up_abs(struct input_dev *input_dev, signed short abs)
@@ -1097,8 +1307,11 @@ static void xpad_set_up_abs(struct input_dev *input_dev, signed short abs)
static void xpad_deinit_input(struct usb_xpad *xpad)
{
- xpad_led_disconnect(xpad);
- input_unregister_device(xpad->dev);
+ if (xpad->input_created) {
+ xpad->input_created = false;
+ xpad_led_disconnect(xpad);
+ input_unregister_device(xpad->dev);
+ }
}
static int xpad_init_input(struct usb_xpad *xpad)
@@ -1118,8 +1331,10 @@ static int xpad_init_input(struct usb_xpad *xpad)
input_set_drvdata(input_dev, xpad);
- input_dev->open = xpad_open;
- input_dev->close = xpad_close;
+ if (xpad->xtype != XTYPE_XBOX360W) {
+ input_dev->open = xpad_open;
+ input_dev->close = xpad_close;
+ }
__set_bit(EV_KEY, input_dev->evbit);
@@ -1181,6 +1396,7 @@ static int xpad_init_input(struct usb_xpad *xpad)
if (error)
goto err_disconnect_led;
+ xpad->input_created = true;
return 0;
err_disconnect_led:
@@ -1241,6 +1457,7 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
xpad->mapping = xpad_device[i].mapping;
xpad->xtype = xpad_device[i].xtype;
xpad->name = xpad_device[i].name;
+ INIT_WORK(&xpad->work, xpad_presence_work);
if (xpad->xtype == XTYPE_UNKNOWN) {
if (intf->cur_altsetting->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC) {
@@ -1277,10 +1494,6 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
usb_set_intfdata(intf, xpad);
- error = xpad_init_input(xpad);
- if (error)
- goto err_deinit_output;
-
if (xpad->xtype == XTYPE_XBOX360W) {
/*
* Submit the int URB immediately rather than waiting for open
@@ -1289,28 +1502,24 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
* exactly the message that a controller has arrived that
* we're waiting for.
*/
- xpad->irq_in->dev = xpad->udev;
- error = usb_submit_urb(xpad->irq_in, GFP_KERNEL);
+ error = xpad360w_start_input(xpad);
if (error)
- goto err_deinit_input;
-
+ goto err_deinit_output;
/*
- * Send presence packet.
- * This will force the controller to resend connection packets.
- * This is useful in the case we activate the module after the
- * adapter has been plugged in, as it won't automatically
- * send us info about the controllers.
+ * Wireless controllers require RESET_RESUME to work properly
+ * after suspend. Ideally this quirk should be in usb core
+ * quirk list, but we have too many vendors producing these
+ * controllers and we'd need to maintain 2 identical lists
+ * here in this driver and in usb core.
*/
- error = xpad_inquiry_pad_presence(xpad);
+ udev->quirks |= USB_QUIRK_RESET_RESUME;
+ } else {
+ error = xpad_init_input(xpad);
if (error)
- goto err_kill_in_urb;
+ goto err_deinit_output;
}
return 0;
-err_kill_in_urb:
- usb_kill_urb(xpad->irq_in);
-err_deinit_input:
- xpad_deinit_input(xpad);
err_deinit_output:
xpad_deinit_output(xpad);
err_free_in_urb:
@@ -1320,19 +1529,24 @@ err_free_idata:
err_free_mem:
kfree(xpad);
return error;
-
}
static void xpad_disconnect(struct usb_interface *intf)
{
- struct usb_xpad *xpad = usb_get_intfdata (intf);
+ struct usb_xpad *xpad = usb_get_intfdata(intf);
+
+ if (xpad->xtype == XTYPE_XBOX360W)
+ xpad360w_stop_input(xpad);
xpad_deinit_input(xpad);
- xpad_deinit_output(xpad);
- if (xpad->xtype == XTYPE_XBOX360W) {
- usb_kill_urb(xpad->irq_in);
- }
+ /*
+ * Now that both input device and LED device are gone we can
+ * stop output URB.
+ */
+ xpad_stop_output(xpad);
+
+ xpad_deinit_output(xpad);
usb_free_urb(xpad->irq_in);
usb_free_coherent(xpad->udev, XPAD_PKT_LEN,
@@ -1343,10 +1557,55 @@ static void xpad_disconnect(struct usb_interface *intf)
usb_set_intfdata(intf, NULL);
}
+static int xpad_suspend(struct usb_interface *intf, pm_message_t message)
+{
+ struct usb_xpad *xpad = usb_get_intfdata(intf);
+ struct input_dev *input = xpad->dev;
+
+ if (xpad->xtype == XTYPE_XBOX360W) {
+ /*
+ * Wireless controllers always listen to input so
+ * they are notified when controller shows up
+ * or goes away.
+ */
+ xpad360w_stop_input(xpad);
+ } else {
+ mutex_lock(&input->mutex);
+ if (input->users)
+ xpad_stop_input(xpad);
+ mutex_unlock(&input->mutex);
+ }
+
+ xpad_stop_output(xpad);
+
+ return 0;
+}
+
+static int xpad_resume(struct usb_interface *intf)
+{
+ struct usb_xpad *xpad = usb_get_intfdata(intf);
+ struct input_dev *input = xpad->dev;
+ int retval = 0;
+
+ if (xpad->xtype == XTYPE_XBOX360W) {
+ retval = xpad360w_start_input(xpad);
+ } else {
+ mutex_lock(&input->mutex);
+ if (input->users)
+ retval = xpad_start_input(xpad);
+ mutex_unlock(&input->mutex);
+ }
+
+ return retval;
+}
+
static struct usb_driver xpad_driver = {
.name = "xpad",
.probe = xpad_probe,
.disconnect = xpad_disconnect,
+ .suspend = xpad_suspend,
+ .resume = xpad_resume,
+ .reset_resume = xpad_resume,
.id_table = xpad_table,
};
diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c
index b9f01bd..2909365 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -630,7 +630,7 @@ gpio_keys_get_devtree_pdata(struct device *dev)
if (!node)
return ERR_PTR(-ENODEV);
- nbuttons = of_get_child_count(node);
+ nbuttons = of_get_available_child_count(node);
if (nbuttons == 0)
return ERR_PTR(-ENODEV);
@@ -645,8 +645,10 @@ gpio_keys_get_devtree_pdata(struct device *dev)
pdata->rep = !!of_get_property(node, "autorepeat", NULL);
+ of_property_read_string(node, "label", &pdata->name);
+
i = 0;
- for_each_child_of_node(node, pp) {
+ for_each_available_child_of_node(node, pp) {
enum of_gpio_flags flags;
button = &pdata->buttons[i++];
diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index 2d5794e..2160512 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -113,8 +113,8 @@ struct t7_config {
#define MXT_T9_DETECT (1 << 7)
struct t9_range {
- u16 x;
- u16 y;
+ __le16 x;
+ __le16 y;
} __packed;
/* MXT_TOUCH_MULTI_T9 orient */
@@ -216,6 +216,7 @@ struct mxt_data {
unsigned int irq;
unsigned int max_x;
unsigned int max_y;
+ bool xy_switch;
bool in_bootloader;
u16 mem_size;
u8 t100_aux_ampl;
@@ -1665,8 +1666,8 @@ static int mxt_read_t9_resolution(struct mxt_data *data)
if (error)
return error;
- le16_to_cpus(&range.x);
- le16_to_cpus(&range.y);
+ data->max_x = get_unaligned_le16(&range.x);
+ data->max_y = get_unaligned_le16(&range.y);
error = __mxt_read_reg(client,
object->start_address + MXT_T9_ORIENT,
@@ -1674,23 +1675,7 @@ static int mxt_read_t9_resolution(struct mxt_data *data)
if (error)
return error;
- /* Handle default values */
- if (range.x == 0)
- range.x = 1023;
-
- if (range.y == 0)
- range.y = 1023;
-
- if (orient & MXT_T9_ORIENT_SWITCH) {
- data->max_x = range.y;
- data->max_y = range.x;
- } else {
- data->max_x = range.x;
- data->max_y = range.y;
- }
-
- dev_dbg(&client->dev,
- "Touchscreen size X%uY%u\n", data->max_x, data->max_y);
+ data->xy_switch = orient & MXT_T9_ORIENT_SWITCH;
return 0;
}
@@ -1708,13 +1693,14 @@ static int mxt_read_t100_config(struct mxt_data *data)
if (!object)
return -EINVAL;
+ /* read touchscreen dimensions */
error = __mxt_read_reg(client,
object->start_address + MXT_T100_XRANGE,
sizeof(range_x), &range_x);
if (error)
return error;
- le16_to_cpus(&range_x);
+ data->max_x = get_unaligned_le16(&range_x);
error = __mxt_read_reg(client,
object->start_address + MXT_T100_YRANGE,
@@ -1722,36 +1708,24 @@ static int mxt_read_t100_config(struct mxt_data *data)
if (error)
return error;
- le16_to_cpus(&range_y);
+ data->max_y = get_unaligned_le16(&range_y);
+ /* read orientation config */
error = __mxt_read_reg(client,
object->start_address + MXT_T100_CFG1,
1, &cfg);
if (error)
return error;
+ data->xy_switch = cfg & MXT_T100_CFG_SWITCHXY;
+
+ /* allocate aux bytes */
error = __mxt_read_reg(client,
object->start_address + MXT_T100_TCHAUX,
1, &tchaux);
if (error)
return error;
- /* Handle default values */
- if (range_x == 0)
- range_x = 1023;
-
- if (range_y == 0)
- range_y = 1023;
-
- if (cfg & MXT_T100_CFG_SWITCHXY) {
- data->max_x = range_y;
- data->max_y = range_x;
- } else {
- data->max_x = range_x;
- data->max_y = range_y;
- }
-
- /* allocate aux bytes */
aux = 6;
if (tchaux & MXT_T100_TCHAUX_VECT)
@@ -1767,9 +1741,6 @@ static int mxt_read_t100_config(struct mxt_data *data)
"T100 aux mappings vect:%u ampl:%u area:%u\n",
data->t100_aux_vect, data->t100_aux_ampl, data->t100_aux_area);
- dev_info(&client->dev,
- "T100 Touchscreen size X%uY%u\n", data->max_x, data->max_y);
-
return 0;
}
@@ -1828,6 +1799,19 @@ static int mxt_initialize_input_device(struct mxt_data *data)
return -EINVAL;
}
+ /* Handle default values and orientation switch */
+ if (data->max_x == 0)
+ data->max_x = 1023;
+
+ if (data->max_y == 0)
+ data->max_y = 1023;
+
+ if (data->xy_switch)
+ swap(data->max_x, data->max_y);
+
+ dev_info(dev, "Touchscreen size X%uY%u\n", data->max_x, data->max_y);
+
+ /* Register input device */
input_dev = input_allocate_device();
if (!input_dev) {
dev_err(dev, "Failed to allocate memory\n");
diff --git a/drivers/lightnvm/Makefile b/drivers/lightnvm/Makefile
index 7e0f42a..a7a0a22 100644
--- a/drivers/lightnvm/Makefile
+++ b/drivers/lightnvm/Makefile
@@ -2,6 +2,6 @@
# Makefile for Open-Channel SSDs.
#
-obj-$(CONFIG_NVM) := core.o
+obj-$(CONFIG_NVM) := core.o sysblk.o
obj-$(CONFIG_NVM_GENNVM) += gennvm.o
obj-$(CONFIG_NVM_RRPC) += rrpc.o
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 8f41b24..33224cb 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -27,6 +27,7 @@
#include <linux/module.h>
#include <linux/miscdevice.h>
#include <linux/lightnvm.h>
+#include <linux/sched/sysctl.h>
#include <uapi/linux/lightnvm.h>
static LIST_HEAD(nvm_targets);
@@ -105,6 +106,9 @@ struct nvmm_type *nvm_init_mgr(struct nvm_dev *dev)
lockdep_assert_held(&nvm_lock);
list_for_each_entry(mt, &nvm_mgrs, list) {
+ if (strncmp(dev->sb.mmtype, mt->name, NVM_MMTYPE_LEN))
+ continue;
+
ret = mt->register_mgr(dev);
if (ret < 0) {
pr_err("nvm: media mgr failed to init (%d) on dev %s\n",
@@ -166,6 +170,20 @@ static struct nvm_dev *nvm_find_nvm_dev(const char *name)
return NULL;
}
+struct nvm_block *nvm_get_blk_unlocked(struct nvm_dev *dev, struct nvm_lun *lun,
+ unsigned long flags)
+{
+ return dev->mt->get_blk_unlocked(dev, lun, flags);
+}
+EXPORT_SYMBOL(nvm_get_blk_unlocked);
+
+/* Assumes that all valid pages have already been moved on release to bm */
+void nvm_put_blk_unlocked(struct nvm_dev *dev, struct nvm_block *blk)
+{
+ return dev->mt->put_blk_unlocked(dev, blk);
+}
+EXPORT_SYMBOL(nvm_put_blk_unlocked);
+
struct nvm_block *nvm_get_blk(struct nvm_dev *dev, struct nvm_lun *lun,
unsigned long flags)
{
@@ -192,6 +210,206 @@ int nvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk)
}
EXPORT_SYMBOL(nvm_erase_blk);
+void nvm_addr_to_generic_mode(struct nvm_dev *dev, struct nvm_rq *rqd)
+{
+ int i;
+
+ if (rqd->nr_pages > 1) {
+ for (i = 0; i < rqd->nr_pages; i++)
+ rqd->ppa_list[i] = dev_to_generic_addr(dev,
+ rqd->ppa_list[i]);
+ } else {
+ rqd->ppa_addr = dev_to_generic_addr(dev, rqd->ppa_addr);
+ }
+}
+EXPORT_SYMBOL(nvm_addr_to_generic_mode);
+
+void nvm_generic_to_addr_mode(struct nvm_dev *dev, struct nvm_rq *rqd)
+{
+ int i;
+
+ if (rqd->nr_pages > 1) {
+ for (i = 0; i < rqd->nr_pages; i++)
+ rqd->ppa_list[i] = generic_to_dev_addr(dev,
+ rqd->ppa_list[i]);
+ } else {
+ rqd->ppa_addr = generic_to_dev_addr(dev, rqd->ppa_addr);
+ }
+}
+EXPORT_SYMBOL(nvm_generic_to_addr_mode);
+
+int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd,
+ struct ppa_addr *ppas, int nr_ppas)
+{
+ int i, plane_cnt, pl_idx;
+
+ if (dev->plane_mode == NVM_PLANE_SINGLE && nr_ppas == 1) {
+ rqd->nr_pages = 1;
+ rqd->ppa_addr = ppas[0];
+
+ return 0;
+ }
+
+ plane_cnt = (1 << dev->plane_mode);
+ rqd->nr_pages = plane_cnt * nr_ppas;
+
+ if (dev->ops->max_phys_sect < rqd->nr_pages)
+ return -EINVAL;
+
+ rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list);
+ if (!rqd->ppa_list) {
+ pr_err("nvm: failed to allocate dma memory\n");
+ return -ENOMEM;
+ }
+
+ for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) {
+ for (i = 0; i < nr_ppas; i++) {
+ ppas[i].g.pl = pl_idx;
+ rqd->ppa_list[(pl_idx * nr_ppas) + i] = ppas[i];
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(nvm_set_rqd_ppalist);
+
+void nvm_free_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd)
+{
+ if (!rqd->ppa_list)
+ return;
+
+ nvm_dev_dma_free(dev, rqd->ppa_list, rqd->dma_ppa_list);
+}
+EXPORT_SYMBOL(nvm_free_rqd_ppalist);
+
+int nvm_erase_ppa(struct nvm_dev *dev, struct ppa_addr *ppas, int nr_ppas)
+{
+ struct nvm_rq rqd;
+ int ret;
+
+ if (!dev->ops->erase_block)
+ return 0;
+
+ memset(&rqd, 0, sizeof(struct nvm_rq));
+
+ ret = nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas);
+ if (ret)
+ return ret;
+
+ nvm_generic_to_addr_mode(dev, &rqd);
+
+ ret = dev->ops->erase_block(dev, &rqd);
+
+ nvm_free_rqd_ppalist(dev, &rqd);
+
+ return ret;
+}
+EXPORT_SYMBOL(nvm_erase_ppa);
+
+void nvm_end_io(struct nvm_rq *rqd, int error)
+{
+ rqd->error = error;
+ rqd->end_io(rqd);
+}
+EXPORT_SYMBOL(nvm_end_io);
+
+static void nvm_end_io_sync(struct nvm_rq *rqd)
+{
+ struct completion *waiting = rqd->wait;
+
+ rqd->wait = NULL;
+
+ complete(waiting);
+}
+
+int nvm_submit_ppa(struct nvm_dev *dev, struct ppa_addr *ppa, int nr_ppas,
+ int opcode, int flags, void *buf, int len)
+{
+ DECLARE_COMPLETION_ONSTACK(wait);
+ struct nvm_rq rqd;
+ struct bio *bio;
+ int ret;
+ unsigned long hang_check;
+
+ bio = bio_map_kern(dev->q, buf, len, GFP_KERNEL);
+ if (IS_ERR_OR_NULL(bio))
+ return -ENOMEM;
+
+ memset(&rqd, 0, sizeof(struct nvm_rq));
+ ret = nvm_set_rqd_ppalist(dev, &rqd, ppa, nr_ppas);
+ if (ret) {
+ bio_put(bio);
+ return ret;
+ }
+
+ rqd.opcode = opcode;
+ rqd.bio = bio;
+ rqd.wait = &wait;
+ rqd.dev = dev;
+ rqd.end_io = nvm_end_io_sync;
+ rqd.flags = flags;
+ nvm_generic_to_addr_mode(dev, &rqd);
+
+ ret = dev->ops->submit_io(dev, &rqd);
+
+ /* Prevent hang_check timer from firing at us during very long I/O */
+ hang_check = sysctl_hung_task_timeout_secs;
+ if (hang_check)
+ while (!wait_for_completion_io_timeout(&wait, hang_check * (HZ/2)));
+ else
+ wait_for_completion_io(&wait);
+
+ nvm_free_rqd_ppalist(dev, &rqd);
+
+ return rqd.error;
+}
+EXPORT_SYMBOL(nvm_submit_ppa);
+
+static int nvm_init_slc_tbl(struct nvm_dev *dev, struct nvm_id_group *grp)
+{
+ int i;
+
+ dev->lps_per_blk = dev->pgs_per_blk;
+ dev->lptbl = kcalloc(dev->lps_per_blk, sizeof(int), GFP_KERNEL);
+ if (!dev->lptbl)
+ return -ENOMEM;
+
+ /* Just a linear array */
+ for (i = 0; i < dev->lps_per_blk; i++)
+ dev->lptbl[i] = i;
+
+ return 0;
+}
+
+static int nvm_init_mlc_tbl(struct nvm_dev *dev, struct nvm_id_group *grp)
+{
+ int i, p;
+ struct nvm_id_lp_mlc *mlc = &grp->lptbl.mlc;
+
+ if (!mlc->num_pairs)
+ return 0;
+
+ dev->lps_per_blk = mlc->num_pairs;
+ dev->lptbl = kcalloc(dev->lps_per_blk, sizeof(int), GFP_KERNEL);
+ if (!dev->lptbl)
+ return -ENOMEM;
+
+ /* The lower page table encoding consists of a list of bytes, where each
+ * has a lower and an upper half. The first half byte maintains the
+ * increment value and every value after is an offset added to the
+ * previous incrementation value */
+ dev->lptbl[0] = mlc->pairs[0] & 0xF;
+ for (i = 1; i < dev->lps_per_blk; i++) {
+ p = mlc->pairs[i >> 1];
+ if (i & 0x1) /* upper */
+ dev->lptbl[i] = dev->lptbl[i - 1] + ((p & 0xF0) >> 4);
+ else /* lower */
+ dev->lptbl[i] = dev->lptbl[i - 1] + (p & 0xF);
+ }
+
+ return 0;
+}
+
static int nvm_core_init(struct nvm_dev *dev)
{
struct nvm_id *id = &dev->identity;
@@ -206,6 +424,7 @@ static int nvm_core_init(struct nvm_dev *dev)
dev->sec_size = grp->csecs;
dev->oob_size = grp->sos;
dev->sec_per_pg = grp->fpg_sz / grp->csecs;
+ dev->mccap = grp->mccap;
memcpy(&dev->ppaf, &id->ppaf, sizeof(struct nvm_addr_format));
dev->plane_mode = NVM_PLANE_SINGLE;
@@ -216,11 +435,23 @@ static int nvm_core_init(struct nvm_dev *dev)
return -EINVAL;
}
- if (grp->fmtype != 0 && grp->fmtype != 1) {
+ switch (grp->fmtype) {
+ case NVM_ID_FMTYPE_SLC:
+ if (nvm_init_slc_tbl(dev, grp))
+ return -ENOMEM;
+ break;
+ case NVM_ID_FMTYPE_MLC:
+ if (nvm_init_mlc_tbl(dev, grp))
+ return -ENOMEM;
+ break;
+ default:
pr_err("nvm: flash type not supported\n");
return -EINVAL;
}
+ if (!dev->lps_per_blk)
+ pr_info("nvm: lower page programming table missing\n");
+
if (grp->mpos & 0x020202)
dev->plane_mode = NVM_PLANE_DOUBLE;
if (grp->mpos & 0x040404)
@@ -238,6 +469,7 @@ static int nvm_core_init(struct nvm_dev *dev)
dev->nr_chnls;
dev->total_pages = dev->total_blocks * dev->pgs_per_blk;
INIT_LIST_HEAD(&dev->online_targets);
+ mutex_init(&dev->mlock);
return 0;
}
@@ -249,6 +481,8 @@ static void nvm_free(struct nvm_dev *dev)
if (dev->mt)
dev->mt->unregister_mgr(dev);
+
+ kfree(dev->lptbl);
}
static int nvm_init(struct nvm_dev *dev)
@@ -338,9 +572,16 @@ int nvm_register(struct request_queue *q, char *disk_name,
}
}
+ ret = nvm_get_sysblock(dev, &dev->sb);
+ if (!ret)
+ pr_err("nvm: device not initialized.\n");
+ else if (ret < 0)
+ pr_err("nvm: err (%d) on device initialization\n", ret);
+
/* register device with a supported media manager */
down_write(&nvm_lock);
- dev->mt = nvm_init_mgr(dev);
+ if (ret > 0)
+ dev->mt = nvm_init_mgr(dev);
list_add(&dev->devices, &nvm_devices);
up_write(&nvm_lock);
@@ -788,6 +1029,97 @@ static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
return __nvm_configure_remove(&remove);
}
+static void nvm_setup_nvm_sb_info(struct nvm_sb_info *info)
+{
+ info->seqnr = 1;
+ info->erase_cnt = 0;
+ info->version = 1;
+}
+
+static long __nvm_ioctl_dev_init(struct nvm_ioctl_dev_init *init)
+{
+ struct nvm_dev *dev;
+ struct nvm_sb_info info;
+ int ret;
+
+ down_write(&nvm_lock);
+ dev = nvm_find_nvm_dev(init->dev);
+ up_write(&nvm_lock);
+ if (!dev) {
+ pr_err("nvm: device not found\n");
+ return -EINVAL;
+ }
+
+ nvm_setup_nvm_sb_info(&info);
+
+ strncpy(info.mmtype, init->mmtype, NVM_MMTYPE_LEN);
+ info.fs_ppa.ppa = -1;
+
+ ret = nvm_init_sysblock(dev, &info);
+ if (ret)
+ return ret;
+
+ memcpy(&dev->sb, &info, sizeof(struct nvm_sb_info));
+
+ down_write(&nvm_lock);
+ dev->mt = nvm_init_mgr(dev);
+ up_write(&nvm_lock);
+
+ return 0;
+}
+
+static long nvm_ioctl_dev_init(struct file *file, void __user *arg)
+{
+ struct nvm_ioctl_dev_init init;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&init, arg, sizeof(struct nvm_ioctl_dev_init)))
+ return -EFAULT;
+
+ if (init.flags != 0) {
+ pr_err("nvm: no flags supported\n");
+ return -EINVAL;
+ }
+
+ init.dev[DISK_NAME_LEN - 1] = '\0';
+
+ return __nvm_ioctl_dev_init(&init);
+}
+
+static long nvm_ioctl_dev_factory(struct file *file, void __user *arg)
+{
+ struct nvm_ioctl_dev_factory fact;
+ struct nvm_dev *dev;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&fact, arg, sizeof(struct nvm_ioctl_dev_factory)))
+ return -EFAULT;
+
+ fact.dev[DISK_NAME_LEN - 1] = '\0';
+
+ if (fact.flags & ~(NVM_FACTORY_NR_BITS - 1))
+ return -EINVAL;
+
+ down_write(&nvm_lock);
+ dev = nvm_find_nvm_dev(fact.dev);
+ up_write(&nvm_lock);
+ if (!dev) {
+ pr_err("nvm: device not found\n");
+ return -EINVAL;
+ }
+
+ if (dev->mt) {
+ dev->mt->unregister_mgr(dev);
+ dev->mt = NULL;
+ }
+
+ return nvm_dev_factory(dev, fact.flags);
+}
+
static long nvm_ctl_ioctl(struct file *file, uint cmd, unsigned long arg)
{
void __user *argp = (void __user *)arg;
@@ -801,6 +1133,10 @@ static long nvm_ctl_ioctl(struct file *file, uint cmd, unsigned long arg)
return nvm_ioctl_dev_create(file, argp);
case NVM_DEV_REMOVE:
return nvm_ioctl_dev_remove(file, argp);
+ case NVM_DEV_INIT:
+ return nvm_ioctl_dev_init(file, argp);
+ case NVM_DEV_FACTORY:
+ return nvm_ioctl_dev_factory(file, argp);
}
return 0;
}
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index a54b339..7fb725b 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -60,7 +60,8 @@ static int gennvm_luns_init(struct nvm_dev *dev, struct gen_nvm *gn)
lun->vlun.lun_id = i % dev->luns_per_chnl;
lun->vlun.chnl_id = i / dev->luns_per_chnl;
lun->vlun.nr_free_blocks = dev->blks_per_lun;
- lun->vlun.nr_inuse_blocks = 0;
+ lun->vlun.nr_open_blocks = 0;
+ lun->vlun.nr_closed_blocks = 0;
lun->vlun.nr_bad_blocks = 0;
}
return 0;
@@ -89,6 +90,7 @@ static int gennvm_block_bb(struct ppa_addr ppa, int nr_blocks, u8 *blks,
list_move_tail(&blk->list, &lun->bb_list);
lun->vlun.nr_bad_blocks++;
+ lun->vlun.nr_free_blocks--;
}
return 0;
@@ -133,15 +135,15 @@ static int gennvm_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
pba = pba - (dev->sec_per_lun * lun_id);
blk = &lun->vlun.blocks[div_u64(pba, dev->sec_per_blk)];
- if (!blk->type) {
+ if (!blk->state) {
/* at this point, we don't know anything about the
* block. It's up to the FTL on top to re-etablish the
- * block state
+ * block state. The block is assumed to be open.
*/
list_move_tail(&blk->list, &lun->used_list);
- blk->type = 1;
+ blk->state = NVM_BLK_ST_OPEN;
lun->vlun.nr_free_blocks--;
- lun->vlun.nr_inuse_blocks++;
+ lun->vlun.nr_open_blocks++;
}
}
@@ -255,14 +257,14 @@ static void gennvm_unregister(struct nvm_dev *dev)
module_put(THIS_MODULE);
}
-static struct nvm_block *gennvm_get_blk(struct nvm_dev *dev,
+static struct nvm_block *gennvm_get_blk_unlocked(struct nvm_dev *dev,
struct nvm_lun *vlun, unsigned long flags)
{
struct gen_lun *lun = container_of(vlun, struct gen_lun, vlun);
struct nvm_block *blk = NULL;
int is_gc = flags & NVM_IOTYPE_GC;
- spin_lock(&vlun->lock);
+ assert_spin_locked(&vlun->lock);
if (list_empty(&lun->free_list)) {
pr_err_ratelimited("gennvm: lun %u have no free pages available",
@@ -275,83 +277,64 @@ static struct nvm_block *gennvm_get_blk(struct nvm_dev *dev,
blk = list_first_entry(&lun->free_list, struct nvm_block, list);
list_move_tail(&blk->list, &lun->used_list);
- blk->type = 1;
+ blk->state = NVM_BLK_ST_OPEN;
lun->vlun.nr_free_blocks--;
- lun->vlun.nr_inuse_blocks++;
+ lun->vlun.nr_open_blocks++;
out:
+ return blk;
+}
+
+static struct nvm_block *gennvm_get_blk(struct nvm_dev *dev,
+ struct nvm_lun *vlun, unsigned long flags)
+{
+ struct nvm_block *blk;
+
+ spin_lock(&vlun->lock);
+ blk = gennvm_get_blk_unlocked(dev, vlun, flags);
spin_unlock(&vlun->lock);
return blk;
}
-static void gennvm_put_blk(struct nvm_dev *dev, struct nvm_block *blk)
+static void gennvm_put_blk_unlocked(struct nvm_dev *dev, struct nvm_block *blk)
{
struct nvm_lun *vlun = blk->lun;
struct gen_lun *lun = container_of(vlun, struct gen_lun, vlun);
- spin_lock(&vlun->lock);
+ assert_spin_locked(&vlun->lock);
- switch (blk->type) {
- case 1:
+ if (blk->state & NVM_BLK_ST_OPEN) {
list_move_tail(&blk->list, &lun->free_list);
+ lun->vlun.nr_open_blocks--;
lun->vlun.nr_free_blocks++;
- lun->vlun.nr_inuse_blocks--;
- blk->type = 0;
- break;
- case 2:
+ blk->state = NVM_BLK_ST_FREE;
+ } else if (blk->state & NVM_BLK_ST_CLOSED) {
+ list_move_tail(&blk->list, &lun->free_list);
+ lun->vlun.nr_closed_blocks--;
+ lun->vlun.nr_free_blocks++;
+ blk->state = NVM_BLK_ST_FREE;
+ } else if (blk->state & NVM_BLK_ST_BAD) {
list_move_tail(&blk->list, &lun->bb_list);
lun->vlun.nr_bad_blocks++;
- lun->vlun.nr_inuse_blocks--;
- break;
- default:
+ blk->state = NVM_BLK_ST_BAD;
+ } else {
WARN_ON_ONCE(1);
pr_err("gennvm: erroneous block type (%lu -> %u)\n",
- blk->id, blk->type);
+ blk->id, blk->state);
list_move_tail(&blk->list, &lun->bb_list);
lun->vlun.nr_bad_blocks++;
- lun->vlun.nr_inuse_blocks--;
- }
-
- spin_unlock(&vlun->lock);
-}
-
-static void gennvm_addr_to_generic_mode(struct nvm_dev *dev, struct nvm_rq *rqd)
-{
- int i;
-
- if (rqd->nr_pages > 1) {
- for (i = 0; i < rqd->nr_pages; i++)
- rqd->ppa_list[i] = dev_to_generic_addr(dev,
- rqd->ppa_list[i]);
- } else {
- rqd->ppa_addr = dev_to_generic_addr(dev, rqd->ppa_addr);
+ blk->state = NVM_BLK_ST_BAD;
}
}
-static void gennvm_generic_to_addr_mode(struct nvm_dev *dev, struct nvm_rq *rqd)
-{
- int i;
-
- if (rqd->nr_pages > 1) {
- for (i = 0; i < rqd->nr_pages; i++)
- rqd->ppa_list[i] = generic_to_dev_addr(dev,
- rqd->ppa_list[i]);
- } else {
- rqd->ppa_addr = generic_to_dev_addr(dev, rqd->ppa_addr);
- }
-}
-
-static int gennvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
+static void gennvm_put_blk(struct nvm_dev *dev, struct nvm_block *blk)
{
- if (!dev->ops->submit_io)
- return 0;
-
- /* Convert address space */
- gennvm_generic_to_addr_mode(dev, rqd);
+ struct nvm_lun *vlun = blk->lun;
- rqd->dev = dev;
- return dev->ops->submit_io(dev, rqd);
+ spin_lock(&vlun->lock);
+ gennvm_put_blk_unlocked(dev, blk);
+ spin_unlock(&vlun->lock);
}
static void gennvm_blk_set_type(struct nvm_dev *dev, struct ppa_addr *ppa,
@@ -376,7 +359,7 @@ static void gennvm_blk_set_type(struct nvm_dev *dev, struct ppa_addr *ppa,
blk = &lun->vlun.blocks[ppa->g.blk];
/* will be moved to bb list on put_blk from target */
- blk->type = type;
+ blk->state = type;
}
/* mark block bad. It is expected the target recover from the error. */
@@ -390,77 +373,51 @@ static void gennvm_mark_blk_bad(struct nvm_dev *dev, struct nvm_rq *rqd)
if (dev->ops->set_bb_tbl(dev, rqd, 1))
return;
- gennvm_addr_to_generic_mode(dev, rqd);
+ nvm_addr_to_generic_mode(dev, rqd);
/* look up blocks and mark them as bad */
if (rqd->nr_pages > 1)
for (i = 0; i < rqd->nr_pages; i++)
- gennvm_blk_set_type(dev, &rqd->ppa_list[i], 2);
+ gennvm_blk_set_type(dev, &rqd->ppa_list[i],
+ NVM_BLK_ST_BAD);
else
- gennvm_blk_set_type(dev, &rqd->ppa_addr, 2);
+ gennvm_blk_set_type(dev, &rqd->ppa_addr, NVM_BLK_ST_BAD);
}
-static int gennvm_end_io(struct nvm_rq *rqd, int error)
+static void gennvm_end_io(struct nvm_rq *rqd)
{
struct nvm_tgt_instance *ins = rqd->ins;
- int ret = 0;
- switch (error) {
+ switch (rqd->error) {
case NVM_RSP_SUCCESS:
- break;
case NVM_RSP_ERR_EMPTYPAGE:
break;
case NVM_RSP_ERR_FAILWRITE:
gennvm_mark_blk_bad(rqd->dev, rqd);
- default:
- ret++;
}
- ret += ins->tt->end_io(rqd, error);
-
- return ret;
+ ins->tt->end_io(rqd);
}
-static int gennvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk,
- unsigned long flags)
+static int gennvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
{
- int plane_cnt = 0, pl_idx, ret;
- struct ppa_addr addr;
- struct nvm_rq rqd;
-
- if (!dev->ops->erase_block)
- return 0;
-
- addr = block_to_ppa(dev, blk);
-
- if (dev->plane_mode == NVM_PLANE_SINGLE) {
- rqd.nr_pages = 1;
- rqd.ppa_addr = addr;
- } else {
- plane_cnt = (1 << dev->plane_mode);
- rqd.nr_pages = plane_cnt;
-
- rqd.ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL,
- &rqd.dma_ppa_list);
- if (!rqd.ppa_list) {
- pr_err("gennvm: failed to allocate dma memory\n");
- return -ENOMEM;
- }
-
- for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) {
- addr.g.pl = pl_idx;
- rqd.ppa_list[pl_idx] = addr;
- }
- }
+ if (!dev->ops->submit_io)
+ return -ENODEV;
- gennvm_generic_to_addr_mode(dev, &rqd);
+ /* Convert address space */
+ nvm_generic_to_addr_mode(dev, rqd);
- ret = dev->ops->erase_block(dev, &rqd);
+ rqd->dev = dev;
+ rqd->end_io = gennvm_end_io;
+ return dev->ops->submit_io(dev, rqd);
+}
- if (plane_cnt)
- nvm_dev_dma_free(dev, rqd.ppa_list, rqd.dma_ppa_list);
+static int gennvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk,
+ unsigned long flags)
+{
+ struct ppa_addr addr = block_to_ppa(dev, blk);
- return ret;
+ return nvm_erase_ppa(dev, &addr, 1);
}
static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid)
@@ -480,10 +437,11 @@ static void gennvm_lun_info_print(struct nvm_dev *dev)
gennvm_for_each_lun(gn, lun, i) {
spin_lock(&lun->vlun.lock);
- pr_info("%s: lun%8u\t%u\t%u\t%u\n",
+ pr_info("%s: lun%8u\t%u\t%u\t%u\t%u\n",
dev->name, i,
lun->vlun.nr_free_blocks,
- lun->vlun.nr_inuse_blocks,
+ lun->vlun.nr_open_blocks,
+ lun->vlun.nr_closed_blocks,
lun->vlun.nr_bad_blocks);
spin_unlock(&lun->vlun.lock);
@@ -491,21 +449,23 @@ static void gennvm_lun_info_print(struct nvm_dev *dev)
}
static struct nvmm_type gennvm = {
- .name = "gennvm",
- .version = {0, 1, 0},
+ .name = "gennvm",
+ .version = {0, 1, 0},
+
+ .register_mgr = gennvm_register,
+ .unregister_mgr = gennvm_unregister,
- .register_mgr = gennvm_register,
- .unregister_mgr = gennvm_unregister,
+ .get_blk_unlocked = gennvm_get_blk_unlocked,
+ .put_blk_unlocked = gennvm_put_blk_unlocked,
- .get_blk = gennvm_get_blk,
- .put_blk = gennvm_put_blk,
+ .get_blk = gennvm_get_blk,
+ .put_blk = gennvm_put_blk,
- .submit_io = gennvm_submit_io,
- .end_io = gennvm_end_io,
- .erase_blk = gennvm_erase_blk,
+ .submit_io = gennvm_submit_io,
+ .erase_blk = gennvm_erase_blk,
- .get_lun = gennvm_get_lun,
- .lun_info_print = gennvm_lun_info_print,
+ .get_lun = gennvm_get_lun,
+ .lun_info_print = gennvm_lun_info_print,
};
static int __init gennvm_module_init(void)
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 134e4fa..d8c7595 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -179,16 +179,23 @@ static void rrpc_set_lun_cur(struct rrpc_lun *rlun, struct rrpc_block *rblk)
static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun,
unsigned long flags)
{
+ struct nvm_lun *lun = rlun->parent;
struct nvm_block *blk;
struct rrpc_block *rblk;
- blk = nvm_get_blk(rrpc->dev, rlun->parent, flags);
- if (!blk)
+ spin_lock(&lun->lock);
+ blk = nvm_get_blk_unlocked(rrpc->dev, rlun->parent, flags);
+ if (!blk) {
+ pr_err("nvm: rrpc: cannot get new block from media manager\n");
+ spin_unlock(&lun->lock);
return NULL;
+ }
rblk = &rlun->blocks[blk->id];
- blk->priv = rblk;
+ list_add_tail(&rblk->list, &rlun->open_list);
+ spin_unlock(&lun->lock);
+ blk->priv = rblk;
bitmap_zero(rblk->invalid_pages, rrpc->dev->pgs_per_blk);
rblk->next_page = 0;
rblk->nr_invalid_pages = 0;
@@ -199,7 +206,13 @@ static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun,
static void rrpc_put_blk(struct rrpc *rrpc, struct rrpc_block *rblk)
{
- nvm_put_blk(rrpc->dev, rblk->parent);
+ struct rrpc_lun *rlun = rblk->rlun;
+ struct nvm_lun *lun = rlun->parent;
+
+ spin_lock(&lun->lock);
+ nvm_put_blk_unlocked(rrpc->dev, rblk->parent);
+ list_del(&rblk->list);
+ spin_unlock(&lun->lock);
}
static void rrpc_put_blks(struct rrpc *rrpc)
@@ -287,6 +300,8 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
}
page = mempool_alloc(rrpc->page_pool, GFP_NOIO);
+ if (!page)
+ return -ENOMEM;
while ((slot = find_first_zero_bit(rblk->invalid_pages,
nr_pgs_per_blk)) < nr_pgs_per_blk) {
@@ -328,6 +343,10 @@ try:
goto finished;
}
wait_for_completion_io(&wait);
+ if (bio->bi_error) {
+ rrpc_inflight_laddr_release(rrpc, rqd);
+ goto finished;
+ }
bio_reset(bio);
reinit_completion(&wait);
@@ -350,6 +369,8 @@ try:
wait_for_completion_io(&wait);
rrpc_inflight_laddr_release(rrpc, rqd);
+ if (bio->bi_error)
+ goto finished;
bio_reset(bio);
}
@@ -373,16 +394,26 @@ static void rrpc_block_gc(struct work_struct *work)
struct rrpc *rrpc = gcb->rrpc;
struct rrpc_block *rblk = gcb->rblk;
struct nvm_dev *dev = rrpc->dev;
+ struct nvm_lun *lun = rblk->parent->lun;
+ struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
+ mempool_free(gcb, rrpc->gcb_pool);
pr_debug("nvm: block '%lu' being reclaimed\n", rblk->parent->id);
if (rrpc_move_valid_pages(rrpc, rblk))
- goto done;
+ goto put_back;
+
+ if (nvm_erase_blk(dev, rblk->parent))
+ goto put_back;
- nvm_erase_blk(dev, rblk->parent);
rrpc_put_blk(rrpc, rblk);
-done:
- mempool_free(gcb, rrpc->gcb_pool);
+
+ return;
+
+put_back:
+ spin_lock(&rlun->lock);
+ list_add_tail(&rblk->prio, &rlun->prio_list);
+ spin_unlock(&rlun->lock);
}
/* the block with highest number of invalid pages, will be in the beginning
@@ -427,7 +458,7 @@ static void rrpc_lun_gc(struct work_struct *work)
if (nr_blocks_need < rrpc->nr_luns)
nr_blocks_need = rrpc->nr_luns;
- spin_lock(&lun->lock);
+ spin_lock(&rlun->lock);
while (nr_blocks_need > lun->nr_free_blocks &&
!list_empty(&rlun->prio_list)) {
struct rrpc_block *rblock = block_prio_find_max(rlun);
@@ -436,16 +467,16 @@ static void rrpc_lun_gc(struct work_struct *work)
if (!rblock->nr_invalid_pages)
break;
+ gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC);
+ if (!gcb)
+ break;
+
list_del_init(&rblock->prio);
BUG_ON(!block_is_full(rrpc, rblock));
pr_debug("rrpc: selected block '%lu' for GC\n", block->id);
- gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC);
- if (!gcb)
- break;
-
gcb->rrpc = rrpc;
gcb->rblk = rblock;
INIT_WORK(&gcb->ws_gc, rrpc_block_gc);
@@ -454,7 +485,7 @@ static void rrpc_lun_gc(struct work_struct *work)
nr_blocks_need--;
}
- spin_unlock(&lun->lock);
+ spin_unlock(&rlun->lock);
/* TODO: Hint that request queue can be started again */
}
@@ -635,12 +666,24 @@ static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd,
lun = rblk->parent->lun;
cmnt_size = atomic_inc_return(&rblk->data_cmnt_size);
- if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk))
+ if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk)) {
+ struct nvm_block *blk = rblk->parent;
+ struct rrpc_lun *rlun = rblk->rlun;
+
+ spin_lock(&lun->lock);
+ lun->nr_open_blocks--;
+ lun->nr_closed_blocks++;
+ blk->state &= ~NVM_BLK_ST_OPEN;
+ blk->state |= NVM_BLK_ST_CLOSED;
+ list_move_tail(&rblk->list, &rlun->closed_list);
+ spin_unlock(&lun->lock);
+
rrpc_run_gc(rrpc, rblk);
+ }
}
}
-static int rrpc_end_io(struct nvm_rq *rqd, int error)
+static void rrpc_end_io(struct nvm_rq *rqd)
{
struct rrpc *rrpc = container_of(rqd->ins, struct rrpc, instance);
struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
@@ -650,11 +693,12 @@ static int rrpc_end_io(struct nvm_rq *rqd, int error)
if (bio_data_dir(rqd->bio) == WRITE)
rrpc_end_io_write(rrpc, rrqd, laddr, npages);
+ bio_put(rqd->bio);
+
if (rrqd->flags & NVM_IOTYPE_GC)
- return 0;
+ return;
rrpc_unlock_rq(rrpc, rqd);
- bio_put(rqd->bio);
if (npages > 1)
nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list);
@@ -662,8 +706,6 @@ static int rrpc_end_io(struct nvm_rq *rqd, int error)
nvm_dev_dma_free(rrpc->dev, rqd->metadata, rqd->dma_metadata);
mempool_free(rqd, rrpc->rq_pool);
-
- return 0;
}
static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
@@ -841,6 +883,13 @@ static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
err = nvm_submit_io(rrpc->dev, rqd);
if (err) {
pr_err("rrpc: I/O submission failed: %d\n", err);
+ bio_put(bio);
+ if (!(flags & NVM_IOTYPE_GC)) {
+ rrpc_unlock_rq(rrpc, rqd);
+ if (rqd->nr_pages > 1)
+ nvm_dev_dma_free(rrpc->dev,
+ rqd->ppa_list, rqd->dma_ppa_list);
+ }
return NVM_IO_ERR;
}
@@ -1090,6 +1139,11 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
struct rrpc_lun *rlun;
int i, j;
+ if (dev->pgs_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
+ pr_err("rrpc: number of pages per block too high.");
+ return -EINVAL;
+ }
+
spin_lock_init(&rrpc->rev_lock);
rrpc->luns = kcalloc(rrpc->nr_luns, sizeof(struct rrpc_lun),
@@ -1101,16 +1155,13 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
for (i = 0; i < rrpc->nr_luns; i++) {
struct nvm_lun *lun = dev->mt->get_lun(dev, lun_begin + i);
- if (dev->pgs_per_blk >
- MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
- pr_err("rrpc: number of pages per block too high.");
- goto err;
- }
-
rlun = &rrpc->luns[i];
rlun->rrpc = rrpc;
rlun->parent = lun;
INIT_LIST_HEAD(&rlun->prio_list);
+ INIT_LIST_HEAD(&rlun->open_list);
+ INIT_LIST_HEAD(&rlun->closed_list);
+
INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
spin_lock_init(&rlun->lock);
@@ -1127,6 +1178,7 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
struct nvm_block *blk = &lun->blocks[j];
rblk->parent = blk;
+ rblk->rlun = rlun;
INIT_LIST_HEAD(&rblk->prio);
spin_lock_init(&rblk->lock);
}
diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h
index a9696a0..ef13ac7 100644
--- a/drivers/lightnvm/rrpc.h
+++ b/drivers/lightnvm/rrpc.h
@@ -54,7 +54,9 @@ struct rrpc_rq {
struct rrpc_block {
struct nvm_block *parent;
+ struct rrpc_lun *rlun;
struct list_head prio;
+ struct list_head list;
#define MAX_INVALID_PAGES_STORAGE 8
/* Bitmap for invalid page intries */
@@ -73,7 +75,16 @@ struct rrpc_lun {
struct nvm_lun *parent;
struct rrpc_block *cur, *gc_cur;
struct rrpc_block *blocks; /* Reference to block allocation */
- struct list_head prio_list; /* Blocks that may be GC'ed */
+
+ struct list_head prio_list; /* Blocks that may be GC'ed */
+ struct list_head open_list; /* In-use open blocks. These are blocks
+ * that can be both written to and read
+ * from
+ */
+ struct list_head closed_list; /* In-use closed blocks. These are
+ * blocks that can _only_ be read from
+ */
+
struct work_struct ws_gc;
spinlock_t lock;
diff --git a/drivers/lightnvm/sysblk.c b/drivers/lightnvm/sysblk.c
new file mode 100644
index 0000000..321de1f
--- /dev/null
+++ b/drivers/lightnvm/sysblk.c
@@ -0,0 +1,741 @@
+/*
+ * Copyright (C) 2015 Matias Bjorling. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ *
+ */
+
+#include <linux/lightnvm.h>
+
+#define MAX_SYSBLKS 3 /* remember to update mapping scheme on change */
+#define MAX_BLKS_PR_SYSBLK 2 /* 2 blks with 256 pages and 3000 erases
+ * enables ~1.5M updates per sysblk unit
+ */
+
+struct sysblk_scan {
+ /* A row is a collection of flash blocks for a system block. */
+ int nr_rows;
+ int row;
+ int act_blk[MAX_SYSBLKS];
+
+ int nr_ppas;
+ struct ppa_addr ppas[MAX_SYSBLKS * MAX_BLKS_PR_SYSBLK];/* all sysblks */
+};
+
+static inline int scan_ppa_idx(int row, int blkid)
+{
+ return (row * MAX_BLKS_PR_SYSBLK) + blkid;
+}
+
+void nvm_sysblk_to_cpu(struct nvm_sb_info *info, struct nvm_system_block *sb)
+{
+ info->seqnr = be32_to_cpu(sb->seqnr);
+ info->erase_cnt = be32_to_cpu(sb->erase_cnt);
+ info->version = be16_to_cpu(sb->version);
+ strncpy(info->mmtype, sb->mmtype, NVM_MMTYPE_LEN);
+ info->fs_ppa.ppa = be64_to_cpu(sb->fs_ppa);
+}
+
+void nvm_cpu_to_sysblk(struct nvm_system_block *sb, struct nvm_sb_info *info)
+{
+ sb->magic = cpu_to_be32(NVM_SYSBLK_MAGIC);
+ sb->seqnr = cpu_to_be32(info->seqnr);
+ sb->erase_cnt = cpu_to_be32(info->erase_cnt);
+ sb->version = cpu_to_be16(info->version);
+ strncpy(sb->mmtype, info->mmtype, NVM_MMTYPE_LEN);
+ sb->fs_ppa = cpu_to_be64(info->fs_ppa.ppa);
+}
+
+static int nvm_setup_sysblks(struct nvm_dev *dev, struct ppa_addr *sysblk_ppas)
+{
+ int nr_rows = min_t(int, MAX_SYSBLKS, dev->nr_chnls);
+ int i;
+
+ for (i = 0; i < nr_rows; i++)
+ sysblk_ppas[i].ppa = 0;
+
+ /* if possible, place sysblk at first channel, middle channel and last
+ * channel of the device. If not, create only one or two sys blocks
+ */
+ switch (dev->nr_chnls) {
+ case 2:
+ sysblk_ppas[1].g.ch = 1;
+ /* fall-through */
+ case 1:
+ sysblk_ppas[0].g.ch = 0;
+ break;
+ default:
+ sysblk_ppas[0].g.ch = 0;
+ sysblk_ppas[1].g.ch = dev->nr_chnls / 2;
+ sysblk_ppas[2].g.ch = dev->nr_chnls - 1;
+ break;
+ }
+
+ return nr_rows;
+}
+
+void nvm_setup_sysblk_scan(struct nvm_dev *dev, struct sysblk_scan *s,
+ struct ppa_addr *sysblk_ppas)
+{
+ memset(s, 0, sizeof(struct sysblk_scan));
+ s->nr_rows = nvm_setup_sysblks(dev, sysblk_ppas);
+}
+
+static int sysblk_get_host_blks(struct ppa_addr ppa, int nr_blks, u8 *blks,
+ void *private)
+{
+ struct sysblk_scan *s = private;
+ int i, nr_sysblk = 0;
+
+ for (i = 0; i < nr_blks; i++) {
+ if (blks[i] != NVM_BLK_T_HOST)
+ continue;
+
+ if (s->nr_ppas == MAX_BLKS_PR_SYSBLK * MAX_SYSBLKS) {
+ pr_err("nvm: too many host blks\n");
+ return -EINVAL;
+ }
+
+ ppa.g.blk = i;
+
+ s->ppas[scan_ppa_idx(s->row, nr_sysblk)] = ppa;
+ s->nr_ppas++;
+ nr_sysblk++;
+ }
+
+ return 0;
+}
+
+static int nvm_get_all_sysblks(struct nvm_dev *dev, struct sysblk_scan *s,
+ struct ppa_addr *ppas, nvm_bb_update_fn *fn)
+{
+ struct ppa_addr dppa;
+ int i, ret;
+
+ s->nr_ppas = 0;
+
+ for (i = 0; i < s->nr_rows; i++) {
+ dppa = generic_to_dev_addr(dev, ppas[i]);
+ s->row = i;
+
+ ret = dev->ops->get_bb_tbl(dev, dppa, dev->blks_per_lun, fn, s);
+ if (ret) {
+ pr_err("nvm: failed bb tbl for ppa (%u %u)\n",
+ ppas[i].g.ch,
+ ppas[i].g.blk);
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * scans a block for latest sysblk.
+ * Returns:
+ * 0 - newer sysblk not found. PPA is updated to latest page.
+ * 1 - newer sysblk found and stored in *cur. PPA is updated to
+ * next valid page.
+ * <0- error.
+ */
+static int nvm_scan_block(struct nvm_dev *dev, struct ppa_addr *ppa,
+ struct nvm_system_block *sblk)
+{
+ struct nvm_system_block *cur;
+ int pg, cursz, ret, found = 0;
+
+ /* the full buffer for a flash page is allocated. Only the first of it
+ * contains the system block information
+ */
+ cursz = dev->sec_size * dev->sec_per_pg * dev->nr_planes;
+ cur = kmalloc(cursz, GFP_KERNEL);
+ if (!cur)
+ return -ENOMEM;
+
+ /* perform linear scan through the block */
+ for (pg = 0; pg < dev->lps_per_blk; pg++) {
+ ppa->g.pg = ppa_to_slc(dev, pg);
+
+ ret = nvm_submit_ppa(dev, ppa, 1, NVM_OP_PREAD, NVM_IO_SLC_MODE,
+ cur, cursz);
+ if (ret) {
+ if (ret == NVM_RSP_ERR_EMPTYPAGE) {
+ pr_debug("nvm: sysblk scan empty ppa (%u %u %u %u)\n",
+ ppa->g.ch,
+ ppa->g.lun,
+ ppa->g.blk,
+ ppa->g.pg);
+ break;
+ }
+ pr_err("nvm: read failed (%x) for ppa (%u %u %u %u)",
+ ret,
+ ppa->g.ch,
+ ppa->g.lun,
+ ppa->g.blk,
+ ppa->g.pg);
+ break; /* if we can't read a page, continue to the
+ * next blk
+ */
+ }
+
+ if (be32_to_cpu(cur->magic) != NVM_SYSBLK_MAGIC) {
+ pr_debug("nvm: scan break for ppa (%u %u %u %u)\n",
+ ppa->g.ch,
+ ppa->g.lun,
+ ppa->g.blk,
+ ppa->g.pg);
+ break; /* last valid page already found */
+ }
+
+ if (be32_to_cpu(cur->seqnr) < be32_to_cpu(sblk->seqnr))
+ continue;
+
+ memcpy(sblk, cur, sizeof(struct nvm_system_block));
+ found = 1;
+ }
+
+ kfree(cur);
+
+ return found;
+}
+
+static int nvm_set_bb_tbl(struct nvm_dev *dev, struct sysblk_scan *s, int type)
+{
+ struct nvm_rq rqd;
+ int ret;
+
+ if (s->nr_ppas > dev->ops->max_phys_sect) {
+ pr_err("nvm: unable to update all sysblocks atomically\n");
+ return -EINVAL;
+ }
+
+ memset(&rqd, 0, sizeof(struct nvm_rq));
+
+ nvm_set_rqd_ppalist(dev, &rqd, s->ppas, s->nr_ppas);
+ nvm_generic_to_addr_mode(dev, &rqd);
+
+ ret = dev->ops->set_bb_tbl(dev, &rqd, type);
+ nvm_free_rqd_ppalist(dev, &rqd);
+ if (ret) {
+ pr_err("nvm: sysblk failed bb mark\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int sysblk_get_free_blks(struct ppa_addr ppa, int nr_blks, u8 *blks,
+ void *private)
+{
+ struct sysblk_scan *s = private;
+ struct ppa_addr *sppa;
+ int i, blkid = 0;
+
+ for (i = 0; i < nr_blks; i++) {
+ if (blks[i] == NVM_BLK_T_HOST)
+ return -EEXIST;
+
+ if (blks[i] != NVM_BLK_T_FREE)
+ continue;
+
+ sppa = &s->ppas[scan_ppa_idx(s->row, blkid)];
+ sppa->g.ch = ppa.g.ch;
+ sppa->g.lun = ppa.g.lun;
+ sppa->g.blk = i;
+ s->nr_ppas++;
+ blkid++;
+
+ pr_debug("nvm: use (%u %u %u) as sysblk\n",
+ sppa->g.ch, sppa->g.lun, sppa->g.blk);
+ if (blkid > MAX_BLKS_PR_SYSBLK - 1)
+ return 0;
+ }
+
+ pr_err("nvm: sysblk failed get sysblk\n");
+ return -EINVAL;
+}
+
+static int nvm_write_and_verify(struct nvm_dev *dev, struct nvm_sb_info *info,
+ struct sysblk_scan *s)
+{
+ struct nvm_system_block nvmsb;
+ void *buf;
+ int i, sect, ret, bufsz;
+ struct ppa_addr *ppas;
+
+ nvm_cpu_to_sysblk(&nvmsb, info);
+
+ /* buffer for flash page */
+ bufsz = dev->sec_size * dev->sec_per_pg * dev->nr_planes;
+ buf = kzalloc(bufsz, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ memcpy(buf, &nvmsb, sizeof(struct nvm_system_block));
+
+ ppas = kcalloc(dev->sec_per_pg, sizeof(struct ppa_addr), GFP_KERNEL);
+ if (!ppas) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ /* Write and verify */
+ for (i = 0; i < s->nr_rows; i++) {
+ ppas[0] = s->ppas[scan_ppa_idx(i, s->act_blk[i])];
+
+ pr_debug("nvm: writing sysblk to ppa (%u %u %u %u)\n",
+ ppas[0].g.ch,
+ ppas[0].g.lun,
+ ppas[0].g.blk,
+ ppas[0].g.pg);
+
+ /* Expand to all sectors within a flash page */
+ if (dev->sec_per_pg > 1) {
+ for (sect = 1; sect < dev->sec_per_pg; sect++) {
+ ppas[sect].ppa = ppas[0].ppa;
+ ppas[sect].g.sec = sect;
+ }
+ }
+
+ ret = nvm_submit_ppa(dev, ppas, dev->sec_per_pg, NVM_OP_PWRITE,
+ NVM_IO_SLC_MODE, buf, bufsz);
+ if (ret) {
+ pr_err("nvm: sysblk failed program (%u %u %u)\n",
+ ppas[0].g.ch,
+ ppas[0].g.lun,
+ ppas[0].g.blk);
+ break;
+ }
+
+ ret = nvm_submit_ppa(dev, ppas, dev->sec_per_pg, NVM_OP_PREAD,
+ NVM_IO_SLC_MODE, buf, bufsz);
+ if (ret) {
+ pr_err("nvm: sysblk failed read (%u %u %u)\n",
+ ppas[0].g.ch,
+ ppas[0].g.lun,
+ ppas[0].g.blk);
+ break;
+ }
+
+ if (memcmp(buf, &nvmsb, sizeof(struct nvm_system_block))) {
+ pr_err("nvm: sysblk failed verify (%u %u %u)\n",
+ ppas[0].g.ch,
+ ppas[0].g.lun,
+ ppas[0].g.blk);
+ ret = -EINVAL;
+ break;
+ }
+ }
+
+ kfree(ppas);
+err:
+ kfree(buf);
+
+ return ret;
+}
+
+static int nvm_prepare_new_sysblks(struct nvm_dev *dev, struct sysblk_scan *s)
+{
+ int i, ret;
+ unsigned long nxt_blk;
+ struct ppa_addr *ppa;
+
+ for (i = 0; i < s->nr_rows; i++) {
+ nxt_blk = (s->act_blk[i] + 1) % MAX_BLKS_PR_SYSBLK;
+ ppa = &s->ppas[scan_ppa_idx(i, nxt_blk)];
+ ppa->g.pg = ppa_to_slc(dev, 0);
+
+ ret = nvm_erase_ppa(dev, ppa, 1);
+ if (ret)
+ return ret;
+
+ s->act_blk[i] = nxt_blk;
+ }
+
+ return 0;
+}
+
+int nvm_get_sysblock(struct nvm_dev *dev, struct nvm_sb_info *info)
+{
+ struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
+ struct sysblk_scan s;
+ struct nvm_system_block *cur;
+ int i, j, found = 0;
+ int ret = -ENOMEM;
+
+ /*
+ * 1. setup sysblk locations
+ * 2. get bad block list
+ * 3. filter on host-specific (type 3)
+ * 4. iterate through all and find the highest seq nr.
+ * 5. return superblock information
+ */
+
+ if (!dev->ops->get_bb_tbl)
+ return -EINVAL;
+
+ nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
+
+ mutex_lock(&dev->mlock);
+ ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, sysblk_get_host_blks);
+ if (ret)
+ goto err_sysblk;
+
+ /* no sysblocks initialized */
+ if (!s.nr_ppas)
+ goto err_sysblk;
+
+ cur = kzalloc(sizeof(struct nvm_system_block), GFP_KERNEL);
+ if (!cur)
+ goto err_sysblk;
+
+ /* find the latest block across all sysblocks */
+ for (i = 0; i < s.nr_rows; i++) {
+ for (j = 0; j < MAX_BLKS_PR_SYSBLK; j++) {
+ struct ppa_addr ppa = s.ppas[scan_ppa_idx(i, j)];
+
+ ret = nvm_scan_block(dev, &ppa, cur);
+ if (ret > 0)
+ found = 1;
+ else if (ret < 0)
+ break;
+ }
+ }
+
+ nvm_sysblk_to_cpu(info, cur);
+
+ kfree(cur);
+err_sysblk:
+ mutex_unlock(&dev->mlock);
+
+ if (found)
+ return 1;
+ return ret;
+}
+
+int nvm_update_sysblock(struct nvm_dev *dev, struct nvm_sb_info *new)
+{
+ /* 1. for each latest superblock
+ * 2. if room
+ * a. write new flash page entry with the updated information
+ * 3. if no room
+ * a. find next available block on lun (linear search)
+ * if none, continue to next lun
+ * if none at all, report error. also report that it wasn't
+ * possible to write to all superblocks.
+ * c. write data to block.
+ */
+ struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
+ struct sysblk_scan s;
+ struct nvm_system_block *cur;
+ int i, j, ppaidx, found = 0;
+ int ret = -ENOMEM;
+
+ if (!dev->ops->get_bb_tbl)
+ return -EINVAL;
+
+ nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
+
+ mutex_lock(&dev->mlock);
+ ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, sysblk_get_host_blks);
+ if (ret)
+ goto err_sysblk;
+
+ cur = kzalloc(sizeof(struct nvm_system_block), GFP_KERNEL);
+ if (!cur)
+ goto err_sysblk;
+
+ /* Get the latest sysblk for each sysblk row */
+ for (i = 0; i < s.nr_rows; i++) {
+ found = 0;
+ for (j = 0; j < MAX_BLKS_PR_SYSBLK; j++) {
+ ppaidx = scan_ppa_idx(i, j);
+ ret = nvm_scan_block(dev, &s.ppas[ppaidx], cur);
+ if (ret > 0) {
+ s.act_blk[i] = j;
+ found = 1;
+ } else if (ret < 0)
+ break;
+ }
+ }
+
+ if (!found) {
+ pr_err("nvm: no valid sysblks found to update\n");
+ ret = -EINVAL;
+ goto err_cur;
+ }
+
+ /*
+ * All sysblocks found. Check that they have same page id in their flash
+ * blocks
+ */
+ for (i = 1; i < s.nr_rows; i++) {
+ struct ppa_addr l = s.ppas[scan_ppa_idx(0, s.act_blk[0])];
+ struct ppa_addr r = s.ppas[scan_ppa_idx(i, s.act_blk[i])];
+
+ if (l.g.pg != r.g.pg) {
+ pr_err("nvm: sysblks not on same page. Previous update failed.\n");
+ ret = -EINVAL;
+ goto err_cur;
+ }
+ }
+
+ /*
+ * Check that there haven't been another update to the seqnr since we
+ * began
+ */
+ if ((new->seqnr - 1) != be32_to_cpu(cur->seqnr)) {
+ pr_err("nvm: seq is not sequential\n");
+ ret = -EINVAL;
+ goto err_cur;
+ }
+
+ /*
+ * When all pages in a block has been written, a new block is selected
+ * and writing is performed on the new block.
+ */
+ if (s.ppas[scan_ppa_idx(0, s.act_blk[0])].g.pg ==
+ dev->lps_per_blk - 1) {
+ ret = nvm_prepare_new_sysblks(dev, &s);
+ if (ret)
+ goto err_cur;
+ }
+
+ ret = nvm_write_and_verify(dev, new, &s);
+err_cur:
+ kfree(cur);
+err_sysblk:
+ mutex_unlock(&dev->mlock);
+
+ return ret;
+}
+
+int nvm_init_sysblock(struct nvm_dev *dev, struct nvm_sb_info *info)
+{
+ struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
+ struct sysblk_scan s;
+ int ret;
+
+ /*
+ * 1. select master blocks and select first available blks
+ * 2. get bad block list
+ * 3. mark MAX_SYSBLKS block as host-based device allocated.
+ * 4. write and verify data to block
+ */
+
+ if (!dev->ops->get_bb_tbl || !dev->ops->set_bb_tbl)
+ return -EINVAL;
+
+ if (!(dev->mccap & NVM_ID_CAP_SLC) || !dev->lps_per_blk) {
+ pr_err("nvm: memory does not support SLC access\n");
+ return -EINVAL;
+ }
+
+ /* Index all sysblocks and mark them as host-driven */
+ nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
+
+ mutex_lock(&dev->mlock);
+ ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, sysblk_get_free_blks);
+ if (ret)
+ goto err_mark;
+
+ ret = nvm_set_bb_tbl(dev, &s, NVM_BLK_T_HOST);
+ if (ret)
+ goto err_mark;
+
+ /* Write to the first block of each row */
+ ret = nvm_write_and_verify(dev, info, &s);
+err_mark:
+ mutex_unlock(&dev->mlock);
+ return ret;
+}
+
+struct factory_blks {
+ struct nvm_dev *dev;
+ int flags;
+ unsigned long *blks;
+};
+
+static int factory_nblks(int nblks)
+{
+ /* Round up to nearest BITS_PER_LONG */
+ return (nblks + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
+}
+
+static unsigned int factory_blk_offset(struct nvm_dev *dev, int ch, int lun)
+{
+ int nblks = factory_nblks(dev->blks_per_lun);
+
+ return ((ch * dev->luns_per_chnl * nblks) + (lun * nblks)) /
+ BITS_PER_LONG;
+}
+
+static int nvm_factory_blks(struct ppa_addr ppa, int nr_blks, u8 *blks,
+ void *private)
+{
+ struct factory_blks *f = private;
+ struct nvm_dev *dev = f->dev;
+ int i, lunoff;
+
+ lunoff = factory_blk_offset(dev, ppa.g.ch, ppa.g.lun);
+
+ /* non-set bits correspond to the block must be erased */
+ for (i = 0; i < nr_blks; i++) {
+ switch (blks[i]) {
+ case NVM_BLK_T_FREE:
+ if (f->flags & NVM_FACTORY_ERASE_ONLY_USER)
+ set_bit(i, &f->blks[lunoff]);
+ break;
+ case NVM_BLK_T_HOST:
+ if (!(f->flags & NVM_FACTORY_RESET_HOST_BLKS))
+ set_bit(i, &f->blks[lunoff]);
+ break;
+ case NVM_BLK_T_GRWN_BAD:
+ if (!(f->flags & NVM_FACTORY_RESET_GRWN_BBLKS))
+ set_bit(i, &f->blks[lunoff]);
+ break;
+ default:
+ set_bit(i, &f->blks[lunoff]);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int nvm_fact_get_blks(struct nvm_dev *dev, struct ppa_addr *erase_list,
+ int max_ppas, struct factory_blks *f)
+{
+ struct ppa_addr ppa;
+ int ch, lun, blkid, idx, done = 0, ppa_cnt = 0;
+ unsigned long *offset;
+
+ while (!done) {
+ done = 1;
+ for (ch = 0; ch < dev->nr_chnls; ch++) {
+ for (lun = 0; lun < dev->luns_per_chnl; lun++) {
+ idx = factory_blk_offset(dev, ch, lun);
+ offset = &f->blks[idx];
+
+ blkid = find_first_zero_bit(offset,
+ dev->blks_per_lun);
+ if (blkid >= dev->blks_per_lun)
+ continue;
+ set_bit(blkid, offset);
+
+ ppa.ppa = 0;
+ ppa.g.ch = ch;
+ ppa.g.lun = lun;
+ ppa.g.blk = blkid;
+ pr_debug("nvm: erase ppa (%u %u %u)\n",
+ ppa.g.ch,
+ ppa.g.lun,
+ ppa.g.blk);
+
+ erase_list[ppa_cnt] = ppa;
+ ppa_cnt++;
+ done = 0;
+
+ if (ppa_cnt == max_ppas)
+ return ppa_cnt;
+ }
+ }
+ }
+
+ return ppa_cnt;
+}
+
+static int nvm_fact_get_bb_tbl(struct nvm_dev *dev, struct ppa_addr ppa,
+ nvm_bb_update_fn *fn, void *priv)
+{
+ struct ppa_addr dev_ppa;
+ int ret;
+
+ dev_ppa = generic_to_dev_addr(dev, ppa);
+
+ ret = dev->ops->get_bb_tbl(dev, dev_ppa, dev->blks_per_lun, fn, priv);
+ if (ret)
+ pr_err("nvm: failed bb tbl for ch%u lun%u\n",
+ ppa.g.ch, ppa.g.blk);
+ return ret;
+}
+
+static int nvm_fact_select_blks(struct nvm_dev *dev, struct factory_blks *f)
+{
+ int ch, lun, ret;
+ struct ppa_addr ppa;
+
+ ppa.ppa = 0;
+ for (ch = 0; ch < dev->nr_chnls; ch++) {
+ for (lun = 0; lun < dev->luns_per_chnl; lun++) {
+ ppa.g.ch = ch;
+ ppa.g.lun = lun;
+
+ ret = nvm_fact_get_bb_tbl(dev, ppa, nvm_factory_blks,
+ f);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+int nvm_dev_factory(struct nvm_dev *dev, int flags)
+{
+ struct factory_blks f;
+ struct ppa_addr *ppas;
+ int ppa_cnt, ret = -ENOMEM;
+ int max_ppas = dev->ops->max_phys_sect / dev->nr_planes;
+ struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
+ struct sysblk_scan s;
+
+ f.blks = kzalloc(factory_nblks(dev->blks_per_lun) * dev->nr_luns,
+ GFP_KERNEL);
+ if (!f.blks)
+ return ret;
+
+ ppas = kcalloc(max_ppas, sizeof(struct ppa_addr), GFP_KERNEL);
+ if (!ppas)
+ goto err_blks;
+
+ f.dev = dev;
+ f.flags = flags;
+
+ /* create list of blks to be erased */
+ ret = nvm_fact_select_blks(dev, &f);
+ if (ret)
+ goto err_ppas;
+
+ /* continue to erase until list of blks until empty */
+ while ((ppa_cnt = nvm_fact_get_blks(dev, ppas, max_ppas, &f)) > 0)
+ nvm_erase_ppa(dev, ppas, ppa_cnt);
+
+ /* mark host reserved blocks free */
+ if (flags & NVM_FACTORY_RESET_HOST_BLKS) {
+ nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
+ mutex_lock(&dev->mlock);
+ ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas,
+ sysblk_get_host_blks);
+ if (!ret)
+ ret = nvm_set_bb_tbl(dev, &s, NVM_BLK_T_FREE);
+ mutex_unlock(&dev->mlock);
+ }
+err_ppas:
+ kfree(ppas);
+err_blks:
+ kfree(f.blks);
+ return ret;
+}
+EXPORT_SYMBOL(nvm_dev_factory);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 83392f8..22b9e34 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1741,6 +1741,7 @@ static void bch_btree_gc(struct cache_set *c)
do {
ret = btree_root(gc_root, c, &op, &writes, &stats);
closure_sync(&writes);
+ cond_resched();
if (ret && ret != -EAGAIN)
pr_warn("gc failed!");
@@ -2162,8 +2163,10 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op,
rw_lock(true, b, b->level);
if (b->key.ptr[0] != btree_ptr ||
- b->seq != seq + 1)
+ b->seq != seq + 1) {
+ op->lock = b->level;
goto out;
+ }
}
SET_KEY_PTRS(check_key, 1);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 679a093..8d0ead9 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -685,6 +685,8 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
WARN(sysfs_create_link(&d->kobj, &c->kobj, "cache") ||
sysfs_create_link(&c->kobj, &d->kobj, d->name),
"Couldn't create device <-> cache set symlinks");
+
+ clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags);
}
static void bcache_device_detach(struct bcache_device *d)
@@ -847,8 +849,11 @@ void bch_cached_dev_run(struct cached_dev *dc)
buf[SB_LABEL_SIZE] = '\0';
env[2] = kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf);
- if (atomic_xchg(&dc->running, 1))
+ if (atomic_xchg(&dc->running, 1)) {
+ kfree(env[1]);
+ kfree(env[2]);
return;
+ }
if (!d->c &&
BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) {
@@ -1933,6 +1938,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
else
err = "device busy";
mutex_unlock(&bch_register_lock);
+ if (attr == &ksysfs_register_quiet)
+ goto out;
}
goto err;
}
@@ -1971,8 +1978,7 @@ out:
err_close:
blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
err:
- if (attr != &ksysfs_register_quiet)
- pr_info("error opening %s: %s", path, err);
+ pr_info("error opening %s: %s", path, err);
ret = -EINVAL;
goto out;
}
@@ -2066,8 +2072,10 @@ static int __init bcache_init(void)
closure_debug_init();
bcache_major = register_blkdev(0, "bcache");
- if (bcache_major < 0)
+ if (bcache_major < 0) {
+ unregister_reboot_notifier(&reboot);
return bcache_major;
+ }
if (!(bcache_wq = create_workqueue("bcache")) ||
!(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) ||
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index b23f88d..b9346cd 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -323,6 +323,10 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode,
static bool dirty_pred(struct keybuf *buf, struct bkey *k)
{
+ struct cached_dev *dc = container_of(buf, struct cached_dev, writeback_keys);
+
+ BUG_ON(KEY_INODE(k) != dc->disk.id);
+
return KEY_DIRTY(k);
}
@@ -372,11 +376,24 @@ next:
}
}
+/*
+ * Returns true if we scanned the entire disk
+ */
static bool refill_dirty(struct cached_dev *dc)
{
struct keybuf *buf = &dc->writeback_keys;
+ struct bkey start = KEY(dc->disk.id, 0, 0);
struct bkey end = KEY(dc->disk.id, MAX_KEY_OFFSET, 0);
- bool searched_from_start = false;
+ struct bkey start_pos;
+
+ /*
+ * make sure keybuf pos is inside the range for this disk - at bringup
+ * we might not be attached yet so this disk's inode nr isn't
+ * initialized then
+ */
+ if (bkey_cmp(&buf->last_scanned, &start) < 0 ||
+ bkey_cmp(&buf->last_scanned, &end) > 0)
+ buf->last_scanned = start;
if (dc->partial_stripes_expensive) {
refill_full_stripes(dc);
@@ -384,14 +401,20 @@ static bool refill_dirty(struct cached_dev *dc)
return false;
}
- if (bkey_cmp(&buf->last_scanned, &end) >= 0) {
- buf->last_scanned = KEY(dc->disk.id, 0, 0);
- searched_from_start = true;
- }
-
+ start_pos = buf->last_scanned;
bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred);
- return bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start;
+ if (bkey_cmp(&buf->last_scanned, &end) < 0)
+ return false;
+
+ /*
+ * If we get to the end start scanning again from the beginning, and
+ * only scan up to where we initially started scanning from:
+ */
+ buf->last_scanned = start;
+ bch_refill_keybuf(dc->disk.c, buf, &start_pos, dirty_pred);
+
+ return bkey_cmp(&buf->last_scanned, &start_pos) >= 0;
}
static int bch_writeback_thread(void *arg)
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index 0a9dab1..073a042 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -63,7 +63,8 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
static inline void bch_writeback_queue(struct cached_dev *dc)
{
- wake_up_process(dc->writeback_thread);
+ if (!IS_ERR_OR_NULL(dc->writeback_thread))
+ wake_up_process(dc->writeback_thread);
}
static inline void bch_writeback_add(struct cached_dev *dc)
diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index 154aced..65cc0ac 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -170,7 +170,7 @@ static int mmc_ios_show(struct seq_file *s, void *data)
str = "invalid";
break;
}
- seq_printf(s, "signal voltage:\t%u (%s)\n", ios->chip_select, str);
+ seq_printf(s, "signal voltage:\t%u (%s)\n", ios->signal_voltage, str);
switch (ios->drv_type) {
case MMC_SET_DRIVER_TYPE_A:
diff --git a/drivers/mmc/core/pwrseq_simple.c b/drivers/mmc/core/pwrseq_simple.c
index 2b16263..aba786d 100644
--- a/drivers/mmc/core/pwrseq_simple.c
+++ b/drivers/mmc/core/pwrseq_simple.c
@@ -29,15 +29,18 @@ struct mmc_pwrseq_simple {
static void mmc_pwrseq_simple_set_gpios_value(struct mmc_pwrseq_simple *pwrseq,
int value)
{
- int i;
struct gpio_descs *reset_gpios = pwrseq->reset_gpios;
- int values[reset_gpios->ndescs];
- for (i = 0; i < reset_gpios->ndescs; i++)
- values[i] = value;
+ if (!IS_ERR(reset_gpios)) {
+ int i;
+ int values[reset_gpios->ndescs];
- gpiod_set_array_value_cansleep(reset_gpios->ndescs, reset_gpios->desc,
- values);
+ for (i = 0; i < reset_gpios->ndescs; i++)
+ values[i] = value;
+
+ gpiod_set_array_value_cansleep(
+ reset_gpios->ndescs, reset_gpios->desc, values);
+ }
}
static void mmc_pwrseq_simple_pre_power_on(struct mmc_host *host)
@@ -79,7 +82,8 @@ static void mmc_pwrseq_simple_free(struct mmc_host *host)
struct mmc_pwrseq_simple *pwrseq = container_of(host->pwrseq,
struct mmc_pwrseq_simple, pwrseq);
- gpiod_put_array(pwrseq->reset_gpios);
+ if (!IS_ERR(pwrseq->reset_gpios))
+ gpiod_put_array(pwrseq->reset_gpios);
if (!IS_ERR(pwrseq->ext_clk))
clk_put(pwrseq->ext_clk);
@@ -112,7 +116,9 @@ struct mmc_pwrseq *mmc_pwrseq_simple_alloc(struct mmc_host *host,
}
pwrseq->reset_gpios = gpiod_get_array(dev, "reset", GPIOD_OUT_HIGH);
- if (IS_ERR(pwrseq->reset_gpios)) {
+ if (IS_ERR(pwrseq->reset_gpios) &&
+ PTR_ERR(pwrseq->reset_gpios) != -ENOENT &&
+ PTR_ERR(pwrseq->reset_gpios) != -ENOSYS) {
ret = PTR_ERR(pwrseq->reset_gpios);
goto clk_put;
}
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index f2b164b..bb39a29 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -329,6 +329,7 @@ static int mmc_read_switch(struct mmc_card *card)
card->sw_caps.sd3_bus_mode = status[13];
/* Driver Strengths supported by the card */
card->sw_caps.sd3_drv_type = status[9];
+ card->sw_caps.sd3_curr_limit = status[7] | status[6] << 8;
}
out:
@@ -545,14 +546,25 @@ static int sd_set_current_limit(struct mmc_card *card, u8 *status)
* when we set current limit to 200ma, the card will draw 200ma, and
* when we set current limit to 400/600/800ma, the card will draw its
* maximum 300ma from the host.
+ *
+ * The above is incorrect: if we try to set a current limit that is
+ * not supported by the card, the card can rightfully error out the
+ * attempt, and remain at the default current limit. This results
+ * in a 300mA card being limited to 200mA even though the host
+ * supports 800mA. Failures seen with SanDisk 8GB UHS cards with
+ * an iMX6 host. --rmk
*/
- if (max_current >= 800)
+ if (max_current >= 800 &&
+ card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_800)
current_limit = SD_SET_CURRENT_LIMIT_800;
- else if (max_current >= 600)
+ else if (max_current >= 600 &&
+ card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_600)
current_limit = SD_SET_CURRENT_LIMIT_600;
- else if (max_current >= 400)
+ else if (max_current >= 400 &&
+ card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_400)
current_limit = SD_SET_CURRENT_LIMIT_400;
- else if (max_current >= 200)
+ else if (max_current >= 200 &&
+ card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_200)
current_limit = SD_SET_CURRENT_LIMIT_200;
if (current_limit != SD_SET_CURRENT_NO_CHANGE) {
@@ -626,9 +638,9 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card)
* SDR104 mode SD-cards. Note that tuning is mandatory for SDR104.
*/
if (!mmc_host_is_spi(card->host) &&
- (card->sd_bus_speed == UHS_SDR50_BUS_SPEED ||
- card->sd_bus_speed == UHS_DDR50_BUS_SPEED ||
- card->sd_bus_speed == UHS_SDR104_BUS_SPEED)) {
+ (card->host->ios.timing == MMC_TIMING_UHS_SDR50 ||
+ card->host->ios.timing == MMC_TIMING_UHS_DDR50 ||
+ card->host->ios.timing == MMC_TIMING_UHS_SDR104)) {
err = mmc_execute_tuning(card);
/*
@@ -638,7 +650,7 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card)
* difference between v3.00 and 3.01 spec means that CMD19
* tuning is also available for DDR50 mode.
*/
- if (err && card->sd_bus_speed == UHS_DDR50_BUS_SPEED) {
+ if (err && card->host->ios.timing == MMC_TIMING_UHS_DDR50) {
pr_warn("%s: ddr50 tuning failed\n",
mmc_hostname(card->host));
err = 0;
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index d61ba1a..467b3cf 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -535,8 +535,8 @@ static int mmc_sdio_init_uhs_card(struct mmc_card *card)
* SDR104 mode SD-cards. Note that tuning is mandatory for SDR104.
*/
if (!mmc_host_is_spi(card->host) &&
- ((card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR50) ||
- (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR104)))
+ ((card->host->ios.timing == MMC_TIMING_UHS_SDR50) ||
+ (card->host->ios.timing == MMC_TIMING_UHS_SDR104)))
err = mmc_execute_tuning(card);
out:
return err;
diff --git a/drivers/mmc/core/sdio_cis.c b/drivers/mmc/core/sdio_cis.c
index 8e94e55..6f6fc52 100644
--- a/drivers/mmc/core/sdio_cis.c
+++ b/drivers/mmc/core/sdio_cis.c
@@ -223,6 +223,7 @@ static const struct cis_tpl cis_tpl_list[] = {
{ 0x20, 4, cistpl_manfid },
{ 0x21, 2, /* cistpl_funcid */ },
{ 0x22, 0, cistpl_funce },
+ { 0x91, 2, /* cistpl_sdio_std */ },
};
static int sdio_read_cis(struct mmc_card *card, struct sdio_func *func)
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index fb26674..0d6ca41 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -151,6 +151,7 @@ static struct variant_data variant_nomadik = {
.fifosize = 16 * 4,
.fifohalfsize = 8 * 4,
.clkreg = MCI_CLK_ENABLE,
+ .clkreg_8bit_bus_enable = MCI_ST_8BIT_BUS,
.datalength_bits = 24,
.datactrl_mask_sdio = MCI_ST_DPSM_SDIOEN,
.st_sdio = true,
@@ -1886,7 +1887,7 @@ static struct amba_id mmci_ids[] = {
{
.id = 0x00280180,
.mask = 0x00ffffff,
- .data = &variant_u300,
+ .data = &variant_nomadik,
},
{
.id = 0x00480180,
diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c
index e4b05db..4a0d6b8 100644
--- a/drivers/mmc/host/tmio_mmc_dma.c
+++ b/drivers/mmc/host/tmio_mmc_dma.c
@@ -94,9 +94,9 @@ static void tmio_mmc_start_dma_rx(struct tmio_mmc_host *host)
desc = NULL;
ret = cookie;
}
+ dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
+ __func__, host->sg_len, ret, cookie, host->mrq);
}
- dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
- __func__, host->sg_len, ret, cookie, host->mrq);
pio:
if (!desc) {
@@ -116,8 +116,8 @@ pio:
"DMA failed: %d, falling back to PIO\n", ret);
}
- dev_dbg(&host->pdev->dev, "%s(): desc %p, cookie %d, sg[%d]\n", __func__,
- desc, cookie, host->sg_len);
+ dev_dbg(&host->pdev->dev, "%s(): desc %p, sg[%d]\n", __func__,
+ desc, host->sg_len);
}
static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
@@ -174,9 +174,9 @@ static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
desc = NULL;
ret = cookie;
}
+ dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
+ __func__, host->sg_len, ret, cookie, host->mrq);
}
- dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
- __func__, host->sg_len, ret, cookie, host->mrq);
pio:
if (!desc) {
@@ -196,8 +196,7 @@ pio:
"DMA failed: %d, falling back to PIO\n", ret);
}
- dev_dbg(&host->pdev->dev, "%s(): desc %p, cookie %d\n", __func__,
- desc, cookie);
+ dev_dbg(&host->pdev->dev, "%s(): desc %p\n", __func__, desc);
}
void tmio_mmc_start_dma(struct tmio_mmc_host *host,
diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 54e056d..ee2b74d 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -174,9 +174,9 @@ static int vol_cdev_fsync(struct file *file, loff_t start, loff_t end,
struct ubi_device *ubi = desc->vol->ubi;
struct inode *inode = file_inode(file);
int err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = ubi_sync(ubi->ubi_num);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 2c2baab..d66c690 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -157,6 +157,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
[29] = "802.1ad offload support",
[31] = "Modifying loopback source checks using UPDATE_QP support",
[32] = "Loopback source checks support",
+ [33] = "RoCEv2 support"
};
int i;
@@ -626,6 +627,8 @@ out:
return err;
}
+static void disable_unsupported_roce_caps(void *buf);
+
int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
{
struct mlx4_cmd_mailbox *mailbox;
@@ -738,6 +741,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
if (err)
goto out;
+ if (mlx4_is_mfunc(dev))
+ disable_unsupported_roce_caps(outbox);
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_QP_OFFSET);
dev_cap->reserved_qps = 1 << (field & 0xf);
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_OFFSET);
@@ -905,6 +910,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
MLX4_GET(dev_cap->bmme_flags, outbox,
QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+ if (dev_cap->bmme_flags & MLX4_FLAG_ROCE_V1_V2)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ROCE_V1_V2;
if (dev_cap->bmme_flags & MLX4_FLAG_PORT_REMAP)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_REMAP;
MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET);
@@ -1161,6 +1168,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
if (err)
return err;
+ disable_unsupported_roce_caps(outbox->buf);
/* add port mng change event capability and disable mw type 1
* unconditionally to slaves
*/
@@ -1258,6 +1266,21 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
return 0;
}
+static void disable_unsupported_roce_caps(void *buf)
+{
+ u32 flags;
+
+ MLX4_GET(flags, buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
+ flags &= ~(1UL << 31);
+ MLX4_PUT(buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
+ MLX4_GET(flags, buf, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
+ flags &= ~(1UL << 24);
+ MLX4_PUT(buf, flags, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
+ MLX4_GET(flags, buf, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+ flags &= ~(MLX4_FLAG_ROCE_V1_V2);
+ MLX4_PUT(buf, flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+}
+
int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
@@ -2239,7 +2262,8 @@ struct mlx4_config_dev {
__be32 rsvd1[3];
__be16 vxlan_udp_dport;
__be16 rsvd2;
- __be32 rsvd3;
+ __be16 roce_v2_entropy;
+ __be16 roce_v2_udp_dport;
__be32 roce_flags;
__be32 rsvd4[25];
__be16 rsvd5;
@@ -2248,6 +2272,7 @@ struct mlx4_config_dev {
};
#define MLX4_VXLAN_UDP_DPORT (1 << 0)
+#define MLX4_ROCE_V2_UDP_DPORT BIT(3)
#define MLX4_DISABLE_RX_PORT BIT(18)
static int mlx4_CONFIG_DEV_set(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev)
@@ -2365,6 +2390,18 @@ int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis)
return mlx4_CONFIG_DEV_set(dev, &config_dev);
}
+int mlx4_config_roce_v2_port(struct mlx4_dev *dev, u16 udp_port)
+{
+ struct mlx4_config_dev config_dev;
+
+ memset(&config_dev, 0, sizeof(config_dev));
+ config_dev.update_flags = cpu_to_be32(MLX4_ROCE_V2_UDP_DPORT);
+ config_dev.roce_v2_udp_dport = cpu_to_be16(udp_port);
+
+ return mlx4_CONFIG_DEV_set(dev, &config_dev);
+}
+EXPORT_SYMBOL_GPL(mlx4_config_roce_v2_port);
+
int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2)
{
struct mlx4_cmd_mailbox *mailbox;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 2404c22..7baef52 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -780,7 +780,10 @@ struct mlx4_set_port_general_context {
u16 reserved1;
u8 v_ignore_fcs;
u8 flags;
- u8 ignore_fcs;
+ union {
+ u8 ignore_fcs;
+ u8 roce_mode;
+ };
u8 reserved2;
__be16 mtu;
u8 pptx;
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index f255042..787b7bb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -1520,6 +1520,8 @@ int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz)
return err;
}
+#define SET_PORT_ROCE_2_FLAGS 0x10
+#define MLX4_SET_PORT_ROCE_V1_V2 0x2
int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx)
{
@@ -1539,6 +1541,11 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
context->pprx = (pprx * (!pfcrx)) << 7;
context->pfcrx = pfcrx;
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+ context->flags |= SET_PORT_ROCE_2_FLAGS;
+ context->roce_mode |=
+ MLX4_SET_PORT_ROCE_V1_V2 << 4;
+ }
in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE,
MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 168823d..d1cd9c3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -167,6 +167,12 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
}
+ if ((cur_state == MLX4_QP_STATE_RTR) &&
+ (new_state == MLX4_QP_STATE_RTS) &&
+ dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+ context->roce_entropy =
+ cpu_to_be16(mlx4_qp_roce_entropy(dev, qp->qpn));
+
*(__be32 *) mailbox->buf = cpu_to_be32(optpar);
memcpy(mailbox->buf + 8, context, sizeof *context);
@@ -921,3 +927,23 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
return 0;
}
EXPORT_SYMBOL_GPL(mlx4_qp_to_ready);
+
+u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn)
+{
+ struct mlx4_qp_context context;
+ struct mlx4_qp qp;
+ int err;
+
+ qp.qpn = qpn;
+ err = mlx4_qp_query(dev, &qp, &context);
+ if (!err) {
+ u32 dest_qpn = be32_to_cpu(context.remote_qpn) & 0xffffff;
+ u16 folded_dst = folded_qp(dest_qpn);
+ u16 folded_src = folded_qp(qpn);
+
+ return (dest_qpn != qpn) ?
+ ((folded_dst ^ folded_src) | 0xC000) :
+ folded_src | 0xC000;
+ }
+ return 0xdead;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 9ea49a8..aac071a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -39,8 +39,8 @@
#include <linux/mlx5/qp.h>
#include <linux/mlx5/cq.h>
#include <linux/mlx5/vport.h>
+#include <linux/mlx5/transobj.h>
#include "wq.h"
-#include "transobj.h"
#include "mlx5_core.h"
#define MLX5E_MAX_NUM_TC 8
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index c56d91a..6a3e430 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2241,7 +2241,7 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev)
goto err_unmap_free_uar;
}
- err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
+ err = mlx5_core_alloc_transport_domain(mdev, &priv->tdn);
if (err) {
mlx5_core_err(mdev, "alloc td failed, %d\n", err);
goto err_dealloc_pd;
@@ -2324,7 +2324,7 @@ err_destroy_mkey:
mlx5_core_destroy_mkey(mdev, &priv->mr);
err_dealloc_transport_domain:
- mlx5_dealloc_transport_domain(mdev, priv->tdn);
+ mlx5_core_dealloc_transport_domain(mdev, priv->tdn);
err_dealloc_pd:
mlx5_core_dealloc_pd(mdev, priv->pdn);
@@ -2356,7 +2356,7 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv)
mlx5e_close_drop_rq(priv);
mlx5e_destroy_tises(priv);
mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
- mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
+ mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn);
mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
free_netdev(netdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 23c244a..647a3ca 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -230,6 +230,7 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+ rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n",
eqe_type_str(eqe->type), eqe->type, rsn);
mlx5_rsc_event(dev, rsn, eqe->type);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index b37749a..1545a94 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -78,6 +78,11 @@ struct mlx5_device_context {
void *context;
};
+enum {
+ MLX5_ATOMIC_REQ_MODE_BE = 0x0,
+ MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
+};
+
static struct mlx5_profile profile[] = {
[0] = {
.mask = 0,
@@ -387,7 +392,7 @@ query_ex:
return err;
}
-static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
+static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod)
{
u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)];
int err;
@@ -395,6 +400,7 @@ static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
memset(out, 0, sizeof(out));
MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+ MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1);
err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
if (err)
return err;
@@ -404,6 +410,46 @@ static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
return err;
}
+static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
+{
+ void *set_ctx;
+ void *set_hca_cap;
+ int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+ int req_endianness;
+ int err;
+
+ if (MLX5_CAP_GEN(dev, atomic)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC,
+ HCA_CAP_OPMOD_GET_CUR);
+ if (err)
+ return err;
+ } else {
+ return 0;
+ }
+
+ req_endianness =
+ MLX5_CAP_ATOMIC(dev,
+ supported_atomic_req_8B_endianess_mode_1);
+
+ if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
+ return 0;
+
+ set_ctx = kzalloc(set_sz, GFP_KERNEL);
+ if (!set_ctx)
+ return -ENOMEM;
+
+ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+
+ /* Set requestor to host endianness */
+ MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianess_mode,
+ MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
+
+ err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
+
+ kfree(set_ctx);
+ return err;
+}
+
static int handle_hca_cap(struct mlx5_core_dev *dev)
{
void *set_ctx = NULL;
@@ -445,7 +491,8 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
- err = set_caps(dev, set_ctx, set_sz);
+ err = set_caps(dev, set_ctx, set_sz,
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
query_ex:
kfree(set_ctx);
@@ -667,7 +714,6 @@ clean:
return err;
}
-#ifdef CONFIG_MLX5_CORE_EN
static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
{
u32 query_in[MLX5_ST_SZ_DW(query_issi_in)];
@@ -720,7 +766,6 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
return -ENOTSUPP;
}
-#endif
static int map_bf_area(struct mlx5_core_dev *dev)
{
@@ -966,13 +1011,11 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
goto err_pagealloc_cleanup;
}
-#ifdef CONFIG_MLX5_CORE_EN
err = mlx5_core_set_issi(dev);
if (err) {
dev_err(&pdev->dev, "failed to set issi\n");
goto err_disable_hca;
}
-#endif
err = mlx5_satisfy_startup_pages(dev, 1);
if (err) {
@@ -992,6 +1035,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
goto reclaim_boot_pages;
}
+ err = handle_hca_cap_atomic(dev);
+ if (err) {
+ dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n");
+ goto reclaim_boot_pages;
+ }
+
err = mlx5_satisfy_startup_pages(dev, 0);
if (err) {
dev_err(&pdev->dev, "failed to allocate init pages\n");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 30e2ba3..def2893 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -36,6 +36,7 @@
#include <linux/mlx5/cmd.h>
#include <linux/mlx5/qp.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/transobj.h>
#include "mlx5_core.h"
@@ -67,6 +68,52 @@ void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common)
complete(&common->free);
}
+static u64 qp_allowed_event_types(void)
+{
+ u64 mask;
+
+ mask = BIT(MLX5_EVENT_TYPE_PATH_MIG) |
+ BIT(MLX5_EVENT_TYPE_COMM_EST) |
+ BIT(MLX5_EVENT_TYPE_SQ_DRAINED) |
+ BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
+ BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR) |
+ BIT(MLX5_EVENT_TYPE_PATH_MIG_FAILED) |
+ BIT(MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) |
+ BIT(MLX5_EVENT_TYPE_WQ_ACCESS_ERROR);
+
+ return mask;
+}
+
+static u64 rq_allowed_event_types(void)
+{
+ u64 mask;
+
+ mask = BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
+ BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
+
+ return mask;
+}
+
+static u64 sq_allowed_event_types(void)
+{
+ return BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
+}
+
+static bool is_event_type_allowed(int rsc_type, int event_type)
+{
+ switch (rsc_type) {
+ case MLX5_EVENT_QUEUE_TYPE_QP:
+ return BIT(event_type) & qp_allowed_event_types();
+ case MLX5_EVENT_QUEUE_TYPE_RQ:
+ return BIT(event_type) & rq_allowed_event_types();
+ case MLX5_EVENT_QUEUE_TYPE_SQ:
+ return BIT(event_type) & sq_allowed_event_types();
+ default:
+ WARN(1, "Event arrived for unknown resource type");
+ return false;
+ }
+}
+
void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
{
struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn);
@@ -75,8 +122,16 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
if (!common)
return;
+ if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) {
+ mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n",
+ event_type, rsn);
+ return;
+ }
+
switch (common->res) {
case MLX5_RES_QP:
+ case MLX5_RES_RQ:
+ case MLX5_RES_SQ:
qp = (struct mlx5_core_qp *)common;
qp->event(qp, event_type);
break;
@@ -177,27 +232,56 @@ void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
}
#endif
+static int create_qprqsq_common(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *qp,
+ int rsc_type)
+{
+ struct mlx5_qp_table *table = &dev->priv.qp_table;
+ int err;
+
+ qp->common.res = rsc_type;
+ spin_lock_irq(&table->lock);
+ err = radix_tree_insert(&table->tree,
+ qp->qpn | (rsc_type << MLX5_USER_INDEX_LEN),
+ qp);
+ spin_unlock_irq(&table->lock);
+ if (err)
+ return err;
+
+ atomic_set(&qp->common.refcount, 1);
+ init_completion(&qp->common.free);
+ qp->pid = current->pid;
+
+ return 0;
+}
+
+static void destroy_qprqsq_common(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *qp)
+{
+ struct mlx5_qp_table *table = &dev->priv.qp_table;
+ unsigned long flags;
+
+ spin_lock_irqsave(&table->lock, flags);
+ radix_tree_delete(&table->tree,
+ qp->qpn | (qp->common.res << MLX5_USER_INDEX_LEN));
+ spin_unlock_irqrestore(&table->lock, flags);
+ mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp);
+ wait_for_completion(&qp->common.free);
+}
+
int mlx5_core_create_qp(struct mlx5_core_dev *dev,
struct mlx5_core_qp *qp,
struct mlx5_create_qp_mbox_in *in,
int inlen)
{
- struct mlx5_qp_table *table = &dev->priv.qp_table;
struct mlx5_create_qp_mbox_out out;
struct mlx5_destroy_qp_mbox_in din;
struct mlx5_destroy_qp_mbox_out dout;
int err;
- void *qpc;
memset(&out, 0, sizeof(out));
in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP);
- if (dev->issi) {
- qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
- /* 0xffffff means we ask to work with cqe version 0 */
- MLX5_SET(qpc, qpc, user_index, 0xffffff);
- }
-
err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
if (err) {
mlx5_core_warn(dev, "ret %d\n", err);
@@ -213,24 +297,16 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
qp->qpn = be32_to_cpu(out.qpn) & 0xffffff;
mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
- qp->common.res = MLX5_RES_QP;
- spin_lock_irq(&table->lock);
- err = radix_tree_insert(&table->tree, qp->qpn, qp);
- spin_unlock_irq(&table->lock);
- if (err) {
- mlx5_core_warn(dev, "err %d\n", err);
+ err = create_qprqsq_common(dev, qp, MLX5_RES_QP);
+ if (err)
goto err_cmd;
- }
err = mlx5_debug_qp_add(dev, qp);
if (err)
mlx5_core_dbg(dev, "failed adding QP 0x%x to debug file system\n",
qp->qpn);
- qp->pid = current->pid;
- atomic_set(&qp->common.refcount, 1);
atomic_inc(&dev->num_qps);
- init_completion(&qp->common.free);
return 0;
@@ -250,18 +326,11 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
{
struct mlx5_destroy_qp_mbox_in in;
struct mlx5_destroy_qp_mbox_out out;
- struct mlx5_qp_table *table = &dev->priv.qp_table;
- unsigned long flags;
int err;
mlx5_debug_qp_remove(dev, qp);
- spin_lock_irqsave(&table->lock, flags);
- radix_tree_delete(&table->tree, qp->qpn);
- spin_unlock_irqrestore(&table->lock, flags);
-
- mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp);
- wait_for_completion(&qp->common.free);
+ destroy_qprqsq_common(dev, qp);
memset(&in, 0, sizeof(in));
memset(&out, 0, sizeof(out));
@@ -279,59 +348,15 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
}
EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp);
-int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state,
- enum mlx5_qp_state new_state,
+int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation,
struct mlx5_modify_qp_mbox_in *in, int sqd_event,
struct mlx5_core_qp *qp)
{
- static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
- [MLX5_QP_STATE_RST] = {
- [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
- [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
- [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_RST2INIT_QP,
- },
- [MLX5_QP_STATE_INIT] = {
- [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
- [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
- [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_INIT2INIT_QP,
- [MLX5_QP_STATE_RTR] = MLX5_CMD_OP_INIT2RTR_QP,
- },
- [MLX5_QP_STATE_RTR] = {
- [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
- [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
- [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTR2RTS_QP,
- },
- [MLX5_QP_STATE_RTS] = {
- [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
- [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
- [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTS2RTS_QP,
- },
- [MLX5_QP_STATE_SQD] = {
- [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
- [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
- },
- [MLX5_QP_STATE_SQER] = {
- [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
- [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
- [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQERR2RTS_QP,
- },
- [MLX5_QP_STATE_ERR] = {
- [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
- [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
- }
- };
-
struct mlx5_modify_qp_mbox_out out;
int err = 0;
- u16 op;
-
- if (cur_state >= MLX5_QP_NUM_STATE || new_state >= MLX5_QP_NUM_STATE ||
- !optab[cur_state][new_state])
- return -EINVAL;
memset(&out, 0, sizeof(out));
- op = optab[cur_state][new_state];
- in->hdr.opcode = cpu_to_be16(op);
+ in->hdr.opcode = cpu_to_be16(operation);
in->qpn = cpu_to_be32(qp->qpn);
err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out));
if (err)
@@ -449,3 +474,67 @@ int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
}
EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
#endif
+
+int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ struct mlx5_core_qp *rq)
+{
+ int err;
+ u32 rqn;
+
+ err = mlx5_core_create_rq(dev, in, inlen, &rqn);
+ if (err)
+ return err;
+
+ rq->qpn = rqn;
+ err = create_qprqsq_common(dev, rq, MLX5_RES_RQ);
+ if (err)
+ goto err_destroy_rq;
+
+ return 0;
+
+err_destroy_rq:
+ mlx5_core_destroy_rq(dev, rq->qpn);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_rq_tracked);
+
+void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *rq)
+{
+ destroy_qprqsq_common(dev, rq);
+ mlx5_core_destroy_rq(dev, rq->qpn);
+}
+EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked);
+
+int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ struct mlx5_core_qp *sq)
+{
+ int err;
+ u32 sqn;
+
+ err = mlx5_core_create_sq(dev, in, inlen, &sqn);
+ if (err)
+ return err;
+
+ sq->qpn = sqn;
+ err = create_qprqsq_common(dev, sq, MLX5_RES_SQ);
+ if (err)
+ goto err_destroy_sq;
+
+ return 0;
+
+err_destroy_sq:
+ mlx5_core_destroy_sq(dev, sq->qpn);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_sq_tracked);
+
+void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *sq)
+{
+ destroy_qprqsq_common(dev, sq);
+ mlx5_core_destroy_sq(dev, sq->qpn);
+}
+EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
index ffada80..04bc522 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
@@ -37,7 +37,7 @@
#include <linux/mlx5/srq.h>
#include <rdma/ib_verbs.h>
#include "mlx5_core.h"
-#include "transobj.h"
+#include <linux/mlx5/transobj.h>
void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type)
{
@@ -241,8 +241,6 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
memcpy(xrc_srqc, srqc, MLX5_ST_SZ_BYTES(srqc));
memcpy(pas, in->pas, pas_size);
- /* 0xffffff means we ask to work with cqe version 0 */
- MLX5_SET(xrc_srqc, xrc_srqc, user_index, 0xffffff);
MLX5_SET(create_xrc_srq_in, create_in, opcode,
MLX5_CMD_OP_CREATE_XRC_SRQ);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
index d7068f5..03a5093 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
@@ -32,9 +32,9 @@
#include <linux/mlx5/driver.h>
#include "mlx5_core.h"
-#include "transobj.h"
+#include <linux/mlx5/transobj.h>
-int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
+int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
{
u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)];
u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)];
@@ -53,8 +53,9 @@ int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
return err;
}
+EXPORT_SYMBOL(mlx5_core_alloc_transport_domain);
-void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
+void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
{
u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)];
u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)];
@@ -68,6 +69,7 @@ void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
}
+EXPORT_SYMBOL(mlx5_core_dealloc_transport_domain);
int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn)
{
@@ -94,6 +96,7 @@ int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen)
memset(out, 0, sizeof(out));
return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
}
+EXPORT_SYMBOL(mlx5_core_modify_rq);
void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn)
{
@@ -108,6 +111,18 @@ void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn)
mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
}
+int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out)
+{
+ u32 in[MLX5_ST_SZ_DW(query_rq_in)] = {0};
+ int outlen = MLX5_ST_SZ_BYTES(query_rq_out);
+
+ MLX5_SET(query_rq_in, in, opcode, MLX5_CMD_OP_QUERY_RQ);
+ MLX5_SET(query_rq_in, in, rqn, rqn);
+
+ return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen);
+}
+EXPORT_SYMBOL(mlx5_core_query_rq);
+
int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn)
{
u32 out[MLX5_ST_SZ_DW(create_sq_out)];
@@ -133,6 +148,7 @@ int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen)
memset(out, 0, sizeof(out));
return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
}
+EXPORT_SYMBOL(mlx5_core_modify_sq);
void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn)
{
@@ -147,6 +163,18 @@ void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn)
mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
}
+int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out)
+{
+ u32 in[MLX5_ST_SZ_DW(query_sq_in)] = {0};
+ int outlen = MLX5_ST_SZ_BYTES(query_sq_out);
+
+ MLX5_SET(query_sq_in, in, opcode, MLX5_CMD_OP_QUERY_SQ);
+ MLX5_SET(query_sq_in, in, sqn, sqn);
+
+ return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen);
+}
+EXPORT_SYMBOL(mlx5_core_query_sq);
+
int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *tirn)
{
@@ -162,6 +190,7 @@ int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
return err;
}
+EXPORT_SYMBOL(mlx5_core_create_tir);
int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
int inlen)
@@ -187,6 +216,7 @@ void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn)
mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
}
+EXPORT_SYMBOL(mlx5_core_destroy_tir);
int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *tisn)
@@ -203,6 +233,19 @@ int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
return err;
}
+EXPORT_SYMBOL(mlx5_core_create_tis);
+
+int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in,
+ int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_tis_out)] = {0};
+
+ MLX5_SET(modify_tis_in, in, tisn, tisn);
+ MLX5_SET(modify_tis_in, in, opcode, MLX5_CMD_OP_MODIFY_TIS);
+
+ return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_modify_tis);
void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
{
@@ -216,6 +259,7 @@ void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
}
+EXPORT_SYMBOL(mlx5_core_destroy_tis);
int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *rmpn)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.h b/drivers/net/ethernet/mellanox/mlx5/core/transobj.h
deleted file mode 100644
index 74cae51..0000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __TRANSOBJ_H__
-#define __TRANSOBJ_H__
-
-int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn);
-void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn);
-int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *rqn);
-int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen);
-void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn);
-int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *sqn);
-int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen);
-void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn);
-int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *tirn);
-int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
- int inlen);
-void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn);
-int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *tisn);
-void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn);
-int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *rmpn);
-int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen);
-int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn);
-int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out);
-int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
-int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *rmpn);
-int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn);
-int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u32 *out);
-int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
-
-int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *rqtn);
-int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
- int inlen);
-void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn);
-
-#endif /* __TRANSOBJ_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 076197e..c7398b9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -76,7 +76,7 @@ u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
return MLX5_GET(query_vport_state_out, out, admin_state);
}
-EXPORT_SYMBOL(mlx5_query_vport_admin_state);
+EXPORT_SYMBOL_GPL(mlx5_query_vport_admin_state);
int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
u16 vport, u8 state)
@@ -104,7 +104,7 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
return err;
}
-EXPORT_SYMBOL(mlx5_modify_vport_admin_state);
+EXPORT_SYMBOL_GPL(mlx5_modify_vport_admin_state);
static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport,
u32 *out, int outlen)
@@ -151,12 +151,9 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
nic_vport_context.permanent_address);
err = mlx5_query_nic_vport_context(mdev, vport, out, outlen);
- if (err)
- goto out;
-
- ether_addr_copy(addr, &out_addr[2]);
+ if (!err)
+ ether_addr_copy(addr, &out_addr[2]);
-out:
kvfree(out);
return err;
}
@@ -197,7 +194,7 @@ int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev,
return err;
}
-EXPORT_SYMBOL(mlx5_modify_nic_vport_mac_address);
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_address);
int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
u32 vport,
@@ -430,6 +427,68 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
}
EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans);
+int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
+ u64 *system_image_guid)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+ out = mlx5_vzalloc(outlen);
+ if (!out)
+ return -ENOMEM;
+
+ mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+ *system_image_guid = MLX5_GET64(query_nic_vport_context_out, out,
+ nic_vport_context.system_image_guid);
+
+ kfree(out);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_system_image_guid);
+
+int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+ out = mlx5_vzalloc(outlen);
+ if (!out)
+ return -ENOMEM;
+
+ mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+ *node_guid = MLX5_GET64(query_nic_vport_context_out, out,
+ nic_vport_context.node_guid);
+
+ kfree(out);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid);
+
+int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
+ u16 *qkey_viol_cntr)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+ out = mlx5_vzalloc(outlen);
+ if (!out)
+ return -ENOMEM;
+
+ mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+ *qkey_viol_cntr = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.qkey_violation_counter);
+
+ kfree(out);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_qkey_viol_cntr);
+
int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
u8 port_num, u16 vf_num, u16 gid_index,
union ib_gid *gid)
@@ -750,3 +809,44 @@ int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev,
return err;
}
EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc);
+
+enum mlx5_vport_roce_state {
+ MLX5_VPORT_ROCE_DISABLED = 0,
+ MLX5_VPORT_ROCE_ENABLED = 1,
+};
+
+static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev,
+ enum mlx5_vport_roce_state state)
+{
+ void *in;
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ int err;
+
+ in = mlx5_vzalloc(inlen);
+ if (!in) {
+ mlx5_core_warn(mdev, "failed to allocate inbox\n");
+ return -ENOMEM;
+ }
+
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.roce_en, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.roce_en,
+ state);
+
+ err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+
+int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev)
+{
+ return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED);
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce);
+
+int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
+{
+ return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED);
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
diff --git a/drivers/ntb/hw/Kconfig b/drivers/ntb/hw/Kconfig
index 4d5535c..7116472 100644
--- a/drivers/ntb/hw/Kconfig
+++ b/drivers/ntb/hw/Kconfig
@@ -1 +1,2 @@
+source "drivers/ntb/hw/amd/Kconfig"
source "drivers/ntb/hw/intel/Kconfig"
diff --git a/drivers/ntb/hw/Makefile b/drivers/ntb/hw/Makefile
index 175d7c9..532e085 100644
--- a/drivers/ntb/hw/Makefile
+++ b/drivers/ntb/hw/Makefile
@@ -1 +1,2 @@
+obj-$(CONFIG_NTB_AMD) += amd/
obj-$(CONFIG_NTB_INTEL) += intel/
diff --git a/drivers/ntb/hw/amd/Kconfig b/drivers/ntb/hw/amd/Kconfig
new file mode 100644
index 0000000..cfe903c
--- /dev/null
+++ b/drivers/ntb/hw/amd/Kconfig
@@ -0,0 +1,7 @@
+config NTB_AMD
+ tristate "AMD Non-Transparent Bridge support"
+ depends on X86_64
+ help
+ This driver supports AMD NTB on capable Zeppelin hardware.
+
+ If unsure, say N.
diff --git a/drivers/ntb/hw/amd/Makefile b/drivers/ntb/hw/amd/Makefile
new file mode 100644
index 0000000..ad54da9
--- /dev/null
+++ b/drivers/ntb/hw/amd/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_NTB_AMD) += ntb_hw_amd.o
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
new file mode 100644
index 0000000..588803a
--- /dev/null
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
@@ -0,0 +1,1143 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * BSD LICENSE
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copy
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of AMD Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * AMD PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Xiangliang Yu <Xiangliang.Yu@amd.com>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/pci.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/ntb.h>
+
+#include "ntb_hw_amd.h"
+
+#define NTB_NAME "ntb_hw_amd"
+#define NTB_DESC "AMD(R) PCI-E Non-Transparent Bridge Driver"
+#define NTB_VER "1.0"
+
+MODULE_DESCRIPTION(NTB_DESC);
+MODULE_VERSION(NTB_VER);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("AMD Inc.");
+
+static const struct file_operations amd_ntb_debugfs_info;
+static struct dentry *debugfs_dir;
+
+static int ndev_mw_to_bar(struct amd_ntb_dev *ndev, int idx)
+{
+ if (idx < 0 || idx > ndev->mw_count)
+ return -EINVAL;
+
+ return 1 << idx;
+}
+
+static int amd_ntb_mw_count(struct ntb_dev *ntb)
+{
+ return ntb_ndev(ntb)->mw_count;
+}
+
+static int amd_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
+ phys_addr_t *base,
+ resource_size_t *size,
+ resource_size_t *align,
+ resource_size_t *align_size)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ int bar;
+
+ bar = ndev_mw_to_bar(ndev, idx);
+ if (bar < 0)
+ return bar;
+
+ if (base)
+ *base = pci_resource_start(ndev->ntb.pdev, bar);
+
+ if (size)
+ *size = pci_resource_len(ndev->ntb.pdev, bar);
+
+ if (align)
+ *align = SZ_4K;
+
+ if (align_size)
+ *align_size = 1;
+
+ return 0;
+}
+
+static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
+ dma_addr_t addr, resource_size_t size)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ unsigned long xlat_reg, limit_reg = 0;
+ resource_size_t mw_size;
+ void __iomem *mmio, *peer_mmio;
+ u64 base_addr, limit, reg_val;
+ int bar;
+
+ bar = ndev_mw_to_bar(ndev, idx);
+ if (bar < 0)
+ return bar;
+
+ mw_size = pci_resource_len(ndev->ntb.pdev, bar);
+
+ /* make sure the range fits in the usable mw size */
+ if (size > mw_size)
+ return -EINVAL;
+
+ mmio = ndev->self_mmio;
+ peer_mmio = ndev->peer_mmio;
+
+ base_addr = pci_resource_start(ndev->ntb.pdev, bar);
+
+ if (bar != 1) {
+ xlat_reg = AMD_BAR23XLAT_OFFSET + ((bar - 2) << 3);
+ limit_reg = AMD_BAR23LMT_OFFSET + ((bar - 2) << 3);
+
+ /* Set the limit if supported */
+ limit = base_addr + size;
+
+ /* set and verify setting the translation address */
+ write64(addr, peer_mmio + xlat_reg);
+ reg_val = read64(peer_mmio + xlat_reg);
+ if (reg_val != addr) {
+ write64(0, peer_mmio + xlat_reg);
+ return -EIO;
+ }
+
+ /* set and verify setting the limit */
+ write64(limit, mmio + limit_reg);
+ reg_val = read64(mmio + limit_reg);
+ if (reg_val != limit) {
+ write64(base_addr, mmio + limit_reg);
+ write64(0, peer_mmio + xlat_reg);
+ return -EIO;
+ }
+ } else {
+ xlat_reg = AMD_BAR1XLAT_OFFSET;
+ limit_reg = AMD_BAR1LMT_OFFSET;
+
+ /* split bar addr range must all be 32 bit */
+ if (addr & (~0ull << 32))
+ return -EINVAL;
+ if ((addr + size) & (~0ull << 32))
+ return -EINVAL;
+
+ /* Set the limit if supported */
+ limit = base_addr + size;
+
+ /* set and verify setting the translation address */
+ write64(addr, peer_mmio + xlat_reg);
+ reg_val = read64(peer_mmio + xlat_reg);
+ if (reg_val != addr) {
+ write64(0, peer_mmio + xlat_reg);
+ return -EIO;
+ }
+
+ /* set and verify setting the limit */
+ writel(limit, mmio + limit_reg);
+ reg_val = readl(mmio + limit_reg);
+ if (reg_val != limit) {
+ writel(base_addr, mmio + limit_reg);
+ writel(0, peer_mmio + xlat_reg);
+ return -EIO;
+ }
+ }
+
+ return 0;
+}
+
+static int amd_link_is_up(struct amd_ntb_dev *ndev)
+{
+ if (!ndev->peer_sta)
+ return NTB_LNK_STA_ACTIVE(ndev->cntl_sta);
+
+ /* If peer_sta is reset or D0 event, the ISR has
+ * started a timer to check link status of hardware.
+ * So here just clear status bit. And if peer_sta is
+ * D3 or PME_TO, D0/reset event will be happened when
+ * system wakeup/poweron, so do nothing here.
+ */
+ if (ndev->peer_sta & AMD_PEER_RESET_EVENT)
+ ndev->peer_sta &= ~AMD_PEER_RESET_EVENT;
+ else if (ndev->peer_sta & AMD_PEER_D0_EVENT)
+ ndev->peer_sta = 0;
+
+ return 0;
+}
+
+static int amd_ntb_link_is_up(struct ntb_dev *ntb,
+ enum ntb_speed *speed,
+ enum ntb_width *width)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ int ret = 0;
+
+ if (amd_link_is_up(ndev)) {
+ if (speed)
+ *speed = NTB_LNK_STA_SPEED(ndev->lnk_sta);
+ if (width)
+ *width = NTB_LNK_STA_WIDTH(ndev->lnk_sta);
+
+ dev_dbg(ndev_dev(ndev), "link is up.\n");
+
+ ret = 1;
+ } else {
+ if (speed)
+ *speed = NTB_SPEED_NONE;
+ if (width)
+ *width = NTB_WIDTH_NONE;
+
+ dev_dbg(ndev_dev(ndev), "link is down.\n");
+ }
+
+ return ret;
+}
+
+static int amd_ntb_link_enable(struct ntb_dev *ntb,
+ enum ntb_speed max_speed,
+ enum ntb_width max_width)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ u32 ntb_ctl;
+
+ /* Enable event interrupt */
+ ndev->int_mask &= ~AMD_EVENT_INTMASK;
+ writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+ if (ndev->ntb.topo == NTB_TOPO_SEC)
+ return -EINVAL;
+ dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
+
+ ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
+ ntb_ctl |= (PMM_REG_CTL | SMM_REG_CTL);
+ writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
+
+ return 0;
+}
+
+static int amd_ntb_link_disable(struct ntb_dev *ntb)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ u32 ntb_ctl;
+
+ /* Disable event interrupt */
+ ndev->int_mask |= AMD_EVENT_INTMASK;
+ writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+ if (ndev->ntb.topo == NTB_TOPO_SEC)
+ return -EINVAL;
+ dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
+
+ ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
+ ntb_ctl &= ~(PMM_REG_CTL | SMM_REG_CTL);
+ writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
+
+ return 0;
+}
+
+static u64 amd_ntb_db_valid_mask(struct ntb_dev *ntb)
+{
+ return ntb_ndev(ntb)->db_valid_mask;
+}
+
+static int amd_ntb_db_vector_count(struct ntb_dev *ntb)
+{
+ return ntb_ndev(ntb)->db_count;
+}
+
+static u64 amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+ if (db_vector < 0 || db_vector > ndev->db_count)
+ return 0;
+
+ return ntb_ndev(ntb)->db_valid_mask & (1 << db_vector);
+}
+
+static u64 amd_ntb_db_read(struct ntb_dev *ntb)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+
+ return (u64)readw(mmio + AMD_DBSTAT_OFFSET);
+}
+
+static int amd_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+
+ writew((u16)db_bits, mmio + AMD_DBSTAT_OFFSET);
+
+ return 0;
+}
+
+static int amd_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ unsigned long flags;
+
+ if (db_bits & ~ndev->db_valid_mask)
+ return -EINVAL;
+
+ spin_lock_irqsave(&ndev->db_mask_lock, flags);
+ ndev->db_mask |= db_bits;
+ writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+ spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
+
+ return 0;
+}
+
+static int amd_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ unsigned long flags;
+
+ if (db_bits & ~ndev->db_valid_mask)
+ return -EINVAL;
+
+ spin_lock_irqsave(&ndev->db_mask_lock, flags);
+ ndev->db_mask &= ~db_bits;
+ writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+ spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
+
+ return 0;
+}
+
+static int amd_ntb_peer_db_addr(struct ntb_dev *ntb,
+ phys_addr_t *db_addr,
+ resource_size_t *db_size)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+ if (db_addr)
+ *db_addr = (phys_addr_t)(ndev->peer_mmio + AMD_DBREQ_OFFSET);
+ if (db_size)
+ *db_size = sizeof(u32);
+
+ return 0;
+}
+
+static int amd_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+
+ writew((u16)db_bits, mmio + AMD_DBREQ_OFFSET);
+
+ return 0;
+}
+
+static int amd_ntb_spad_count(struct ntb_dev *ntb)
+{
+ return ntb_ndev(ntb)->spad_count;
+}
+
+static u32 amd_ntb_spad_read(struct ntb_dev *ntb, int idx)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ u32 offset;
+
+ if (idx < 0 || idx >= ndev->spad_count)
+ return 0;
+
+ offset = ndev->self_spad + (idx << 2);
+ return readl(mmio + AMD_SPAD_OFFSET + offset);
+}
+
+static int amd_ntb_spad_write(struct ntb_dev *ntb,
+ int idx, u32 val)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ u32 offset;
+
+ if (idx < 0 || idx >= ndev->spad_count)
+ return -EINVAL;
+
+ offset = ndev->self_spad + (idx << 2);
+ writel(val, mmio + AMD_SPAD_OFFSET + offset);
+
+ return 0;
+}
+
+static int amd_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
+ phys_addr_t *spad_addr)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+ if (idx < 0 || idx >= ndev->spad_count)
+ return -EINVAL;
+
+ if (spad_addr)
+ *spad_addr = (phys_addr_t)(ndev->self_mmio + AMD_SPAD_OFFSET +
+ ndev->peer_spad + (idx << 2));
+ return 0;
+}
+
+static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ u32 offset;
+
+ if (idx < 0 || idx >= ndev->spad_count)
+ return -EINVAL;
+
+ offset = ndev->peer_spad + (idx << 2);
+ return readl(mmio + AMD_SPAD_OFFSET + offset);
+}
+
+static int amd_ntb_peer_spad_write(struct ntb_dev *ntb,
+ int idx, u32 val)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ u32 offset;
+
+ if (idx < 0 || idx >= ndev->spad_count)
+ return -EINVAL;
+
+ offset = ndev->peer_spad + (idx << 2);
+ writel(val, mmio + AMD_SPAD_OFFSET + offset);
+
+ return 0;
+}
+
+static const struct ntb_dev_ops amd_ntb_ops = {
+ .mw_count = amd_ntb_mw_count,
+ .mw_get_range = amd_ntb_mw_get_range,
+ .mw_set_trans = amd_ntb_mw_set_trans,
+ .link_is_up = amd_ntb_link_is_up,
+ .link_enable = amd_ntb_link_enable,
+ .link_disable = amd_ntb_link_disable,
+ .db_valid_mask = amd_ntb_db_valid_mask,
+ .db_vector_count = amd_ntb_db_vector_count,
+ .db_vector_mask = amd_ntb_db_vector_mask,
+ .db_read = amd_ntb_db_read,
+ .db_clear = amd_ntb_db_clear,
+ .db_set_mask = amd_ntb_db_set_mask,
+ .db_clear_mask = amd_ntb_db_clear_mask,
+ .peer_db_addr = amd_ntb_peer_db_addr,
+ .peer_db_set = amd_ntb_peer_db_set,
+ .spad_count = amd_ntb_spad_count,
+ .spad_read = amd_ntb_spad_read,
+ .spad_write = amd_ntb_spad_write,
+ .peer_spad_addr = amd_ntb_peer_spad_addr,
+ .peer_spad_read = amd_ntb_peer_spad_read,
+ .peer_spad_write = amd_ntb_peer_spad_write,
+};
+
+static void amd_ack_smu(struct amd_ntb_dev *ndev, u32 bit)
+{
+ void __iomem *mmio = ndev->self_mmio;
+ int reg;
+
+ reg = readl(mmio + AMD_SMUACK_OFFSET);
+ reg |= bit;
+ writel(reg, mmio + AMD_SMUACK_OFFSET);
+
+ ndev->peer_sta |= bit;
+}
+
+static void amd_handle_event(struct amd_ntb_dev *ndev, int vec)
+{
+ void __iomem *mmio = ndev->self_mmio;
+ u32 status;
+
+ status = readl(mmio + AMD_INTSTAT_OFFSET);
+ if (!(status & AMD_EVENT_INTMASK))
+ return;
+
+ dev_dbg(ndev_dev(ndev), "status = 0x%x and vec = %d\n", status, vec);
+
+ status &= AMD_EVENT_INTMASK;
+ switch (status) {
+ case AMD_PEER_FLUSH_EVENT:
+ dev_info(ndev_dev(ndev), "Flush is done.\n");
+ break;
+ case AMD_PEER_RESET_EVENT:
+ amd_ack_smu(ndev, AMD_PEER_RESET_EVENT);
+
+ /* link down first */
+ ntb_link_event(&ndev->ntb);
+ /* polling peer status */
+ schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+
+ break;
+ case AMD_PEER_D3_EVENT:
+ case AMD_PEER_PMETO_EVENT:
+ amd_ack_smu(ndev, status);
+
+ /* link down */
+ ntb_link_event(&ndev->ntb);
+
+ break;
+ case AMD_PEER_D0_EVENT:
+ mmio = ndev->peer_mmio;
+ status = readl(mmio + AMD_PMESTAT_OFFSET);
+ /* check if this is WAKEUP event */
+ if (status & 0x1)
+ dev_info(ndev_dev(ndev), "Wakeup is done.\n");
+
+ amd_ack_smu(ndev, AMD_PEER_D0_EVENT);
+
+ /* start a timer to poll link status */
+ schedule_delayed_work(&ndev->hb_timer,
+ AMD_LINK_HB_TIMEOUT);
+ break;
+ default:
+ dev_info(ndev_dev(ndev), "event status = 0x%x.\n", status);
+ break;
+ }
+}
+
+static irqreturn_t ndev_interrupt(struct amd_ntb_dev *ndev, int vec)
+{
+ dev_dbg(ndev_dev(ndev), "vec %d\n", vec);
+
+ if (vec > (AMD_DB_CNT - 1) || (ndev->msix_vec_count == 1))
+ amd_handle_event(ndev, vec);
+
+ if (vec < AMD_DB_CNT)
+ ntb_db_event(&ndev->ntb, vec);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t ndev_vec_isr(int irq, void *dev)
+{
+ struct amd_ntb_vec *nvec = dev;
+
+ return ndev_interrupt(nvec->ndev, nvec->num);
+}
+
+static irqreturn_t ndev_irq_isr(int irq, void *dev)
+{
+ struct amd_ntb_dev *ndev = dev;
+
+ return ndev_interrupt(ndev, irq - ndev_pdev(ndev)->irq);
+}
+
+static int ndev_init_isr(struct amd_ntb_dev *ndev,
+ int msix_min, int msix_max)
+{
+ struct pci_dev *pdev;
+ int rc, i, msix_count, node;
+
+ pdev = ndev_pdev(ndev);
+
+ node = dev_to_node(&pdev->dev);
+
+ ndev->db_mask = ndev->db_valid_mask;
+
+ /* Try to set up msix irq */
+ ndev->vec = kzalloc_node(msix_max * sizeof(*ndev->vec),
+ GFP_KERNEL, node);
+ if (!ndev->vec)
+ goto err_msix_vec_alloc;
+
+ ndev->msix = kzalloc_node(msix_max * sizeof(*ndev->msix),
+ GFP_KERNEL, node);
+ if (!ndev->msix)
+ goto err_msix_alloc;
+
+ for (i = 0; i < msix_max; ++i)
+ ndev->msix[i].entry = i;
+
+ msix_count = pci_enable_msix_range(pdev, ndev->msix,
+ msix_min, msix_max);
+ if (msix_count < 0)
+ goto err_msix_enable;
+
+ /* NOTE: Disable MSIX if msix count is less than 16 because of
+ * hardware limitation.
+ */
+ if (msix_count < msix_min) {
+ pci_disable_msix(pdev);
+ goto err_msix_enable;
+ }
+
+ for (i = 0; i < msix_count; ++i) {
+ ndev->vec[i].ndev = ndev;
+ ndev->vec[i].num = i;
+ rc = request_irq(ndev->msix[i].vector, ndev_vec_isr, 0,
+ "ndev_vec_isr", &ndev->vec[i]);
+ if (rc)
+ goto err_msix_request;
+ }
+
+ dev_dbg(ndev_dev(ndev), "Using msix interrupts\n");
+ ndev->db_count = msix_min;
+ ndev->msix_vec_count = msix_max;
+ return 0;
+
+err_msix_request:
+ while (i-- > 0)
+ free_irq(ndev->msix[i].vector, ndev);
+ pci_disable_msix(pdev);
+err_msix_enable:
+ kfree(ndev->msix);
+err_msix_alloc:
+ kfree(ndev->vec);
+err_msix_vec_alloc:
+ ndev->msix = NULL;
+ ndev->vec = NULL;
+
+ /* Try to set up msi irq */
+ rc = pci_enable_msi(pdev);
+ if (rc)
+ goto err_msi_enable;
+
+ rc = request_irq(pdev->irq, ndev_irq_isr, 0,
+ "ndev_irq_isr", ndev);
+ if (rc)
+ goto err_msi_request;
+
+ dev_dbg(ndev_dev(ndev), "Using msi interrupts\n");
+ ndev->db_count = 1;
+ ndev->msix_vec_count = 1;
+ return 0;
+
+err_msi_request:
+ pci_disable_msi(pdev);
+err_msi_enable:
+
+ /* Try to set up intx irq */
+ pci_intx(pdev, 1);
+
+ rc = request_irq(pdev->irq, ndev_irq_isr, IRQF_SHARED,
+ "ndev_irq_isr", ndev);
+ if (rc)
+ goto err_intx_request;
+
+ dev_dbg(ndev_dev(ndev), "Using intx interrupts\n");
+ ndev->db_count = 1;
+ ndev->msix_vec_count = 1;
+ return 0;
+
+err_intx_request:
+ return rc;
+}
+
+static void ndev_deinit_isr(struct amd_ntb_dev *ndev)
+{
+ struct pci_dev *pdev;
+ void __iomem *mmio = ndev->self_mmio;
+ int i;
+
+ pdev = ndev_pdev(ndev);
+
+ /* Mask all doorbell interrupts */
+ ndev->db_mask = ndev->db_valid_mask;
+ writel(ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+
+ if (ndev->msix) {
+ i = ndev->msix_vec_count;
+ while (i--)
+ free_irq(ndev->msix[i].vector, &ndev->vec[i]);
+ pci_disable_msix(pdev);
+ kfree(ndev->msix);
+ kfree(ndev->vec);
+ } else {
+ free_irq(pdev->irq, ndev);
+ if (pci_dev_msi_enabled(pdev))
+ pci_disable_msi(pdev);
+ else
+ pci_intx(pdev, 0);
+ }
+}
+
+static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *offp)
+{
+ struct amd_ntb_dev *ndev;
+ void __iomem *mmio;
+ char *buf;
+ size_t buf_size;
+ ssize_t ret, off;
+ union { u64 v64; u32 v32; u16 v16; } u;
+
+ ndev = filp->private_data;
+ mmio = ndev->self_mmio;
+
+ buf_size = min(count, 0x800ul);
+
+ buf = kmalloc(buf_size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ off = 0;
+
+ off += scnprintf(buf + off, buf_size - off,
+ "NTB Device Information:\n");
+
+ off += scnprintf(buf + off, buf_size - off,
+ "Connection Topology -\t%s\n",
+ ntb_topo_string(ndev->ntb.topo));
+
+ off += scnprintf(buf + off, buf_size - off,
+ "LNK STA -\t\t%#06x\n", ndev->lnk_sta);
+
+ if (!amd_link_is_up(ndev)) {
+ off += scnprintf(buf + off, buf_size - off,
+ "Link Status -\t\tDown\n");
+ } else {
+ off += scnprintf(buf + off, buf_size - off,
+ "Link Status -\t\tUp\n");
+ off += scnprintf(buf + off, buf_size - off,
+ "Link Speed -\t\tPCI-E Gen %u\n",
+ NTB_LNK_STA_SPEED(ndev->lnk_sta));
+ off += scnprintf(buf + off, buf_size - off,
+ "Link Width -\t\tx%u\n",
+ NTB_LNK_STA_WIDTH(ndev->lnk_sta));
+ }
+
+ off += scnprintf(buf + off, buf_size - off,
+ "Memory Window Count -\t%u\n", ndev->mw_count);
+ off += scnprintf(buf + off, buf_size - off,
+ "Scratchpad Count -\t%u\n", ndev->spad_count);
+ off += scnprintf(buf + off, buf_size - off,
+ "Doorbell Count -\t%u\n", ndev->db_count);
+ off += scnprintf(buf + off, buf_size - off,
+ "MSIX Vector Count -\t%u\n", ndev->msix_vec_count);
+
+ off += scnprintf(buf + off, buf_size - off,
+ "Doorbell Valid Mask -\t%#llx\n", ndev->db_valid_mask);
+
+ u.v32 = readl(ndev->self_mmio + AMD_DBMASK_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "Doorbell Mask -\t\t\t%#06x\n", u.v32);
+
+ u.v32 = readl(mmio + AMD_DBSTAT_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "Doorbell Bell -\t\t\t%#06x\n", u.v32);
+
+ off += scnprintf(buf + off, buf_size - off,
+ "\nNTB Incoming XLAT:\n");
+
+ u.v64 = read64(mmio + AMD_BAR1XLAT_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "XLAT1 -\t\t%#018llx\n", u.v64);
+
+ u.v64 = read64(ndev->self_mmio + AMD_BAR23XLAT_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "XLAT23 -\t\t%#018llx\n", u.v64);
+
+ u.v64 = read64(ndev->self_mmio + AMD_BAR45XLAT_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "XLAT45 -\t\t%#018llx\n", u.v64);
+
+ u.v32 = readl(mmio + AMD_BAR1LMT_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "LMT1 -\t\t\t%#06x\n", u.v32);
+
+ u.v64 = read64(ndev->self_mmio + AMD_BAR23LMT_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "LMT23 -\t\t\t%#018llx\n", u.v64);
+
+ u.v64 = read64(ndev->self_mmio + AMD_BAR45LMT_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "LMT45 -\t\t\t%#018llx\n", u.v64);
+
+ ret = simple_read_from_buffer(ubuf, count, offp, buf, off);
+ kfree(buf);
+ return ret;
+}
+
+static void ndev_init_debugfs(struct amd_ntb_dev *ndev)
+{
+ if (!debugfs_dir) {
+ ndev->debugfs_dir = NULL;
+ ndev->debugfs_info = NULL;
+ } else {
+ ndev->debugfs_dir =
+ debugfs_create_dir(ndev_name(ndev), debugfs_dir);
+ if (!ndev->debugfs_dir)
+ ndev->debugfs_info = NULL;
+ else
+ ndev->debugfs_info =
+ debugfs_create_file("info", S_IRUSR,
+ ndev->debugfs_dir, ndev,
+ &amd_ntb_debugfs_info);
+ }
+}
+
+static void ndev_deinit_debugfs(struct amd_ntb_dev *ndev)
+{
+ debugfs_remove_recursive(ndev->debugfs_dir);
+}
+
+static inline void ndev_init_struct(struct amd_ntb_dev *ndev,
+ struct pci_dev *pdev)
+{
+ ndev->ntb.pdev = pdev;
+ ndev->ntb.topo = NTB_TOPO_NONE;
+ ndev->ntb.ops = &amd_ntb_ops;
+ ndev->int_mask = AMD_EVENT_INTMASK;
+ spin_lock_init(&ndev->db_mask_lock);
+}
+
+static int amd_poll_link(struct amd_ntb_dev *ndev)
+{
+ void __iomem *mmio = ndev->peer_mmio;
+ u32 reg, stat;
+ int rc;
+
+ reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+ reg &= NTB_LIN_STA_ACTIVE_BIT;
+
+ dev_dbg(ndev_dev(ndev), "%s: reg_val = 0x%x.\n", __func__, reg);
+
+ if (reg == ndev->cntl_sta)
+ return 0;
+
+ ndev->cntl_sta = reg;
+
+ rc = pci_read_config_dword(ndev->ntb.pdev,
+ AMD_LINK_STATUS_OFFSET, &stat);
+ if (rc)
+ return 0;
+ ndev->lnk_sta = stat;
+
+ return 1;
+}
+
+static void amd_link_hb(struct work_struct *work)
+{
+ struct amd_ntb_dev *ndev = hb_ndev(work);
+
+ if (amd_poll_link(ndev))
+ ntb_link_event(&ndev->ntb);
+
+ if (!amd_link_is_up(ndev))
+ schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+}
+
+static int amd_init_isr(struct amd_ntb_dev *ndev)
+{
+ return ndev_init_isr(ndev, AMD_DB_CNT, AMD_MSIX_VECTOR_CNT);
+}
+
+static void amd_init_side_info(struct amd_ntb_dev *ndev)
+{
+ void __iomem *mmio = ndev->self_mmio;
+ unsigned int reg;
+
+ reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+ if (!(reg & AMD_SIDE_READY)) {
+ reg |= AMD_SIDE_READY;
+ writel(reg, mmio + AMD_SIDEINFO_OFFSET);
+ }
+}
+
+static void amd_deinit_side_info(struct amd_ntb_dev *ndev)
+{
+ void __iomem *mmio = ndev->self_mmio;
+ unsigned int reg;
+
+ reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+ if (reg & AMD_SIDE_READY) {
+ reg &= ~AMD_SIDE_READY;
+ writel(reg, mmio + AMD_SIDEINFO_OFFSET);
+ readl(mmio + AMD_SIDEINFO_OFFSET);
+ }
+}
+
+static int amd_init_ntb(struct amd_ntb_dev *ndev)
+{
+ void __iomem *mmio = ndev->self_mmio;
+
+ ndev->mw_count = AMD_MW_CNT;
+ ndev->spad_count = AMD_SPADS_CNT;
+ ndev->db_count = AMD_DB_CNT;
+
+ switch (ndev->ntb.topo) {
+ case NTB_TOPO_PRI:
+ case NTB_TOPO_SEC:
+ ndev->spad_count >>= 1;
+ if (ndev->ntb.topo == NTB_TOPO_PRI) {
+ ndev->self_spad = 0;
+ ndev->peer_spad = 0x20;
+ } else {
+ ndev->self_spad = 0x20;
+ ndev->peer_spad = 0;
+ }
+
+ INIT_DELAYED_WORK(&ndev->hb_timer, amd_link_hb);
+ schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+
+ break;
+ default:
+ dev_err(ndev_dev(ndev), "AMD NTB does not support B2B mode.\n");
+ return -EINVAL;
+ }
+
+ ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
+
+ /* Mask event interrupts */
+ writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+ return 0;
+}
+
+static enum ntb_topo amd_get_topo(struct amd_ntb_dev *ndev)
+{
+ void __iomem *mmio = ndev->self_mmio;
+ u32 info;
+
+ info = readl(mmio + AMD_SIDEINFO_OFFSET);
+ if (info & AMD_SIDE_MASK)
+ return NTB_TOPO_SEC;
+ else
+ return NTB_TOPO_PRI;
+}
+
+static int amd_init_dev(struct amd_ntb_dev *ndev)
+{
+ struct pci_dev *pdev;
+ int rc = 0;
+
+ pdev = ndev_pdev(ndev);
+
+ ndev->ntb.topo = amd_get_topo(ndev);
+ dev_dbg(ndev_dev(ndev), "AMD NTB topo is %s\n",
+ ntb_topo_string(ndev->ntb.topo));
+
+ rc = amd_init_ntb(ndev);
+ if (rc)
+ return rc;
+
+ rc = amd_init_isr(ndev);
+ if (rc) {
+ dev_err(ndev_dev(ndev), "fail to init isr.\n");
+ return rc;
+ }
+
+ ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
+
+ return 0;
+}
+
+static void amd_deinit_dev(struct amd_ntb_dev *ndev)
+{
+ cancel_delayed_work_sync(&ndev->hb_timer);
+
+ ndev_deinit_isr(ndev);
+}
+
+static int amd_ntb_init_pci(struct amd_ntb_dev *ndev,
+ struct pci_dev *pdev)
+{
+ int rc;
+
+ pci_set_drvdata(pdev, ndev);
+
+ rc = pci_enable_device(pdev);
+ if (rc)
+ goto err_pci_enable;
+
+ rc = pci_request_regions(pdev, NTB_NAME);
+ if (rc)
+ goto err_pci_regions;
+
+ pci_set_master(pdev);
+
+ rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (rc) {
+ rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (rc)
+ goto err_dma_mask;
+ dev_warn(ndev_dev(ndev), "Cannot DMA highmem\n");
+ }
+
+ rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (rc) {
+ rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (rc)
+ goto err_dma_mask;
+ dev_warn(ndev_dev(ndev), "Cannot DMA consistent highmem\n");
+ }
+
+ ndev->self_mmio = pci_iomap(pdev, 0, 0);
+ if (!ndev->self_mmio) {
+ rc = -EIO;
+ goto err_dma_mask;
+ }
+ ndev->peer_mmio = ndev->self_mmio + AMD_PEER_OFFSET;
+
+ return 0;
+
+err_dma_mask:
+ pci_clear_master(pdev);
+err_pci_regions:
+ pci_disable_device(pdev);
+err_pci_enable:
+ pci_set_drvdata(pdev, NULL);
+ return rc;
+}
+
+static void amd_ntb_deinit_pci(struct amd_ntb_dev *ndev)
+{
+ struct pci_dev *pdev = ndev_pdev(ndev);
+
+ pci_iounmap(pdev, ndev->self_mmio);
+
+ pci_clear_master(pdev);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+}
+
+static int amd_ntb_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ struct amd_ntb_dev *ndev;
+ int rc, node;
+
+ node = dev_to_node(&pdev->dev);
+
+ ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node);
+ if (!ndev) {
+ rc = -ENOMEM;
+ goto err_ndev;
+ }
+
+ ndev_init_struct(ndev, pdev);
+
+ rc = amd_ntb_init_pci(ndev, pdev);
+ if (rc)
+ goto err_init_pci;
+
+ rc = amd_init_dev(ndev);
+ if (rc)
+ goto err_init_dev;
+
+ /* write side info */
+ amd_init_side_info(ndev);
+
+ amd_poll_link(ndev);
+
+ ndev_init_debugfs(ndev);
+
+ rc = ntb_register_device(&ndev->ntb);
+ if (rc)
+ goto err_register;
+
+ dev_info(&pdev->dev, "NTB device registered.\n");
+
+ return 0;
+
+err_register:
+ ndev_deinit_debugfs(ndev);
+ amd_deinit_dev(ndev);
+err_init_dev:
+ amd_ntb_deinit_pci(ndev);
+err_init_pci:
+ kfree(ndev);
+err_ndev:
+ return rc;
+}
+
+static void amd_ntb_pci_remove(struct pci_dev *pdev)
+{
+ struct amd_ntb_dev *ndev = pci_get_drvdata(pdev);
+
+ ntb_unregister_device(&ndev->ntb);
+ ndev_deinit_debugfs(ndev);
+ amd_deinit_side_info(ndev);
+ amd_deinit_dev(ndev);
+ amd_ntb_deinit_pci(ndev);
+ kfree(ndev);
+}
+
+static const struct file_operations amd_ntb_debugfs_info = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = ndev_debugfs_read,
+};
+
+static const struct pci_device_id amd_ntb_pci_tbl[] = {
+ {PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NTB)},
+ {0}
+};
+MODULE_DEVICE_TABLE(pci, amd_ntb_pci_tbl);
+
+static struct pci_driver amd_ntb_pci_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = amd_ntb_pci_tbl,
+ .probe = amd_ntb_pci_probe,
+ .remove = amd_ntb_pci_remove,
+};
+
+static int __init amd_ntb_pci_driver_init(void)
+{
+ pr_info("%s %s\n", NTB_DESC, NTB_VER);
+
+ if (debugfs_initialized())
+ debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+ return pci_register_driver(&amd_ntb_pci_driver);
+}
+module_init(amd_ntb_pci_driver_init);
+
+static void __exit amd_ntb_pci_driver_exit(void)
+{
+ pci_unregister_driver(&amd_ntb_pci_driver);
+ debugfs_remove_recursive(debugfs_dir);
+}
+module_exit(amd_ntb_pci_driver_exit);
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.h b/drivers/ntb/hw/amd/ntb_hw_amd.h
new file mode 100644
index 0000000..2eac3cd
--- /dev/null
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.h
@@ -0,0 +1,217 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * BSD LICENSE
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copy
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of AMD Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * AMD PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Xiangliang Yu <Xiangliang.Yu@amd.com>
+ */
+
+#ifndef NTB_HW_AMD_H
+#define NTB_HW_AMD_H
+
+#include <linux/ntb.h>
+#include <linux/pci.h>
+
+#define PCI_DEVICE_ID_AMD_NTB 0x145B
+#define AMD_LINK_HB_TIMEOUT msecs_to_jiffies(1000)
+#define AMD_LINK_STATUS_OFFSET 0x68
+#define NTB_LIN_STA_ACTIVE_BIT 0x00000002
+#define NTB_LNK_STA_SPEED_MASK 0x000F0000
+#define NTB_LNK_STA_WIDTH_MASK 0x03F00000
+#define NTB_LNK_STA_ACTIVE(x) (!!((x) & NTB_LIN_STA_ACTIVE_BIT))
+#define NTB_LNK_STA_SPEED(x) (((x) & NTB_LNK_STA_SPEED_MASK) >> 16)
+#define NTB_LNK_STA_WIDTH(x) (((x) & NTB_LNK_STA_WIDTH_MASK) >> 20)
+
+#ifndef read64
+#ifdef readq
+#define read64 readq
+#else
+#define read64 _read64
+static inline u64 _read64(void __iomem *mmio)
+{
+ u64 low, high;
+
+ low = readl(mmio);
+ high = readl(mmio + sizeof(u32));
+ return low | (high << 32);
+}
+#endif
+#endif
+
+#ifndef write64
+#ifdef writeq
+#define write64 writeq
+#else
+#define write64 _write64
+static inline void _write64(u64 val, void __iomem *mmio)
+{
+ writel(val, mmio);
+ writel(val >> 32, mmio + sizeof(u32));
+}
+#endif
+#endif
+
+enum {
+ /* AMD NTB Capability */
+ AMD_MW_CNT = 3,
+ AMD_DB_CNT = 16,
+ AMD_MSIX_VECTOR_CNT = 24,
+ AMD_SPADS_CNT = 16,
+
+ /* AMD NTB register offset */
+ AMD_CNTL_OFFSET = 0x200,
+
+ /* NTB control register bits */
+ PMM_REG_CTL = BIT(21),
+ SMM_REG_CTL = BIT(20),
+ SMM_REG_ACC_PATH = BIT(18),
+ PMM_REG_ACC_PATH = BIT(17),
+ NTB_CLK_EN = BIT(16),
+
+ AMD_STA_OFFSET = 0x204,
+ AMD_PGSLV_OFFSET = 0x208,
+ AMD_SPAD_MUX_OFFSET = 0x20C,
+ AMD_SPAD_OFFSET = 0x210,
+ AMD_RSMU_HCID = 0x250,
+ AMD_RSMU_SIID = 0x254,
+ AMD_PSION_OFFSET = 0x300,
+ AMD_SSION_OFFSET = 0x330,
+ AMD_MMINDEX_OFFSET = 0x400,
+ AMD_MMDATA_OFFSET = 0x404,
+ AMD_SIDEINFO_OFFSET = 0x408,
+
+ AMD_SIDE_MASK = BIT(0),
+ AMD_SIDE_READY = BIT(1),
+
+ /* limit register */
+ AMD_ROMBARLMT_OFFSET = 0x410,
+ AMD_BAR1LMT_OFFSET = 0x414,
+ AMD_BAR23LMT_OFFSET = 0x418,
+ AMD_BAR45LMT_OFFSET = 0x420,
+ /* xlat address */
+ AMD_POMBARXLAT_OFFSET = 0x428,
+ AMD_BAR1XLAT_OFFSET = 0x430,
+ AMD_BAR23XLAT_OFFSET = 0x438,
+ AMD_BAR45XLAT_OFFSET = 0x440,
+ /* doorbell and interrupt */
+ AMD_DBFM_OFFSET = 0x450,
+ AMD_DBREQ_OFFSET = 0x454,
+ AMD_MIRRDBSTAT_OFFSET = 0x458,
+ AMD_DBMASK_OFFSET = 0x45C,
+ AMD_DBSTAT_OFFSET = 0x460,
+ AMD_INTMASK_OFFSET = 0x470,
+ AMD_INTSTAT_OFFSET = 0x474,
+
+ /* event type */
+ AMD_PEER_FLUSH_EVENT = BIT(0),
+ AMD_PEER_RESET_EVENT = BIT(1),
+ AMD_PEER_D3_EVENT = BIT(2),
+ AMD_PEER_PMETO_EVENT = BIT(3),
+ AMD_PEER_D0_EVENT = BIT(4),
+ AMD_EVENT_INTMASK = (AMD_PEER_FLUSH_EVENT |
+ AMD_PEER_RESET_EVENT | AMD_PEER_D3_EVENT |
+ AMD_PEER_PMETO_EVENT | AMD_PEER_D0_EVENT),
+
+ AMD_PMESTAT_OFFSET = 0x480,
+ AMD_PMSGTRIG_OFFSET = 0x490,
+ AMD_LTRLATENCY_OFFSET = 0x494,
+ AMD_FLUSHTRIG_OFFSET = 0x498,
+
+ /* SMU register*/
+ AMD_SMUACK_OFFSET = 0x4A0,
+ AMD_SINRST_OFFSET = 0x4A4,
+ AMD_RSPNUM_OFFSET = 0x4A8,
+ AMD_SMU_SPADMUTEX = 0x4B0,
+ AMD_SMU_SPADOFFSET = 0x4B4,
+
+ AMD_PEER_OFFSET = 0x400,
+};
+
+struct amd_ntb_dev;
+
+struct amd_ntb_vec {
+ struct amd_ntb_dev *ndev;
+ int num;
+};
+
+struct amd_ntb_dev {
+ struct ntb_dev ntb;
+
+ u32 ntb_side;
+ u32 lnk_sta;
+ u32 cntl_sta;
+ u32 peer_sta;
+
+ unsigned char mw_count;
+ unsigned char spad_count;
+ unsigned char db_count;
+ unsigned char msix_vec_count;
+
+ u64 db_valid_mask;
+ u64 db_mask;
+ u32 int_mask;
+
+ struct msix_entry *msix;
+ struct amd_ntb_vec *vec;
+
+ /* synchronize rmw access of db_mask and hw reg */
+ spinlock_t db_mask_lock;
+
+ void __iomem *self_mmio;
+ void __iomem *peer_mmio;
+ unsigned int self_spad;
+ unsigned int peer_spad;
+
+ struct delayed_work hb_timer;
+
+ struct dentry *debugfs_dir;
+ struct dentry *debugfs_info;
+};
+
+#define ndev_pdev(ndev) ((ndev)->ntb.pdev)
+#define ndev_name(ndev) pci_name(ndev_pdev(ndev))
+#define ndev_dev(ndev) (&ndev_pdev(ndev)->dev)
+#define ntb_ndev(__ntb) container_of(__ntb, struct amd_ntb_dev, ntb)
+#define hb_ndev(__work) container_of(__work, struct amd_ntb_dev, hb_timer.work)
+
+#endif
diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c
index a198f82..40d04ef 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.c
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.c
@@ -875,7 +875,7 @@ static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
limit_reg = bar2_off(ndev->xlat_reg->bar2_limit, bar);
if (bar < 4 || !ndev->bar4_split) {
- base = ioread64(mmio + base_reg);
+ base = ioread64(mmio + base_reg) & NTB_BAR_MASK_64;
/* Set the limit if supported, if size is not mw_size */
if (limit_reg && size != mw_size)
@@ -906,7 +906,7 @@ static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
if ((addr + size) & (~0ull << 32))
return -EINVAL;
- base = ioread32(mmio + base_reg);
+ base = ioread32(mmio + base_reg) & NTB_BAR_MASK_32;
/* Set the limit if supported, if size is not mw_size */
if (limit_reg && size != mw_size)
diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.h b/drivers/ntb/hw/intel/ntb_hw_intel.h
index 2eb4add..3ec149c 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.h
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.h
@@ -245,6 +245,9 @@
#define NTB_UNSAFE_DB BIT_ULL(0)
#define NTB_UNSAFE_SPAD BIT_ULL(1)
+#define NTB_BAR_MASK_64 ~(0xfull)
+#define NTB_BAR_MASK_32 ~(0xfu)
+
struct intel_ntb_dev;
struct intel_ntb_reg {
@@ -334,7 +337,8 @@ struct intel_ntb_dev {
#define ndev_pdev(ndev) ((ndev)->ntb.pdev)
#define ndev_name(ndev) pci_name(ndev_pdev(ndev))
#define ndev_dev(ndev) (&ndev_pdev(ndev)->dev)
-#define ntb_ndev(ntb) container_of(ntb, struct intel_ntb_dev, ntb)
-#define hb_ndev(work) container_of(work, struct intel_ntb_dev, hb_timer.work)
+#define ntb_ndev(__ntb) container_of(__ntb, struct intel_ntb_dev, ntb)
+#define hb_ndev(__work) container_of(__work, struct intel_ntb_dev, \
+ hb_timer.work)
#endif
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 60654d5..ec4775f 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -171,12 +171,14 @@ struct ntb_transport_qp {
u64 rx_err_ver;
u64 rx_memcpy;
u64 rx_async;
+ u64 dma_rx_prep_err;
u64 tx_bytes;
u64 tx_pkts;
u64 tx_ring_full;
u64 tx_err_no_buf;
u64 tx_memcpy;
u64 tx_async;
+ u64 dma_tx_prep_err;
};
struct ntb_transport_mw {
@@ -249,6 +251,8 @@ enum {
#define QP_TO_MW(nt, qp) ((qp) % nt->mw_count)
#define NTB_QP_DEF_NUM_ENTRIES 100
#define NTB_LINK_DOWN_TIMEOUT 10
+#define DMA_RETRIES 20
+#define DMA_OUT_RESOURCE_TO 50
static void ntb_transport_rxc_db(unsigned long data);
static const struct ntb_ctx_ops ntb_transport_ops;
@@ -501,6 +505,12 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
out_offset += snprintf(buf + out_offset, out_count - out_offset,
"free tx - \t%u\n",
ntb_transport_tx_free_entry(qp));
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "DMA tx prep err - \t%llu\n",
+ qp->dma_tx_prep_err);
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "DMA rx prep err - \t%llu\n",
+ qp->dma_rx_prep_err);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
"\n");
@@ -726,6 +736,8 @@ static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
qp->tx_err_no_buf = 0;
qp->tx_memcpy = 0;
qp->tx_async = 0;
+ qp->dma_tx_prep_err = 0;
+ qp->dma_rx_prep_err = 0;
}
static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
@@ -1228,6 +1240,7 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset)
struct dmaengine_unmap_data *unmap;
dma_cookie_t cookie;
void *buf = entry->buf;
+ int retries = 0;
len = entry->len;
@@ -1263,11 +1276,21 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset)
unmap->from_cnt = 1;
- txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
- unmap->addr[0], len,
- DMA_PREP_INTERRUPT);
- if (!txd)
+ for (retries = 0; retries < DMA_RETRIES; retries++) {
+ txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
+ unmap->addr[0], len,
+ DMA_PREP_INTERRUPT);
+ if (txd)
+ break;
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(DMA_OUT_RESOURCE_TO);
+ }
+
+ if (!txd) {
+ qp->dma_rx_prep_err++;
goto err_get_unmap;
+ }
txd->callback = ntb_rx_copy_callback;
txd->callback_param = entry;
@@ -1460,6 +1483,7 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
void __iomem *offset;
size_t len = entry->len;
void *buf = entry->buf;
+ int retries = 0;
offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header);
@@ -1494,10 +1518,20 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
unmap->to_cnt = 1;
- txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0], len,
- DMA_PREP_INTERRUPT);
- if (!txd)
+ for (retries = 0; retries < DMA_RETRIES; retries++) {
+ txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0],
+ len, DMA_PREP_INTERRUPT);
+ if (txd)
+ break;
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(DMA_OUT_RESOURCE_TO);
+ }
+
+ if (!txd) {
+ qp->dma_tx_prep_err++;
goto err_get_unmap;
+ }
txd->callback = ntb_tx_copy_callback;
txd->callback_param = entry;
@@ -1532,7 +1566,7 @@ static int ntb_process_tx(struct ntb_transport_qp *qp,
if (entry->len > qp->tx_max_frame - sizeof(struct ntb_payload_header)) {
if (qp->tx_handler)
- qp->tx_handler(qp->cb_data, qp, NULL, -EIO);
+ qp->tx_handler(qp, qp->cb_data, NULL, -EIO);
ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry,
&qp->tx_free_q);
diff --git a/drivers/ntb/test/Kconfig b/drivers/ntb/test/Kconfig
index 01852f9..a5d0eda 100644
--- a/drivers/ntb/test/Kconfig
+++ b/drivers/ntb/test/Kconfig
@@ -17,3 +17,11 @@ config NTB_TOOL
functioning at a basic level.
If unsure, say N.
+
+config NTB_PERF
+ tristate "NTB RAW Perf Measuring Tool"
+ help
+ This is a tool to measure raw NTB performance by transferring data
+ to and from the window without additional software interaction.
+
+ If unsure, say N.
diff --git a/drivers/ntb/test/Makefile b/drivers/ntb/test/Makefile
index 0ea32a3..9e77e0b 100644
--- a/drivers/ntb/test/Makefile
+++ b/drivers/ntb/test/Makefile
@@ -1,2 +1,3 @@
obj-$(CONFIG_NTB_PINGPONG) += ntb_pingpong.o
obj-$(CONFIG_NTB_TOOL) += ntb_tool.o
+obj-$(CONFIG_NTB_PERF) += ntb_perf.o
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
new file mode 100644
index 0000000..c8a37ba
--- /dev/null
+++ b/drivers/ntb/test/ntb_perf.c
@@ -0,0 +1,748 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copy
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * PCIe NTB Perf Linux driver
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/sizes.h>
+#include <linux/ntb.h>
+
+#define DRIVER_NAME "ntb_perf"
+#define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool"
+
+#define DRIVER_LICENSE "Dual BSD/GPL"
+#define DRIVER_VERSION "1.0"
+#define DRIVER_AUTHOR "Dave Jiang <dave.jiang@intel.com>"
+
+#define PERF_LINK_DOWN_TIMEOUT 10
+#define PERF_VERSION 0xffff0001
+#define MAX_THREADS 32
+#define MAX_TEST_SIZE SZ_1M
+#define MAX_SRCS 32
+#define DMA_OUT_RESOURCE_TO 50
+#define DMA_RETRIES 20
+#define SZ_4G (1ULL << 32)
+#define MAX_SEG_ORDER 20 /* no larger than 1M for kmalloc buffer */
+
+MODULE_LICENSE(DRIVER_LICENSE);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
+
+static struct dentry *perf_debugfs_dir;
+
+static unsigned int seg_order = 19; /* 512K */
+module_param(seg_order, uint, 0644);
+MODULE_PARM_DESC(seg_order, "size order [n^2] of buffer segment for testing");
+
+static unsigned int run_order = 32; /* 4G */
+module_param(run_order, uint, 0644);
+MODULE_PARM_DESC(run_order, "size order [n^2] of total data to transfer");
+
+static bool use_dma; /* default to 0 */
+module_param(use_dma, bool, 0644);
+MODULE_PARM_DESC(use_dma, "Using DMA engine to measure performance");
+
+struct perf_mw {
+ phys_addr_t phys_addr;
+ resource_size_t phys_size;
+ resource_size_t xlat_align;
+ resource_size_t xlat_align_size;
+ void __iomem *vbase;
+ size_t xlat_size;
+ size_t buf_size;
+ void *virt_addr;
+ dma_addr_t dma_addr;
+};
+
+struct perf_ctx;
+
+struct pthr_ctx {
+ struct task_struct *thread;
+ struct perf_ctx *perf;
+ atomic_t dma_sync;
+ struct dma_chan *dma_chan;
+ int dma_prep_err;
+ int src_idx;
+ void *srcs[MAX_SRCS];
+};
+
+struct perf_ctx {
+ struct ntb_dev *ntb;
+ spinlock_t db_lock;
+ struct perf_mw mw;
+ bool link_is_up;
+ struct work_struct link_cleanup;
+ struct delayed_work link_work;
+ struct dentry *debugfs_node_dir;
+ struct dentry *debugfs_run;
+ struct dentry *debugfs_threads;
+ u8 perf_threads;
+ bool run;
+ struct pthr_ctx pthr_ctx[MAX_THREADS];
+ atomic_t tsync;
+};
+
+enum {
+ VERSION = 0,
+ MW_SZ_HIGH,
+ MW_SZ_LOW,
+ SPAD_MSG,
+ SPAD_ACK,
+ MAX_SPAD
+};
+
+static void perf_link_event(void *ctx)
+{
+ struct perf_ctx *perf = ctx;
+
+ if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1)
+ schedule_delayed_work(&perf->link_work, 2*HZ);
+ else
+ schedule_work(&perf->link_cleanup);
+}
+
+static void perf_db_event(void *ctx, int vec)
+{
+ struct perf_ctx *perf = ctx;
+ u64 db_bits, db_mask;
+
+ db_mask = ntb_db_vector_mask(perf->ntb, vec);
+ db_bits = ntb_db_read(perf->ntb);
+
+ dev_dbg(&perf->ntb->dev, "doorbell vec %d mask %#llx bits %#llx\n",
+ vec, db_mask, db_bits);
+}
+
+static const struct ntb_ctx_ops perf_ops = {
+ .link_event = perf_link_event,
+ .db_event = perf_db_event,
+};
+
+static void perf_copy_callback(void *data)
+{
+ struct pthr_ctx *pctx = data;
+
+ atomic_dec(&pctx->dma_sync);
+}
+
+static ssize_t perf_copy(struct pthr_ctx *pctx, char *dst,
+ char *src, size_t size)
+{
+ struct perf_ctx *perf = pctx->perf;
+ struct dma_async_tx_descriptor *txd;
+ struct dma_chan *chan = pctx->dma_chan;
+ struct dma_device *device;
+ struct dmaengine_unmap_data *unmap;
+ dma_cookie_t cookie;
+ size_t src_off, dst_off;
+ struct perf_mw *mw = &perf->mw;
+ u64 vbase, dst_vaddr;
+ dma_addr_t dst_phys;
+ int retries = 0;
+
+ if (!use_dma) {
+ memcpy_toio(dst, src, size);
+ return size;
+ }
+
+ if (!chan) {
+ dev_err(&perf->ntb->dev, "DMA engine does not exist\n");
+ return -EINVAL;
+ }
+
+ device = chan->device;
+ src_off = (size_t)src & ~PAGE_MASK;
+ dst_off = (size_t)dst & ~PAGE_MASK;
+
+ if (!is_dma_copy_aligned(device, src_off, dst_off, size))
+ return -ENODEV;
+
+ vbase = (u64)(u64 *)mw->vbase;
+ dst_vaddr = (u64)(u64 *)dst;
+ dst_phys = mw->phys_addr + (dst_vaddr - vbase);
+
+ unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT);
+ if (!unmap)
+ return -ENOMEM;
+
+ unmap->len = size;
+ unmap->addr[0] = dma_map_page(device->dev, virt_to_page(src),
+ src_off, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(device->dev, unmap->addr[0]))
+ goto err_get_unmap;
+
+ unmap->to_cnt = 1;
+
+ do {
+ txd = device->device_prep_dma_memcpy(chan, dst_phys,
+ unmap->addr[0],
+ size, DMA_PREP_INTERRUPT);
+ if (!txd) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(DMA_OUT_RESOURCE_TO);
+ }
+ } while (!txd && (++retries < DMA_RETRIES));
+
+ if (!txd) {
+ pctx->dma_prep_err++;
+ goto err_get_unmap;
+ }
+
+ txd->callback = perf_copy_callback;
+ txd->callback_param = pctx;
+ dma_set_unmap(txd, unmap);
+
+ cookie = dmaengine_submit(txd);
+ if (dma_submit_error(cookie))
+ goto err_set_unmap;
+
+ atomic_inc(&pctx->dma_sync);
+ dma_async_issue_pending(chan);
+
+ return size;
+
+err_set_unmap:
+ dmaengine_unmap_put(unmap);
+err_get_unmap:
+ dmaengine_unmap_put(unmap);
+ return 0;
+}
+
+static int perf_move_data(struct pthr_ctx *pctx, char *dst, char *src,
+ u64 buf_size, u64 win_size, u64 total)
+{
+ int chunks, total_chunks, i;
+ int copied_chunks = 0;
+ u64 copied = 0, result;
+ char *tmp = dst;
+ u64 perf, diff_us;
+ ktime_t kstart, kstop, kdiff;
+
+ chunks = div64_u64(win_size, buf_size);
+ total_chunks = div64_u64(total, buf_size);
+ kstart = ktime_get();
+
+ for (i = 0; i < total_chunks; i++) {
+ result = perf_copy(pctx, tmp, src, buf_size);
+ copied += result;
+ copied_chunks++;
+ if (copied_chunks == chunks) {
+ tmp = dst;
+ copied_chunks = 0;
+ } else
+ tmp += buf_size;
+
+ /* Probably should schedule every 4GB to prevent soft hang. */
+ if (((copied % SZ_4G) == 0) && !use_dma) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(1);
+ }
+ }
+
+ if (use_dma) {
+ pr_info("%s: All DMA descriptors submitted\n", current->comm);
+ while (atomic_read(&pctx->dma_sync) != 0)
+ msleep(20);
+ }
+
+ kstop = ktime_get();
+ kdiff = ktime_sub(kstop, kstart);
+ diff_us = ktime_to_us(kdiff);
+
+ pr_info("%s: copied %llu bytes\n", current->comm, copied);
+
+ pr_info("%s: lasted %llu usecs\n", current->comm, diff_us);
+
+ perf = div64_u64(copied, diff_us);
+
+ pr_info("%s: MBytes/s: %llu\n", current->comm, perf);
+
+ return 0;
+}
+
+static bool perf_dma_filter_fn(struct dma_chan *chan, void *node)
+{
+ return dev_to_node(&chan->dev->device) == (int)(unsigned long)node;
+}
+
+static int ntb_perf_thread(void *data)
+{
+ struct pthr_ctx *pctx = data;
+ struct perf_ctx *perf = pctx->perf;
+ struct pci_dev *pdev = perf->ntb->pdev;
+ struct perf_mw *mw = &perf->mw;
+ char *dst;
+ u64 win_size, buf_size, total;
+ void *src;
+ int rc, node, i;
+ struct dma_chan *dma_chan = NULL;
+
+ pr_info("kthread %s starting...\n", current->comm);
+
+ node = dev_to_node(&pdev->dev);
+
+ if (use_dma && !pctx->dma_chan) {
+ dma_cap_mask_t dma_mask;
+
+ dma_cap_zero(dma_mask);
+ dma_cap_set(DMA_MEMCPY, dma_mask);
+ dma_chan = dma_request_channel(dma_mask, perf_dma_filter_fn,
+ (void *)(unsigned long)node);
+ if (!dma_chan) {
+ pr_warn("%s: cannot acquire DMA channel, quitting\n",
+ current->comm);
+ return -ENODEV;
+ }
+ pctx->dma_chan = dma_chan;
+ }
+
+ for (i = 0; i < MAX_SRCS; i++) {
+ pctx->srcs[i] = kmalloc_node(MAX_TEST_SIZE, GFP_KERNEL, node);
+ if (!pctx->srcs[i]) {
+ rc = -ENOMEM;
+ goto err;
+ }
+ }
+
+ win_size = mw->phys_size;
+ buf_size = 1ULL << seg_order;
+ total = 1ULL << run_order;
+
+ if (buf_size > MAX_TEST_SIZE)
+ buf_size = MAX_TEST_SIZE;
+
+ dst = (char *)mw->vbase;
+
+ atomic_inc(&perf->tsync);
+ while (atomic_read(&perf->tsync) != perf->perf_threads)
+ schedule();
+
+ src = pctx->srcs[pctx->src_idx];
+ pctx->src_idx = (pctx->src_idx + 1) & (MAX_SRCS - 1);
+
+ rc = perf_move_data(pctx, dst, src, buf_size, win_size, total);
+
+ atomic_dec(&perf->tsync);
+
+ if (rc < 0) {
+ pr_err("%s: failed\n", current->comm);
+ rc = -ENXIO;
+ goto err;
+ }
+
+ for (i = 0; i < MAX_SRCS; i++) {
+ kfree(pctx->srcs[i]);
+ pctx->srcs[i] = NULL;
+ }
+
+ return 0;
+
+err:
+ for (i = 0; i < MAX_SRCS; i++) {
+ kfree(pctx->srcs[i]);
+ pctx->srcs[i] = NULL;
+ }
+
+ if (dma_chan) {
+ dma_release_channel(dma_chan);
+ pctx->dma_chan = NULL;
+ }
+
+ return rc;
+}
+
+static void perf_free_mw(struct perf_ctx *perf)
+{
+ struct perf_mw *mw = &perf->mw;
+ struct pci_dev *pdev = perf->ntb->pdev;
+
+ if (!mw->virt_addr)
+ return;
+
+ ntb_mw_clear_trans(perf->ntb, 0);
+ dma_free_coherent(&pdev->dev, mw->buf_size,
+ mw->virt_addr, mw->dma_addr);
+ mw->xlat_size = 0;
+ mw->buf_size = 0;
+ mw->virt_addr = NULL;
+}
+
+static int perf_set_mw(struct perf_ctx *perf, resource_size_t size)
+{
+ struct perf_mw *mw = &perf->mw;
+ size_t xlat_size, buf_size;
+
+ if (!size)
+ return -EINVAL;
+
+ xlat_size = round_up(size, mw->xlat_align_size);
+ buf_size = round_up(size, mw->xlat_align);
+
+ if (mw->xlat_size == xlat_size)
+ return 0;
+
+ if (mw->buf_size)
+ perf_free_mw(perf);
+
+ mw->xlat_size = xlat_size;
+ mw->buf_size = buf_size;
+
+ mw->virt_addr = dma_alloc_coherent(&perf->ntb->pdev->dev, buf_size,
+ &mw->dma_addr, GFP_KERNEL);
+ if (!mw->virt_addr) {
+ mw->xlat_size = 0;
+ mw->buf_size = 0;
+ }
+
+ return 0;
+}
+
+static void perf_link_work(struct work_struct *work)
+{
+ struct perf_ctx *perf =
+ container_of(work, struct perf_ctx, link_work.work);
+ struct ntb_dev *ndev = perf->ntb;
+ struct pci_dev *pdev = ndev->pdev;
+ u32 val;
+ u64 size;
+ int rc;
+
+ dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
+
+ size = perf->mw.phys_size;
+ ntb_peer_spad_write(ndev, MW_SZ_HIGH, upper_32_bits(size));
+ ntb_peer_spad_write(ndev, MW_SZ_LOW, lower_32_bits(size));
+ ntb_peer_spad_write(ndev, VERSION, PERF_VERSION);
+
+ /* now read what peer wrote */
+ val = ntb_spad_read(ndev, VERSION);
+ if (val != PERF_VERSION) {
+ dev_dbg(&pdev->dev, "Remote version = %#x\n", val);
+ goto out;
+ }
+
+ val = ntb_spad_read(ndev, MW_SZ_HIGH);
+ size = (u64)val << 32;
+
+ val = ntb_spad_read(ndev, MW_SZ_LOW);
+ size |= val;
+
+ dev_dbg(&pdev->dev, "Remote MW size = %#llx\n", size);
+
+ rc = perf_set_mw(perf, size);
+ if (rc)
+ goto out1;
+
+ perf->link_is_up = true;
+
+ return;
+
+out1:
+ perf_free_mw(perf);
+
+out:
+ if (ntb_link_is_up(ndev, NULL, NULL) == 1)
+ schedule_delayed_work(&perf->link_work,
+ msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT));
+}
+
+static void perf_link_cleanup(struct work_struct *work)
+{
+ struct perf_ctx *perf = container_of(work,
+ struct perf_ctx,
+ link_cleanup);
+
+ dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
+
+ if (!perf->link_is_up)
+ cancel_delayed_work_sync(&perf->link_work);
+}
+
+static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf)
+{
+ struct perf_mw *mw;
+ int rc;
+
+ mw = &perf->mw;
+
+ rc = ntb_mw_get_range(ntb, 0, &mw->phys_addr, &mw->phys_size,
+ &mw->xlat_align, &mw->xlat_align_size);
+ if (rc)
+ return rc;
+
+ perf->mw.vbase = ioremap_wc(mw->phys_addr, mw->phys_size);
+ if (!mw->vbase)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *offp)
+{
+ struct perf_ctx *perf = filp->private_data;
+ char *buf;
+ ssize_t ret, out_offset;
+
+ if (!perf)
+ return 0;
+
+ buf = kmalloc(64, GFP_KERNEL);
+ out_offset = snprintf(buf, 64, "%d\n", perf->run);
+ ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
+ kfree(buf);
+
+ return ret;
+}
+
+static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
+ size_t count, loff_t *offp)
+{
+ struct perf_ctx *perf = filp->private_data;
+ int node, i;
+
+ if (!perf->link_is_up)
+ return 0;
+
+ if (perf->perf_threads == 0)
+ return 0;
+
+ if (atomic_read(&perf->tsync) == 0)
+ perf->run = false;
+
+ if (perf->run) {
+ /* lets stop the threads */
+ perf->run = false;
+ for (i = 0; i < MAX_THREADS; i++) {
+ if (perf->pthr_ctx[i].thread) {
+ kthread_stop(perf->pthr_ctx[i].thread);
+ perf->pthr_ctx[i].thread = NULL;
+ } else
+ break;
+ }
+ } else {
+ perf->run = true;
+
+ if (perf->perf_threads > MAX_THREADS) {
+ perf->perf_threads = MAX_THREADS;
+ pr_info("Reset total threads to: %u\n", MAX_THREADS);
+ }
+
+ /* no greater than 1M */
+ if (seg_order > MAX_SEG_ORDER) {
+ seg_order = MAX_SEG_ORDER;
+ pr_info("Fix seg_order to %u\n", seg_order);
+ }
+
+ if (run_order < seg_order) {
+ run_order = seg_order;
+ pr_info("Fix run_order to %u\n", run_order);
+ }
+
+ node = dev_to_node(&perf->ntb->pdev->dev);
+ /* launch kernel thread */
+ for (i = 0; i < perf->perf_threads; i++) {
+ struct pthr_ctx *pctx;
+
+ pctx = &perf->pthr_ctx[i];
+ atomic_set(&pctx->dma_sync, 0);
+ pctx->perf = perf;
+ pctx->thread =
+ kthread_create_on_node(ntb_perf_thread,
+ (void *)pctx,
+ node, "ntb_perf %d", i);
+ if (pctx->thread)
+ wake_up_process(pctx->thread);
+ else {
+ perf->run = false;
+ for (i = 0; i < MAX_THREADS; i++) {
+ if (pctx->thread) {
+ kthread_stop(pctx->thread);
+ pctx->thread = NULL;
+ }
+ }
+ }
+
+ if (perf->run == false)
+ return -ENXIO;
+ }
+
+ }
+
+ return count;
+}
+
+static const struct file_operations ntb_perf_debugfs_run = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = debugfs_run_read,
+ .write = debugfs_run_write,
+};
+
+static int perf_debugfs_setup(struct perf_ctx *perf)
+{
+ struct pci_dev *pdev = perf->ntb->pdev;
+
+ if (!debugfs_initialized())
+ return -ENODEV;
+
+ if (!perf_debugfs_dir) {
+ perf_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+ if (!perf_debugfs_dir)
+ return -ENODEV;
+ }
+
+ perf->debugfs_node_dir = debugfs_create_dir(pci_name(pdev),
+ perf_debugfs_dir);
+ if (!perf->debugfs_node_dir)
+ return -ENODEV;
+
+ perf->debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR,
+ perf->debugfs_node_dir, perf,
+ &ntb_perf_debugfs_run);
+ if (!perf->debugfs_run)
+ return -ENODEV;
+
+ perf->debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR,
+ perf->debugfs_node_dir,
+ &perf->perf_threads);
+ if (!perf->debugfs_threads)
+ return -ENODEV;
+
+ return 0;
+}
+
+static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
+{
+ struct pci_dev *pdev = ntb->pdev;
+ struct perf_ctx *perf;
+ int node;
+ int rc = 0;
+
+ node = dev_to_node(&pdev->dev);
+
+ perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node);
+ if (!perf) {
+ rc = -ENOMEM;
+ goto err_perf;
+ }
+
+ perf->ntb = ntb;
+ perf->perf_threads = 1;
+ atomic_set(&perf->tsync, 0);
+ perf->run = false;
+ spin_lock_init(&perf->db_lock);
+ perf_setup_mw(ntb, perf);
+ INIT_DELAYED_WORK(&perf->link_work, perf_link_work);
+ INIT_WORK(&perf->link_cleanup, perf_link_cleanup);
+
+ rc = ntb_set_ctx(ntb, perf, &perf_ops);
+ if (rc)
+ goto err_ctx;
+
+ perf->link_is_up = false;
+ ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+ ntb_link_event(ntb);
+
+ rc = perf_debugfs_setup(perf);
+ if (rc)
+ goto err_ctx;
+
+ return 0;
+
+err_ctx:
+ cancel_delayed_work_sync(&perf->link_work);
+ cancel_work_sync(&perf->link_cleanup);
+ kfree(perf);
+err_perf:
+ return rc;
+}
+
+static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb)
+{
+ struct perf_ctx *perf = ntb->ctx;
+ int i;
+
+ dev_dbg(&perf->ntb->dev, "%s called\n", __func__);
+
+ cancel_delayed_work_sync(&perf->link_work);
+ cancel_work_sync(&perf->link_cleanup);
+
+ ntb_clear_ctx(ntb);
+ ntb_link_disable(ntb);
+
+ debugfs_remove_recursive(perf_debugfs_dir);
+ perf_debugfs_dir = NULL;
+
+ if (use_dma) {
+ for (i = 0; i < MAX_THREADS; i++) {
+ struct pthr_ctx *pctx = &perf->pthr_ctx[i];
+
+ if (pctx->dma_chan)
+ dma_release_channel(pctx->dma_chan);
+ }
+ }
+
+ kfree(perf);
+}
+
+static struct ntb_client perf_client = {
+ .ops = {
+ .probe = perf_probe,
+ .remove = perf_remove,
+ },
+};
+module_ntb_client(perf_client);
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index 002a94a..5d62373 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -8,3 +8,14 @@ config BLK_DEV_NVME
To compile this driver as a module, choose M here: the
module will be called nvme.
+
+config BLK_DEV_NVME_SCSI
+ bool "SCSI emulation for NVMe device nodes"
+ depends on BLK_DEV_NVME
+ ---help---
+ This adds support for the SG_IO ioctl on the NVMe character
+ and block devices nodes, as well a a translation for a small
+ number of selected SCSI commands to NVMe commands to the NVMe
+ driver. If you don't know what this means you probably want
+ to say N here, and if you know what it means you probably
+ want to say N as well.
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index a5fe239..51bf908 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -1,5 +1,6 @@
obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
-lightnvm-$(CONFIG_NVM) := lightnvm.o
-nvme-y += pci.o scsi.o $(lightnvm-y)
+lightnvm-$(CONFIG_NVM) := lightnvm.o
+nvme-y += core.o pci.o $(lightnvm-y)
+nvme-$(CONFIG_BLK_DEV_NVME_SCSI) += scsi.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
new file mode 100644
index 0000000..c5bf001
--- /dev/null
+++ b/drivers/nvme/host/core.c
@@ -0,0 +1,1472 @@
+/*
+ * NVM Express device driver
+ * Copyright (c) 2011-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/hdreg.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/list_sort.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/pr.h>
+#include <linux/ptrace.h>
+#include <linux/nvme_ioctl.h>
+#include <linux/t10-pi.h>
+#include <scsi/sg.h>
+#include <asm/unaligned.h>
+
+#include "nvme.h"
+
+#define NVME_MINORS (1U << MINORBITS)
+
+static int nvme_major;
+module_param(nvme_major, int, 0);
+
+static int nvme_char_major;
+module_param(nvme_char_major, int, 0);
+
+static LIST_HEAD(nvme_ctrl_list);
+DEFINE_SPINLOCK(dev_list_lock);
+
+static struct class *nvme_class;
+
+static void nvme_free_ns(struct kref *kref)
+{
+ struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
+
+ if (ns->type == NVME_NS_LIGHTNVM)
+ nvme_nvm_unregister(ns->queue, ns->disk->disk_name);
+
+ spin_lock(&dev_list_lock);
+ ns->disk->private_data = NULL;
+ spin_unlock(&dev_list_lock);
+
+ nvme_put_ctrl(ns->ctrl);
+ put_disk(ns->disk);
+ kfree(ns);
+}
+
+static void nvme_put_ns(struct nvme_ns *ns)
+{
+ kref_put(&ns->kref, nvme_free_ns);
+}
+
+static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk)
+{
+ struct nvme_ns *ns;
+
+ spin_lock(&dev_list_lock);
+ ns = disk->private_data;
+ if (ns && !kref_get_unless_zero(&ns->kref))
+ ns = NULL;
+ spin_unlock(&dev_list_lock);
+
+ return ns;
+}
+
+void nvme_requeue_req(struct request *req)
+{
+ unsigned long flags;
+
+ blk_mq_requeue_request(req);
+ spin_lock_irqsave(req->q->queue_lock, flags);
+ if (!blk_queue_stopped(req->q))
+ blk_mq_kick_requeue_list(req->q);
+ spin_unlock_irqrestore(req->q->queue_lock, flags);
+}
+
+struct request *nvme_alloc_request(struct request_queue *q,
+ struct nvme_command *cmd, unsigned int flags)
+{
+ bool write = cmd->common.opcode & 1;
+ struct request *req;
+
+ req = blk_mq_alloc_request(q, write, flags);
+ if (IS_ERR(req))
+ return req;
+
+ req->cmd_type = REQ_TYPE_DRV_PRIV;
+ req->cmd_flags |= REQ_FAILFAST_DRIVER;
+ req->__data_len = 0;
+ req->__sector = (sector_t) -1;
+ req->bio = req->biotail = NULL;
+
+ req->cmd = (unsigned char *)cmd;
+ req->cmd_len = sizeof(struct nvme_command);
+ req->special = (void *)0;
+
+ return req;
+}
+
+/*
+ * Returns 0 on success. If the result is negative, it's a Linux error code;
+ * if the result is positive, it's an NVM Express status code
+ */
+int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+ void *buffer, unsigned bufflen, u32 *result, unsigned timeout)
+{
+ struct request *req;
+ int ret;
+
+ req = nvme_alloc_request(q, cmd, 0);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
+
+ if (buffer && bufflen) {
+ ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
+ if (ret)
+ goto out;
+ }
+
+ blk_execute_rq(req->q, NULL, req, 0);
+ if (result)
+ *result = (u32)(uintptr_t)req->special;
+ ret = req->errors;
+ out:
+ blk_mq_free_request(req);
+ return ret;
+}
+
+int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+ void *buffer, unsigned bufflen)
+{
+ return __nvme_submit_sync_cmd(q, cmd, buffer, bufflen, NULL, 0);
+}
+
+int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
+ void __user *ubuffer, unsigned bufflen,
+ void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
+ u32 *result, unsigned timeout)
+{
+ bool write = cmd->common.opcode & 1;
+ struct nvme_ns *ns = q->queuedata;
+ struct gendisk *disk = ns ? ns->disk : NULL;
+ struct request *req;
+ struct bio *bio = NULL;
+ void *meta = NULL;
+ int ret;
+
+ req = nvme_alloc_request(q, cmd, 0);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
+
+ if (ubuffer && bufflen) {
+ ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
+ GFP_KERNEL);
+ if (ret)
+ goto out;
+ bio = req->bio;
+
+ if (!disk)
+ goto submit;
+ bio->bi_bdev = bdget_disk(disk, 0);
+ if (!bio->bi_bdev) {
+ ret = -ENODEV;
+ goto out_unmap;
+ }
+
+ if (meta_buffer) {
+ struct bio_integrity_payload *bip;
+
+ meta = kmalloc(meta_len, GFP_KERNEL);
+ if (!meta) {
+ ret = -ENOMEM;
+ goto out_unmap;
+ }
+
+ if (write) {
+ if (copy_from_user(meta, meta_buffer,
+ meta_len)) {
+ ret = -EFAULT;
+ goto out_free_meta;
+ }
+ }
+
+ bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
+ if (IS_ERR(bip)) {
+ ret = PTR_ERR(bip);
+ goto out_free_meta;
+ }
+
+ bip->bip_iter.bi_size = meta_len;
+ bip->bip_iter.bi_sector = meta_seed;
+
+ ret = bio_integrity_add_page(bio, virt_to_page(meta),
+ meta_len, offset_in_page(meta));
+ if (ret != meta_len) {
+ ret = -ENOMEM;
+ goto out_free_meta;
+ }
+ }
+ }
+ submit:
+ blk_execute_rq(req->q, disk, req, 0);
+ ret = req->errors;
+ if (result)
+ *result = (u32)(uintptr_t)req->special;
+ if (meta && !ret && !write) {
+ if (copy_to_user(meta_buffer, meta, meta_len))
+ ret = -EFAULT;
+ }
+ out_free_meta:
+ kfree(meta);
+ out_unmap:
+ if (bio) {
+ if (disk && bio->bi_bdev)
+ bdput(bio->bi_bdev);
+ blk_rq_unmap_user(bio);
+ }
+ out:
+ blk_mq_free_request(req);
+ return ret;
+}
+
+int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
+ void __user *ubuffer, unsigned bufflen, u32 *result,
+ unsigned timeout)
+{
+ return __nvme_submit_user_cmd(q, cmd, ubuffer, bufflen, NULL, 0, 0,
+ result, timeout);
+}
+
+int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
+{
+ struct nvme_command c = { };
+ int error;
+
+ /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+ c.identify.opcode = nvme_admin_identify;
+ c.identify.cns = cpu_to_le32(1);
+
+ *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
+ if (!*id)
+ return -ENOMEM;
+
+ error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
+ sizeof(struct nvme_id_ctrl));
+ if (error)
+ kfree(*id);
+ return error;
+}
+
+static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list)
+{
+ struct nvme_command c = { };
+
+ c.identify.opcode = nvme_admin_identify;
+ c.identify.cns = cpu_to_le32(2);
+ c.identify.nsid = cpu_to_le32(nsid);
+ return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
+}
+
+int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
+ struct nvme_id_ns **id)
+{
+ struct nvme_command c = { };
+ int error;
+
+ /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+ c.identify.opcode = nvme_admin_identify,
+ c.identify.nsid = cpu_to_le32(nsid),
+
+ *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
+ if (!*id)
+ return -ENOMEM;
+
+ error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
+ sizeof(struct nvme_id_ns));
+ if (error)
+ kfree(*id);
+ return error;
+}
+
+int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
+ dma_addr_t dma_addr, u32 *result)
+{
+ struct nvme_command c;
+
+ memset(&c, 0, sizeof(c));
+ c.features.opcode = nvme_admin_get_features;
+ c.features.nsid = cpu_to_le32(nsid);
+ c.features.prp1 = cpu_to_le64(dma_addr);
+ c.features.fid = cpu_to_le32(fid);
+
+ return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
+}
+
+int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
+ dma_addr_t dma_addr, u32 *result)
+{
+ struct nvme_command c;
+
+ memset(&c, 0, sizeof(c));
+ c.features.opcode = nvme_admin_set_features;
+ c.features.prp1 = cpu_to_le64(dma_addr);
+ c.features.fid = cpu_to_le32(fid);
+ c.features.dword11 = cpu_to_le32(dword11);
+
+ return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
+}
+
+int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
+{
+ struct nvme_command c = { };
+ int error;
+
+ c.common.opcode = nvme_admin_get_log_page,
+ c.common.nsid = cpu_to_le32(0xFFFFFFFF),
+ c.common.cdw10[0] = cpu_to_le32(
+ (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
+ NVME_LOG_SMART),
+
+ *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
+ if (!*log)
+ return -ENOMEM;
+
+ error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
+ sizeof(struct nvme_smart_log));
+ if (error)
+ kfree(*log);
+ return error;
+}
+
+int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
+{
+ u32 q_count = (*count - 1) | ((*count - 1) << 16);
+ u32 result;
+ int status, nr_io_queues;
+
+ status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, 0,
+ &result);
+ if (status)
+ return status;
+
+ nr_io_queues = min(result & 0xffff, result >> 16) + 1;
+ *count = min(*count, nr_io_queues);
+ return 0;
+}
+
+static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
+{
+ struct nvme_user_io io;
+ struct nvme_command c;
+ unsigned length, meta_len;
+ void __user *metadata;
+
+ if (copy_from_user(&io, uio, sizeof(io)))
+ return -EFAULT;
+
+ switch (io.opcode) {
+ case nvme_cmd_write:
+ case nvme_cmd_read:
+ case nvme_cmd_compare:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ length = (io.nblocks + 1) << ns->lba_shift;
+ meta_len = (io.nblocks + 1) * ns->ms;
+ metadata = (void __user *)(uintptr_t)io.metadata;
+
+ if (ns->ext) {
+ length += meta_len;
+ meta_len = 0;
+ } else if (meta_len) {
+ if ((io.metadata & 3) || !io.metadata)
+ return -EINVAL;
+ }
+
+ memset(&c, 0, sizeof(c));
+ c.rw.opcode = io.opcode;
+ c.rw.flags = io.flags;
+ c.rw.nsid = cpu_to_le32(ns->ns_id);
+ c.rw.slba = cpu_to_le64(io.slba);
+ c.rw.length = cpu_to_le16(io.nblocks);
+ c.rw.control = cpu_to_le16(io.control);
+ c.rw.dsmgmt = cpu_to_le32(io.dsmgmt);
+ c.rw.reftag = cpu_to_le32(io.reftag);
+ c.rw.apptag = cpu_to_le16(io.apptag);
+ c.rw.appmask = cpu_to_le16(io.appmask);
+
+ return __nvme_submit_user_cmd(ns->queue, &c,
+ (void __user *)(uintptr_t)io.addr, length,
+ metadata, meta_len, io.slba, NULL, 0);
+}
+
+static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+ struct nvme_passthru_cmd __user *ucmd)
+{
+ struct nvme_passthru_cmd cmd;
+ struct nvme_command c;
+ unsigned timeout = 0;
+ int status;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
+ return -EFAULT;
+
+ memset(&c, 0, sizeof(c));
+ c.common.opcode = cmd.opcode;
+ c.common.flags = cmd.flags;
+ c.common.nsid = cpu_to_le32(cmd.nsid);
+ c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
+ c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
+ c.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
+ c.common.cdw10[1] = cpu_to_le32(cmd.cdw11);
+ c.common.cdw10[2] = cpu_to_le32(cmd.cdw12);
+ c.common.cdw10[3] = cpu_to_le32(cmd.cdw13);
+ c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
+ c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
+
+ if (cmd.timeout_ms)
+ timeout = msecs_to_jiffies(cmd.timeout_ms);
+
+ status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
+ (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
+ &cmd.result, timeout);
+ if (status >= 0) {
+ if (put_user(cmd.result, &ucmd->result))
+ return -EFAULT;
+ }
+
+ return status;
+}
+
+static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
+{
+ struct nvme_ns *ns = bdev->bd_disk->private_data;
+
+ switch (cmd) {
+ case NVME_IOCTL_ID:
+ force_successful_syscall_return();
+ return ns->ns_id;
+ case NVME_IOCTL_ADMIN_CMD:
+ return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg);
+ case NVME_IOCTL_IO_CMD:
+ return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
+ case NVME_IOCTL_SUBMIT_IO:
+ return nvme_submit_io(ns, (void __user *)arg);
+#ifdef CONFIG_BLK_DEV_NVME_SCSI
+ case SG_GET_VERSION_NUM:
+ return nvme_sg_get_version_num((void __user *)arg);
+ case SG_IO:
+ return nvme_sg_io(ns, (void __user *)arg);
+#endif
+ default:
+ return -ENOTTY;
+ }
+}
+
+#ifdef CONFIG_COMPAT
+static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
+{
+ switch (cmd) {
+ case SG_IO:
+ return -ENOIOCTLCMD;
+ }
+ return nvme_ioctl(bdev, mode, cmd, arg);
+}
+#else
+#define nvme_compat_ioctl NULL
+#endif
+
+static int nvme_open(struct block_device *bdev, fmode_t mode)
+{
+ return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO;
+}
+
+static void nvme_release(struct gendisk *disk, fmode_t mode)
+{
+ nvme_put_ns(disk->private_data);
+}
+
+static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+ /* some standard values */
+ geo->heads = 1 << 6;
+ geo->sectors = 1 << 5;
+ geo->cylinders = get_capacity(bdev->bd_disk) >> 11;
+ return 0;
+}
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static void nvme_init_integrity(struct nvme_ns *ns)
+{
+ struct blk_integrity integrity;
+
+ switch (ns->pi_type) {
+ case NVME_NS_DPS_PI_TYPE3:
+ integrity.profile = &t10_pi_type3_crc;
+ break;
+ case NVME_NS_DPS_PI_TYPE1:
+ case NVME_NS_DPS_PI_TYPE2:
+ integrity.profile = &t10_pi_type1_crc;
+ break;
+ default:
+ integrity.profile = NULL;
+ break;
+ }
+ integrity.tuple_size = ns->ms;
+ blk_integrity_register(ns->disk, &integrity);
+ blk_queue_max_integrity_segments(ns->queue, 1);
+}
+#else
+static void nvme_init_integrity(struct nvme_ns *ns)
+{
+}
+#endif /* CONFIG_BLK_DEV_INTEGRITY */
+
+static void nvme_config_discard(struct nvme_ns *ns)
+{
+ u32 logical_block_size = queue_logical_block_size(ns->queue);
+ ns->queue->limits.discard_zeroes_data = 0;
+ ns->queue->limits.discard_alignment = logical_block_size;
+ ns->queue->limits.discard_granularity = logical_block_size;
+ blk_queue_max_discard_sectors(ns->queue, 0xffffffff);
+ queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
+}
+
+static int nvme_revalidate_disk(struct gendisk *disk)
+{
+ struct nvme_ns *ns = disk->private_data;
+ struct nvme_id_ns *id;
+ u8 lbaf, pi_type;
+ u16 old_ms;
+ unsigned short bs;
+
+ if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) {
+ dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n",
+ __func__, ns->ctrl->instance, ns->ns_id);
+ return -ENODEV;
+ }
+ if (id->ncap == 0) {
+ kfree(id);
+ return -ENODEV;
+ }
+
+ if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) {
+ if (nvme_nvm_register(ns->queue, disk->disk_name)) {
+ dev_warn(ns->ctrl->dev,
+ "%s: LightNVM init failure\n", __func__);
+ kfree(id);
+ return -ENODEV;
+ }
+ ns->type = NVME_NS_LIGHTNVM;
+ }
+
+ if (ns->ctrl->vs >= NVME_VS(1, 1))
+ memcpy(ns->eui, id->eui64, sizeof(ns->eui));
+ if (ns->ctrl->vs >= NVME_VS(1, 2))
+ memcpy(ns->uuid, id->nguid, sizeof(ns->uuid));
+
+ old_ms = ns->ms;
+ lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
+ ns->lba_shift = id->lbaf[lbaf].ds;
+ ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
+ ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
+
+ /*
+ * If identify namespace failed, use default 512 byte block size so
+ * block layer can use before failing read/write for 0 capacity.
+ */
+ if (ns->lba_shift == 0)
+ ns->lba_shift = 9;
+ bs = 1 << ns->lba_shift;
+ /* XXX: PI implementation requires metadata equal t10 pi tuple size */
+ pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
+ id->dps & NVME_NS_DPS_PI_MASK : 0;
+
+ blk_mq_freeze_queue(disk->queue);
+ if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
+ ns->ms != old_ms ||
+ bs != queue_logical_block_size(disk->queue) ||
+ (ns->ms && ns->ext)))
+ blk_integrity_unregister(disk);
+
+ ns->pi_type = pi_type;
+ blk_queue_logical_block_size(ns->queue, bs);
+
+ if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
+ nvme_init_integrity(ns);
+ if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
+ set_capacity(disk, 0);
+ else
+ set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
+
+ if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
+ nvme_config_discard(ns);
+ blk_mq_unfreeze_queue(disk->queue);
+
+ kfree(id);
+ return 0;
+}
+
+static char nvme_pr_type(enum pr_type type)
+{
+ switch (type) {
+ case PR_WRITE_EXCLUSIVE:
+ return 1;
+ case PR_EXCLUSIVE_ACCESS:
+ return 2;
+ case PR_WRITE_EXCLUSIVE_REG_ONLY:
+ return 3;
+ case PR_EXCLUSIVE_ACCESS_REG_ONLY:
+ return 4;
+ case PR_WRITE_EXCLUSIVE_ALL_REGS:
+ return 5;
+ case PR_EXCLUSIVE_ACCESS_ALL_REGS:
+ return 6;
+ default:
+ return 0;
+ }
+};
+
+static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
+ u64 key, u64 sa_key, u8 op)
+{
+ struct nvme_ns *ns = bdev->bd_disk->private_data;
+ struct nvme_command c;
+ u8 data[16] = { 0, };
+
+ put_unaligned_le64(key, &data[0]);
+ put_unaligned_le64(sa_key, &data[8]);
+
+ memset(&c, 0, sizeof(c));
+ c.common.opcode = op;
+ c.common.nsid = cpu_to_le32(ns->ns_id);
+ c.common.cdw10[0] = cpu_to_le32(cdw10);
+
+ return nvme_submit_sync_cmd(ns->queue, &c, data, 16);
+}
+
+static int nvme_pr_register(struct block_device *bdev, u64 old,
+ u64 new, unsigned flags)
+{
+ u32 cdw10;
+
+ if (flags & ~PR_FL_IGNORE_KEY)
+ return -EOPNOTSUPP;
+
+ cdw10 = old ? 2 : 0;
+ cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0;
+ cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */
+ return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register);
+}
+
+static int nvme_pr_reserve(struct block_device *bdev, u64 key,
+ enum pr_type type, unsigned flags)
+{
+ u32 cdw10;
+
+ if (flags & ~PR_FL_IGNORE_KEY)
+ return -EOPNOTSUPP;
+
+ cdw10 = nvme_pr_type(type) << 8;
+ cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0);
+ return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire);
+}
+
+static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
+ enum pr_type type, bool abort)
+{
+ u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1;
+ return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
+}
+
+static int nvme_pr_clear(struct block_device *bdev, u64 key)
+{
+ u32 cdw10 = 1 | (key ? 1 << 3 : 0);
+ return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register);
+}
+
+static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
+{
+ u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0;
+ return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
+}
+
+static const struct pr_ops nvme_pr_ops = {
+ .pr_register = nvme_pr_register,
+ .pr_reserve = nvme_pr_reserve,
+ .pr_release = nvme_pr_release,
+ .pr_preempt = nvme_pr_preempt,
+ .pr_clear = nvme_pr_clear,
+};
+
+static const struct block_device_operations nvme_fops = {
+ .owner = THIS_MODULE,
+ .ioctl = nvme_ioctl,
+ .compat_ioctl = nvme_compat_ioctl,
+ .open = nvme_open,
+ .release = nvme_release,
+ .getgeo = nvme_getgeo,
+ .revalidate_disk= nvme_revalidate_disk,
+ .pr_ops = &nvme_pr_ops,
+};
+
+static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
+{
+ unsigned long timeout =
+ ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
+ u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
+ int ret;
+
+ while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
+ if ((csts & NVME_CSTS_RDY) == bit)
+ break;
+
+ msleep(100);
+ if (fatal_signal_pending(current))
+ return -EINTR;
+ if (time_after(jiffies, timeout)) {
+ dev_err(ctrl->dev,
+ "Device not ready; aborting %s\n", enabled ?
+ "initialisation" : "reset");
+ return -ENODEV;
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * If the device has been passed off to us in an enabled state, just clear
+ * the enabled bit. The spec says we should set the 'shutdown notification
+ * bits', but doing so may cause the device to complete commands to the
+ * admin queue ... and we don't know what memory that might be pointing at!
+ */
+int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
+{
+ int ret;
+
+ ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
+ ctrl->ctrl_config &= ~NVME_CC_ENABLE;
+
+ ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
+ if (ret)
+ return ret;
+ return nvme_wait_ready(ctrl, cap, false);
+}
+
+int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
+{
+ /*
+ * Default to a 4K page size, with the intention to update this
+ * path in the future to accomodate architectures with differing
+ * kernel and IO page sizes.
+ */
+ unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12, page_shift = 12;
+ int ret;
+
+ if (page_shift < dev_page_min) {
+ dev_err(ctrl->dev,
+ "Minimum device page size %u too large for host (%u)\n",
+ 1 << dev_page_min, 1 << page_shift);
+ return -ENODEV;
+ }
+
+ ctrl->page_size = 1 << page_shift;
+
+ ctrl->ctrl_config = NVME_CC_CSS_NVM;
+ ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
+ ctrl->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
+ ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
+ ctrl->ctrl_config |= NVME_CC_ENABLE;
+
+ ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
+ if (ret)
+ return ret;
+ return nvme_wait_ready(ctrl, cap, true);
+}
+
+int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
+{
+ unsigned long timeout = SHUTDOWN_TIMEOUT + jiffies;
+ u32 csts;
+ int ret;
+
+ ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
+ ctrl->ctrl_config |= NVME_CC_SHN_NORMAL;
+
+ ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
+ if (ret)
+ return ret;
+
+ while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
+ if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_CMPLT)
+ break;
+
+ msleep(100);
+ if (fatal_signal_pending(current))
+ return -EINTR;
+ if (time_after(jiffies, timeout)) {
+ dev_err(ctrl->dev,
+ "Device shutdown incomplete; abort shutdown\n");
+ return -ENODEV;
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Initialize the cached copies of the Identify data and various controller
+ * register in our nvme_ctrl structure. This should be called as soon as
+ * the admin queue is fully up and running.
+ */
+int nvme_init_identify(struct nvme_ctrl *ctrl)
+{
+ struct nvme_id_ctrl *id;
+ u64 cap;
+ int ret, page_shift;
+
+ ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
+ if (ret) {
+ dev_err(ctrl->dev, "Reading VS failed (%d)\n", ret);
+ return ret;
+ }
+
+ ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap);
+ if (ret) {
+ dev_err(ctrl->dev, "Reading CAP failed (%d)\n", ret);
+ return ret;
+ }
+ page_shift = NVME_CAP_MPSMIN(cap) + 12;
+
+ if (ctrl->vs >= NVME_VS(1, 1))
+ ctrl->subsystem = NVME_CAP_NSSRC(cap);
+
+ ret = nvme_identify_ctrl(ctrl, &id);
+ if (ret) {
+ dev_err(ctrl->dev, "Identify Controller failed (%d)\n", ret);
+ return -EIO;
+ }
+
+ ctrl->oncs = le16_to_cpup(&id->oncs);
+ atomic_set(&ctrl->abort_limit, id->acl + 1);
+ ctrl->vwc = id->vwc;
+ memcpy(ctrl->serial, id->sn, sizeof(id->sn));
+ memcpy(ctrl->model, id->mn, sizeof(id->mn));
+ memcpy(ctrl->firmware_rev, id->fr, sizeof(id->fr));
+ if (id->mdts)
+ ctrl->max_hw_sectors = 1 << (id->mdts + page_shift - 9);
+ else
+ ctrl->max_hw_sectors = UINT_MAX;
+
+ if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && id->vs[3]) {
+ unsigned int max_hw_sectors;
+
+ ctrl->stripe_size = 1 << (id->vs[3] + page_shift);
+ max_hw_sectors = ctrl->stripe_size >> (page_shift - 9);
+ if (ctrl->max_hw_sectors) {
+ ctrl->max_hw_sectors = min(max_hw_sectors,
+ ctrl->max_hw_sectors);
+ } else {
+ ctrl->max_hw_sectors = max_hw_sectors;
+ }
+ }
+
+ kfree(id);
+ return 0;
+}
+
+static int nvme_dev_open(struct inode *inode, struct file *file)
+{
+ struct nvme_ctrl *ctrl;
+ int instance = iminor(inode);
+ int ret = -ENODEV;
+
+ spin_lock(&dev_list_lock);
+ list_for_each_entry(ctrl, &nvme_ctrl_list, node) {
+ if (ctrl->instance != instance)
+ continue;
+
+ if (!ctrl->admin_q) {
+ ret = -EWOULDBLOCK;
+ break;
+ }
+ if (!kref_get_unless_zero(&ctrl->kref))
+ break;
+ file->private_data = ctrl;
+ ret = 0;
+ break;
+ }
+ spin_unlock(&dev_list_lock);
+
+ return ret;
+}
+
+static int nvme_dev_release(struct inode *inode, struct file *file)
+{
+ nvme_put_ctrl(file->private_data);
+ return 0;
+}
+
+static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
+{
+ struct nvme_ns *ns;
+ int ret;
+
+ mutex_lock(&ctrl->namespaces_mutex);
+ if (list_empty(&ctrl->namespaces)) {
+ ret = -ENOTTY;
+ goto out_unlock;
+ }
+
+ ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
+ if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
+ dev_warn(ctrl->dev,
+ "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ dev_warn(ctrl->dev,
+ "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
+ kref_get(&ns->kref);
+ mutex_unlock(&ctrl->namespaces_mutex);
+
+ ret = nvme_user_cmd(ctrl, ns, argp);
+ nvme_put_ns(ns);
+ return ret;
+
+out_unlock:
+ mutex_unlock(&ctrl->namespaces_mutex);
+ return ret;
+}
+
+static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct nvme_ctrl *ctrl = file->private_data;
+ void __user *argp = (void __user *)arg;
+
+ switch (cmd) {
+ case NVME_IOCTL_ADMIN_CMD:
+ return nvme_user_cmd(ctrl, NULL, argp);
+ case NVME_IOCTL_IO_CMD:
+ return nvme_dev_user_cmd(ctrl, argp);
+ case NVME_IOCTL_RESET:
+ dev_warn(ctrl->dev, "resetting controller\n");
+ return ctrl->ops->reset_ctrl(ctrl);
+ case NVME_IOCTL_SUBSYS_RESET:
+ return nvme_reset_subsystem(ctrl);
+ default:
+ return -ENOTTY;
+ }
+}
+
+static const struct file_operations nvme_dev_fops = {
+ .owner = THIS_MODULE,
+ .open = nvme_dev_open,
+ .release = nvme_dev_release,
+ .unlocked_ioctl = nvme_dev_ioctl,
+ .compat_ioctl = nvme_dev_ioctl,
+};
+
+static ssize_t nvme_sysfs_reset(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ int ret;
+
+ ret = ctrl->ops->reset_ctrl(ctrl);
+ if (ret < 0)
+ return ret;
+ return count;
+}
+static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
+
+static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ns *ns = dev_to_disk(dev)->private_data;
+ return sprintf(buf, "%pU\n", ns->uuid);
+}
+static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL);
+
+static ssize_t eui_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ns *ns = dev_to_disk(dev)->private_data;
+ return sprintf(buf, "%8phd\n", ns->eui);
+}
+static DEVICE_ATTR(eui, S_IRUGO, eui_show, NULL);
+
+static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ns *ns = dev_to_disk(dev)->private_data;
+ return sprintf(buf, "%d\n", ns->ns_id);
+}
+static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL);
+
+static struct attribute *nvme_ns_attrs[] = {
+ &dev_attr_uuid.attr,
+ &dev_attr_eui.attr,
+ &dev_attr_nsid.attr,
+ NULL,
+};
+
+static umode_t nvme_attrs_are_visible(struct kobject *kobj,
+ struct attribute *a, int n)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct nvme_ns *ns = dev_to_disk(dev)->private_data;
+
+ if (a == &dev_attr_uuid.attr) {
+ if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
+ return 0;
+ }
+ if (a == &dev_attr_eui.attr) {
+ if (!memchr_inv(ns->eui, 0, sizeof(ns->eui)))
+ return 0;
+ }
+ return a->mode;
+}
+
+static const struct attribute_group nvme_ns_attr_group = {
+ .attrs = nvme_ns_attrs,
+ .is_visible = nvme_attrs_are_visible,
+};
+
+#define nvme_show_function(field) \
+static ssize_t field##_show(struct device *dev, \
+ struct device_attribute *attr, char *buf) \
+{ \
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \
+ return sprintf(buf, "%.*s\n", (int)sizeof(ctrl->field), ctrl->field); \
+} \
+static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
+
+nvme_show_function(model);
+nvme_show_function(serial);
+nvme_show_function(firmware_rev);
+
+static struct attribute *nvme_dev_attrs[] = {
+ &dev_attr_reset_controller.attr,
+ &dev_attr_model.attr,
+ &dev_attr_serial.attr,
+ &dev_attr_firmware_rev.attr,
+ NULL
+};
+
+static struct attribute_group nvme_dev_attrs_group = {
+ .attrs = nvme_dev_attrs,
+};
+
+static const struct attribute_group *nvme_dev_attr_groups[] = {
+ &nvme_dev_attrs_group,
+ NULL,
+};
+
+static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+ struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
+ struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
+
+ return nsa->ns_id - nsb->ns_id;
+}
+
+static struct nvme_ns *nvme_find_ns(struct nvme_ctrl *ctrl, unsigned nsid)
+{
+ struct nvme_ns *ns;
+
+ lockdep_assert_held(&ctrl->namespaces_mutex);
+
+ list_for_each_entry(ns, &ctrl->namespaces, list) {
+ if (ns->ns_id == nsid)
+ return ns;
+ if (ns->ns_id > nsid)
+ break;
+ }
+ return NULL;
+}
+
+static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
+{
+ struct nvme_ns *ns;
+ struct gendisk *disk;
+ int node = dev_to_node(ctrl->dev);
+
+ lockdep_assert_held(&ctrl->namespaces_mutex);
+
+ ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
+ if (!ns)
+ return;
+
+ ns->queue = blk_mq_init_queue(ctrl->tagset);
+ if (IS_ERR(ns->queue))
+ goto out_free_ns;
+ queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue);
+ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
+ ns->queue->queuedata = ns;
+ ns->ctrl = ctrl;
+
+ disk = alloc_disk_node(0, node);
+ if (!disk)
+ goto out_free_queue;
+
+ kref_init(&ns->kref);
+ ns->ns_id = nsid;
+ ns->disk = disk;
+ ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
+
+ blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
+ if (ctrl->max_hw_sectors) {
+ blk_queue_max_hw_sectors(ns->queue, ctrl->max_hw_sectors);
+ blk_queue_max_segments(ns->queue,
+ (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1);
+ }
+ if (ctrl->stripe_size)
+ blk_queue_chunk_sectors(ns->queue, ctrl->stripe_size >> 9);
+ if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
+ blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
+ blk_queue_virt_boundary(ns->queue, ctrl->page_size - 1);
+
+ disk->major = nvme_major;
+ disk->first_minor = 0;
+ disk->fops = &nvme_fops;
+ disk->private_data = ns;
+ disk->queue = ns->queue;
+ disk->driverfs_dev = ctrl->device;
+ disk->flags = GENHD_FL_EXT_DEVT;
+ sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, nsid);
+
+ if (nvme_revalidate_disk(ns->disk))
+ goto out_free_disk;
+
+ list_add_tail(&ns->list, &ctrl->namespaces);
+ kref_get(&ctrl->kref);
+ if (ns->type == NVME_NS_LIGHTNVM)
+ return;
+
+ add_disk(ns->disk);
+ if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
+ &nvme_ns_attr_group))
+ pr_warn("%s: failed to create sysfs group for identification\n",
+ ns->disk->disk_name);
+ return;
+ out_free_disk:
+ kfree(disk);
+ out_free_queue:
+ blk_cleanup_queue(ns->queue);
+ out_free_ns:
+ kfree(ns);
+}
+
+static void nvme_ns_remove(struct nvme_ns *ns)
+{
+ bool kill = nvme_io_incapable(ns->ctrl) &&
+ !blk_queue_dying(ns->queue);
+
+ lockdep_assert_held(&ns->ctrl->namespaces_mutex);
+
+ if (kill) {
+ blk_set_queue_dying(ns->queue);
+
+ /*
+ * The controller was shutdown first if we got here through
+ * device removal. The shutdown may requeue outstanding
+ * requests. These need to be aborted immediately so
+ * del_gendisk doesn't block indefinitely for their completion.
+ */
+ blk_mq_abort_requeue_list(ns->queue);
+ }
+ if (ns->disk->flags & GENHD_FL_UP) {
+ if (blk_get_integrity(ns->disk))
+ blk_integrity_unregister(ns->disk);
+ sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
+ &nvme_ns_attr_group);
+ del_gendisk(ns->disk);
+ }
+ if (kill || !blk_queue_dying(ns->queue)) {
+ blk_mq_abort_requeue_list(ns->queue);
+ blk_cleanup_queue(ns->queue);
+ }
+ list_del_init(&ns->list);
+ nvme_put_ns(ns);
+}
+
+static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid)
+{
+ struct nvme_ns *ns;
+
+ ns = nvme_find_ns(ctrl, nsid);
+ if (ns) {
+ if (revalidate_disk(ns->disk))
+ nvme_ns_remove(ns);
+ } else
+ nvme_alloc_ns(ctrl, nsid);
+}
+
+static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
+{
+ struct nvme_ns *ns;
+ __le32 *ns_list;
+ unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024);
+ int ret = 0;
+
+ ns_list = kzalloc(0x1000, GFP_KERNEL);
+ if (!ns_list)
+ return -ENOMEM;
+
+ for (i = 0; i < num_lists; i++) {
+ ret = nvme_identify_ns_list(ctrl, prev, ns_list);
+ if (ret)
+ goto out;
+
+ for (j = 0; j < min(nn, 1024U); j++) {
+ nsid = le32_to_cpu(ns_list[j]);
+ if (!nsid)
+ goto out;
+
+ nvme_validate_ns(ctrl, nsid);
+
+ while (++prev < nsid) {
+ ns = nvme_find_ns(ctrl, prev);
+ if (ns)
+ nvme_ns_remove(ns);
+ }
+ }
+ nn -= j;
+ }
+ out:
+ kfree(ns_list);
+ return ret;
+}
+
+static void __nvme_scan_namespaces(struct nvme_ctrl *ctrl, unsigned nn)
+{
+ struct nvme_ns *ns, *next;
+ unsigned i;
+
+ lockdep_assert_held(&ctrl->namespaces_mutex);
+
+ for (i = 1; i <= nn; i++)
+ nvme_validate_ns(ctrl, i);
+
+ list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
+ if (ns->ns_id > nn)
+ nvme_ns_remove(ns);
+ }
+}
+
+void nvme_scan_namespaces(struct nvme_ctrl *ctrl)
+{
+ struct nvme_id_ctrl *id;
+ unsigned nn;
+
+ if (nvme_identify_ctrl(ctrl, &id))
+ return;
+
+ mutex_lock(&ctrl->namespaces_mutex);
+ nn = le32_to_cpu(id->nn);
+ if (ctrl->vs >= NVME_VS(1, 1) &&
+ !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
+ if (!nvme_scan_ns_list(ctrl, nn))
+ goto done;
+ }
+ __nvme_scan_namespaces(ctrl, le32_to_cpup(&id->nn));
+ done:
+ list_sort(NULL, &ctrl->namespaces, ns_cmp);
+ mutex_unlock(&ctrl->namespaces_mutex);
+ kfree(id);
+}
+
+void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
+{
+ struct nvme_ns *ns, *next;
+
+ mutex_lock(&ctrl->namespaces_mutex);
+ list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
+ nvme_ns_remove(ns);
+ mutex_unlock(&ctrl->namespaces_mutex);
+}
+
+static DEFINE_IDA(nvme_instance_ida);
+
+static int nvme_set_instance(struct nvme_ctrl *ctrl)
+{
+ int instance, error;
+
+ do {
+ if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL))
+ return -ENODEV;
+
+ spin_lock(&dev_list_lock);
+ error = ida_get_new(&nvme_instance_ida, &instance);
+ spin_unlock(&dev_list_lock);
+ } while (error == -EAGAIN);
+
+ if (error)
+ return -ENODEV;
+
+ ctrl->instance = instance;
+ return 0;
+}
+
+static void nvme_release_instance(struct nvme_ctrl *ctrl)
+{
+ spin_lock(&dev_list_lock);
+ ida_remove(&nvme_instance_ida, ctrl->instance);
+ spin_unlock(&dev_list_lock);
+}
+
+void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
+ {
+ device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
+
+ spin_lock(&dev_list_lock);
+ list_del(&ctrl->node);
+ spin_unlock(&dev_list_lock);
+}
+
+static void nvme_free_ctrl(struct kref *kref)
+{
+ struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref);
+
+ put_device(ctrl->device);
+ nvme_release_instance(ctrl);
+
+ ctrl->ops->free_ctrl(ctrl);
+}
+
+void nvme_put_ctrl(struct nvme_ctrl *ctrl)
+{
+ kref_put(&ctrl->kref, nvme_free_ctrl);
+}
+
+/*
+ * Initialize a NVMe controller structures. This needs to be called during
+ * earliest initialization so that we have the initialized structured around
+ * during probing.
+ */
+int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
+ const struct nvme_ctrl_ops *ops, unsigned long quirks)
+{
+ int ret;
+
+ INIT_LIST_HEAD(&ctrl->namespaces);
+ mutex_init(&ctrl->namespaces_mutex);
+ kref_init(&ctrl->kref);
+ ctrl->dev = dev;
+ ctrl->ops = ops;
+ ctrl->quirks = quirks;
+
+ ret = nvme_set_instance(ctrl);
+ if (ret)
+ goto out;
+
+ ctrl->device = device_create_with_groups(nvme_class, ctrl->dev,
+ MKDEV(nvme_char_major, ctrl->instance),
+ dev, nvme_dev_attr_groups,
+ "nvme%d", ctrl->instance);
+ if (IS_ERR(ctrl->device)) {
+ ret = PTR_ERR(ctrl->device);
+ goto out_release_instance;
+ }
+ get_device(ctrl->device);
+ dev_set_drvdata(ctrl->device, ctrl);
+
+ spin_lock(&dev_list_lock);
+ list_add_tail(&ctrl->node, &nvme_ctrl_list);
+ spin_unlock(&dev_list_lock);
+
+ return 0;
+out_release_instance:
+ nvme_release_instance(ctrl);
+out:
+ return ret;
+}
+
+void nvme_stop_queues(struct nvme_ctrl *ctrl)
+{
+ struct nvme_ns *ns;
+
+ mutex_lock(&ctrl->namespaces_mutex);
+ list_for_each_entry(ns, &ctrl->namespaces, list) {
+ spin_lock_irq(ns->queue->queue_lock);
+ queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
+ spin_unlock_irq(ns->queue->queue_lock);
+
+ blk_mq_cancel_requeue_work(ns->queue);
+ blk_mq_stop_hw_queues(ns->queue);
+ }
+ mutex_unlock(&ctrl->namespaces_mutex);
+}
+
+void nvme_start_queues(struct nvme_ctrl *ctrl)
+{
+ struct nvme_ns *ns;
+
+ mutex_lock(&ctrl->namespaces_mutex);
+ list_for_each_entry(ns, &ctrl->namespaces, list) {
+ queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue);
+ blk_mq_start_stopped_hw_queues(ns->queue, true);
+ blk_mq_kick_requeue_list(ns->queue);
+ }
+ mutex_unlock(&ctrl->namespaces_mutex);
+}
+
+int __init nvme_core_init(void)
+{
+ int result;
+
+ result = register_blkdev(nvme_major, "nvme");
+ if (result < 0)
+ return result;
+ else if (result > 0)
+ nvme_major = result;
+
+ result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
+ &nvme_dev_fops);
+ if (result < 0)
+ goto unregister_blkdev;
+ else if (result > 0)
+ nvme_char_major = result;
+
+ nvme_class = class_create(THIS_MODULE, "nvme");
+ if (IS_ERR(nvme_class)) {
+ result = PTR_ERR(nvme_class);
+ goto unregister_chrdev;
+ }
+
+ return 0;
+
+ unregister_chrdev:
+ __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+ unregister_blkdev:
+ unregister_blkdev(nvme_major, "nvme");
+ return result;
+}
+
+void nvme_core_exit(void)
+{
+ unregister_blkdev(nvme_major, "nvme");
+ class_destroy(nvme_class);
+ __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+}
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 1af54ea..5cd3725 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -146,6 +146,16 @@ struct nvme_nvm_command {
};
};
+struct nvme_nvm_lp_mlc {
+ __u16 num_pairs;
+ __u8 pairs[886];
+};
+
+struct nvme_nvm_lp_tbl {
+ __u8 id[8];
+ struct nvme_nvm_lp_mlc mlc;
+};
+
struct nvme_nvm_id_group {
__u8 mtype;
__u8 fmtype;
@@ -169,7 +179,8 @@ struct nvme_nvm_id_group {
__le32 mpos;
__le32 mccap;
__le16 cpar;
- __u8 reserved[906];
+ __u8 reserved[10];
+ struct nvme_nvm_lp_tbl lptbl;
} __packed;
struct nvme_nvm_addr_format {
@@ -266,6 +277,15 @@ static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
dst->mccap = le32_to_cpu(src->mccap);
dst->cpar = le16_to_cpu(src->cpar);
+
+ if (dst->fmtype == NVM_ID_FMTYPE_MLC) {
+ memcpy(dst->lptbl.id, src->lptbl.id, 8);
+ dst->lptbl.mlc.num_pairs =
+ le16_to_cpu(src->lptbl.mlc.num_pairs);
+ /* 4 bits per pair */
+ memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs,
+ dst->lptbl.mlc.num_pairs >> 1);
+ }
}
return 0;
@@ -274,7 +294,6 @@ static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id)
{
struct nvme_ns *ns = nvmdev->q->queuedata;
- struct nvme_dev *dev = ns->dev;
struct nvme_nvm_id *nvme_nvm_id;
struct nvme_nvm_command c = {};
int ret;
@@ -287,7 +306,7 @@ static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id)
if (!nvme_nvm_id)
return -ENOMEM;
- ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c,
+ ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c,
nvme_nvm_id, sizeof(struct nvme_nvm_id));
if (ret) {
ret = -EIO;
@@ -312,9 +331,8 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb,
nvm_l2p_update_fn *update_l2p, void *priv)
{
struct nvme_ns *ns = nvmdev->q->queuedata;
- struct nvme_dev *dev = ns->dev;
struct nvme_nvm_command c = {};
- u32 len = queue_max_hw_sectors(dev->admin_q) << 9;
+ u32 len = queue_max_hw_sectors(ns->ctrl->admin_q) << 9;
u32 nlb_pr_rq = len / sizeof(u64);
u64 cmd_slba = slba;
void *entries;
@@ -332,10 +350,10 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb,
c.l2p.slba = cpu_to_le64(cmd_slba);
c.l2p.nlb = cpu_to_le32(cmd_nlb);
- ret = nvme_submit_sync_cmd(dev->admin_q,
+ ret = nvme_submit_sync_cmd(ns->ctrl->admin_q,
(struct nvme_command *)&c, entries, len);
if (ret) {
- dev_err(dev->dev, "L2P table transfer failed (%d)\n",
+ dev_err(ns->ctrl->dev, "L2P table transfer failed (%d)\n",
ret);
ret = -EIO;
goto out;
@@ -361,7 +379,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
{
struct request_queue *q = nvmdev->q;
struct nvme_ns *ns = q->queuedata;
- struct nvme_dev *dev = ns->dev;
+ struct nvme_ctrl *ctrl = ns->ctrl;
struct nvme_nvm_command c = {};
struct nvme_nvm_bb_tbl *bb_tbl;
int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blocks;
@@ -375,41 +393,36 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
if (!bb_tbl)
return -ENOMEM;
- ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c,
+ ret = nvme_submit_sync_cmd(ctrl->admin_q, (struct nvme_command *)&c,
bb_tbl, tblsz);
if (ret) {
- dev_err(dev->dev, "get bad block table failed (%d)\n", ret);
+ dev_err(ctrl->dev, "get bad block table failed (%d)\n", ret);
ret = -EIO;
goto out;
}
if (bb_tbl->tblid[0] != 'B' || bb_tbl->tblid[1] != 'B' ||
bb_tbl->tblid[2] != 'L' || bb_tbl->tblid[3] != 'T') {
- dev_err(dev->dev, "bbt format mismatch\n");
+ dev_err(ctrl->dev, "bbt format mismatch\n");
ret = -EINVAL;
goto out;
}
if (le16_to_cpu(bb_tbl->verid) != 1) {
ret = -EINVAL;
- dev_err(dev->dev, "bbt version not supported\n");
+ dev_err(ctrl->dev, "bbt version not supported\n");
goto out;
}
if (le32_to_cpu(bb_tbl->tblks) != nr_blocks) {
ret = -EINVAL;
- dev_err(dev->dev, "bbt unsuspected blocks returned (%u!=%u)",
+ dev_err(ctrl->dev, "bbt unsuspected blocks returned (%u!=%u)",
le32_to_cpu(bb_tbl->tblks), nr_blocks);
goto out;
}
ppa = dev_to_generic_addr(nvmdev, ppa);
ret = update_bbtbl(ppa, nr_blocks, bb_tbl->blk, priv);
- if (ret) {
- ret = -EINTR;
- goto out;
- }
-
out:
kfree(bb_tbl);
return ret;
@@ -419,7 +432,6 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct nvm_rq *rqd,
int type)
{
struct nvme_ns *ns = nvmdev->q->queuedata;
- struct nvme_dev *dev = ns->dev;
struct nvme_nvm_command c = {};
int ret = 0;
@@ -429,10 +441,10 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct nvm_rq *rqd,
c.set_bb.nlb = cpu_to_le16(rqd->nr_pages - 1);
c.set_bb.value = type;
- ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c,
+ ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c,
NULL, 0);
if (ret)
- dev_err(dev->dev, "set bad block table failed (%d)\n", ret);
+ dev_err(ns->ctrl->dev, "set bad block table failed (%d)\n", ret);
return ret;
}
@@ -453,11 +465,8 @@ static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd,
static void nvme_nvm_end_io(struct request *rq, int error)
{
struct nvm_rq *rqd = rq->end_io_data;
- struct nvm_dev *dev = rqd->dev;
- if (dev->mt && dev->mt->end_io(rqd, error))
- pr_err("nvme: err status: %x result: %lx\n",
- rq->errors, (unsigned long)rq->special);
+ nvm_end_io(rqd, error);
kfree(rq->cmd);
blk_mq_free_request(rq);
@@ -520,9 +529,8 @@ static int nvme_nvm_erase_block(struct nvm_dev *dev, struct nvm_rq *rqd)
static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name)
{
struct nvme_ns *ns = nvmdev->q->queuedata;
- struct nvme_dev *dev = ns->dev;
- return dma_pool_create(name, dev->dev, PAGE_SIZE, PAGE_SIZE, 0);
+ return dma_pool_create(name, ns->ctrl->dev, PAGE_SIZE, PAGE_SIZE, 0);
}
static void nvme_nvm_destroy_dma_pool(void *pool)
@@ -580,8 +588,9 @@ void nvme_nvm_unregister(struct request_queue *q, char *disk_name)
int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
{
- struct nvme_dev *dev = ns->dev;
- struct pci_dev *pdev = to_pci_dev(dev->dev);
+ struct nvme_ctrl *ctrl = ns->ctrl;
+ /* XXX: this is poking into PCI structures from generic code! */
+ struct pci_dev *pdev = to_pci_dev(ctrl->dev);
/* QEMU NVMe simulator - PCI ID + Vendor specific bit */
if (pdev->vendor == PCI_VENDOR_ID_CNEX &&
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 044253d..4fb5bb7 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -19,58 +19,77 @@
#include <linux/kref.h>
#include <linux/blk-mq.h>
+enum {
+ /*
+ * Driver internal status code for commands that were cancelled due
+ * to timeouts or controller shutdown. The value is negative so
+ * that it a) doesn't overlap with the unsigned hardware error codes,
+ * and b) can easily be tested for.
+ */
+ NVME_SC_CANCELLED = -EINTR,
+};
+
extern unsigned char nvme_io_timeout;
#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ)
+extern unsigned char admin_timeout;
+#define ADMIN_TIMEOUT (admin_timeout * HZ)
+
+extern unsigned char shutdown_timeout;
+#define SHUTDOWN_TIMEOUT (shutdown_timeout * HZ)
+
enum {
NVME_NS_LBA = 0,
NVME_NS_LIGHTNVM = 1,
};
/*
- * Represents an NVM Express device. Each nvme_dev is a PCI function.
+ * List of workarounds for devices that required behavior not specified in
+ * the standard.
*/
-struct nvme_dev {
- struct list_head node;
- struct nvme_queue **queues;
+enum nvme_quirks {
+ /*
+ * Prefers I/O aligned to a stripe size specified in a vendor
+ * specific Identify field.
+ */
+ NVME_QUIRK_STRIPE_SIZE = (1 << 0),
+
+ /*
+ * The controller doesn't handle Identify value others than 0 or 1
+ * correctly.
+ */
+ NVME_QUIRK_IDENTIFY_CNS = (1 << 1),
+};
+
+struct nvme_ctrl {
+ const struct nvme_ctrl_ops *ops;
struct request_queue *admin_q;
- struct blk_mq_tag_set tagset;
- struct blk_mq_tag_set admin_tagset;
- u32 __iomem *dbs;
struct device *dev;
- struct dma_pool *prp_page_pool;
- struct dma_pool *prp_small_pool;
+ struct kref kref;
int instance;
- unsigned queue_count;
- unsigned online_queues;
- unsigned max_qid;
- int q_depth;
- u32 db_stride;
- u32 ctrl_config;
- struct msix_entry *entry;
- struct nvme_bar __iomem *bar;
+ struct blk_mq_tag_set *tagset;
struct list_head namespaces;
- struct kref kref;
- struct device *device;
- struct work_struct reset_work;
- struct work_struct probe_work;
- struct work_struct scan_work;
+ struct mutex namespaces_mutex;
+ struct device *device; /* char device */
+ struct list_head node;
+
char name[12];
char serial[20];
char model[40];
char firmware_rev[8];
- bool subsystem;
+
+ u32 ctrl_config;
+
+ u32 page_size;
u32 max_hw_sectors;
u32 stripe_size;
- u32 page_size;
- void __iomem *cmb;
- dma_addr_t cmb_dma_addr;
- u64 cmb_size;
- u32 cmbsz;
u16 oncs;
- u16 abort_limit;
+ atomic_t abort_limit;
u8 event_limit;
u8 vwc;
+ u32 vs;
+ bool subsystem;
+ unsigned long quirks;
};
/*
@@ -79,11 +98,14 @@ struct nvme_dev {
struct nvme_ns {
struct list_head list;
- struct nvme_dev *dev;
+ struct nvme_ctrl *ctrl;
struct request_queue *queue;
struct gendisk *disk;
struct kref kref;
+ u8 eui[8];
+ u8 uuid[16];
+
unsigned ns_id;
int lba_shift;
u16 ms;
@@ -94,41 +116,156 @@ struct nvme_ns {
u32 mode_select_block_len;
};
-/*
- * The nvme_iod describes the data in an I/O, including the list of PRP
- * entries. You can't see it in this data structure because C doesn't let
- * me express that. Use nvme_alloc_iod to ensure there's enough space
- * allocated to store the PRP list.
- */
-struct nvme_iod {
- unsigned long private; /* For the use of the submitter of the I/O */
- int npages; /* In the PRP list. 0 means small pool in use */
- int offset; /* Of PRP list */
- int nents; /* Used in scatterlist */
- int length; /* Of data, in bytes */
- dma_addr_t first_dma;
- struct scatterlist meta_sg[1]; /* metadata requires single contiguous buffer */
- struct scatterlist sg[0];
+struct nvme_ctrl_ops {
+ int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
+ int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
+ int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
+ bool (*io_incapable)(struct nvme_ctrl *ctrl);
+ int (*reset_ctrl)(struct nvme_ctrl *ctrl);
+ void (*free_ctrl)(struct nvme_ctrl *ctrl);
};
+static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl)
+{
+ u32 val = 0;
+
+ if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &val))
+ return false;
+ return val & NVME_CSTS_RDY;
+}
+
+static inline bool nvme_io_incapable(struct nvme_ctrl *ctrl)
+{
+ u32 val = 0;
+
+ if (ctrl->ops->io_incapable(ctrl))
+ return false;
+ if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &val))
+ return false;
+ return val & NVME_CSTS_CFS;
+}
+
+static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
+{
+ if (!ctrl->subsystem)
+ return -ENOTTY;
+ return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65);
+}
+
static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
{
return (sector >> (ns->lba_shift - 9));
}
+static inline void nvme_setup_flush(struct nvme_ns *ns,
+ struct nvme_command *cmnd)
+{
+ memset(cmnd, 0, sizeof(*cmnd));
+ cmnd->common.opcode = nvme_cmd_flush;
+ cmnd->common.nsid = cpu_to_le32(ns->ns_id);
+}
+
+static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
+ struct nvme_command *cmnd)
+{
+ u16 control = 0;
+ u32 dsmgmt = 0;
+
+ if (req->cmd_flags & REQ_FUA)
+ control |= NVME_RW_FUA;
+ if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
+ control |= NVME_RW_LR;
+
+ if (req->cmd_flags & REQ_RAHEAD)
+ dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
+
+ memset(cmnd, 0, sizeof(*cmnd));
+ cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
+ cmnd->rw.command_id = req->tag;
+ cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
+ cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
+ cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+
+ if (ns->ms) {
+ switch (ns->pi_type) {
+ case NVME_NS_DPS_PI_TYPE3:
+ control |= NVME_RW_PRINFO_PRCHK_GUARD;
+ break;
+ case NVME_NS_DPS_PI_TYPE1:
+ case NVME_NS_DPS_PI_TYPE2:
+ control |= NVME_RW_PRINFO_PRCHK_GUARD |
+ NVME_RW_PRINFO_PRCHK_REF;
+ cmnd->rw.reftag = cpu_to_le32(
+ nvme_block_nr(ns, blk_rq_pos(req)));
+ break;
+ }
+ if (!blk_integrity_rq(req))
+ control |= NVME_RW_PRINFO_PRACT;
+ }
+
+ cmnd->rw.control = cpu_to_le16(control);
+ cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
+}
+
+
+static inline int nvme_error_status(u16 status)
+{
+ switch (status & 0x7ff) {
+ case NVME_SC_SUCCESS:
+ return 0;
+ case NVME_SC_CAP_EXCEEDED:
+ return -ENOSPC;
+ default:
+ return -EIO;
+ }
+}
+
+static inline bool nvme_req_needs_retry(struct request *req, u16 status)
+{
+ return !(status & NVME_SC_DNR || blk_noretry_request(req)) &&
+ (jiffies - req->start_time) < req->timeout;
+}
+
+int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
+int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
+int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
+int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
+ const struct nvme_ctrl_ops *ops, unsigned long quirks);
+void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
+void nvme_put_ctrl(struct nvme_ctrl *ctrl);
+int nvme_init_identify(struct nvme_ctrl *ctrl);
+
+void nvme_scan_namespaces(struct nvme_ctrl *ctrl);
+void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
+
+void nvme_stop_queues(struct nvme_ctrl *ctrl);
+void nvme_start_queues(struct nvme_ctrl *ctrl);
+
+struct request *nvme_alloc_request(struct request_queue *q,
+ struct nvme_command *cmd, unsigned int flags);
+void nvme_requeue_req(struct request *req);
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
void *buf, unsigned bufflen);
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
- void *buffer, void __user *ubuffer, unsigned bufflen,
+ void *buffer, unsigned bufflen, u32 *result, unsigned timeout);
+int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
+ void __user *ubuffer, unsigned bufflen, u32 *result,
+ unsigned timeout);
+int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
+ void __user *ubuffer, unsigned bufflen,
+ void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
u32 *result, unsigned timeout);
-int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id);
-int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
+int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id);
+int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
struct nvme_id_ns **id);
-int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log);
-int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
+int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log);
+int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
dma_addr_t dma_addr, u32 *result);
-int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
+int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
dma_addr_t dma_addr, u32 *result);
+int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
+
+extern spinlock_t dev_list_lock;
struct sg_io_hdr;
@@ -154,4 +291,7 @@ static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *i
}
#endif /* CONFIG_NVM */
+int __init nvme_core_init(void);
+void nvme_core_exit(void);
+
#endif /* _NVME_H */
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index f5c0e26..72ef832 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -12,6 +12,7 @@
* more details.
*/
+#include <linux/aer.h>
#include <linux/bitops.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
@@ -28,10 +29,10 @@
#include <linux/kdev_t.h>
#include <linux/kthread.h>
#include <linux/kernel.h>
-#include <linux/list_sort.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
+#include <linux/mutex.h>
#include <linux/pci.h>
#include <linux/poison.h>
#include <linux/ptrace.h>
@@ -39,23 +40,24 @@
#include <linux/slab.h>
#include <linux/t10-pi.h>
#include <linux/types.h>
-#include <linux/pr.h>
-#include <scsi/sg.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <asm/unaligned.h>
-#include <uapi/linux/nvme_ioctl.h>
#include "nvme.h"
-#define NVME_MINORS (1U << MINORBITS)
#define NVME_Q_DEPTH 1024
#define NVME_AQ_DEPTH 256
#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
-#define ADMIN_TIMEOUT (admin_timeout * HZ)
-#define SHUTDOWN_TIMEOUT (shutdown_timeout * HZ)
+
+/*
+ * We handle AEN commands ourselves and don't even let the
+ * block layer know about them.
+ */
+#define NVME_NR_AEN_COMMANDS 1
+#define NVME_AQ_BLKMQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS)
-static unsigned char admin_timeout = 60;
+unsigned char admin_timeout = 60;
module_param(admin_timeout, byte, 0644);
MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
@@ -63,16 +65,10 @@ unsigned char nvme_io_timeout = 30;
module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
-static unsigned char shutdown_timeout = 5;
+unsigned char shutdown_timeout = 5;
module_param(shutdown_timeout, byte, 0644);
MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
-static int nvme_major;
-module_param(nvme_major, int, 0);
-
-static int nvme_char_major;
-module_param(nvme_char_major, int, 0);
-
static int use_threaded_interrupts;
module_param(use_threaded_interrupts, int, 0);
@@ -80,28 +76,60 @@ static bool use_cmb_sqes = true;
module_param(use_cmb_sqes, bool, 0644);
MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
-static DEFINE_SPINLOCK(dev_list_lock);
static LIST_HEAD(dev_list);
static struct task_struct *nvme_thread;
static struct workqueue_struct *nvme_workq;
static wait_queue_head_t nvme_kthread_wait;
-static struct class *nvme_class;
+struct nvme_dev;
+struct nvme_queue;
-static int __nvme_reset(struct nvme_dev *dev);
static int nvme_reset(struct nvme_dev *dev);
static void nvme_process_cq(struct nvme_queue *nvmeq);
-static void nvme_dead_ctrl(struct nvme_dev *dev);
+static void nvme_remove_dead_ctrl(struct nvme_dev *dev);
+static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
-struct async_cmd_info {
- struct kthread_work work;
- struct kthread_worker *worker;
- struct request *req;
- u32 result;
- int status;
- void *ctx;
+/*
+ * Represents an NVM Express device. Each nvme_dev is a PCI function.
+ */
+struct nvme_dev {
+ struct list_head node;
+ struct nvme_queue **queues;
+ struct blk_mq_tag_set tagset;
+ struct blk_mq_tag_set admin_tagset;
+ u32 __iomem *dbs;
+ struct device *dev;
+ struct dma_pool *prp_page_pool;
+ struct dma_pool *prp_small_pool;
+ unsigned queue_count;
+ unsigned online_queues;
+ unsigned max_qid;
+ int q_depth;
+ u32 db_stride;
+ struct msix_entry *entry;
+ void __iomem *bar;
+ struct work_struct reset_work;
+ struct work_struct scan_work;
+ struct work_struct remove_work;
+ struct mutex shutdown_lock;
+ bool subsystem;
+ void __iomem *cmb;
+ dma_addr_t cmb_dma_addr;
+ u64 cmb_size;
+ u32 cmbsz;
+ unsigned long flags;
+
+#define NVME_CTRL_RESETTING 0
+
+ struct nvme_ctrl ctrl;
+ struct completion ioq_wait;
};
+static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
+{
+ return container_of(ctrl, struct nvme_dev, ctrl);
+}
+
/*
* An NVM Express queue. Each device has at least two (one for admin
* commands and one for I/O commands).
@@ -126,7 +154,24 @@ struct nvme_queue {
u16 qid;
u8 cq_phase;
u8 cqe_seen;
- struct async_cmd_info cmdinfo;
+};
+
+/*
+ * The nvme_iod describes the data in an I/O, including the list of PRP
+ * entries. You can't see it in this data structure because C doesn't let
+ * me express that. Use nvme_init_iod to ensure there's enough space
+ * allocated to store the PRP list.
+ */
+struct nvme_iod {
+ struct nvme_queue *nvmeq;
+ int aborted;
+ int npages; /* In the PRP list. 0 means small pool in use */
+ int nents; /* Used in scatterlist */
+ int length; /* Of data, in bytes */
+ dma_addr_t first_dma;
+ struct scatterlist meta_sg; /* metadata requires single contiguous buffer */
+ struct scatterlist *sg;
+ struct scatterlist inline_sg[0];
};
/*
@@ -148,23 +193,11 @@ static inline void _nvme_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
}
-typedef void (*nvme_completion_fn)(struct nvme_queue *, void *,
- struct nvme_completion *);
-
-struct nvme_cmd_info {
- nvme_completion_fn fn;
- void *ctx;
- int aborted;
- struct nvme_queue *nvmeq;
- struct nvme_iod iod[0];
-};
-
/*
* Max size of iod being embedded in the request payload
*/
#define NVME_INT_PAGES 2
-#define NVME_INT_BYTES(dev) (NVME_INT_PAGES * (dev)->page_size)
-#define NVME_INT_MASK 0x01
+#define NVME_INT_BYTES(dev) (NVME_INT_PAGES * (dev)->ctrl.page_size)
/*
* Will slightly overestimate the number of pages needed. This is OK
@@ -173,19 +206,22 @@ struct nvme_cmd_info {
*/
static int nvme_npages(unsigned size, struct nvme_dev *dev)
{
- unsigned nprps = DIV_ROUND_UP(size + dev->page_size, dev->page_size);
+ unsigned nprps = DIV_ROUND_UP(size + dev->ctrl.page_size,
+ dev->ctrl.page_size);
return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8);
}
-static unsigned int nvme_cmd_size(struct nvme_dev *dev)
+static unsigned int nvme_iod_alloc_size(struct nvme_dev *dev,
+ unsigned int size, unsigned int nseg)
{
- unsigned int ret = sizeof(struct nvme_cmd_info);
-
- ret += sizeof(struct nvme_iod);
- ret += sizeof(__le64 *) * nvme_npages(NVME_INT_BYTES(dev), dev);
- ret += sizeof(struct scatterlist) * NVME_INT_PAGES;
+ return sizeof(__le64 *) * nvme_npages(size, dev) +
+ sizeof(struct scatterlist) * nseg;
+}
- return ret;
+static unsigned int nvme_cmd_size(struct nvme_dev *dev)
+{
+ return sizeof(struct nvme_iod) +
+ nvme_iod_alloc_size(dev, NVME_INT_BYTES(dev), NVME_INT_PAGES);
}
static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
@@ -215,11 +251,11 @@ static int nvme_admin_init_request(void *data, struct request *req,
unsigned int numa_node)
{
struct nvme_dev *dev = data;
- struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct nvme_queue *nvmeq = dev->queues[0];
BUG_ON(!nvmeq);
- cmd->nvmeq = nvmeq;
+ iod->nvmeq = nvmeq;
return 0;
}
@@ -242,148 +278,36 @@ static int nvme_init_request(void *data, struct request *req,
unsigned int numa_node)
{
struct nvme_dev *dev = data;
- struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1];
BUG_ON(!nvmeq);
- cmd->nvmeq = nvmeq;
+ iod->nvmeq = nvmeq;
return 0;
}
-static void nvme_set_info(struct nvme_cmd_info *cmd, void *ctx,
- nvme_completion_fn handler)
-{
- cmd->fn = handler;
- cmd->ctx = ctx;
- cmd->aborted = 0;
- blk_mq_start_request(blk_mq_rq_from_pdu(cmd));
-}
-
-static void *iod_get_private(struct nvme_iod *iod)
-{
- return (void *) (iod->private & ~0x1UL);
-}
-
-/*
- * If bit 0 is set, the iod is embedded in the request payload.
- */
-static bool iod_should_kfree(struct nvme_iod *iod)
-{
- return (iod->private & NVME_INT_MASK) == 0;
-}
-
-/* Special values must be less than 0x1000 */
-#define CMD_CTX_BASE ((void *)POISON_POINTER_DELTA)
-#define CMD_CTX_CANCELLED (0x30C + CMD_CTX_BASE)
-#define CMD_CTX_COMPLETED (0x310 + CMD_CTX_BASE)
-#define CMD_CTX_INVALID (0x314 + CMD_CTX_BASE)
-
-static void special_completion(struct nvme_queue *nvmeq, void *ctx,
- struct nvme_completion *cqe)
-{
- if (ctx == CMD_CTX_CANCELLED)
- return;
- if (ctx == CMD_CTX_COMPLETED) {
- dev_warn(nvmeq->q_dmadev,
- "completed id %d twice on queue %d\n",
- cqe->command_id, le16_to_cpup(&cqe->sq_id));
- return;
- }
- if (ctx == CMD_CTX_INVALID) {
- dev_warn(nvmeq->q_dmadev,
- "invalid id %d completed on queue %d\n",
- cqe->command_id, le16_to_cpup(&cqe->sq_id));
- return;
- }
- dev_warn(nvmeq->q_dmadev, "Unknown special completion %p\n", ctx);
-}
-
-static void *cancel_cmd_info(struct nvme_cmd_info *cmd, nvme_completion_fn *fn)
-{
- void *ctx;
-
- if (fn)
- *fn = cmd->fn;
- ctx = cmd->ctx;
- cmd->fn = special_completion;
- cmd->ctx = CMD_CTX_CANCELLED;
- return ctx;
-}
-
-static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
- struct nvme_completion *cqe)
+static void nvme_complete_async_event(struct nvme_dev *dev,
+ struct nvme_completion *cqe)
{
- u32 result = le32_to_cpup(&cqe->result);
- u16 status = le16_to_cpup(&cqe->status) >> 1;
+ u16 status = le16_to_cpu(cqe->status) >> 1;
+ u32 result = le32_to_cpu(cqe->result);
if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ)
- ++nvmeq->dev->event_limit;
+ ++dev->ctrl.event_limit;
if (status != NVME_SC_SUCCESS)
return;
switch (result & 0xff07) {
case NVME_AER_NOTICE_NS_CHANGED:
- dev_info(nvmeq->q_dmadev, "rescanning\n");
- schedule_work(&nvmeq->dev->scan_work);
+ dev_info(dev->dev, "rescanning\n");
+ queue_work(nvme_workq, &dev->scan_work);
default:
- dev_warn(nvmeq->q_dmadev, "async event result %08x\n", result);
+ dev_warn(dev->dev, "async event result %08x\n", result);
}
}
-static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
- struct nvme_completion *cqe)
-{
- struct request *req = ctx;
-
- u16 status = le16_to_cpup(&cqe->status) >> 1;
- u32 result = le32_to_cpup(&cqe->result);
-
- blk_mq_free_request(req);
-
- dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result);
- ++nvmeq->dev->abort_limit;
-}
-
-static void async_completion(struct nvme_queue *nvmeq, void *ctx,
- struct nvme_completion *cqe)
-{
- struct async_cmd_info *cmdinfo = ctx;
- cmdinfo->result = le32_to_cpup(&cqe->result);
- cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
- queue_kthread_work(cmdinfo->worker, &cmdinfo->work);
- blk_mq_free_request(cmdinfo->req);
-}
-
-static inline struct nvme_cmd_info *get_cmd_from_tag(struct nvme_queue *nvmeq,
- unsigned int tag)
-{
- struct request *req = blk_mq_tag_to_rq(*nvmeq->tags, tag);
-
- return blk_mq_rq_to_pdu(req);
-}
-
-/*
- * Called with local interrupts disabled and the q_lock held. May not sleep.
- */
-static void *nvme_finish_cmd(struct nvme_queue *nvmeq, int tag,
- nvme_completion_fn *fn)
-{
- struct nvme_cmd_info *cmd = get_cmd_from_tag(nvmeq, tag);
- void *ctx;
- if (tag >= nvmeq->q_depth) {
- *fn = special_completion;
- return CMD_CTX_INVALID;
- }
- if (fn)
- *fn = cmd->fn;
- ctx = cmd->ctx;
- cmd->fn = special_completion;
- cmd->ctx = CMD_CTX_COMPLETED;
- return ctx;
-}
-
/**
- * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
+ * __nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
* @nvmeq: The queue to use
* @cmd: The command to send
*
@@ -405,69 +329,44 @@ static void __nvme_submit_cmd(struct nvme_queue *nvmeq,
nvmeq->sq_tail = tail;
}
-static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
-{
- unsigned long flags;
- spin_lock_irqsave(&nvmeq->q_lock, flags);
- __nvme_submit_cmd(nvmeq, cmd);
- spin_unlock_irqrestore(&nvmeq->q_lock, flags);
-}
-
-static __le64 **iod_list(struct nvme_iod *iod)
+static __le64 **iod_list(struct request *req)
{
- return ((void *)iod) + iod->offset;
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ return (__le64 **)(iod->sg + req->nr_phys_segments);
}
-static inline void iod_init(struct nvme_iod *iod, unsigned nbytes,
- unsigned nseg, unsigned long private)
+static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
{
- iod->private = private;
- iod->offset = offsetof(struct nvme_iod, sg[nseg]);
- iod->npages = -1;
- iod->length = nbytes;
- iod->nents = 0;
-}
-
-static struct nvme_iod *
-__nvme_alloc_iod(unsigned nseg, unsigned bytes, struct nvme_dev *dev,
- unsigned long priv, gfp_t gfp)
-{
- struct nvme_iod *iod = kmalloc(sizeof(struct nvme_iod) +
- sizeof(__le64 *) * nvme_npages(bytes, dev) +
- sizeof(struct scatterlist) * nseg, gfp);
-
- if (iod)
- iod_init(iod, bytes, nseg, priv);
-
- return iod;
-}
-
-static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev,
- gfp_t gfp)
-{
- unsigned size = !(rq->cmd_flags & REQ_DISCARD) ? blk_rq_bytes(rq) :
- sizeof(struct nvme_dsm_range);
- struct nvme_iod *iod;
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
+ int nseg = rq->nr_phys_segments;
+ unsigned size;
- if (rq->nr_phys_segments <= NVME_INT_PAGES &&
- size <= NVME_INT_BYTES(dev)) {
- struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(rq);
+ if (rq->cmd_flags & REQ_DISCARD)
+ size = sizeof(struct nvme_dsm_range);
+ else
+ size = blk_rq_bytes(rq);
- iod = cmd->iod;
- iod_init(iod, size, rq->nr_phys_segments,
- (unsigned long) rq | NVME_INT_MASK);
- return iod;
+ if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
+ iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
+ if (!iod->sg)
+ return BLK_MQ_RQ_QUEUE_BUSY;
+ } else {
+ iod->sg = iod->inline_sg;
}
- return __nvme_alloc_iod(rq->nr_phys_segments, size, dev,
- (unsigned long) rq, gfp);
+ iod->aborted = 0;
+ iod->npages = -1;
+ iod->nents = 0;
+ iod->length = size;
+ return 0;
}
-static void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
+static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
{
- const int last_prp = dev->page_size / 8 - 1;
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ const int last_prp = dev->ctrl.page_size / 8 - 1;
int i;
- __le64 **list = iod_list(iod);
+ __le64 **list = iod_list(req);
dma_addr_t prp_dma = iod->first_dma;
if (iod->npages == 0)
@@ -479,20 +378,8 @@ static void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
prp_dma = next_prp_dma;
}
- if (iod_should_kfree(iod))
- kfree(iod);
-}
-
-static int nvme_error_status(u16 status)
-{
- switch (status & 0x7ff) {
- case NVME_SC_SUCCESS:
- return 0;
- case NVME_SC_CAP_EXCEEDED:
- return -ENOSPC;
- default:
- return -EIO;
- }
+ if (iod->sg != iod->inline_sg)
+ kfree(iod->sg);
}
#ifdef CONFIG_BLK_DEV_INTEGRITY
@@ -549,27 +436,6 @@ static void nvme_dif_remap(struct request *req,
}
kunmap_atomic(pmap);
}
-
-static void nvme_init_integrity(struct nvme_ns *ns)
-{
- struct blk_integrity integrity;
-
- switch (ns->pi_type) {
- case NVME_NS_DPS_PI_TYPE3:
- integrity.profile = &t10_pi_type3_crc;
- break;
- case NVME_NS_DPS_PI_TYPE1:
- case NVME_NS_DPS_PI_TYPE2:
- integrity.profile = &t10_pi_type1_crc;
- break;
- default:
- integrity.profile = NULL;
- break;
- }
- integrity.tuple_size = ns->ms;
- blk_integrity_register(ns->disk, &integrity);
- blk_queue_max_integrity_segments(ns->queue, 1);
-}
#else /* CONFIG_BLK_DEV_INTEGRITY */
static void nvme_dif_remap(struct request *req,
void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi))
@@ -581,91 +447,27 @@ static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi)
static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
{
}
-static void nvme_init_integrity(struct nvme_ns *ns)
-{
-}
#endif
-static void req_completion(struct nvme_queue *nvmeq, void *ctx,
- struct nvme_completion *cqe)
-{
- struct nvme_iod *iod = ctx;
- struct request *req = iod_get_private(iod);
- struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);
- u16 status = le16_to_cpup(&cqe->status) >> 1;
- bool requeue = false;
- int error = 0;
-
- if (unlikely(status)) {
- if (!(status & NVME_SC_DNR || blk_noretry_request(req))
- && (jiffies - req->start_time) < req->timeout) {
- unsigned long flags;
-
- requeue = true;
- blk_mq_requeue_request(req);
- spin_lock_irqsave(req->q->queue_lock, flags);
- if (!blk_queue_stopped(req->q))
- blk_mq_kick_requeue_list(req->q);
- spin_unlock_irqrestore(req->q->queue_lock, flags);
- goto release_iod;
- }
-
- if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
- if (cmd_rq->ctx == CMD_CTX_CANCELLED)
- error = -EINTR;
- else
- error = status;
- } else {
- error = nvme_error_status(status);
- }
- }
-
- if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
- u32 result = le32_to_cpup(&cqe->result);
- req->special = (void *)(uintptr_t)result;
- }
-
- if (cmd_rq->aborted)
- dev_warn(nvmeq->dev->dev,
- "completing aborted command with status:%04x\n",
- error);
-
-release_iod:
- if (iod->nents) {
- dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents,
- rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
- if (blk_integrity_rq(req)) {
- if (!rq_data_dir(req))
- nvme_dif_remap(req, nvme_dif_complete);
- dma_unmap_sg(nvmeq->dev->dev, iod->meta_sg, 1,
- rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
- }
- }
- nvme_free_iod(nvmeq->dev, iod);
-
- if (likely(!requeue))
- blk_mq_complete_request(req, error);
-}
-
-/* length is in bytes. gfp flags indicates whether we may sleep. */
-static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
- int total_len, gfp_t gfp)
+static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req,
+ int total_len)
{
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct dma_pool *pool;
int length = total_len;
struct scatterlist *sg = iod->sg;
int dma_len = sg_dma_len(sg);
u64 dma_addr = sg_dma_address(sg);
- u32 page_size = dev->page_size;
+ u32 page_size = dev->ctrl.page_size;
int offset = dma_addr & (page_size - 1);
__le64 *prp_list;
- __le64 **list = iod_list(iod);
+ __le64 **list = iod_list(req);
dma_addr_t prp_dma;
int nprps, i;
length -= (page_size - offset);
if (length <= 0)
- return total_len;
+ return true;
dma_len -= (page_size - offset);
if (dma_len) {
@@ -678,7 +480,7 @@ static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
if (length <= page_size) {
iod->first_dma = dma_addr;
- return total_len;
+ return true;
}
nprps = DIV_ROUND_UP(length, page_size);
@@ -690,11 +492,11 @@ static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
iod->npages = 1;
}
- prp_list = dma_pool_alloc(pool, gfp, &prp_dma);
+ prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
if (!prp_list) {
iod->first_dma = dma_addr;
iod->npages = -1;
- return (total_len - length) + page_size;
+ return false;
}
list[0] = prp_list;
iod->first_dma = prp_dma;
@@ -702,9 +504,9 @@ static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
for (;;) {
if (i == page_size >> 3) {
__le64 *old_prp_list = prp_list;
- prp_list = dma_pool_alloc(pool, gfp, &prp_dma);
+ prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
if (!prp_list)
- return total_len - length;
+ return false;
list[iod->npages++] = prp_list;
prp_list[0] = old_prp_list[i - 1];
old_prp_list[i - 1] = cpu_to_le64(prp_dma);
@@ -724,115 +526,105 @@ static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
dma_len = sg_dma_len(sg);
}
- return total_len;
+ return true;
}
-static void nvme_submit_priv(struct nvme_queue *nvmeq, struct request *req,
- struct nvme_iod *iod)
+static int nvme_map_data(struct nvme_dev *dev, struct request *req,
+ struct nvme_command *cmnd)
{
- struct nvme_command cmnd;
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ struct request_queue *q = req->q;
+ enum dma_data_direction dma_dir = rq_data_dir(req) ?
+ DMA_TO_DEVICE : DMA_FROM_DEVICE;
+ int ret = BLK_MQ_RQ_QUEUE_ERROR;
- memcpy(&cmnd, req->cmd, sizeof(cmnd));
- cmnd.rw.command_id = req->tag;
- if (req->nr_phys_segments) {
- cmnd.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
- cmnd.rw.prp2 = cpu_to_le64(iod->first_dma);
- }
+ sg_init_table(iod->sg, req->nr_phys_segments);
+ iod->nents = blk_rq_map_sg(q, req, iod->sg);
+ if (!iod->nents)
+ goto out;
- __nvme_submit_cmd(nvmeq, &cmnd);
-}
+ ret = BLK_MQ_RQ_QUEUE_BUSY;
+ if (!dma_map_sg(dev->dev, iod->sg, iod->nents, dma_dir))
+ goto out;
-/*
- * We reuse the small pool to allocate the 16-byte range here as it is not
- * worth having a special pool for these or additional cases to handle freeing
- * the iod.
- */
-static void nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns,
- struct request *req, struct nvme_iod *iod)
-{
- struct nvme_dsm_range *range =
- (struct nvme_dsm_range *)iod_list(iod)[0];
- struct nvme_command cmnd;
+ if (!nvme_setup_prps(dev, req, blk_rq_bytes(req)))
+ goto out_unmap;
- range->cattr = cpu_to_le32(0);
- range->nlb = cpu_to_le32(blk_rq_bytes(req) >> ns->lba_shift);
- range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
+ ret = BLK_MQ_RQ_QUEUE_ERROR;
+ if (blk_integrity_rq(req)) {
+ if (blk_rq_count_integrity_sg(q, req->bio) != 1)
+ goto out_unmap;
- memset(&cmnd, 0, sizeof(cmnd));
- cmnd.dsm.opcode = nvme_cmd_dsm;
- cmnd.dsm.command_id = req->tag;
- cmnd.dsm.nsid = cpu_to_le32(ns->ns_id);
- cmnd.dsm.prp1 = cpu_to_le64(iod->first_dma);
- cmnd.dsm.nr = 0;
- cmnd.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
+ sg_init_table(&iod->meta_sg, 1);
+ if (blk_rq_map_integrity_sg(q, req->bio, &iod->meta_sg) != 1)
+ goto out_unmap;
- __nvme_submit_cmd(nvmeq, &cmnd);
-}
+ if (rq_data_dir(req))
+ nvme_dif_remap(req, nvme_dif_prep);
-static void nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns,
- int cmdid)
-{
- struct nvme_command cmnd;
+ if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir))
+ goto out_unmap;
+ }
- memset(&cmnd, 0, sizeof(cmnd));
- cmnd.common.opcode = nvme_cmd_flush;
- cmnd.common.command_id = cmdid;
- cmnd.common.nsid = cpu_to_le32(ns->ns_id);
+ cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
+ cmnd->rw.prp2 = cpu_to_le64(iod->first_dma);
+ if (blk_integrity_rq(req))
+ cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg));
+ return BLK_MQ_RQ_QUEUE_OK;
- __nvme_submit_cmd(nvmeq, &cmnd);
+out_unmap:
+ dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
+out:
+ return ret;
}
-static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
- struct nvme_ns *ns)
+static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
{
- struct request *req = iod_get_private(iod);
- struct nvme_command cmnd;
- u16 control = 0;
- u32 dsmgmt = 0;
-
- if (req->cmd_flags & REQ_FUA)
- control |= NVME_RW_FUA;
- if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
- control |= NVME_RW_LR;
-
- if (req->cmd_flags & REQ_RAHEAD)
- dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
-
- memset(&cmnd, 0, sizeof(cmnd));
- cmnd.rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
- cmnd.rw.command_id = req->tag;
- cmnd.rw.nsid = cpu_to_le32(ns->ns_id);
- cmnd.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
- cmnd.rw.prp2 = cpu_to_le64(iod->first_dma);
- cmnd.rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
- cmnd.rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
-
- if (ns->ms) {
- switch (ns->pi_type) {
- case NVME_NS_DPS_PI_TYPE3:
- control |= NVME_RW_PRINFO_PRCHK_GUARD;
- break;
- case NVME_NS_DPS_PI_TYPE1:
- case NVME_NS_DPS_PI_TYPE2:
- control |= NVME_RW_PRINFO_PRCHK_GUARD |
- NVME_RW_PRINFO_PRCHK_REF;
- cmnd.rw.reftag = cpu_to_le32(
- nvme_block_nr(ns, blk_rq_pos(req)));
- break;
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ enum dma_data_direction dma_dir = rq_data_dir(req) ?
+ DMA_TO_DEVICE : DMA_FROM_DEVICE;
+
+ if (iod->nents) {
+ dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
+ if (blk_integrity_rq(req)) {
+ if (!rq_data_dir(req))
+ nvme_dif_remap(req, nvme_dif_complete);
+ dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir);
}
- if (blk_integrity_rq(req))
- cmnd.rw.metadata =
- cpu_to_le64(sg_dma_address(iod->meta_sg));
- else
- control |= NVME_RW_PRINFO_PRACT;
}
- cmnd.rw.control = cpu_to_le16(control);
- cmnd.rw.dsmgmt = cpu_to_le32(dsmgmt);
+ nvme_free_iod(dev, req);
+}
- __nvme_submit_cmd(nvmeq, &cmnd);
+/*
+ * We reuse the small pool to allocate the 16-byte range here as it is not
+ * worth having a special pool for these or additional cases to handle freeing
+ * the iod.
+ */
+static int nvme_setup_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns,
+ struct request *req, struct nvme_command *cmnd)
+{
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ struct nvme_dsm_range *range;
- return 0;
+ range = dma_pool_alloc(nvmeq->dev->prp_small_pool, GFP_ATOMIC,
+ &iod->first_dma);
+ if (!range)
+ return BLK_MQ_RQ_QUEUE_BUSY;
+ iod_list(req)[0] = (__le64 *)range;
+ iod->npages = 0;
+
+ range->cattr = cpu_to_le32(0);
+ range->nlb = cpu_to_le32(blk_rq_bytes(req) >> ns->lba_shift);
+ range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
+
+ memset(cmnd, 0, sizeof(*cmnd));
+ cmnd->dsm.opcode = nvme_cmd_dsm;
+ cmnd->dsm.nsid = cpu_to_le32(ns->ns_id);
+ cmnd->dsm.prp1 = cpu_to_le64(iod->first_dma);
+ cmnd->dsm.nr = 0;
+ cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
+ return BLK_MQ_RQ_QUEUE_OK;
}
/*
@@ -845,9 +637,8 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_queue *nvmeq = hctx->driver_data;
struct nvme_dev *dev = nvmeq->dev;
struct request *req = bd->rq;
- struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
- struct nvme_iod *iod;
- enum dma_data_direction dma_dir;
+ struct nvme_command cmnd;
+ int ret = BLK_MQ_RQ_QUEUE_OK;
/*
* If formated with metadata, require the block layer provide a buffer
@@ -857,91 +648,72 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
if (ns && ns->ms && !blk_integrity_rq(req)) {
if (!(ns->pi_type && ns->ms == 8) &&
req->cmd_type != REQ_TYPE_DRV_PRIV) {
- blk_mq_complete_request(req, -EFAULT);
+ blk_mq_end_request(req, -EFAULT);
return BLK_MQ_RQ_QUEUE_OK;
}
}
- iod = nvme_alloc_iod(req, dev, GFP_ATOMIC);
- if (!iod)
- return BLK_MQ_RQ_QUEUE_BUSY;
+ ret = nvme_init_iod(req, dev);
+ if (ret)
+ return ret;
if (req->cmd_flags & REQ_DISCARD) {
- void *range;
- /*
- * We reuse the small pool to allocate the 16-byte range here
- * as it is not worth having a special pool for these or
- * additional cases to handle freeing the iod.
- */
- range = dma_pool_alloc(dev->prp_small_pool, GFP_ATOMIC,
- &iod->first_dma);
- if (!range)
- goto retry_cmd;
- iod_list(iod)[0] = (__le64 *)range;
- iod->npages = 0;
- } else if (req->nr_phys_segments) {
- dma_dir = rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+ ret = nvme_setup_discard(nvmeq, ns, req, &cmnd);
+ } else {
+ if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+ memcpy(&cmnd, req->cmd, sizeof(cmnd));
+ else if (req->cmd_flags & REQ_FLUSH)
+ nvme_setup_flush(ns, &cmnd);
+ else
+ nvme_setup_rw(ns, req, &cmnd);
- sg_init_table(iod->sg, req->nr_phys_segments);
- iod->nents = blk_rq_map_sg(req->q, req, iod->sg);
- if (!iod->nents)
- goto error_cmd;
+ if (req->nr_phys_segments)
+ ret = nvme_map_data(dev, req, &cmnd);
+ }
- if (!dma_map_sg(nvmeq->q_dmadev, iod->sg, iod->nents, dma_dir))
- goto retry_cmd;
+ if (ret)
+ goto out;
- if (blk_rq_bytes(req) !=
- nvme_setup_prps(dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) {
- dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
- goto retry_cmd;
- }
- if (blk_integrity_rq(req)) {
- if (blk_rq_count_integrity_sg(req->q, req->bio) != 1) {
- dma_unmap_sg(dev->dev, iod->sg, iod->nents,
- dma_dir);
- goto error_cmd;
- }
+ cmnd.common.command_id = req->tag;
+ blk_mq_start_request(req);
- sg_init_table(iod->meta_sg, 1);
- if (blk_rq_map_integrity_sg(
- req->q, req->bio, iod->meta_sg) != 1) {
- dma_unmap_sg(dev->dev, iod->sg, iod->nents,
- dma_dir);
- goto error_cmd;
- }
+ spin_lock_irq(&nvmeq->q_lock);
+ __nvme_submit_cmd(nvmeq, &cmnd);
+ nvme_process_cq(nvmeq);
+ spin_unlock_irq(&nvmeq->q_lock);
+ return BLK_MQ_RQ_QUEUE_OK;
+out:
+ nvme_free_iod(dev, req);
+ return ret;
+}
- if (rq_data_dir(req))
- nvme_dif_remap(req, nvme_dif_prep);
+static void nvme_complete_rq(struct request *req)
+{
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ struct nvme_dev *dev = iod->nvmeq->dev;
+ int error = 0;
- if (!dma_map_sg(nvmeq->q_dmadev, iod->meta_sg, 1, dma_dir)) {
- dma_unmap_sg(dev->dev, iod->sg, iod->nents,
- dma_dir);
- goto error_cmd;
- }
+ nvme_unmap_data(dev, req);
+
+ if (unlikely(req->errors)) {
+ if (nvme_req_needs_retry(req, req->errors)) {
+ nvme_requeue_req(req);
+ return;
}
- }
- nvme_set_info(cmd, iod, req_completion);
- spin_lock_irq(&nvmeq->q_lock);
- if (req->cmd_type == REQ_TYPE_DRV_PRIV)
- nvme_submit_priv(nvmeq, req, iod);
- else if (req->cmd_flags & REQ_DISCARD)
- nvme_submit_discard(nvmeq, ns, req, iod);
- else if (req->cmd_flags & REQ_FLUSH)
- nvme_submit_flush(nvmeq, ns, req->tag);
- else
- nvme_submit_iod(nvmeq, iod, ns);
+ if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+ error = req->errors;
+ else
+ error = nvme_error_status(req->errors);
+ }
- nvme_process_cq(nvmeq);
- spin_unlock_irq(&nvmeq->q_lock);
- return BLK_MQ_RQ_QUEUE_OK;
+ if (unlikely(iod->aborted)) {
+ dev_warn(dev->dev,
+ "completing aborted command with status: %04x\n",
+ req->errors);
+ }
- error_cmd:
- nvme_free_iod(dev, iod);
- return BLK_MQ_RQ_QUEUE_ERROR;
- retry_cmd:
- nvme_free_iod(dev, iod);
- return BLK_MQ_RQ_QUEUE_BUSY;
+ blk_mq_end_request(req, error);
}
static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
@@ -952,20 +724,47 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
phase = nvmeq->cq_phase;
for (;;) {
- void *ctx;
- nvme_completion_fn fn;
struct nvme_completion cqe = nvmeq->cqes[head];
- if ((le16_to_cpu(cqe.status) & 1) != phase)
+ u16 status = le16_to_cpu(cqe.status);
+ struct request *req;
+
+ if ((status & 1) != phase)
break;
nvmeq->sq_head = le16_to_cpu(cqe.sq_head);
if (++head == nvmeq->q_depth) {
head = 0;
phase = !phase;
}
+
if (tag && *tag == cqe.command_id)
*tag = -1;
- ctx = nvme_finish_cmd(nvmeq, cqe.command_id, &fn);
- fn(nvmeq, ctx, &cqe);
+
+ if (unlikely(cqe.command_id >= nvmeq->q_depth)) {
+ dev_warn(nvmeq->q_dmadev,
+ "invalid id %d completed on queue %d\n",
+ cqe.command_id, le16_to_cpu(cqe.sq_id));
+ continue;
+ }
+
+ /*
+ * AEN requests are special as they don't time out and can
+ * survive any kind of queue freeze and often don't respond to
+ * aborts. We don't even bother to allocate a struct request
+ * for them but rather special case them here.
+ */
+ if (unlikely(nvmeq->qid == 0 &&
+ cqe.command_id >= NVME_AQ_BLKMQ_DEPTH)) {
+ nvme_complete_async_event(nvmeq->dev, &cqe);
+ continue;
+ }
+
+ req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
+ if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
+ u32 result = le32_to_cpu(cqe.result);
+ req->special = (void *)(uintptr_t)result;
+ }
+ blk_mq_complete_request(req, status >> 1);
+
}
/* If the controller ignores the cq head doorbell and continuously
@@ -1028,112 +827,15 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
return 0;
}
-/*
- * Returns 0 on success. If the result is negative, it's a Linux error code;
- * if the result is positive, it's an NVM Express status code
- */
-int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
- void *buffer, void __user *ubuffer, unsigned bufflen,
- u32 *result, unsigned timeout)
-{
- bool write = cmd->common.opcode & 1;
- struct bio *bio = NULL;
- struct request *req;
- int ret;
-
- req = blk_mq_alloc_request(q, write, 0);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- req->cmd_type = REQ_TYPE_DRV_PRIV;
- req->cmd_flags |= REQ_FAILFAST_DRIVER;
- req->__data_len = 0;
- req->__sector = (sector_t) -1;
- req->bio = req->biotail = NULL;
-
- req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
-
- req->cmd = (unsigned char *)cmd;
- req->cmd_len = sizeof(struct nvme_command);
- req->special = (void *)0;
-
- if (buffer && bufflen) {
- ret = blk_rq_map_kern(q, req, buffer, bufflen,
- __GFP_DIRECT_RECLAIM);
- if (ret)
- goto out;
- } else if (ubuffer && bufflen) {
- ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
- __GFP_DIRECT_RECLAIM);
- if (ret)
- goto out;
- bio = req->bio;
- }
-
- blk_execute_rq(req->q, NULL, req, 0);
- if (bio)
- blk_rq_unmap_user(bio);
- if (result)
- *result = (u32)(uintptr_t)req->special;
- ret = req->errors;
- out:
- blk_mq_free_request(req);
- return ret;
-}
-
-int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
- void *buffer, unsigned bufflen)
-{
- return __nvme_submit_sync_cmd(q, cmd, buffer, NULL, bufflen, NULL, 0);
-}
-
-static int nvme_submit_async_admin_req(struct nvme_dev *dev)
+static void nvme_submit_async_event(struct nvme_dev *dev)
{
- struct nvme_queue *nvmeq = dev->queues[0];
struct nvme_command c;
- struct nvme_cmd_info *cmd_info;
- struct request *req;
-
- req = blk_mq_alloc_request(dev->admin_q, WRITE,
- BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- req->cmd_flags |= REQ_NO_TIMEOUT;
- cmd_info = blk_mq_rq_to_pdu(req);
- nvme_set_info(cmd_info, NULL, async_req_completion);
memset(&c, 0, sizeof(c));
c.common.opcode = nvme_admin_async_event;
- c.common.command_id = req->tag;
-
- blk_mq_free_request(req);
- __nvme_submit_cmd(nvmeq, &c);
- return 0;
-}
-
-static int nvme_submit_admin_async_cmd(struct nvme_dev *dev,
- struct nvme_command *cmd,
- struct async_cmd_info *cmdinfo, unsigned timeout)
-{
- struct nvme_queue *nvmeq = dev->queues[0];
- struct request *req;
- struct nvme_cmd_info *cmd_rq;
-
- req = blk_mq_alloc_request(dev->admin_q, WRITE, 0);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- req->timeout = timeout;
- cmd_rq = blk_mq_rq_to_pdu(req);
- cmdinfo->req = req;
- nvme_set_info(cmd_rq, cmdinfo, async_completion);
- cmdinfo->status = -EINTR;
+ c.common.command_id = NVME_AQ_BLKMQ_DEPTH + --dev->ctrl.event_limit;
- cmd->common.command_id = req->tag;
-
- nvme_submit_cmd(nvmeq, cmd);
- return 0;
+ __nvme_submit_cmd(dev->queues[0], &c);
}
static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
@@ -1144,7 +846,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
c.delete_queue.opcode = opcode;
c.delete_queue.qid = cpu_to_le16(id);
- return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
+ return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
}
static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
@@ -1165,7 +867,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
c.create_cq.cq_flags = cpu_to_le16(flags);
c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
- return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
+ return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
}
static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
@@ -1186,7 +888,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
c.create_sq.sq_flags = cpu_to_le16(flags);
c.create_sq.cqid = cpu_to_le16(qid);
- return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
+ return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
}
static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid)
@@ -1199,195 +901,111 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
}
-int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id)
-{
- struct nvme_command c = { };
- int error;
-
- /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
- c.identify.opcode = nvme_admin_identify;
- c.identify.cns = cpu_to_le32(1);
-
- *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
- if (!*id)
- return -ENOMEM;
-
- error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
- sizeof(struct nvme_id_ctrl));
- if (error)
- kfree(*id);
- return error;
-}
-
-int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
- struct nvme_id_ns **id)
-{
- struct nvme_command c = { };
- int error;
-
- /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
- c.identify.opcode = nvme_admin_identify,
- c.identify.nsid = cpu_to_le32(nsid),
-
- *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
- if (!*id)
- return -ENOMEM;
-
- error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
- sizeof(struct nvme_id_ns));
- if (error)
- kfree(*id);
- return error;
-}
-
-int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
- dma_addr_t dma_addr, u32 *result)
-{
- struct nvme_command c;
-
- memset(&c, 0, sizeof(c));
- c.features.opcode = nvme_admin_get_features;
- c.features.nsid = cpu_to_le32(nsid);
- c.features.prp1 = cpu_to_le64(dma_addr);
- c.features.fid = cpu_to_le32(fid);
-
- return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0,
- result, 0);
-}
-
-int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
- dma_addr_t dma_addr, u32 *result)
-{
- struct nvme_command c;
-
- memset(&c, 0, sizeof(c));
- c.features.opcode = nvme_admin_set_features;
- c.features.prp1 = cpu_to_le64(dma_addr);
- c.features.fid = cpu_to_le32(fid);
- c.features.dword11 = cpu_to_le32(dword11);
-
- return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0,
- result, 0);
-}
-
-int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log)
+static void abort_endio(struct request *req, int error)
{
- struct nvme_command c = { };
- int error;
-
- c.common.opcode = nvme_admin_get_log_page,
- c.common.nsid = cpu_to_le32(0xFFFFFFFF),
- c.common.cdw10[0] = cpu_to_le32(
- (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
- NVME_LOG_SMART),
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ struct nvme_queue *nvmeq = iod->nvmeq;
+ u32 result = (u32)(uintptr_t)req->special;
+ u16 status = req->errors;
- *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
- if (!*log)
- return -ENOMEM;
+ dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result);
+ atomic_inc(&nvmeq->dev->ctrl.abort_limit);
- error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
- sizeof(struct nvme_smart_log));
- if (error)
- kfree(*log);
- return error;
+ blk_mq_free_request(req);
}
-/**
- * nvme_abort_req - Attempt aborting a request
- *
- * Schedule controller reset if the command was already aborted once before and
- * still hasn't been returned to the driver, or if this is the admin queue.
- */
-static void nvme_abort_req(struct request *req)
+static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
{
- struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);
- struct nvme_queue *nvmeq = cmd_rq->nvmeq;
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ struct nvme_queue *nvmeq = iod->nvmeq;
struct nvme_dev *dev = nvmeq->dev;
struct request *abort_req;
- struct nvme_cmd_info *abort_cmd;
struct nvme_command cmd;
- if (!nvmeq->qid || cmd_rq->aborted) {
- spin_lock(&dev_list_lock);
- if (!__nvme_reset(dev)) {
- dev_warn(dev->dev,
- "I/O %d QID %d timeout, reset controller\n",
- req->tag, nvmeq->qid);
- }
- spin_unlock(&dev_list_lock);
- return;
+ /*
+ * Shutdown immediately if controller times out while starting. The
+ * reset work will see the pci device disabled when it gets the forced
+ * cancellation error. All outstanding requests are completed on
+ * shutdown, so we return BLK_EH_HANDLED.
+ */
+ if (test_bit(NVME_CTRL_RESETTING, &dev->flags)) {
+ dev_warn(dev->dev,
+ "I/O %d QID %d timeout, disable controller\n",
+ req->tag, nvmeq->qid);
+ nvme_dev_disable(dev, false);
+ req->errors = NVME_SC_CANCELLED;
+ return BLK_EH_HANDLED;
}
- if (!dev->abort_limit)
- return;
+ /*
+ * Shutdown the controller immediately and schedule a reset if the
+ * command was already aborted once before and still hasn't been
+ * returned to the driver, or if this is the admin queue.
+ */
+ if (!nvmeq->qid || iod->aborted) {
+ dev_warn(dev->dev,
+ "I/O %d QID %d timeout, reset controller\n",
+ req->tag, nvmeq->qid);
+ nvme_dev_disable(dev, false);
+ queue_work(nvme_workq, &dev->reset_work);
- abort_req = blk_mq_alloc_request(dev->admin_q, WRITE,
- BLK_MQ_REQ_NOWAIT);
- if (IS_ERR(abort_req))
- return;
+ /*
+ * Mark the request as handled, since the inline shutdown
+ * forces all outstanding requests to complete.
+ */
+ req->errors = NVME_SC_CANCELLED;
+ return BLK_EH_HANDLED;
+ }
- abort_cmd = blk_mq_rq_to_pdu(abort_req);
- nvme_set_info(abort_cmd, abort_req, abort_completion);
+ iod->aborted = 1;
+
+ if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) {
+ atomic_inc(&dev->ctrl.abort_limit);
+ return BLK_EH_RESET_TIMER;
+ }
memset(&cmd, 0, sizeof(cmd));
cmd.abort.opcode = nvme_admin_abort_cmd;
cmd.abort.cid = req->tag;
cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
- cmd.abort.command_id = abort_req->tag;
- --dev->abort_limit;
- cmd_rq->aborted = 1;
+ dev_warn(nvmeq->q_dmadev, "I/O %d QID %d timeout, aborting\n",
+ req->tag, nvmeq->qid);
- dev_warn(nvmeq->q_dmadev, "Aborting I/O %d QID %d\n", req->tag,
- nvmeq->qid);
- nvme_submit_cmd(dev->queues[0], &cmd);
+ abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd,
+ BLK_MQ_REQ_NOWAIT);
+ if (IS_ERR(abort_req)) {
+ atomic_inc(&dev->ctrl.abort_limit);
+ return BLK_EH_RESET_TIMER;
+ }
+
+ abort_req->timeout = ADMIN_TIMEOUT;
+ abort_req->end_io_data = NULL;
+ blk_execute_rq_nowait(abort_req->q, NULL, abort_req, 0, abort_endio);
+
+ /*
+ * The aborted req will be completed on receiving the abort req.
+ * We enable the timer again. If hit twice, it'll cause a device reset,
+ * as the device then is in a faulty state.
+ */
+ return BLK_EH_RESET_TIMER;
}
static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved)
{
struct nvme_queue *nvmeq = data;
- void *ctx;
- nvme_completion_fn fn;
- struct nvme_cmd_info *cmd;
- struct nvme_completion cqe;
+ int status;
if (!blk_mq_request_started(req))
return;
- cmd = blk_mq_rq_to_pdu(req);
-
- if (cmd->ctx == CMD_CTX_CANCELLED)
- return;
+ dev_warn(nvmeq->q_dmadev,
+ "Cancelling I/O %d QID %d\n", req->tag, nvmeq->qid);
+ status = NVME_SC_ABORT_REQ;
if (blk_queue_dying(req->q))
- cqe.status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1);
- else
- cqe.status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
-
-
- dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d QID %d\n",
- req->tag, nvmeq->qid);
- ctx = cancel_cmd_info(cmd, &fn);
- fn(nvmeq, ctx, &cqe);
-}
-
-static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
-{
- struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
- struct nvme_queue *nvmeq = cmd->nvmeq;
-
- dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag,
- nvmeq->qid);
- spin_lock_irq(&nvmeq->q_lock);
- nvme_abort_req(req);
- spin_unlock_irq(&nvmeq->q_lock);
-
- /*
- * The aborted req will be completed on receiving the abort req.
- * We enable the timer again. If hit twice, it'll cause a device reset,
- * as the device then is in a faulty state.
- */
- return BLK_EH_RESET_TIMER;
+ status |= NVME_SC_DNR;
+ blk_mq_complete_request(req, status);
}
static void nvme_free_queue(struct nvme_queue *nvmeq)
@@ -1430,8 +1048,8 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
nvmeq->cq_vector = -1;
spin_unlock_irq(&nvmeq->q_lock);
- if (!nvmeq->qid && nvmeq->dev->admin_q)
- blk_mq_freeze_queue_start(nvmeq->dev->admin_q);
+ if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
+ blk_mq_stop_hw_queues(nvmeq->dev->ctrl.admin_q);
irq_set_affinity_hint(vector, NULL);
free_irq(vector, nvmeq);
@@ -1447,21 +1065,20 @@ static void nvme_clear_queue(struct nvme_queue *nvmeq)
spin_unlock_irq(&nvmeq->q_lock);
}
-static void nvme_disable_queue(struct nvme_dev *dev, int qid)
+static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
{
- struct nvme_queue *nvmeq = dev->queues[qid];
+ struct nvme_queue *nvmeq = dev->queues[0];
if (!nvmeq)
return;
if (nvme_suspend_queue(nvmeq))
return;
- /* Don't tell the adapter to delete the admin queue.
- * Don't tell a removed adapter to delete IO queues. */
- if (qid && readl(&dev->bar->csts) != -1) {
- adapter_delete_sq(dev, qid);
- adapter_delete_cq(dev, qid);
- }
+ if (shutdown)
+ nvme_shutdown_ctrl(&dev->ctrl);
+ else
+ nvme_disable_ctrl(&dev->ctrl, lo_hi_readq(
+ dev->bar + NVME_REG_CAP));
spin_lock_irq(&nvmeq->q_lock);
nvme_process_cq(nvmeq);
@@ -1472,11 +1089,12 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
int entry_size)
{
int q_depth = dev->q_depth;
- unsigned q_size_aligned = roundup(q_depth * entry_size, dev->page_size);
+ unsigned q_size_aligned = roundup(q_depth * entry_size,
+ dev->ctrl.page_size);
if (q_size_aligned * nr_io_queues > dev->cmb_size) {
u64 mem_per_q = div_u64(dev->cmb_size, nr_io_queues);
- mem_per_q = round_down(mem_per_q, dev->page_size);
+ mem_per_q = round_down(mem_per_q, dev->ctrl.page_size);
q_depth = div_u64(mem_per_q, entry_size);
/*
@@ -1495,8 +1113,8 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
int qid, int depth)
{
if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) {
- unsigned offset = (qid - 1) *
- roundup(SQ_SIZE(depth), dev->page_size);
+ unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth),
+ dev->ctrl.page_size);
nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset;
nvmeq->sq_cmds_io = dev->cmb + offset;
} else {
@@ -1527,7 +1145,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
nvmeq->q_dmadev = dev->dev;
nvmeq->dev = dev;
snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d",
- dev->instance, qid);
+ dev->ctrl.instance, qid);
spin_lock_init(&nvmeq->q_lock);
nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
@@ -1604,79 +1222,9 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
return result;
}
-static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled)
-{
- unsigned long timeout;
- u32 bit = enabled ? NVME_CSTS_RDY : 0;
-
- timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
-
- while ((readl(&dev->bar->csts) & NVME_CSTS_RDY) != bit) {
- msleep(100);
- if (fatal_signal_pending(current))
- return -EINTR;
- if (time_after(jiffies, timeout)) {
- dev_err(dev->dev,
- "Device not ready; aborting %s\n", enabled ?
- "initialisation" : "reset");
- return -ENODEV;
- }
- }
-
- return 0;
-}
-
-/*
- * If the device has been passed off to us in an enabled state, just clear
- * the enabled bit. The spec says we should set the 'shutdown notification
- * bits', but doing so may cause the device to complete commands to the
- * admin queue ... and we don't know what memory that might be pointing at!
- */
-static int nvme_disable_ctrl(struct nvme_dev *dev, u64 cap)
-{
- dev->ctrl_config &= ~NVME_CC_SHN_MASK;
- dev->ctrl_config &= ~NVME_CC_ENABLE;
- writel(dev->ctrl_config, &dev->bar->cc);
-
- return nvme_wait_ready(dev, cap, false);
-}
-
-static int nvme_enable_ctrl(struct nvme_dev *dev, u64 cap)
-{
- dev->ctrl_config &= ~NVME_CC_SHN_MASK;
- dev->ctrl_config |= NVME_CC_ENABLE;
- writel(dev->ctrl_config, &dev->bar->cc);
-
- return nvme_wait_ready(dev, cap, true);
-}
-
-static int nvme_shutdown_ctrl(struct nvme_dev *dev)
-{
- unsigned long timeout;
-
- dev->ctrl_config &= ~NVME_CC_SHN_MASK;
- dev->ctrl_config |= NVME_CC_SHN_NORMAL;
-
- writel(dev->ctrl_config, &dev->bar->cc);
-
- timeout = SHUTDOWN_TIMEOUT + jiffies;
- while ((readl(&dev->bar->csts) & NVME_CSTS_SHST_MASK) !=
- NVME_CSTS_SHST_CMPLT) {
- msleep(100);
- if (fatal_signal_pending(current))
- return -EINTR;
- if (time_after(jiffies, timeout)) {
- dev_err(dev->dev,
- "Device shutdown incomplete; abort shutdown\n");
- return -ENODEV;
- }
- }
-
- return 0;
-}
-
static struct blk_mq_ops nvme_mq_admin_ops = {
.queue_rq = nvme_queue_rq,
+ .complete = nvme_complete_rq,
.map_queue = blk_mq_map_queue,
.init_hctx = nvme_admin_init_hctx,
.exit_hctx = nvme_admin_exit_hctx,
@@ -1686,6 +1234,7 @@ static struct blk_mq_ops nvme_mq_admin_ops = {
static struct blk_mq_ops nvme_mq_ops = {
.queue_rq = nvme_queue_rq,
+ .complete = nvme_complete_rq,
.map_queue = blk_mq_map_queue,
.init_hctx = nvme_init_hctx,
.init_request = nvme_init_request,
@@ -1695,19 +1244,23 @@ static struct blk_mq_ops nvme_mq_ops = {
static void nvme_dev_remove_admin(struct nvme_dev *dev)
{
- if (dev->admin_q && !blk_queue_dying(dev->admin_q)) {
- blk_cleanup_queue(dev->admin_q);
+ if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) {
+ blk_cleanup_queue(dev->ctrl.admin_q);
blk_mq_free_tag_set(&dev->admin_tagset);
}
}
static int nvme_alloc_admin_tags(struct nvme_dev *dev)
{
- if (!dev->admin_q) {
+ if (!dev->ctrl.admin_q) {
dev->admin_tagset.ops = &nvme_mq_admin_ops;
dev->admin_tagset.nr_hw_queues = 1;
- dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1;
- dev->admin_tagset.reserved_tags = 1;
+
+ /*
+ * Subtract one to leave an empty queue entry for 'Full Queue'
+ * condition. See NVM-Express 1.2 specification, section 4.1.2.
+ */
+ dev->admin_tagset.queue_depth = NVME_AQ_BLKMQ_DEPTH - 1;
dev->admin_tagset.timeout = ADMIN_TIMEOUT;
dev->admin_tagset.numa_node = dev_to_node(dev->dev);
dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
@@ -1716,18 +1269,18 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
if (blk_mq_alloc_tag_set(&dev->admin_tagset))
return -ENOMEM;
- dev->admin_q = blk_mq_init_queue(&dev->admin_tagset);
- if (IS_ERR(dev->admin_q)) {
+ dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset);
+ if (IS_ERR(dev->ctrl.admin_q)) {
blk_mq_free_tag_set(&dev->admin_tagset);
return -ENOMEM;
}
- if (!blk_get_queue(dev->admin_q)) {
+ if (!blk_get_queue(dev->ctrl.admin_q)) {
nvme_dev_remove_admin(dev);
- dev->admin_q = NULL;
+ dev->ctrl.admin_q = NULL;
return -ENODEV;
}
} else
- blk_mq_unfreeze_queue(dev->admin_q);
+ blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
return 0;
}
@@ -1736,31 +1289,17 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
{
int result;
u32 aqa;
- u64 cap = lo_hi_readq(&dev->bar->cap);
+ u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
struct nvme_queue *nvmeq;
- /*
- * default to a 4K page size, with the intention to update this
- * path in the future to accomodate architectures with differing
- * kernel and IO page sizes.
- */
- unsigned page_shift = 12;
- unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12;
-
- if (page_shift < dev_page_min) {
- dev_err(dev->dev,
- "Minimum device page size (%u) too large for "
- "host (%u)\n", 1 << dev_page_min,
- 1 << page_shift);
- return -ENODEV;
- }
- dev->subsystem = readl(&dev->bar->vs) >= NVME_VS(1, 1) ?
+ dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1) ?
NVME_CAP_NSSRC(cap) : 0;
- if (dev->subsystem && (readl(&dev->bar->csts) & NVME_CSTS_NSSRO))
- writel(NVME_CSTS_NSSRO, &dev->bar->csts);
+ if (dev->subsystem &&
+ (readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))
+ writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);
- result = nvme_disable_ctrl(dev, cap);
+ result = nvme_disable_ctrl(&dev->ctrl, cap);
if (result < 0)
return result;
@@ -1774,18 +1313,11 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
aqa = nvmeq->q_depth - 1;
aqa |= aqa << 16;
- dev->page_size = 1 << page_shift;
-
- dev->ctrl_config = NVME_CC_CSS_NVM;
- dev->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
- dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
- dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
-
- writel(aqa, &dev->bar->aqa);
- lo_hi_writeq(nvmeq->sq_dma_addr, &dev->bar->asq);
- lo_hi_writeq(nvmeq->cq_dma_addr, &dev->bar->acq);
+ writel(aqa, dev->bar + NVME_REG_AQA);
+ lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);
+ lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);
- result = nvme_enable_ctrl(dev, cap);
+ result = nvme_enable_ctrl(&dev->ctrl, cap);
if (result)
goto free_nvmeq;
@@ -1803,406 +1335,6 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
return result;
}
-static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
-{
- struct nvme_dev *dev = ns->dev;
- struct nvme_user_io io;
- struct nvme_command c;
- unsigned length, meta_len;
- int status, write;
- dma_addr_t meta_dma = 0;
- void *meta = NULL;
- void __user *metadata;
-
- if (copy_from_user(&io, uio, sizeof(io)))
- return -EFAULT;
-
- switch (io.opcode) {
- case nvme_cmd_write:
- case nvme_cmd_read:
- case nvme_cmd_compare:
- break;
- default:
- return -EINVAL;
- }
-
- length = (io.nblocks + 1) << ns->lba_shift;
- meta_len = (io.nblocks + 1) * ns->ms;
- metadata = (void __user *)(uintptr_t)io.metadata;
- write = io.opcode & 1;
-
- if (ns->ext) {
- length += meta_len;
- meta_len = 0;
- }
- if (meta_len) {
- if (((io.metadata & 3) || !io.metadata) && !ns->ext)
- return -EINVAL;
-
- meta = dma_alloc_coherent(dev->dev, meta_len,
- &meta_dma, GFP_KERNEL);
-
- if (!meta) {
- status = -ENOMEM;
- goto unmap;
- }
- if (write) {
- if (copy_from_user(meta, metadata, meta_len)) {
- status = -EFAULT;
- goto unmap;
- }
- }
- }
-
- memset(&c, 0, sizeof(c));
- c.rw.opcode = io.opcode;
- c.rw.flags = io.flags;
- c.rw.nsid = cpu_to_le32(ns->ns_id);
- c.rw.slba = cpu_to_le64(io.slba);
- c.rw.length = cpu_to_le16(io.nblocks);
- c.rw.control = cpu_to_le16(io.control);
- c.rw.dsmgmt = cpu_to_le32(io.dsmgmt);
- c.rw.reftag = cpu_to_le32(io.reftag);
- c.rw.apptag = cpu_to_le16(io.apptag);
- c.rw.appmask = cpu_to_le16(io.appmask);
- c.rw.metadata = cpu_to_le64(meta_dma);
-
- status = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
- (void __user *)(uintptr_t)io.addr, length, NULL, 0);
- unmap:
- if (meta) {
- if (status == NVME_SC_SUCCESS && !write) {
- if (copy_to_user(metadata, meta, meta_len))
- status = -EFAULT;
- }
- dma_free_coherent(dev->dev, meta_len, meta, meta_dma);
- }
- return status;
-}
-
-static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
- struct nvme_passthru_cmd __user *ucmd)
-{
- struct nvme_passthru_cmd cmd;
- struct nvme_command c;
- unsigned timeout = 0;
- int status;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
- if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
- return -EFAULT;
-
- memset(&c, 0, sizeof(c));
- c.common.opcode = cmd.opcode;
- c.common.flags = cmd.flags;
- c.common.nsid = cpu_to_le32(cmd.nsid);
- c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
- c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
- c.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
- c.common.cdw10[1] = cpu_to_le32(cmd.cdw11);
- c.common.cdw10[2] = cpu_to_le32(cmd.cdw12);
- c.common.cdw10[3] = cpu_to_le32(cmd.cdw13);
- c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
- c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
-
- if (cmd.timeout_ms)
- timeout = msecs_to_jiffies(cmd.timeout_ms);
-
- status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c,
- NULL, (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
- &cmd.result, timeout);
- if (status >= 0) {
- if (put_user(cmd.result, &ucmd->result))
- return -EFAULT;
- }
-
- return status;
-}
-
-static int nvme_subsys_reset(struct nvme_dev *dev)
-{
- if (!dev->subsystem)
- return -ENOTTY;
-
- writel(0x4E564D65, &dev->bar->nssr); /* "NVMe" */
- return 0;
-}
-
-static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
- unsigned long arg)
-{
- struct nvme_ns *ns = bdev->bd_disk->private_data;
-
- switch (cmd) {
- case NVME_IOCTL_ID:
- force_successful_syscall_return();
- return ns->ns_id;
- case NVME_IOCTL_ADMIN_CMD:
- return nvme_user_cmd(ns->dev, NULL, (void __user *)arg);
- case NVME_IOCTL_IO_CMD:
- return nvme_user_cmd(ns->dev, ns, (void __user *)arg);
- case NVME_IOCTL_SUBMIT_IO:
- return nvme_submit_io(ns, (void __user *)arg);
- case SG_GET_VERSION_NUM:
- return nvme_sg_get_version_num((void __user *)arg);
- case SG_IO:
- return nvme_sg_io(ns, (void __user *)arg);
- default:
- return -ENOTTY;
- }
-}
-
-#ifdef CONFIG_COMPAT
-static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long arg)
-{
- switch (cmd) {
- case SG_IO:
- return -ENOIOCTLCMD;
- }
- return nvme_ioctl(bdev, mode, cmd, arg);
-}
-#else
-#define nvme_compat_ioctl NULL
-#endif
-
-static void nvme_free_dev(struct kref *kref);
-static void nvme_free_ns(struct kref *kref)
-{
- struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
-
- if (ns->type == NVME_NS_LIGHTNVM)
- nvme_nvm_unregister(ns->queue, ns->disk->disk_name);
-
- spin_lock(&dev_list_lock);
- ns->disk->private_data = NULL;
- spin_unlock(&dev_list_lock);
-
- kref_put(&ns->dev->kref, nvme_free_dev);
- put_disk(ns->disk);
- kfree(ns);
-}
-
-static int nvme_open(struct block_device *bdev, fmode_t mode)
-{
- int ret = 0;
- struct nvme_ns *ns;
-
- spin_lock(&dev_list_lock);
- ns = bdev->bd_disk->private_data;
- if (!ns)
- ret = -ENXIO;
- else if (!kref_get_unless_zero(&ns->kref))
- ret = -ENXIO;
- spin_unlock(&dev_list_lock);
-
- return ret;
-}
-
-static void nvme_release(struct gendisk *disk, fmode_t mode)
-{
- struct nvme_ns *ns = disk->private_data;
- kref_put(&ns->kref, nvme_free_ns);
-}
-
-static int nvme_getgeo(struct block_device *bd, struct hd_geometry *geo)
-{
- /* some standard values */
- geo->heads = 1 << 6;
- geo->sectors = 1 << 5;
- geo->cylinders = get_capacity(bd->bd_disk) >> 11;
- return 0;
-}
-
-static void nvme_config_discard(struct nvme_ns *ns)
-{
- u32 logical_block_size = queue_logical_block_size(ns->queue);
- ns->queue->limits.discard_zeroes_data = 0;
- ns->queue->limits.discard_alignment = logical_block_size;
- ns->queue->limits.discard_granularity = logical_block_size;
- blk_queue_max_discard_sectors(ns->queue, 0xffffffff);
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
-}
-
-static int nvme_revalidate_disk(struct gendisk *disk)
-{
- struct nvme_ns *ns = disk->private_data;
- struct nvme_dev *dev = ns->dev;
- struct nvme_id_ns *id;
- u8 lbaf, pi_type;
- u16 old_ms;
- unsigned short bs;
-
- if (nvme_identify_ns(dev, ns->ns_id, &id)) {
- dev_warn(dev->dev, "%s: Identify failure nvme%dn%d\n", __func__,
- dev->instance, ns->ns_id);
- return -ENODEV;
- }
- if (id->ncap == 0) {
- kfree(id);
- return -ENODEV;
- }
-
- if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) {
- if (nvme_nvm_register(ns->queue, disk->disk_name)) {
- dev_warn(dev->dev,
- "%s: LightNVM init failure\n", __func__);
- kfree(id);
- return -ENODEV;
- }
- ns->type = NVME_NS_LIGHTNVM;
- }
-
- old_ms = ns->ms;
- lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
- ns->lba_shift = id->lbaf[lbaf].ds;
- ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
- ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
-
- /*
- * If identify namespace failed, use default 512 byte block size so
- * block layer can use before failing read/write for 0 capacity.
- */
- if (ns->lba_shift == 0)
- ns->lba_shift = 9;
- bs = 1 << ns->lba_shift;
-
- /* XXX: PI implementation requires metadata equal t10 pi tuple size */
- pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
- id->dps & NVME_NS_DPS_PI_MASK : 0;
-
- blk_mq_freeze_queue(disk->queue);
- if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
- ns->ms != old_ms ||
- bs != queue_logical_block_size(disk->queue) ||
- (ns->ms && ns->ext)))
- blk_integrity_unregister(disk);
-
- ns->pi_type = pi_type;
- blk_queue_logical_block_size(ns->queue, bs);
-
- if (ns->ms && !ns->ext)
- nvme_init_integrity(ns);
-
- if ((ns->ms && !(ns->ms == 8 && ns->pi_type) &&
- !blk_get_integrity(disk)) ||
- ns->type == NVME_NS_LIGHTNVM)
- set_capacity(disk, 0);
- else
- set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
-
- if (dev->oncs & NVME_CTRL_ONCS_DSM)
- nvme_config_discard(ns);
- blk_mq_unfreeze_queue(disk->queue);
-
- kfree(id);
- return 0;
-}
-
-static char nvme_pr_type(enum pr_type type)
-{
- switch (type) {
- case PR_WRITE_EXCLUSIVE:
- return 1;
- case PR_EXCLUSIVE_ACCESS:
- return 2;
- case PR_WRITE_EXCLUSIVE_REG_ONLY:
- return 3;
- case PR_EXCLUSIVE_ACCESS_REG_ONLY:
- return 4;
- case PR_WRITE_EXCLUSIVE_ALL_REGS:
- return 5;
- case PR_EXCLUSIVE_ACCESS_ALL_REGS:
- return 6;
- default:
- return 0;
- }
-};
-
-static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
- u64 key, u64 sa_key, u8 op)
-{
- struct nvme_ns *ns = bdev->bd_disk->private_data;
- struct nvme_command c;
- u8 data[16] = { 0, };
-
- put_unaligned_le64(key, &data[0]);
- put_unaligned_le64(sa_key, &data[8]);
-
- memset(&c, 0, sizeof(c));
- c.common.opcode = op;
- c.common.nsid = cpu_to_le32(ns->ns_id);
- c.common.cdw10[0] = cpu_to_le32(cdw10);
-
- return nvme_submit_sync_cmd(ns->queue, &c, data, 16);
-}
-
-static int nvme_pr_register(struct block_device *bdev, u64 old,
- u64 new, unsigned flags)
-{
- u32 cdw10;
-
- if (flags & ~PR_FL_IGNORE_KEY)
- return -EOPNOTSUPP;
-
- cdw10 = old ? 2 : 0;
- cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0;
- cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */
- return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register);
-}
-
-static int nvme_pr_reserve(struct block_device *bdev, u64 key,
- enum pr_type type, unsigned flags)
-{
- u32 cdw10;
-
- if (flags & ~PR_FL_IGNORE_KEY)
- return -EOPNOTSUPP;
-
- cdw10 = nvme_pr_type(type) << 8;
- cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0);
- return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire);
-}
-
-static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
- enum pr_type type, bool abort)
-{
- u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1;
- return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
-}
-
-static int nvme_pr_clear(struct block_device *bdev, u64 key)
-{
- u32 cdw10 = 1 | (key ? 1 << 3 : 0);
- return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register);
-}
-
-static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
-{
- u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0;
- return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
-}
-
-static const struct pr_ops nvme_pr_ops = {
- .pr_register = nvme_pr_register,
- .pr_reserve = nvme_pr_reserve,
- .pr_release = nvme_pr_release,
- .pr_preempt = nvme_pr_preempt,
- .pr_clear = nvme_pr_clear,
-};
-
-static const struct block_device_operations nvme_fops = {
- .owner = THIS_MODULE,
- .ioctl = nvme_ioctl,
- .compat_ioctl = nvme_compat_ioctl,
- .open = nvme_open,
- .release = nvme_release,
- .getgeo = nvme_getgeo,
- .revalidate_disk= nvme_revalidate_disk,
- .pr_ops = &nvme_pr_ops,
-};
-
static int nvme_kthread(void *data)
{
struct nvme_dev *dev, *next;
@@ -2212,14 +1344,20 @@ static int nvme_kthread(void *data)
spin_lock(&dev_list_lock);
list_for_each_entry_safe(dev, next, &dev_list, node) {
int i;
- u32 csts = readl(&dev->bar->csts);
+ u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+ /*
+ * Skip controllers currently under reset.
+ */
+ if (work_pending(&dev->reset_work) || work_busy(&dev->reset_work))
+ continue;
if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) ||
csts & NVME_CSTS_CFS) {
- if (!__nvme_reset(dev)) {
+ if (queue_work(nvme_workq, &dev->reset_work)) {
dev_warn(dev->dev,
"Failed status: %x, reset controller\n",
- readl(&dev->bar->csts));
+ readl(dev->bar + NVME_REG_CSTS));
}
continue;
}
@@ -2230,11 +1368,8 @@ static int nvme_kthread(void *data)
spin_lock_irq(&nvmeq->q_lock);
nvme_process_cq(nvmeq);
- while ((i == 0) && (dev->event_limit > 0)) {
- if (nvme_submit_async_admin_req(dev))
- break;
- dev->event_limit--;
- }
+ while (i == 0 && dev->ctrl.event_limit > 0)
+ nvme_submit_async_event(dev);
spin_unlock_irq(&nvmeq->q_lock);
}
}
@@ -2244,127 +1379,33 @@ static int nvme_kthread(void *data)
return 0;
}
-static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
-{
- struct nvme_ns *ns;
- struct gendisk *disk;
- int node = dev_to_node(dev->dev);
-
- ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
- if (!ns)
- return;
-
- ns->queue = blk_mq_init_queue(&dev->tagset);
- if (IS_ERR(ns->queue))
- goto out_free_ns;
- queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue);
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
- ns->dev = dev;
- ns->queue->queuedata = ns;
-
- disk = alloc_disk_node(0, node);
- if (!disk)
- goto out_free_queue;
-
- kref_init(&ns->kref);
- ns->ns_id = nsid;
- ns->disk = disk;
- ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
- list_add_tail(&ns->list, &dev->namespaces);
-
- blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
- if (dev->max_hw_sectors) {
- blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
- blk_queue_max_segments(ns->queue,
- (dev->max_hw_sectors / (dev->page_size >> 9)) + 1);
- }
- if (dev->stripe_size)
- blk_queue_chunk_sectors(ns->queue, dev->stripe_size >> 9);
- if (dev->vwc & NVME_CTRL_VWC_PRESENT)
- blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
- blk_queue_virt_boundary(ns->queue, dev->page_size - 1);
-
- disk->major = nvme_major;
- disk->first_minor = 0;
- disk->fops = &nvme_fops;
- disk->private_data = ns;
- disk->queue = ns->queue;
- disk->driverfs_dev = dev->device;
- disk->flags = GENHD_FL_EXT_DEVT;
- sprintf(disk->disk_name, "nvme%dn%d", dev->instance, nsid);
-
- /*
- * Initialize capacity to 0 until we establish the namespace format and
- * setup integrity extentions if necessary. The revalidate_disk after
- * add_disk allows the driver to register with integrity if the format
- * requires it.
- */
- set_capacity(disk, 0);
- if (nvme_revalidate_disk(ns->disk))
- goto out_free_disk;
-
- kref_get(&dev->kref);
- if (ns->type != NVME_NS_LIGHTNVM) {
- add_disk(ns->disk);
- if (ns->ms) {
- struct block_device *bd = bdget_disk(ns->disk, 0);
- if (!bd)
- return;
- if (blkdev_get(bd, FMODE_READ, NULL)) {
- bdput(bd);
- return;
- }
- blkdev_reread_part(bd);
- blkdev_put(bd, FMODE_READ);
- }
- }
- return;
- out_free_disk:
- kfree(disk);
- list_del(&ns->list);
- out_free_queue:
- blk_cleanup_queue(ns->queue);
- out_free_ns:
- kfree(ns);
-}
-
-/*
- * Create I/O queues. Failing to create an I/O queue is not an issue,
- * we can continue with less than the desired amount of queues, and
- * even a controller without I/O queues an still be used to issue
- * admin commands. This might be useful to upgrade a buggy firmware
- * for example.
- */
-static void nvme_create_io_queues(struct nvme_dev *dev)
+static int nvme_create_io_queues(struct nvme_dev *dev)
{
unsigned i;
+ int ret = 0;
- for (i = dev->queue_count; i <= dev->max_qid; i++)
- if (!nvme_alloc_queue(dev, i, dev->q_depth))
+ for (i = dev->queue_count; i <= dev->max_qid; i++) {
+ if (!nvme_alloc_queue(dev, i, dev->q_depth)) {
+ ret = -ENOMEM;
break;
+ }
+ }
- for (i = dev->online_queues; i <= dev->queue_count - 1; i++)
- if (nvme_create_queue(dev->queues[i], i)) {
+ for (i = dev->online_queues; i <= dev->queue_count - 1; i++) {
+ ret = nvme_create_queue(dev->queues[i], i);
+ if (ret) {
nvme_free_queues(dev, i);
break;
}
-}
-
-static int set_queue_count(struct nvme_dev *dev, int count)
-{
- int status;
- u32 result;
- u32 q_count = (count - 1) | ((count - 1) << 16);
-
- status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0,
- &result);
- if (status < 0)
- return status;
- if (status > 0) {
- dev_err(dev->dev, "Could not set queue count (%d)\n", status);
- return 0;
}
- return min(result & 0xffff, result >> 16) + 1;
+
+ /*
+ * Ignore failing Create SQ/CQ commands, we can continue with less
+ * than the desired aount of queues, and even a controller without
+ * I/O queues an still be used to issue admin commands. This might
+ * be useful to upgrade a buggy firmware for example.
+ */
+ return ret >= 0 ? 0 : ret;
}
static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
@@ -2379,11 +1420,11 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
if (!use_cmb_sqes)
return NULL;
- dev->cmbsz = readl(&dev->bar->cmbsz);
+ dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
if (!(NVME_CMB_SZ(dev->cmbsz)))
return NULL;
- cmbloc = readl(&dev->bar->cmbloc);
+ cmbloc = readl(dev->bar + NVME_REG_CMBLOC);
szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz));
size = szu * NVME_CMB_SZ(dev->cmbsz);
@@ -2431,11 +1472,20 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
int result, i, vecs, nr_io_queues, size;
nr_io_queues = num_possible_cpus();
- result = set_queue_count(dev, nr_io_queues);
- if (result <= 0)
+ result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
+ if (result < 0)
return result;
- if (result < nr_io_queues)
- nr_io_queues = result;
+
+ /*
+ * Degraded controllers might return an error when setting the queue
+ * count. We still want to be able to bring them online and offer
+ * access to the admin queue, as that might be only way to fix them up.
+ */
+ if (result > 0) {
+ dev_err(dev->dev, "Could not set queue count (%d)\n", result);
+ nr_io_queues = 0;
+ result = 0;
+ }
if (dev->cmb && NVME_CMB_SQS(dev->cmbsz)) {
result = nvme_cmb_qdepth(dev, nr_io_queues,
@@ -2457,7 +1507,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
return -ENOMEM;
size = db_bar_size(dev, nr_io_queues);
} while (1);
- dev->dbs = ((void __iomem *)dev->bar) + 4096;
+ dev->dbs = dev->bar + 4096;
adminq->q_db = dev->dbs;
}
@@ -2501,115 +1551,115 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
/* Free previously allocated queues that are no longer usable */
nvme_free_queues(dev, nr_io_queues + 1);
- nvme_create_io_queues(dev);
-
- return 0;
+ return nvme_create_io_queues(dev);
free_queues:
nvme_free_queues(dev, 1);
return result;
}
-static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
+static void nvme_set_irq_hints(struct nvme_dev *dev)
{
- struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
- struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
+ struct nvme_queue *nvmeq;
+ int i;
- return nsa->ns_id - nsb->ns_id;
-}
+ for (i = 0; i < dev->online_queues; i++) {
+ nvmeq = dev->queues[i];
-static struct nvme_ns *nvme_find_ns(struct nvme_dev *dev, unsigned nsid)
-{
- struct nvme_ns *ns;
+ if (!nvmeq->tags || !(*nvmeq->tags))
+ continue;
- list_for_each_entry(ns, &dev->namespaces, list) {
- if (ns->ns_id == nsid)
- return ns;
- if (ns->ns_id > nsid)
- break;
+ irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector,
+ blk_mq_tags_cpumask(*nvmeq->tags));
}
- return NULL;
}
-static inline bool nvme_io_incapable(struct nvme_dev *dev)
+static void nvme_dev_scan(struct work_struct *work)
{
- return (!dev->bar || readl(&dev->bar->csts) & NVME_CSTS_CFS ||
- dev->online_queues < 2);
+ struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work);
+
+ if (!dev->tagset.tags)
+ return;
+ nvme_scan_namespaces(&dev->ctrl);
+ nvme_set_irq_hints(dev);
}
-static void nvme_ns_remove(struct nvme_ns *ns)
+static void nvme_del_queue_end(struct request *req, int error)
{
- bool kill = nvme_io_incapable(ns->dev) && !blk_queue_dying(ns->queue);
+ struct nvme_queue *nvmeq = req->end_io_data;
- if (kill) {
- blk_set_queue_dying(ns->queue);
-
- /*
- * The controller was shutdown first if we got here through
- * device removal. The shutdown may requeue outstanding
- * requests. These need to be aborted immediately so
- * del_gendisk doesn't block indefinitely for their completion.
- */
- blk_mq_abort_requeue_list(ns->queue);
- }
- if (ns->disk->flags & GENHD_FL_UP)
- del_gendisk(ns->disk);
- if (kill || !blk_queue_dying(ns->queue)) {
- blk_mq_abort_requeue_list(ns->queue);
- blk_cleanup_queue(ns->queue);
- }
- list_del_init(&ns->list);
- kref_put(&ns->kref, nvme_free_ns);
+ blk_mq_free_request(req);
+ complete(&nvmeq->dev->ioq_wait);
}
-static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn)
+static void nvme_del_cq_end(struct request *req, int error)
{
- struct nvme_ns *ns, *next;
- unsigned i;
+ struct nvme_queue *nvmeq = req->end_io_data;
- for (i = 1; i <= nn; i++) {
- ns = nvme_find_ns(dev, i);
- if (ns) {
- if (revalidate_disk(ns->disk))
- nvme_ns_remove(ns);
- } else
- nvme_alloc_ns(dev, i);
- }
- list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
- if (ns->ns_id > nn)
- nvme_ns_remove(ns);
+ if (!error) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&nvmeq->q_lock, flags);
+ nvme_process_cq(nvmeq);
+ spin_unlock_irqrestore(&nvmeq->q_lock, flags);
}
- list_sort(NULL, &dev->namespaces, ns_cmp);
+
+ nvme_del_queue_end(req, error);
}
-static void nvme_set_irq_hints(struct nvme_dev *dev)
+static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
{
- struct nvme_queue *nvmeq;
- int i;
+ struct request_queue *q = nvmeq->dev->ctrl.admin_q;
+ struct request *req;
+ struct nvme_command cmd;
- for (i = 0; i < dev->online_queues; i++) {
- nvmeq = dev->queues[i];
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.delete_queue.opcode = opcode;
+ cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid);
- if (!nvmeq->tags || !(*nvmeq->tags))
- continue;
+ req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
- irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector,
- blk_mq_tags_cpumask(*nvmeq->tags));
- }
+ req->timeout = ADMIN_TIMEOUT;
+ req->end_io_data = nvmeq;
+
+ blk_execute_rq_nowait(q, NULL, req, false,
+ opcode == nvme_admin_delete_cq ?
+ nvme_del_cq_end : nvme_del_queue_end);
+ return 0;
}
-static void nvme_dev_scan(struct work_struct *work)
+static void nvme_disable_io_queues(struct nvme_dev *dev)
{
- struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work);
- struct nvme_id_ctrl *ctrl;
+ int pass;
+ unsigned long timeout;
+ u8 opcode = nvme_admin_delete_sq;
- if (!dev->tagset.tags)
- return;
- if (nvme_identify_ctrl(dev, &ctrl))
- return;
- nvme_scan_namespaces(dev, le32_to_cpup(&ctrl->nn));
- kfree(ctrl);
- nvme_set_irq_hints(dev);
+ for (pass = 0; pass < 2; pass++) {
+ int sent = 0, i = dev->queue_count - 1;
+
+ reinit_completion(&dev->ioq_wait);
+ retry:
+ timeout = ADMIN_TIMEOUT;
+ for (; i > 0; i--) {
+ struct nvme_queue *nvmeq = dev->queues[i];
+
+ if (!pass)
+ nvme_suspend_queue(nvmeq);
+ if (nvme_delete_queue(nvmeq, opcode))
+ break;
+ ++sent;
+ }
+ while (sent--) {
+ timeout = wait_for_completion_io_timeout(&dev->ioq_wait, timeout);
+ if (timeout == 0)
+ return;
+ if (i)
+ goto retry;
+ }
+ opcode = nvme_admin_delete_cq;
+ }
}
/*
@@ -2620,42 +1670,7 @@ static void nvme_dev_scan(struct work_struct *work)
*/
static int nvme_dev_add(struct nvme_dev *dev)
{
- struct pci_dev *pdev = to_pci_dev(dev->dev);
- int res;
- struct nvme_id_ctrl *ctrl;
- int shift = NVME_CAP_MPSMIN(lo_hi_readq(&dev->bar->cap)) + 12;
-
- res = nvme_identify_ctrl(dev, &ctrl);
- if (res) {
- dev_err(dev->dev, "Identify Controller failed (%d)\n", res);
- return -EIO;
- }
-
- dev->oncs = le16_to_cpup(&ctrl->oncs);
- dev->abort_limit = ctrl->acl + 1;
- dev->vwc = ctrl->vwc;
- memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
- memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
- memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
- if (ctrl->mdts)
- dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9);
- else
- dev->max_hw_sectors = UINT_MAX;
- if ((pdev->vendor == PCI_VENDOR_ID_INTEL) &&
- (pdev->device == 0x0953) && ctrl->vs[3]) {
- unsigned int max_hw_sectors;
-
- dev->stripe_size = 1 << (ctrl->vs[3] + shift);
- max_hw_sectors = dev->stripe_size >> (shift - 9);
- if (dev->max_hw_sectors) {
- dev->max_hw_sectors = min(max_hw_sectors,
- dev->max_hw_sectors);
- } else
- dev->max_hw_sectors = max_hw_sectors;
- }
- kfree(ctrl);
-
- if (!dev->tagset.tags) {
+ if (!dev->ctrl.tagset) {
dev->tagset.ops = &nvme_mq_ops;
dev->tagset.nr_hw_queues = dev->online_queues - 1;
dev->tagset.timeout = NVME_IO_TIMEOUT;
@@ -2668,8 +1683,9 @@ static int nvme_dev_add(struct nvme_dev *dev)
if (blk_mq_alloc_tag_set(&dev->tagset))
return 0;
+ dev->ctrl.tagset = &dev->tagset;
}
- schedule_work(&dev->scan_work);
+ queue_work(nvme_workq, &dev->scan_work);
return 0;
}
@@ -2699,7 +1715,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
if (!dev->bar)
goto disable;
- if (readl(&dev->bar->csts) == -1) {
+ if (readl(dev->bar + NVME_REG_CSTS) == -1) {
result = -ENODEV;
goto unmap;
}
@@ -2714,10 +1730,11 @@ static int nvme_dev_map(struct nvme_dev *dev)
goto unmap;
}
- cap = lo_hi_readq(&dev->bar->cap);
+ cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
+
dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
- dev->dbs = ((void __iomem *)dev->bar) + 4096;
+ dev->dbs = dev->bar + 4096;
/*
* Temporary fix for the Apple controller found in the MacBook8,1 and
@@ -2730,9 +1747,11 @@ static int nvme_dev_map(struct nvme_dev *dev)
dev->q_depth);
}
- if (readl(&dev->bar->vs) >= NVME_VS(1, 2))
+ if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2))
dev->cmb = nvme_map_cmb(dev);
+ pci_enable_pcie_error_reporting(pdev);
+ pci_save_state(pdev);
return 0;
unmap:
@@ -2760,152 +1779,34 @@ static void nvme_dev_unmap(struct nvme_dev *dev)
pci_release_regions(pdev);
}
- if (pci_is_enabled(pdev))
+ if (pci_is_enabled(pdev)) {
+ pci_disable_pcie_error_reporting(pdev);
pci_disable_device(pdev);
-}
-
-struct nvme_delq_ctx {
- struct task_struct *waiter;
- struct kthread_worker *worker;
- atomic_t refcount;
-};
-
-static void nvme_wait_dq(struct nvme_delq_ctx *dq, struct nvme_dev *dev)
-{
- dq->waiter = current;
- mb();
-
- for (;;) {
- set_current_state(TASK_KILLABLE);
- if (!atomic_read(&dq->refcount))
- break;
- if (!schedule_timeout(ADMIN_TIMEOUT) ||
- fatal_signal_pending(current)) {
- /*
- * Disable the controller first since we can't trust it
- * at this point, but leave the admin queue enabled
- * until all queue deletion requests are flushed.
- * FIXME: This may take a while if there are more h/w
- * queues than admin tags.
- */
- set_current_state(TASK_RUNNING);
- nvme_disable_ctrl(dev, lo_hi_readq(&dev->bar->cap));
- nvme_clear_queue(dev->queues[0]);
- flush_kthread_worker(dq->worker);
- nvme_disable_queue(dev, 0);
- return;
- }
}
- set_current_state(TASK_RUNNING);
-}
-
-static void nvme_put_dq(struct nvme_delq_ctx *dq)
-{
- atomic_dec(&dq->refcount);
- if (dq->waiter)
- wake_up_process(dq->waiter);
-}
-
-static struct nvme_delq_ctx *nvme_get_dq(struct nvme_delq_ctx *dq)
-{
- atomic_inc(&dq->refcount);
- return dq;
-}
-
-static void nvme_del_queue_end(struct nvme_queue *nvmeq)
-{
- struct nvme_delq_ctx *dq = nvmeq->cmdinfo.ctx;
- nvme_put_dq(dq);
-
- spin_lock_irq(&nvmeq->q_lock);
- nvme_process_cq(nvmeq);
- spin_unlock_irq(&nvmeq->q_lock);
-}
-
-static int adapter_async_del_queue(struct nvme_queue *nvmeq, u8 opcode,
- kthread_work_func_t fn)
-{
- struct nvme_command c;
-
- memset(&c, 0, sizeof(c));
- c.delete_queue.opcode = opcode;
- c.delete_queue.qid = cpu_to_le16(nvmeq->qid);
-
- init_kthread_work(&nvmeq->cmdinfo.work, fn);
- return nvme_submit_admin_async_cmd(nvmeq->dev, &c, &nvmeq->cmdinfo,
- ADMIN_TIMEOUT);
-}
-
-static void nvme_del_cq_work_handler(struct kthread_work *work)
-{
- struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
- cmdinfo.work);
- nvme_del_queue_end(nvmeq);
-}
-
-static int nvme_delete_cq(struct nvme_queue *nvmeq)
-{
- return adapter_async_del_queue(nvmeq, nvme_admin_delete_cq,
- nvme_del_cq_work_handler);
-}
-
-static void nvme_del_sq_work_handler(struct kthread_work *work)
-{
- struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
- cmdinfo.work);
- int status = nvmeq->cmdinfo.status;
-
- if (!status)
- status = nvme_delete_cq(nvmeq);
- if (status)
- nvme_del_queue_end(nvmeq);
-}
-
-static int nvme_delete_sq(struct nvme_queue *nvmeq)
-{
- return adapter_async_del_queue(nvmeq, nvme_admin_delete_sq,
- nvme_del_sq_work_handler);
}
-static void nvme_del_queue_start(struct kthread_work *work)
+static int nvme_dev_list_add(struct nvme_dev *dev)
{
- struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
- cmdinfo.work);
- if (nvme_delete_sq(nvmeq))
- nvme_del_queue_end(nvmeq);
-}
+ bool start_thread = false;
-static void nvme_disable_io_queues(struct nvme_dev *dev)
-{
- int i;
- DEFINE_KTHREAD_WORKER_ONSTACK(worker);
- struct nvme_delq_ctx dq;
- struct task_struct *kworker_task = kthread_run(kthread_worker_fn,
- &worker, "nvme%d", dev->instance);
-
- if (IS_ERR(kworker_task)) {
- dev_err(dev->dev,
- "Failed to create queue del task\n");
- for (i = dev->queue_count - 1; i > 0; i--)
- nvme_disable_queue(dev, i);
- return;
+ spin_lock(&dev_list_lock);
+ if (list_empty(&dev_list) && IS_ERR_OR_NULL(nvme_thread)) {
+ start_thread = true;
+ nvme_thread = NULL;
}
+ list_add(&dev->node, &dev_list);
+ spin_unlock(&dev_list_lock);
- dq.waiter = NULL;
- atomic_set(&dq.refcount, 0);
- dq.worker = &worker;
- for (i = dev->queue_count - 1; i > 0; i--) {
- struct nvme_queue *nvmeq = dev->queues[i];
+ if (start_thread) {
+ nvme_thread = kthread_run(nvme_kthread, NULL, "nvme");
+ wake_up_all(&nvme_kthread_wait);
+ } else
+ wait_event_killable(nvme_kthread_wait, nvme_thread);
- if (nvme_suspend_queue(nvmeq))
- continue;
- nvmeq->cmdinfo.ctx = nvme_get_dq(&dq);
- nvmeq->cmdinfo.worker = dq.worker;
- init_kthread_work(&nvmeq->cmdinfo.work, nvme_del_queue_start);
- queue_kthread_work(dq.worker, &nvmeq->cmdinfo.work);
- }
- nvme_wait_dq(&dq, dev);
- kthread_stop(kworker_task);
+ if (IS_ERR_OR_NULL(nvme_thread))
+ return nvme_thread ? PTR_ERR(nvme_thread) : -EINTR;
+
+ return 0;
}
/*
@@ -2928,44 +1829,17 @@ static void nvme_dev_list_remove(struct nvme_dev *dev)
kthread_stop(tmp);
}
-static void nvme_freeze_queues(struct nvme_dev *dev)
-{
- struct nvme_ns *ns;
-
- list_for_each_entry(ns, &dev->namespaces, list) {
- blk_mq_freeze_queue_start(ns->queue);
-
- spin_lock_irq(ns->queue->queue_lock);
- queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
- spin_unlock_irq(ns->queue->queue_lock);
-
- blk_mq_cancel_requeue_work(ns->queue);
- blk_mq_stop_hw_queues(ns->queue);
- }
-}
-
-static void nvme_unfreeze_queues(struct nvme_dev *dev)
-{
- struct nvme_ns *ns;
-
- list_for_each_entry(ns, &dev->namespaces, list) {
- queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue);
- blk_mq_unfreeze_queue(ns->queue);
- blk_mq_start_stopped_hw_queues(ns->queue, true);
- blk_mq_kick_requeue_list(ns->queue);
- }
-}
-
-static void nvme_dev_shutdown(struct nvme_dev *dev)
+static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
{
int i;
u32 csts = -1;
nvme_dev_list_remove(dev);
+ mutex_lock(&dev->shutdown_lock);
if (dev->bar) {
- nvme_freeze_queues(dev);
- csts = readl(&dev->bar->csts);
+ nvme_stop_queues(&dev->ctrl);
+ csts = readl(dev->bar + NVME_REG_CSTS);
}
if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
for (i = dev->queue_count - 1; i >= 0; i--) {
@@ -2974,30 +1848,13 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
}
} else {
nvme_disable_io_queues(dev);
- nvme_shutdown_ctrl(dev);
- nvme_disable_queue(dev, 0);
+ nvme_disable_admin_queue(dev, shutdown);
}
nvme_dev_unmap(dev);
for (i = dev->queue_count - 1; i >= 0; i--)
nvme_clear_queue(dev->queues[i]);
-}
-
-static void nvme_dev_remove(struct nvme_dev *dev)
-{
- struct nvme_ns *ns, *next;
-
- if (nvme_io_incapable(dev)) {
- /*
- * If the device is not capable of IO (surprise hot-removal,
- * for example), we need to quiesce prior to deleting the
- * namespaces. This will end outstanding requests and prevent
- * attempts to sync dirty data.
- */
- nvme_dev_shutdown(dev);
- }
- list_for_each_entry_safe(ns, next, &dev->namespaces, list)
- nvme_ns_remove(ns);
+ mutex_unlock(&dev->shutdown_lock);
}
static int nvme_setup_prp_pools(struct nvme_dev *dev)
@@ -3023,119 +1880,36 @@ static void nvme_release_prp_pools(struct nvme_dev *dev)
dma_pool_destroy(dev->prp_small_pool);
}
-static DEFINE_IDA(nvme_instance_ida);
-
-static int nvme_set_instance(struct nvme_dev *dev)
-{
- int instance, error;
-
- do {
- if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL))
- return -ENODEV;
-
- spin_lock(&dev_list_lock);
- error = ida_get_new(&nvme_instance_ida, &instance);
- spin_unlock(&dev_list_lock);
- } while (error == -EAGAIN);
-
- if (error)
- return -ENODEV;
-
- dev->instance = instance;
- return 0;
-}
-
-static void nvme_release_instance(struct nvme_dev *dev)
+static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
{
- spin_lock(&dev_list_lock);
- ida_remove(&nvme_instance_ida, dev->instance);
- spin_unlock(&dev_list_lock);
-}
-
-static void nvme_free_dev(struct kref *kref)
-{
- struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
+ struct nvme_dev *dev = to_nvme_dev(ctrl);
put_device(dev->dev);
- put_device(dev->device);
- nvme_release_instance(dev);
if (dev->tagset.tags)
blk_mq_free_tag_set(&dev->tagset);
- if (dev->admin_q)
- blk_put_queue(dev->admin_q);
+ if (dev->ctrl.admin_q)
+ blk_put_queue(dev->ctrl.admin_q);
kfree(dev->queues);
kfree(dev->entry);
kfree(dev);
}
-static int nvme_dev_open(struct inode *inode, struct file *f)
+static void nvme_reset_work(struct work_struct *work)
{
- struct nvme_dev *dev;
- int instance = iminor(inode);
- int ret = -ENODEV;
-
- spin_lock(&dev_list_lock);
- list_for_each_entry(dev, &dev_list, node) {
- if (dev->instance == instance) {
- if (!dev->admin_q) {
- ret = -EWOULDBLOCK;
- break;
- }
- if (!kref_get_unless_zero(&dev->kref))
- break;
- f->private_data = dev;
- ret = 0;
- break;
- }
- }
- spin_unlock(&dev_list_lock);
-
- return ret;
-}
+ struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
+ int result;
-static int nvme_dev_release(struct inode *inode, struct file *f)
-{
- struct nvme_dev *dev = f->private_data;
- kref_put(&dev->kref, nvme_free_dev);
- return 0;
-}
+ if (WARN_ON(test_bit(NVME_CTRL_RESETTING, &dev->flags)))
+ goto out;
-static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
-{
- struct nvme_dev *dev = f->private_data;
- struct nvme_ns *ns;
-
- switch (cmd) {
- case NVME_IOCTL_ADMIN_CMD:
- return nvme_user_cmd(dev, NULL, (void __user *)arg);
- case NVME_IOCTL_IO_CMD:
- if (list_empty(&dev->namespaces))
- return -ENOTTY;
- ns = list_first_entry(&dev->namespaces, struct nvme_ns, list);
- return nvme_user_cmd(dev, ns, (void __user *)arg);
- case NVME_IOCTL_RESET:
- dev_warn(dev->dev, "resetting controller\n");
- return nvme_reset(dev);
- case NVME_IOCTL_SUBSYS_RESET:
- return nvme_subsys_reset(dev);
- default:
- return -ENOTTY;
- }
-}
+ /*
+ * If we're called to reset a live controller first shut it down before
+ * moving on.
+ */
+ if (dev->bar)
+ nvme_dev_disable(dev, false);
-static const struct file_operations nvme_dev_fops = {
- .owner = THIS_MODULE,
- .open = nvme_dev_open,
- .release = nvme_dev_release,
- .unlocked_ioctl = nvme_dev_ioctl,
- .compat_ioctl = nvme_dev_ioctl,
-};
-
-static void nvme_probe_work(struct work_struct *work)
-{
- struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work);
- bool start_thread = false;
- int result;
+ set_bit(NVME_CTRL_RESETTING, &dev->flags);
result = nvme_dev_map(dev);
if (result)
@@ -3145,35 +1919,24 @@ static void nvme_probe_work(struct work_struct *work)
if (result)
goto unmap;
- spin_lock(&dev_list_lock);
- if (list_empty(&dev_list) && IS_ERR_OR_NULL(nvme_thread)) {
- start_thread = true;
- nvme_thread = NULL;
- }
- list_add(&dev->node, &dev_list);
- spin_unlock(&dev_list_lock);
-
- if (start_thread) {
- nvme_thread = kthread_run(nvme_kthread, NULL, "nvme");
- wake_up_all(&nvme_kthread_wait);
- } else
- wait_event_killable(nvme_kthread_wait, nvme_thread);
-
- if (IS_ERR_OR_NULL(nvme_thread)) {
- result = nvme_thread ? PTR_ERR(nvme_thread) : -EINTR;
- goto disable;
- }
-
nvme_init_queue(dev->queues[0], 0);
result = nvme_alloc_admin_tags(dev);
if (result)
goto disable;
+ result = nvme_init_identify(&dev->ctrl);
+ if (result)
+ goto free_tags;
+
result = nvme_setup_io_queues(dev);
if (result)
goto free_tags;
- dev->event_limit = 1;
+ dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS;
+
+ result = nvme_dev_list_add(dev);
+ if (result)
+ goto remove;
/*
* Keep the controller around but remove all namespaces if we don't have
@@ -3181,117 +1944,98 @@ static void nvme_probe_work(struct work_struct *work)
*/
if (dev->online_queues < 2) {
dev_warn(dev->dev, "IO queues not created\n");
- nvme_dev_remove(dev);
+ nvme_remove_namespaces(&dev->ctrl);
} else {
- nvme_unfreeze_queues(dev);
+ nvme_start_queues(&dev->ctrl);
nvme_dev_add(dev);
}
+ clear_bit(NVME_CTRL_RESETTING, &dev->flags);
return;
+ remove:
+ nvme_dev_list_remove(dev);
free_tags:
nvme_dev_remove_admin(dev);
- blk_put_queue(dev->admin_q);
- dev->admin_q = NULL;
+ blk_put_queue(dev->ctrl.admin_q);
+ dev->ctrl.admin_q = NULL;
dev->queues[0]->tags = NULL;
disable:
- nvme_disable_queue(dev, 0);
- nvme_dev_list_remove(dev);
+ nvme_disable_admin_queue(dev, false);
unmap:
nvme_dev_unmap(dev);
out:
- if (!work_busy(&dev->reset_work))
- nvme_dead_ctrl(dev);
+ nvme_remove_dead_ctrl(dev);
}
-static int nvme_remove_dead_ctrl(void *arg)
+static void nvme_remove_dead_ctrl_work(struct work_struct *work)
{
- struct nvme_dev *dev = (struct nvme_dev *)arg;
+ struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work);
struct pci_dev *pdev = to_pci_dev(dev->dev);
if (pci_get_drvdata(pdev))
pci_stop_and_remove_bus_device_locked(pdev);
- kref_put(&dev->kref, nvme_free_dev);
- return 0;
+ nvme_put_ctrl(&dev->ctrl);
}
-static void nvme_dead_ctrl(struct nvme_dev *dev)
+static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
{
- dev_warn(dev->dev, "Device failed to resume\n");
- kref_get(&dev->kref);
- if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
- dev->instance))) {
- dev_err(dev->dev,
- "Failed to start controller remove task\n");
- kref_put(&dev->kref, nvme_free_dev);
- }
+ dev_warn(dev->dev, "Removing after probe failure\n");
+ kref_get(&dev->ctrl.kref);
+ if (!schedule_work(&dev->remove_work))
+ nvme_put_ctrl(&dev->ctrl);
}
-static void nvme_reset_work(struct work_struct *ws)
+static int nvme_reset(struct nvme_dev *dev)
{
- struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work);
- bool in_probe = work_busy(&dev->probe_work);
-
- nvme_dev_shutdown(dev);
+ if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
+ return -ENODEV;
- /* Synchronize with device probe so that work will see failure status
- * and exit gracefully without trying to schedule another reset */
- flush_work(&dev->probe_work);
+ if (!queue_work(nvme_workq, &dev->reset_work))
+ return -EBUSY;
- /* Fail this device if reset occured during probe to avoid
- * infinite initialization loops. */
- if (in_probe) {
- nvme_dead_ctrl(dev);
- return;
- }
- /* Schedule device resume asynchronously so the reset work is available
- * to cleanup errors that may occur during reinitialization */
- schedule_work(&dev->probe_work);
+ flush_work(&dev->reset_work);
+ return 0;
}
-static int __nvme_reset(struct nvme_dev *dev)
+static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
{
- if (work_pending(&dev->reset_work))
- return -EBUSY;
- list_del_init(&dev->node);
- queue_work(nvme_workq, &dev->reset_work);
+ *val = readl(to_nvme_dev(ctrl)->bar + off);
return 0;
}
-static int nvme_reset(struct nvme_dev *dev)
+static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
{
- int ret;
-
- if (!dev->admin_q || blk_queue_dying(dev->admin_q))
- return -ENODEV;
-
- spin_lock(&dev_list_lock);
- ret = __nvme_reset(dev);
- spin_unlock(&dev_list_lock);
-
- if (!ret) {
- flush_work(&dev->reset_work);
- flush_work(&dev->probe_work);
- return 0;
- }
+ writel(val, to_nvme_dev(ctrl)->bar + off);
+ return 0;
+}
- return ret;
+static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
+{
+ *val = readq(to_nvme_dev(ctrl)->bar + off);
+ return 0;
}
-static ssize_t nvme_sysfs_reset(struct device *dev,
- struct device_attribute *attr, const char *buf,
- size_t count)
+static bool nvme_pci_io_incapable(struct nvme_ctrl *ctrl)
{
- struct nvme_dev *ndev = dev_get_drvdata(dev);
- int ret;
+ struct nvme_dev *dev = to_nvme_dev(ctrl);
- ret = nvme_reset(ndev);
- if (ret < 0)
- return ret;
+ return !dev->bar || dev->online_queues < 2;
+}
- return count;
+static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
+{
+ return nvme_reset(to_nvme_dev(ctrl));
}
-static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
+
+static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
+ .reg_read32 = nvme_pci_reg_read32,
+ .reg_write32 = nvme_pci_reg_write32,
+ .reg_read64 = nvme_pci_reg_read64,
+ .io_incapable = nvme_pci_io_incapable,
+ .reset_ctrl = nvme_pci_reset_ctrl,
+ .free_ctrl = nvme_pci_free_ctrl,
+};
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
@@ -3314,46 +2058,30 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (!dev->queues)
goto free;
- INIT_LIST_HEAD(&dev->namespaces);
- INIT_WORK(&dev->reset_work, nvme_reset_work);
dev->dev = get_device(&pdev->dev);
pci_set_drvdata(pdev, dev);
- result = nvme_set_instance(dev);
- if (result)
- goto put_pci;
+
+ INIT_LIST_HEAD(&dev->node);
+ INIT_WORK(&dev->scan_work, nvme_dev_scan);
+ INIT_WORK(&dev->reset_work, nvme_reset_work);
+ INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
+ mutex_init(&dev->shutdown_lock);
+ init_completion(&dev->ioq_wait);
result = nvme_setup_prp_pools(dev);
if (result)
- goto release;
-
- kref_init(&dev->kref);
- dev->device = device_create(nvme_class, &pdev->dev,
- MKDEV(nvme_char_major, dev->instance),
- dev, "nvme%d", dev->instance);
- if (IS_ERR(dev->device)) {
- result = PTR_ERR(dev->device);
- goto release_pools;
- }
- get_device(dev->device);
- dev_set_drvdata(dev->device, dev);
+ goto put_pci;
- result = device_create_file(dev->device, &dev_attr_reset_controller);
+ result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,
+ id->driver_data);
if (result)
- goto put_dev;
+ goto release_pools;
- INIT_LIST_HEAD(&dev->node);
- INIT_WORK(&dev->scan_work, nvme_dev_scan);
- INIT_WORK(&dev->probe_work, nvme_probe_work);
- schedule_work(&dev->probe_work);
+ queue_work(nvme_workq, &dev->reset_work);
return 0;
- put_dev:
- device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
- put_device(dev->device);
release_pools:
nvme_release_prp_pools(dev);
- release:
- nvme_release_instance(dev);
put_pci:
put_device(dev->dev);
free:
@@ -3368,15 +2096,15 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
struct nvme_dev *dev = pci_get_drvdata(pdev);
if (prepare)
- nvme_dev_shutdown(dev);
+ nvme_dev_disable(dev, false);
else
- schedule_work(&dev->probe_work);
+ queue_work(nvme_workq, &dev->reset_work);
}
static void nvme_shutdown(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
- nvme_dev_shutdown(dev);
+ nvme_dev_disable(dev, true);
}
static void nvme_remove(struct pci_dev *pdev)
@@ -3388,34 +2116,25 @@ static void nvme_remove(struct pci_dev *pdev)
spin_unlock(&dev_list_lock);
pci_set_drvdata(pdev, NULL);
- flush_work(&dev->probe_work);
flush_work(&dev->reset_work);
flush_work(&dev->scan_work);
- device_remove_file(dev->device, &dev_attr_reset_controller);
- nvme_dev_remove(dev);
- nvme_dev_shutdown(dev);
+ nvme_remove_namespaces(&dev->ctrl);
+ nvme_uninit_ctrl(&dev->ctrl);
+ nvme_dev_disable(dev, true);
nvme_dev_remove_admin(dev);
- device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
nvme_free_queues(dev, 0);
nvme_release_cmb(dev);
nvme_release_prp_pools(dev);
- kref_put(&dev->kref, nvme_free_dev);
+ nvme_put_ctrl(&dev->ctrl);
}
-/* These functions are yet to be implemented */
-#define nvme_error_detected NULL
-#define nvme_dump_registers NULL
-#define nvme_link_reset NULL
-#define nvme_slot_reset NULL
-#define nvme_error_resume NULL
-
#ifdef CONFIG_PM_SLEEP
static int nvme_suspend(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);
- nvme_dev_shutdown(ndev);
+ nvme_dev_disable(ndev, true);
return 0;
}
@@ -3424,17 +2143,53 @@ static int nvme_resume(struct device *dev)
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);
- schedule_work(&ndev->probe_work);
+ queue_work(nvme_workq, &ndev->reset_work);
return 0;
}
#endif
static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume);
+static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ struct nvme_dev *dev = pci_get_drvdata(pdev);
+
+ /*
+ * A frozen channel requires a reset. When detected, this method will
+ * shutdown the controller to quiesce. The controller will be restarted
+ * after the slot reset through driver's slot_reset callback.
+ */
+ dev_warn(&pdev->dev, "error detected: state:%d\n", state);
+ switch (state) {
+ case pci_channel_io_normal:
+ return PCI_ERS_RESULT_CAN_RECOVER;
+ case pci_channel_io_frozen:
+ nvme_dev_disable(dev, false);
+ return PCI_ERS_RESULT_NEED_RESET;
+ case pci_channel_io_perm_failure:
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
+{
+ struct nvme_dev *dev = pci_get_drvdata(pdev);
+
+ dev_info(&pdev->dev, "restart after slot reset\n");
+ pci_restore_state(pdev);
+ queue_work(nvme_workq, &dev->reset_work);
+ return PCI_ERS_RESULT_RECOVERED;
+}
+
+static void nvme_error_resume(struct pci_dev *pdev)
+{
+ pci_cleanup_aer_uncorrect_error_status(pdev);
+}
+
static const struct pci_error_handlers nvme_err_handler = {
.error_detected = nvme_error_detected,
- .mmio_enabled = nvme_dump_registers,
- .link_reset = nvme_link_reset,
.slot_reset = nvme_slot_reset,
.resume = nvme_error_resume,
.reset_notify = nvme_reset_notify,
@@ -3444,6 +2199,10 @@ static const struct pci_error_handlers nvme_err_handler = {
#define PCI_CLASS_STORAGE_EXPRESS 0x010802
static const struct pci_device_id nvme_id_table[] = {
+ { PCI_VDEVICE(INTEL, 0x0953),
+ .driver_data = NVME_QUIRK_STRIPE_SIZE, },
+ { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */
+ .driver_data = NVME_QUIRK_IDENTIFY_CNS, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
{ 0, }
@@ -3468,40 +2227,21 @@ static int __init nvme_init(void)
init_waitqueue_head(&nvme_kthread_wait);
- nvme_workq = create_singlethread_workqueue("nvme");
+ nvme_workq = alloc_workqueue("nvme", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
if (!nvme_workq)
return -ENOMEM;
- result = register_blkdev(nvme_major, "nvme");
+ result = nvme_core_init();
if (result < 0)
goto kill_workq;
- else if (result > 0)
- nvme_major = result;
-
- result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
- &nvme_dev_fops);
- if (result < 0)
- goto unregister_blkdev;
- else if (result > 0)
- nvme_char_major = result;
-
- nvme_class = class_create(THIS_MODULE, "nvme");
- if (IS_ERR(nvme_class)) {
- result = PTR_ERR(nvme_class);
- goto unregister_chrdev;
- }
result = pci_register_driver(&nvme_driver);
if (result)
- goto destroy_class;
+ goto core_exit;
return 0;
- destroy_class:
- class_destroy(nvme_class);
- unregister_chrdev:
- __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
- unregister_blkdev:
- unregister_blkdev(nvme_major, "nvme");
+ core_exit:
+ nvme_core_exit();
kill_workq:
destroy_workqueue(nvme_workq);
return result;
@@ -3510,10 +2250,8 @@ static int __init nvme_init(void)
static void __exit nvme_exit(void)
{
pci_unregister_driver(&nvme_driver);
- unregister_blkdev(nvme_major, "nvme");
+ nvme_core_exit();
destroy_workqueue(nvme_workq);
- class_destroy(nvme_class);
- __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
BUG_ON(nvme_thread && !IS_ERR(nvme_thread));
_nvme_check_size();
}
diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c
index c3d8d38..e947e29 100644
--- a/drivers/nvme/host/scsi.c
+++ b/drivers/nvme/host/scsi.c
@@ -524,7 +524,7 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
struct sg_io_hdr *hdr, u8 *inq_response,
int alloc_len)
{
- struct nvme_dev *dev = ns->dev;
+ struct nvme_ctrl *ctrl = ns->ctrl;
struct nvme_id_ns *id_ns;
int res;
int nvme_sc;
@@ -532,10 +532,10 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
u8 resp_data_format = 0x02;
u8 protect;
u8 cmdque = 0x01 << 1;
- u8 fw_offset = sizeof(dev->firmware_rev);
+ u8 fw_offset = sizeof(ctrl->firmware_rev);
/* nvme ns identify - use DPS value for PROTECT field */
- nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+ nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
return res;
@@ -553,12 +553,12 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
inq_response[5] = protect; /* sccs=0 | acc=0 | tpgs=0 | pc3=0 */
inq_response[7] = cmdque; /* wbus16=0 | sync=0 | vs=0 */
strncpy(&inq_response[8], "NVMe ", 8);
- strncpy(&inq_response[16], dev->model, 16);
+ strncpy(&inq_response[16], ctrl->model, 16);
- while (dev->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4)
+ while (ctrl->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4)
fw_offset--;
fw_offset -= 4;
- strncpy(&inq_response[32], dev->firmware_rev + fw_offset, 4);
+ strncpy(&inq_response[32], ctrl->firmware_rev + fw_offset, 4);
xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
@@ -588,82 +588,113 @@ static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
struct sg_io_hdr *hdr, u8 *inq_response,
int alloc_len)
{
- struct nvme_dev *dev = ns->dev;
int xfer_len;
memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
inq_response[1] = INQ_UNIT_SERIAL_NUMBER_PAGE; /* Page Code */
inq_response[3] = INQ_SERIAL_NUMBER_LENGTH; /* Page Length */
- strncpy(&inq_response[4], dev->serial, INQ_SERIAL_NUMBER_LENGTH);
+ strncpy(&inq_response[4], ns->ctrl->serial, INQ_SERIAL_NUMBER_LENGTH);
xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
}
-static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *inq_response, int alloc_len)
+static int nvme_fill_device_id_eui64(struct nvme_ns *ns, struct sg_io_hdr *hdr,
+ u8 *inq_response, int alloc_len)
{
- struct nvme_dev *dev = ns->dev;
- int res;
- int nvme_sc;
- int xfer_len;
- __be32 tmp_id = cpu_to_be32(ns->ns_id);
+ struct nvme_id_ns *id_ns;
+ int nvme_sc, res;
+ size_t len;
+ void *eui;
- memset(inq_response, 0, alloc_len);
- inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE; /* Page Code */
- if (readl(&dev->bar->vs) >= NVME_VS(1, 1)) {
- struct nvme_id_ns *id_ns;
- void *eui;
- int len;
+ nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
+ res = nvme_trans_status_code(hdr, nvme_sc);
+ if (res)
+ return res;
- nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
+ eui = id_ns->eui64;
+ len = sizeof(id_ns->eui64);
- eui = id_ns->eui64;
- len = sizeof(id_ns->eui64);
- if (readl(&dev->bar->vs) >= NVME_VS(1, 2)) {
- if (bitmap_empty(eui, len * 8)) {
- eui = id_ns->nguid;
- len = sizeof(id_ns->nguid);
- }
- }
+ if (ns->ctrl->vs >= NVME_VS(1, 2)) {
if (bitmap_empty(eui, len * 8)) {
- kfree(id_ns);
- goto scsi_string;
+ eui = id_ns->nguid;
+ len = sizeof(id_ns->nguid);
}
+ }
- inq_response[3] = 4 + len; /* Page Length */
- /* Designation Descriptor start */
- inq_response[4] = 0x01; /* Proto ID=0h | Code set=1h */
- inq_response[5] = 0x02; /* PIV=0b | Asso=00b | Designator Type=2h */
- inq_response[6] = 0x00; /* Rsvd */
- inq_response[7] = len; /* Designator Length */
- memcpy(&inq_response[8], eui, len);
- kfree(id_ns);
- } else {
- scsi_string:
- if (alloc_len < 72) {
- return nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- }
- inq_response[3] = 0x48; /* Page Length */
- /* Designation Descriptor start */
- inq_response[4] = 0x03; /* Proto ID=0h | Code set=3h */
- inq_response[5] = 0x08; /* PIV=0b | Asso=00b | Designator Type=8h */
- inq_response[6] = 0x00; /* Rsvd */
- inq_response[7] = 0x44; /* Designator Length */
-
- sprintf(&inq_response[8], "%04x", to_pci_dev(dev->dev)->vendor);
- memcpy(&inq_response[12], dev->model, sizeof(dev->model));
- sprintf(&inq_response[52], "%04x", tmp_id);
- memcpy(&inq_response[56], dev->serial, sizeof(dev->serial));
+ if (bitmap_empty(eui, len * 8)) {
+ res = -EOPNOTSUPP;
+ goto out_free_id;
}
- xfer_len = alloc_len;
- return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
+
+ memset(inq_response, 0, alloc_len);
+ inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
+ inq_response[3] = 4 + len; /* Page Length */
+
+ /* Designation Descriptor start */
+ inq_response[4] = 0x01; /* Proto ID=0h | Code set=1h */
+ inq_response[5] = 0x02; /* PIV=0b | Asso=00b | Designator Type=2h */
+ inq_response[6] = 0x00; /* Rsvd */
+ inq_response[7] = len; /* Designator Length */
+ memcpy(&inq_response[8], eui, len);
+
+ res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
+out_free_id:
+ kfree(id_ns);
+ return res;
+}
+
+static int nvme_fill_device_id_scsi_string(struct nvme_ns *ns,
+ struct sg_io_hdr *hdr, u8 *inq_response, int alloc_len)
+{
+ struct nvme_ctrl *ctrl = ns->ctrl;
+ struct nvme_id_ctrl *id_ctrl;
+ int nvme_sc, res;
+
+ if (alloc_len < 72) {
+ return nvme_trans_completion(hdr,
+ SAM_STAT_CHECK_CONDITION,
+ ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
+ SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+ }
+
+ nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
+ res = nvme_trans_status_code(hdr, nvme_sc);
+ if (res)
+ return res;
+
+ memset(inq_response, 0, alloc_len);
+ inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
+ inq_response[3] = 0x48; /* Page Length */
+
+ /* Designation Descriptor start */
+ inq_response[4] = 0x03; /* Proto ID=0h | Code set=3h */
+ inq_response[5] = 0x08; /* PIV=0b | Asso=00b | Designator Type=8h */
+ inq_response[6] = 0x00; /* Rsvd */
+ inq_response[7] = 0x44; /* Designator Length */
+
+ sprintf(&inq_response[8], "%04x", le16_to_cpu(id_ctrl->vid));
+ memcpy(&inq_response[12], ctrl->model, sizeof(ctrl->model));
+ sprintf(&inq_response[52], "%04x", cpu_to_be32(ns->ns_id));
+ memcpy(&inq_response[56], ctrl->serial, sizeof(ctrl->serial));
+
+ res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
+ kfree(id_ctrl);
+ return res;
+}
+
+static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
+ u8 *resp, int alloc_len)
+{
+ int res;
+
+ if (ns->ctrl->vs >= NVME_VS(1, 1)) {
+ res = nvme_fill_device_id_eui64(ns, hdr, resp, alloc_len);
+ if (res != -EOPNOTSUPP)
+ return res;
+ }
+
+ return nvme_fill_device_id_scsi_string(ns, hdr, resp, alloc_len);
}
static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
@@ -672,7 +703,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *inq_response;
int res;
int nvme_sc;
- struct nvme_dev *dev = ns->dev;
+ struct nvme_ctrl *ctrl = ns->ctrl;
struct nvme_id_ctrl *id_ctrl;
struct nvme_id_ns *id_ns;
int xfer_len;
@@ -688,7 +719,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
if (inq_response == NULL)
return -ENOMEM;
- nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+ nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
goto out_free_inq;
@@ -704,7 +735,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
app_chk = protect << 1;
ref_chk = protect;
- nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
+ nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
goto out_free_inq;
@@ -815,7 +846,6 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
int res;
int xfer_len;
u8 *log_response;
- struct nvme_dev *dev = ns->dev;
struct nvme_smart_log *smart_log;
u8 temp_c;
u16 temp_k;
@@ -824,7 +854,7 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
if (log_response == NULL)
return -ENOMEM;
- res = nvme_get_log_page(dev, &smart_log);
+ res = nvme_get_log_page(ns->ctrl, &smart_log);
if (res < 0)
goto out_free_response;
@@ -862,7 +892,6 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
int res;
int xfer_len;
u8 *log_response;
- struct nvme_dev *dev = ns->dev;
struct nvme_smart_log *smart_log;
u32 feature_resp;
u8 temp_c_cur, temp_c_thresh;
@@ -872,7 +901,7 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
if (log_response == NULL)
return -ENOMEM;
- res = nvme_get_log_page(dev, &smart_log);
+ res = nvme_get_log_page(ns->ctrl, &smart_log);
if (res < 0)
goto out_free_response;
@@ -886,7 +915,7 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
kfree(smart_log);
/* Get Features for Temp Threshold */
- res = nvme_get_features(dev, NVME_FEAT_TEMP_THRESH, 0, 0,
+ res = nvme_get_features(ns->ctrl, NVME_FEAT_TEMP_THRESH, 0, 0,
&feature_resp);
if (res != NVME_SC_SUCCESS)
temp_c_thresh = LOG_TEMP_UNKNOWN;
@@ -948,7 +977,6 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
{
int res;
int nvme_sc;
- struct nvme_dev *dev = ns->dev;
struct nvme_id_ns *id_ns;
u8 flbas;
u32 lba_length;
@@ -958,7 +986,7 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN)
return -EINVAL;
- nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+ nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
return res;
@@ -1014,14 +1042,13 @@ static int nvme_trans_fill_caching_page(struct nvme_ns *ns,
{
int res = 0;
int nvme_sc;
- struct nvme_dev *dev = ns->dev;
u32 feature_resp;
u8 vwc;
if (len < MODE_PAGE_CACHING_LEN)
return -EINVAL;
- nvme_sc = nvme_get_features(dev, NVME_FEAT_VOLATILE_WC, 0, 0,
+ nvme_sc = nvme_get_features(ns->ctrl, NVME_FEAT_VOLATILE_WC, 0, 0,
&feature_resp);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
@@ -1207,12 +1234,11 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr,
{
int res;
int nvme_sc;
- struct nvme_dev *dev = ns->dev;
struct nvme_id_ctrl *id_ctrl;
int lowest_pow_st; /* max npss = lowest power consumption */
unsigned ps_desired = 0;
- nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
+ nvme_sc = nvme_identify_ctrl(ns->ctrl, &id_ctrl);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
return res;
@@ -1256,7 +1282,7 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
break;
}
- nvme_sc = nvme_set_features(dev, NVME_FEAT_POWER_MGMT, ps_desired, 0,
+ nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_POWER_MGMT, ps_desired, 0,
NULL);
return nvme_trans_status_code(hdr, nvme_sc);
}
@@ -1280,7 +1306,6 @@ static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr
u8 buffer_id)
{
int nvme_sc;
- struct nvme_dev *dev = ns->dev;
struct nvme_command c;
if (hdr->iovec_count > 0) {
@@ -1297,7 +1322,7 @@ static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr
c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
- nvme_sc = __nvme_submit_sync_cmd(dev->admin_q, &c, NULL,
+ nvme_sc = nvme_submit_user_cmd(ns->ctrl->admin_q, &c,
hdr->dxferp, tot_len, NULL, 0);
return nvme_trans_status_code(hdr, nvme_sc);
}
@@ -1364,14 +1389,13 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
{
int res = 0;
int nvme_sc;
- struct nvme_dev *dev = ns->dev;
unsigned dword11;
switch (page_code) {
case MODE_PAGE_CACHING:
dword11 = ((mode_page[2] & CACHING_MODE_PAGE_WCE_MASK) ? 1 : 0);
- nvme_sc = nvme_set_features(dev, NVME_FEAT_VOLATILE_WC, dword11,
- 0, NULL);
+ nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_VOLATILE_WC,
+ dword11, 0, NULL);
res = nvme_trans_status_code(hdr, nvme_sc);
break;
case MODE_PAGE_CONTROL:
@@ -1473,7 +1497,6 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
{
int res = 0;
int nvme_sc;
- struct nvme_dev *dev = ns->dev;
u8 flbas;
/*
@@ -1486,7 +1509,7 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) {
struct nvme_id_ns *id_ns;
- nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+ nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
return res;
@@ -1570,7 +1593,6 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
{
int res;
int nvme_sc;
- struct nvme_dev *dev = ns->dev;
struct nvme_id_ns *id_ns;
u8 i;
u8 flbas, nlbaf;
@@ -1579,7 +1601,7 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
struct nvme_command c;
/* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */
- nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+ nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
return res;
@@ -1611,7 +1633,7 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
c.format.nsid = cpu_to_le32(ns->ns_id);
c.format.cdw10 = cpu_to_le32(cdw10);
- nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
+ nvme_sc = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, NULL, 0);
res = nvme_trans_status_code(hdr, nvme_sc);
kfree(id_ns);
@@ -1704,7 +1726,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
nvme_sc = NVME_SC_LBA_RANGE;
break;
}
- nvme_sc = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
+ nvme_sc = nvme_submit_user_cmd(ns->queue, &c,
next_mapping_addr, unit_len, NULL, 0);
if (nvme_sc)
break;
@@ -2040,7 +2062,6 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u32 alloc_len;
u32 resp_size;
u32 xfer_len;
- struct nvme_dev *dev = ns->dev;
struct nvme_id_ns *id_ns;
u8 *response;
@@ -2052,7 +2073,7 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
resp_size = READ_CAP_10_RESP_SIZE;
}
- nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+ nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
return res;
@@ -2080,7 +2101,6 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
int nvme_sc;
u32 alloc_len, xfer_len, resp_size;
u8 *response;
- struct nvme_dev *dev = ns->dev;
struct nvme_id_ctrl *id_ctrl;
u32 ll_length, lun_id;
u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET;
@@ -2094,7 +2114,7 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
case ALL_LUNS_RETURNED:
case ALL_WELL_KNOWN_LUNS_RETURNED:
case RESTRICTED_LUNS_RETURNED:
- nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
+ nvme_sc = nvme_identify_ctrl(ns->ctrl, &id_ctrl);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
return res;
@@ -2295,9 +2315,7 @@ static int nvme_trans_test_unit_ready(struct nvme_ns *ns,
struct sg_io_hdr *hdr,
u8 *cmd)
{
- struct nvme_dev *dev = ns->dev;
-
- if (!(readl(&dev->bar->csts) & NVME_CSTS_RDY))
+ if (nvme_ctrl_ready(ns->ctrl))
return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
NOT_READY, SCSI_ASC_LUN_NOT_READY,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c
index dd92c5e..b48ac630 100644
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -138,22 +138,22 @@ static int __oprofilefs_create_file(struct dentry *root, char const *name,
struct dentry *dentry;
struct inode *inode;
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
dentry = d_alloc_name(root, name);
if (!dentry) {
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
return -ENOMEM;
}
inode = oprofilefs_get_inode(root->d_sb, S_IFREG | perm);
if (!inode) {
dput(dentry);
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
return -ENOMEM;
}
inode->i_fop = fops;
inode->i_private = priv;
d_add(dentry, inode);
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
return 0;
}
@@ -215,22 +215,22 @@ struct dentry *oprofilefs_mkdir(struct dentry *parent, char const *name)
struct dentry *dentry;
struct inode *inode;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
dentry = d_alloc_name(parent, name);
if (!dentry) {
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
return NULL;
}
inode = oprofilefs_get_inode(parent->d_sb, S_IFDIR | 0755);
if (!inode) {
dput(dentry);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
return NULL;
}
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
d_add(dentry, inode);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
return dentry;
}
diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c
index 2940bd7..25aba16 100644
--- a/drivers/scsi/3w-xxxx.c
+++ b/drivers/scsi/3w-xxxx.c
@@ -1045,6 +1045,9 @@ static int tw_chrdev_open(struct inode *inode, struct file *file)
static const struct file_operations tw_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = tw_chrdev_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = tw_chrdev_ioctl,
+#endif
.open = tw_chrdev_open,
.release = NULL,
.llseek = noop_llseek,
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index c1fe0d2..e2f31c9 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1106,6 +1106,7 @@ config SCSI_IPR
tristate "IBM Power Linux RAID adapter support"
depends on PCI && SCSI && ATA
select FW_LOADER
+ select IRQ_POLL
---help---
This driver supports the IBM Power Linux family RAID adapters.
This includes IBM pSeries 5712, 5703, 5709, and 570A, as well
@@ -1620,23 +1621,6 @@ config ATARI_SCSI
ST-DMA, replacing ACSI). It does NOT support other schemes, like
in the Hades (without DMA).
-config ATARI_SCSI_TOSHIBA_DELAY
- bool "Long delays for Toshiba CD-ROMs"
- depends on ATARI_SCSI
- help
- This option increases the delay after a SCSI arbitration to
- accommodate some flaky Toshiba CD-ROM drives. Say Y if you intend to
- use a Toshiba CD-ROM drive; otherwise, the option is not needed and
- would impact performance a bit, so say N.
-
-config ATARI_SCSI_RESET_BOOT
- bool "Reset SCSI-devices at boottime"
- depends on ATARI_SCSI
- help
- Reset the devices on your Atari whenever it boots. This makes the
- boot process fractionally longer but may assist recovery from errors
- that leave the devices with SCSI operations partway completed.
-
config MAC_SCSI
tristate "Macintosh NCR5380 SCSI"
depends on MAC && SCSI=y
diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c
index a777e5c..d728672 100644
--- a/drivers/scsi/NCR5380.c
+++ b/drivers/scsi/NCR5380.c
@@ -1,17 +1,17 @@
-/*
+/*
* NCR 5380 generic driver routines. These should make it *trivial*
- * to implement 5380 SCSI drivers under Linux with a non-trantor
- * architecture.
+ * to implement 5380 SCSI drivers under Linux with a non-trantor
+ * architecture.
*
- * Note that these routines also work with NR53c400 family chips.
+ * Note that these routines also work with NR53c400 family chips.
*
* Copyright 1993, Drew Eckhardt
- * Visionary Computing
- * (Unix and Linux consulting and custom programming)
- * drew@colorado.edu
- * +1 (303) 666-5836
+ * Visionary Computing
+ * (Unix and Linux consulting and custom programming)
+ * drew@colorado.edu
+ * +1 (303) 666-5836
*
- * For more information, please consult
+ * For more information, please consult
*
* NCR 5380 Family
* SCSI Protocol Controller
@@ -25,84 +25,28 @@
*/
/*
- * Revision 1.10 1998/9/2 Alan Cox
- * (alan@lxorguk.ukuu.org.uk)
- * Fixed up the timer lockups reported so far. Things still suck. Looking
- * forward to 2.3 and per device request queues. Then it'll be possible to
- * SMP thread this beast and improve life no end.
-
- * Revision 1.9 1997/7/27 Ronald van Cuijlenborg
- * (ronald.van.cuijlenborg@tip.nl or nutty@dds.nl)
- * (hopefully) fixed and enhanced USLEEP
- * added support for DTC3181E card (for Mustek scanner)
- *
-
- * Revision 1.8 Ingmar Baumgart
- * (ingmar@gonzo.schwaben.de)
- * added support for NCR53C400a card
- *
-
- * Revision 1.7 1996/3/2 Ray Van Tassle (rayvt@comm.mot.com)
- * added proc_info
- * added support needed for DTC 3180/3280
- * fixed a couple of bugs
- *
-
- * Revision 1.5 1994/01/19 09:14:57 drew
- * Fixed udelay() hack that was being used on DATAOUT phases
- * instead of a proper wait for the final handshake.
- *
- * Revision 1.4 1994/01/19 06:44:25 drew
- * *** empty log message ***
- *
- * Revision 1.3 1994/01/19 05:24:40 drew
- * Added support for TCR LAST_BYTE_SENT bit.
- *
- * Revision 1.2 1994/01/15 06:14:11 drew
- * REAL DMA support, bug fixes.
- *
- * Revision 1.1 1994/01/15 06:00:54 drew
- * Initial revision
- *
+ * With contributions from Ray Van Tassle, Ingmar Baumgart,
+ * Ronald van Cuijlenborg, Alan Cox and others.
*/
/*
- * Further development / testing that should be done :
+ * Further development / testing that should be done :
* 1. Cleanup the NCR5380_transfer_dma function and DMA operation complete
- * code so that everything does the same thing that's done at the
- * end of a pseudo-DMA read operation.
+ * code so that everything does the same thing that's done at the
+ * end of a pseudo-DMA read operation.
*
* 2. Fix REAL_DMA (interrupt driven, polled works fine) -
- * basically, transfer size needs to be reduced by one
- * and the last byte read as is done with PSEUDO_DMA.
- *
- * 4. Test SCSI-II tagged queueing (I have no devices which support
- * tagged queueing)
- *
- * 5. Test linked command handling code after Eric is ready with
- * the high level code.
+ * basically, transfer size needs to be reduced by one
+ * and the last byte read as is done with PSEUDO_DMA.
+ *
+ * 4. Test SCSI-II tagged queueing (I have no devices which support
+ * tagged queueing)
*/
-#include <scsi/scsi_dbg.h>
-#include <scsi/scsi_transport_spi.h>
-
-#if (NDEBUG & NDEBUG_LISTS)
-#define LIST(x,y) {printk("LINE:%d Adding %p to %p\n", __LINE__, (void*)(x), (void*)(y)); if ((x)==(y)) udelay(5); }
-#define REMOVE(w,x,y,z) {printk("LINE:%d Removing: %p->%p %p->%p \n", __LINE__, (void*)(w), (void*)(x), (void*)(y), (void*)(z)); if ((x)==(y)) udelay(5); }
-#else
-#define LIST(x,y)
-#define REMOVE(w,x,y,z)
-#endif
#ifndef notyet
-#undef LINKED
#undef REAL_DMA
#endif
-#ifdef REAL_DMA_POLL
-#undef READ_OVERRUNS
-#define READ_OVERRUNS
-#endif
-
#ifdef BOARD_REQUIRES_NO_DELAY
#define io_recovery_delay(x)
#else
@@ -112,44 +56,28 @@
/*
* Design
*
- * This is a generic 5380 driver. To use it on a different platform,
+ * This is a generic 5380 driver. To use it on a different platform,
* one simply writes appropriate system specific macros (ie, data
- * transfer - some PC's will use the I/O bus, 68K's must use
+ * transfer - some PC's will use the I/O bus, 68K's must use
* memory mapped) and drops this file in their 'C' wrapper.
*
- * (Note from hch: unfortunately it was not enough for the different
- * m68k folks and instead of improving this driver they copied it
- * and hacked it up for their needs. As a consequence they lost
- * most updates to this driver. Maybe someone will fix all these
- * drivers to use a common core one day..)
- *
- * As far as command queueing, two queues are maintained for
+ * As far as command queueing, two queues are maintained for
* each 5380 in the system - commands that haven't been issued yet,
- * and commands that are currently executing. This means that an
- * unlimited number of commands may be queued, letting
- * more commands propagate from the higher driver levels giving higher
- * throughput. Note that both I_T_L and I_T_L_Q nexuses are supported,
- * allowing multiple commands to propagate all the way to a SCSI-II device
+ * and commands that are currently executing. This means that an
+ * unlimited number of commands may be queued, letting
+ * more commands propagate from the higher driver levels giving higher
+ * throughput. Note that both I_T_L and I_T_L_Q nexuses are supported,
+ * allowing multiple commands to propagate all the way to a SCSI-II device
* while a command is already executing.
*
*
- * Issues specific to the NCR5380 :
+ * Issues specific to the NCR5380 :
*
- * When used in a PIO or pseudo-dma mode, the NCR5380 is a braindead
- * piece of hardware that requires you to sit in a loop polling for
- * the REQ signal as long as you are connected. Some devices are
- * brain dead (ie, many TEXEL CD ROM drives) and won't disconnect
- * while doing long seek operations.
- *
- * The workaround for this is to keep track of devices that have
- * disconnected. If the device hasn't disconnected, for commands that
- * should disconnect, we do something like
- *
- * while (!REQ is asserted) { sleep for N usecs; poll for M usecs }
- *
- * Some tweaking of N and M needs to be done. An algorithm based
- * on "time to data" would give the best results as long as short time
- * to datas (ie, on the same track) were considered, however these
+ * When used in a PIO or pseudo-dma mode, the NCR5380 is a braindead
+ * piece of hardware that requires you to sit in a loop polling for
+ * the REQ signal as long as you are connected. Some devices are
+ * brain dead (ie, many TEXEL CD ROM drives) and won't disconnect
+ * while doing long seek operations. [...] These
* broken devices are the exception rather than the rule and I'd rather
* spend my time optimizing for the normal case.
*
@@ -159,23 +87,23 @@
* which is started from a workqueue for each NCR5380 host in the
* system. It attempts to establish I_T_L or I_T_L_Q nexuses by
* removing the commands from the issue queue and calling
- * NCR5380_select() if a nexus is not established.
+ * NCR5380_select() if a nexus is not established.
*
* Once a nexus is established, the NCR5380_information_transfer()
* phase goes through the various phases as instructed by the target.
* if the target goes into MSG IN and sends a DISCONNECT message,
* the command structure is placed into the per instance disconnected
- * queue, and NCR5380_main tries to find more work. If the target is
+ * queue, and NCR5380_main tries to find more work. If the target is
* idle for too long, the system will try to sleep.
*
* If a command has disconnected, eventually an interrupt will trigger,
* calling NCR5380_intr() which will in turn call NCR5380_reselect
* to reestablish a nexus. This will run main if necessary.
*
- * On command termination, the done function will be called as
+ * On command termination, the done function will be called as
* appropriate.
*
- * SCSI pointers are maintained in the SCp field of SCSI command
+ * SCSI pointers are maintained in the SCp field of SCSI command
* structures, being initialized after the command is connected
* in NCR5380_select, and set as appropriate in NCR5380_information_transfer.
* Note that in violation of the standard, an implicit SAVE POINTERS operation
@@ -185,73 +113,48 @@
/*
* Using this file :
* This file a skeleton Linux SCSI driver for the NCR 5380 series
- * of chips. To use it, you write an architecture specific functions
+ * of chips. To use it, you write an architecture specific functions
* and macros and include this file in your driver.
*
- * These macros control options :
- * AUTOPROBE_IRQ - if defined, the NCR5380_probe_irq() function will be
- * defined.
- *
+ * These macros control options :
+ * AUTOPROBE_IRQ - if defined, the NCR5380_probe_irq() function will be
+ * defined.
+ *
* AUTOSENSE - if defined, REQUEST SENSE will be performed automatically
- * for commands that return with a CHECK CONDITION status.
+ * for commands that return with a CHECK CONDITION status.
*
* DIFFERENTIAL - if defined, NCR53c81 chips will use external differential
- * transceivers.
+ * transceivers.
*
* DONT_USE_INTR - if defined, never use interrupts, even if we probe or
- * override-configure an IRQ.
- *
- * LIMIT_TRANSFERSIZE - if defined, limit the pseudo-dma transfers to 512
- * bytes at a time. Since interrupts are disabled by default during
- * these transfers, we might need this to give reasonable interrupt
- * service time if the transfer size gets too large.
- *
- * LINKED - if defined, linked commands are supported.
+ * override-configure an IRQ.
*
* PSEUDO_DMA - if defined, PSEUDO DMA is used during the data transfer phases.
*
* REAL_DMA - if defined, REAL DMA is used during the data transfer phases.
*
* REAL_DMA_POLL - if defined, REAL DMA is used but the driver doesn't
- * rely on phase mismatch and EOP interrupts to determine end
- * of phase.
- *
- * UNSAFE - leave interrupts enabled during pseudo-DMA transfers. You
- * only really want to use this if you're having a problem with
- * dropped characters during high speed communications, and even
- * then, you're going to be better off twiddling with transfersize
- * in the high level code.
- *
- * Defaults for these will be provided although the user may want to adjust
- * these to allocate CPU resources to the SCSI driver or "real" code.
- *
- * USLEEP_SLEEP - amount of time, in jiffies, to sleep
- *
- * USLEEP_POLL - amount of time, in jiffies, to poll
+ * rely on phase mismatch and EOP interrupts to determine end
+ * of phase.
*
* These macros MUST be defined :
- * NCR5380_local_declare() - declare any local variables needed for your
- * transfer routines.
*
- * NCR5380_setup(instance) - initialize any local variables needed from a given
- * instance of the host adapter for NCR5380_{read,write,pread,pwrite}
- *
* NCR5380_read(register) - read from the specified register
*
- * NCR5380_write(register, value) - write to the specific register
+ * NCR5380_write(register, value) - write to the specific register
*
- * NCR5380_implementation_fields - additional fields needed for this
- * specific implementation of the NCR5380
+ * NCR5380_implementation_fields - additional fields needed for this
+ * specific implementation of the NCR5380
*
* Either real DMA *or* pseudo DMA may be implemented
- * REAL functions :
+ * REAL functions :
* NCR5380_REAL_DMA should be defined if real DMA is to be used.
- * Note that the DMA setup functions should return the number of bytes
- * that they were able to program the controller for.
+ * Note that the DMA setup functions should return the number of bytes
+ * that they were able to program the controller for.
*
- * Also note that generic i386/PC versions of these macros are
- * available as NCR5380_i386_dma_write_setup,
- * NCR5380_i386_dma_read_setup, and NCR5380_i386_dma_residual.
+ * Also note that generic i386/PC versions of these macros are
+ * available as NCR5380_i386_dma_write_setup,
+ * NCR5380_i386_dma_read_setup, and NCR5380_i386_dma_residual.
*
* NCR5380_dma_write_setup(instance, src, count) - initialize
* NCR5380_dma_read_setup(instance, dst, count) - initialize
@@ -262,25 +165,25 @@
* NCR5380_pread(instance, dst, count);
*
* The generic driver is initialized by calling NCR5380_init(instance),
- * after setting the appropriate host specific fields and ID. If the
+ * after setting the appropriate host specific fields and ID. If the
* driver wishes to autoprobe for an IRQ line, the NCR5380_probe_irq(instance,
* possible) function may be used.
*/
-static int do_abort(struct Scsi_Host *host);
-static void do_reset(struct Scsi_Host *host);
+static int do_abort(struct Scsi_Host *);
+static void do_reset(struct Scsi_Host *);
-/*
- * initialize_SCp - init the scsi pointer field
- * @cmd: command block to set up
+/**
+ * initialize_SCp - init the scsi pointer field
+ * @cmd: command block to set up
*
- * Set up the internal fields in the SCSI command.
+ * Set up the internal fields in the SCSI command.
*/
static inline void initialize_SCp(struct scsi_cmnd *cmd)
{
- /*
- * Initialize the Scsi Pointer field so that all of the commands in the
+ /*
+ * Initialize the Scsi Pointer field so that all of the commands in the
* various queues are valid.
*/
@@ -295,120 +198,123 @@ static inline void initialize_SCp(struct scsi_cmnd *cmd)
cmd->SCp.ptr = NULL;
cmd->SCp.this_residual = 0;
}
+
+ cmd->SCp.Status = 0;
+ cmd->SCp.Message = 0;
}
/**
- * NCR5380_poll_politely - wait for NCR5380 status bits
- * @instance: controller to poll
- * @reg: 5380 register to poll
- * @bit: Bitmask to check
- * @val: Value required to exit
- *
- * Polls the NCR5380 in a reasonably efficient manner waiting for
- * an event to occur, after a short quick poll we begin giving the
- * CPU back in non IRQ contexts
- *
- * Returns the value of the register or a negative error code.
+ * NCR5380_poll_politely2 - wait for two chip register values
+ * @instance: controller to poll
+ * @reg1: 5380 register to poll
+ * @bit1: Bitmask to check
+ * @val1: Expected value
+ * @reg2: Second 5380 register to poll
+ * @bit2: Second bitmask to check
+ * @val2: Second expected value
+ * @wait: Time-out in jiffies
+ *
+ * Polls the chip in a reasonably efficient manner waiting for an
+ * event to occur. After a short quick poll we begin to yield the CPU
+ * (if possible). In irq contexts the time-out is arbitrarily limited.
+ * Callers may hold locks as long as they are held in irq mode.
+ *
+ * Returns 0 if either or both event(s) occurred otherwise -ETIMEDOUT.
*/
-
-static int NCR5380_poll_politely(struct Scsi_Host *instance, int reg, int bit, int val, int t)
+
+static int NCR5380_poll_politely2(struct Scsi_Host *instance,
+ int reg1, int bit1, int val1,
+ int reg2, int bit2, int val2, int wait)
{
- NCR5380_local_declare();
- int n = 500; /* At about 8uS a cycle for the cpu access */
- unsigned long end = jiffies + t;
- int r;
-
- NCR5380_setup(instance);
-
- while( n-- > 0)
- {
- r = NCR5380_read(reg);
- if((r & bit) == val)
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ unsigned long deadline = jiffies + wait;
+ unsigned long n;
+
+ /* Busy-wait for up to 10 ms */
+ n = min(10000U, jiffies_to_usecs(wait));
+ n *= hostdata->accesses_per_ms;
+ n /= 2000;
+ do {
+ if ((NCR5380_read(reg1) & bit1) == val1)
+ return 0;
+ if ((NCR5380_read(reg2) & bit2) == val2)
return 0;
cpu_relax();
- }
-
- /* t time yet ? */
- while(time_before(jiffies, end))
- {
- r = NCR5380_read(reg);
- if((r & bit) == val)
+ } while (n--);
+
+ if (irqs_disabled() || in_interrupt())
+ return -ETIMEDOUT;
+
+ /* Repeatedly sleep for 1 ms until deadline */
+ while (time_is_after_jiffies(deadline)) {
+ schedule_timeout_uninterruptible(1);
+ if ((NCR5380_read(reg1) & bit1) == val1)
+ return 0;
+ if ((NCR5380_read(reg2) & bit2) == val2)
return 0;
- if(!in_interrupt())
- cond_resched();
- else
- cpu_relax();
}
+
return -ETIMEDOUT;
}
-static struct {
- unsigned char value;
- const char *name;
-} phases[] __maybe_unused = {
- {PHASE_DATAOUT, "DATAOUT"},
- {PHASE_DATAIN, "DATAIN"},
- {PHASE_CMDOUT, "CMDOUT"},
- {PHASE_STATIN, "STATIN"},
- {PHASE_MSGOUT, "MSGOUT"},
- {PHASE_MSGIN, "MSGIN"},
- {PHASE_UNKNOWN, "UNKNOWN"}
-};
+static inline int NCR5380_poll_politely(struct Scsi_Host *instance,
+ int reg, int bit, int val, int wait)
+{
+ return NCR5380_poll_politely2(instance, reg, bit, val,
+ reg, bit, val, wait);
+}
#if NDEBUG
static struct {
unsigned char mask;
const char *name;
-} signals[] = {
- {SR_DBP, "PARITY"},
- {SR_RST, "RST"},
- {SR_BSY, "BSY"},
- {SR_REQ, "REQ"},
- {SR_MSG, "MSG"},
- {SR_CD, "CD"},
- {SR_IO, "IO"},
- {SR_SEL, "SEL"},
+} signals[] = {
+ {SR_DBP, "PARITY"},
+ {SR_RST, "RST"},
+ {SR_BSY, "BSY"},
+ {SR_REQ, "REQ"},
+ {SR_MSG, "MSG"},
+ {SR_CD, "CD"},
+ {SR_IO, "IO"},
+ {SR_SEL, "SEL"},
{0, NULL}
-},
+},
basrs[] = {
- {BASR_ATN, "ATN"},
- {BASR_ACK, "ACK"},
+ {BASR_ATN, "ATN"},
+ {BASR_ACK, "ACK"},
{0, NULL}
-},
-icrs[] = {
- {ICR_ASSERT_RST, "ASSERT RST"},
- {ICR_ASSERT_ACK, "ASSERT ACK"},
- {ICR_ASSERT_BSY, "ASSERT BSY"},
- {ICR_ASSERT_SEL, "ASSERT SEL"},
- {ICR_ASSERT_ATN, "ASSERT ATN"},
- {ICR_ASSERT_DATA, "ASSERT DATA"},
+},
+icrs[] = {
+ {ICR_ASSERT_RST, "ASSERT RST"},
+ {ICR_ASSERT_ACK, "ASSERT ACK"},
+ {ICR_ASSERT_BSY, "ASSERT BSY"},
+ {ICR_ASSERT_SEL, "ASSERT SEL"},
+ {ICR_ASSERT_ATN, "ASSERT ATN"},
+ {ICR_ASSERT_DATA, "ASSERT DATA"},
{0, NULL}
-},
-mrs[] = {
- {MR_BLOCK_DMA_MODE, "MODE BLOCK DMA"},
- {MR_TARGET, "MODE TARGET"},
- {MR_ENABLE_PAR_CHECK, "MODE PARITY CHECK"},
- {MR_ENABLE_PAR_INTR, "MODE PARITY INTR"},
- {MR_MONITOR_BSY, "MODE MONITOR BSY"},
- {MR_DMA_MODE, "MODE DMA"},
- {MR_ARBITRATE, "MODE ARBITRATION"},
+},
+mrs[] = {
+ {MR_BLOCK_DMA_MODE, "MODE BLOCK DMA"},
+ {MR_TARGET, "MODE TARGET"},
+ {MR_ENABLE_PAR_CHECK, "MODE PARITY CHECK"},
+ {MR_ENABLE_PAR_INTR, "MODE PARITY INTR"},
+ {MR_ENABLE_EOP_INTR, "MODE EOP INTR"},
+ {MR_MONITOR_BSY, "MODE MONITOR BSY"},
+ {MR_DMA_MODE, "MODE DMA"},
+ {MR_ARBITRATE, "MODE ARBITRATION"},
{0, NULL}
};
/**
- * NCR5380_print - print scsi bus signals
- * @instance: adapter state to dump
- *
- * Print the SCSI bus signals for debugging purposes
+ * NCR5380_print - print scsi bus signals
+ * @instance: adapter state to dump
*
- * Locks: caller holds hostdata lock (not essential)
+ * Print the SCSI bus signals for debugging purposes
*/
static void NCR5380_print(struct Scsi_Host *instance)
{
- NCR5380_local_declare();
unsigned char status, data, basr, mr, icr, i;
- NCR5380_setup(instance);
data = NCR5380_read(CURRENT_SCSI_DATA_REG);
status = NCR5380_read(STATUS_REG);
@@ -435,117 +341,56 @@ static void NCR5380_print(struct Scsi_Host *instance)
printk("\n");
}
+static struct {
+ unsigned char value;
+ const char *name;
+} phases[] = {
+ {PHASE_DATAOUT, "DATAOUT"},
+ {PHASE_DATAIN, "DATAIN"},
+ {PHASE_CMDOUT, "CMDOUT"},
+ {PHASE_STATIN, "STATIN"},
+ {PHASE_MSGOUT, "MSGOUT"},
+ {PHASE_MSGIN, "MSGIN"},
+ {PHASE_UNKNOWN, "UNKNOWN"}
+};
-/*
- * NCR5380_print_phase - show SCSI phase
- * @instance: adapter to dump
- *
- * Print the current SCSI phase for debugging purposes
+/**
+ * NCR5380_print_phase - show SCSI phase
+ * @instance: adapter to dump
*
- * Locks: none
+ * Print the current SCSI phase for debugging purposes
*/
static void NCR5380_print_phase(struct Scsi_Host *instance)
{
- NCR5380_local_declare();
unsigned char status;
int i;
- NCR5380_setup(instance);
status = NCR5380_read(STATUS_REG);
if (!(status & SR_REQ))
- printk("scsi%d : REQ not asserted, phase unknown.\n", instance->host_no);
+ shost_printk(KERN_DEBUG, instance, "REQ not asserted, phase unknown.\n");
else {
- for (i = 0; (phases[i].value != PHASE_UNKNOWN) && (phases[i].value != (status & PHASE_MASK)); ++i);
- printk("scsi%d : phase %s\n", instance->host_no, phases[i].name);
+ for (i = 0; (phases[i].value != PHASE_UNKNOWN) &&
+ (phases[i].value != (status & PHASE_MASK)); ++i)
+ ;
+ shost_printk(KERN_DEBUG, instance, "phase %s\n", phases[i].name);
}
}
#endif
-/*
- * These need tweaking, and would probably work best as per-device
- * flags initialized differently for disk, tape, cd, etc devices.
- * People with broken devices are free to experiment as to what gives
- * the best results for them.
- *
- * USLEEP_SLEEP should be a minimum seek time.
- *
- * USLEEP_POLL should be a maximum rotational latency.
- */
-#ifndef USLEEP_SLEEP
-/* 20 ms (reasonable hard disk speed) */
-#define USLEEP_SLEEP msecs_to_jiffies(20)
-#endif
-/* 300 RPM (floppy speed) */
-#ifndef USLEEP_POLL
-#define USLEEP_POLL msecs_to_jiffies(200)
-#endif
-#ifndef USLEEP_WAITLONG
-/* RvC: (reasonable time to wait on select error) */
-#define USLEEP_WAITLONG USLEEP_SLEEP
-#endif
-/*
- * Function : int should_disconnect (unsigned char cmd)
- *
- * Purpose : decide whether a command would normally disconnect or
- * not, since if it won't disconnect we should go to sleep.
- *
- * Input : cmd - opcode of SCSI command
- *
- * Returns : DISCONNECT_LONG if we should disconnect for a really long
- * time (ie always, sleep, look for REQ active, sleep),
- * DISCONNECT_TIME_TO_DATA if we would only disconnect for a normal
- * time-to-data delay, DISCONNECT_NONE if this command would return
- * immediately.
- *
- * Future sleep algorithms based on time to data can exploit
- * something like this so they can differentiate between "normal"
- * (ie, read, write, seek) and unusual commands (ie, * format).
- *
- * Note : We don't deal with commands that handle an immediate disconnect,
- *
- */
-
-static int should_disconnect(unsigned char cmd)
-{
- switch (cmd) {
- case READ_6:
- case WRITE_6:
- case SEEK_6:
- case READ_10:
- case WRITE_10:
- case SEEK_10:
- return DISCONNECT_TIME_TO_DATA;
- case FORMAT_UNIT:
- case SEARCH_HIGH:
- case SEARCH_LOW:
- case SEARCH_EQUAL:
- return DISCONNECT_LONG;
- default:
- return DISCONNECT_NONE;
- }
-}
-
-static void NCR5380_set_timer(struct NCR5380_hostdata *hostdata, unsigned long timeout)
-{
- hostdata->time_expires = jiffies + timeout;
- schedule_delayed_work(&hostdata->coroutine, timeout);
-}
-
-
-static int probe_irq __initdata = 0;
+static int probe_irq __initdata;
/**
- * probe_intr - helper for IRQ autoprobe
- * @irq: interrupt number
- * @dev_id: unused
- * @regs: unused
+ * probe_intr - helper for IRQ autoprobe
+ * @irq: interrupt number
+ * @dev_id: unused
+ * @regs: unused
*
- * Set a flag to indicate the IRQ in question was received. This is
- * used by the IRQ probe code.
+ * Set a flag to indicate the IRQ in question was received. This is
+ * used by the IRQ probe code.
*/
-
+
static irqreturn_t __init probe_intr(int irq, void *dev_id)
{
probe_irq = irq;
@@ -553,24 +398,20 @@ static irqreturn_t __init probe_intr(int irq, void *dev_id)
}
/**
- * NCR5380_probe_irq - find the IRQ of an NCR5380
- * @instance: NCR5380 controller
- * @possible: bitmask of ISA IRQ lines
+ * NCR5380_probe_irq - find the IRQ of an NCR5380
+ * @instance: NCR5380 controller
+ * @possible: bitmask of ISA IRQ lines
*
- * Autoprobe for the IRQ line used by the NCR5380 by triggering an IRQ
- * and then looking to see what interrupt actually turned up.
- *
- * Locks: none, irqs must be enabled on entry
+ * Autoprobe for the IRQ line used by the NCR5380 by triggering an IRQ
+ * and then looking to see what interrupt actually turned up.
*/
static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
int possible)
{
- NCR5380_local_declare();
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
unsigned long timeout;
int trying_irqs, i, mask;
- NCR5380_setup(instance);
for (trying_irqs = 0, i = 1, mask = 2; i < 16; ++i, mask <<= 1)
if ((mask & possible) && (request_irq(i, &probe_intr, 0, "NCR-probe", NULL) == 0))
@@ -581,7 +422,7 @@ static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
/*
* A interrupt is triggered whenever BSY = false, SEL = true
- * and a bit set in the SELECT_ENABLE_REG is asserted on the
+ * and a bit set in the SELECT_ENABLE_REG is asserted on the
* SCSI bus.
*
* Note that the bus is only driven when the phase control signals
@@ -596,7 +437,7 @@ static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
while (probe_irq == NO_IRQ && time_before(jiffies, timeout))
schedule_timeout_uninterruptible(1);
-
+
NCR5380_write(SELECT_ENABLE_REG, 0);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
@@ -608,12 +449,10 @@ static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
}
/**
- * NCR58380_info - report driver and host information
- * @instance: relevant scsi host instance
- *
- * For use as the host template info() handler.
+ * NCR58380_info - report driver and host information
+ * @instance: relevant scsi host instance
*
- * Locks: none
+ * For use as the host template info() handler.
*/
static const char *NCR5380_info(struct Scsi_Host *instance)
@@ -633,20 +472,14 @@ static void prepare_info(struct Scsi_Host *instance)
"can_queue %d, cmd_per_lun %d, "
"sg_tablesize %d, this_id %d, "
"flags { %s%s%s}, "
-#if defined(USLEEP_POLL) && defined(USLEEP_WAITLONG)
- "USLEEP_POLL %lu, USLEEP_WAITLONG %lu, "
-#endif
"options { %s} ",
instance->hostt->name, instance->io_port, instance->n_io_port,
instance->base, instance->irq,
instance->can_queue, instance->cmd_per_lun,
instance->sg_tablesize, instance->this_id,
- hostdata->flags & FLAG_NCR53C400 ? "NCR53C400 " : "",
- hostdata->flags & FLAG_DTC3181E ? "DTC3181E " : "",
+ hostdata->flags & FLAG_NO_DMA_FIXUP ? "NO_DMA_FIXUP " : "",
hostdata->flags & FLAG_NO_PSEUDO_DMA ? "NO_PSEUDO_DMA " : "",
-#if defined(USLEEP_POLL) && defined(USLEEP_WAITLONG)
- USLEEP_POLL, USLEEP_WAITLONG,
-#endif
+ hostdata->flags & FLAG_TOSHIBA_DELAY ? "TOSHIBA_DELAY " : "",
#ifdef AUTOPROBE_IRQ
"AUTOPROBE_IRQ "
#endif
@@ -665,46 +498,10 @@ static void prepare_info(struct Scsi_Host *instance)
#ifdef PSEUDO_DMA
"PSEUDO_DMA "
#endif
-#ifdef UNSAFE
- "UNSAFE "
-#endif
-#ifdef NCR53C400
- "NCR53C400 "
-#endif
"");
}
-/**
- * NCR5380_print_status - dump controller info
- * @instance: controller to dump
- *
- * Print commands in the various queues, called from NCR5380_abort
- * and NCR5380_debug to aid debugging.
- *
- * Locks: called functions disable irqs
- */
-
-static void NCR5380_print_status(struct Scsi_Host *instance)
-{
- NCR5380_dprint(NDEBUG_ANY, instance);
- NCR5380_dprint_phase(NDEBUG_ANY, instance);
-}
-
#ifdef PSEUDO_DMA
-/******************************************/
-/*
- * /proc/scsi/[dtc pas16 t128 generic]/[0-ASC_NUM_BOARD_SUPPORTED]
- *
- * *buffer: I/O buffer
- * **start: if inout == FALSE pointer into buffer where user read should start
- * offset: current offset
- * length: length of buffer
- * hostno: Scsi_Host host_no
- * inout: TRUE - user is writing; FALSE - user is reading
- *
- * Return the number of bytes read from or written
- */
-
static int __maybe_unused NCR5380_write_info(struct Scsi_Host *instance,
char *buffer, int length)
{
@@ -714,104 +511,41 @@ static int __maybe_unused NCR5380_write_info(struct Scsi_Host *instance,
hostdata->spin_max_w = 0;
return 0;
}
-#endif
-
-static
-void lprint_Scsi_Cmnd(struct scsi_cmnd *cmd, struct seq_file *m);
-static
-void lprint_command(unsigned char *cmd, struct seq_file *m);
-static
-void lprint_opcode(int opcode, struct seq_file *m);
static int __maybe_unused NCR5380_show_info(struct seq_file *m,
- struct Scsi_Host *instance)
+ struct Scsi_Host *instance)
{
- struct NCR5380_hostdata *hostdata;
- struct scsi_cmnd *ptr;
-
- hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
-#ifdef PSEUDO_DMA
seq_printf(m, "Highwater I/O busy spin counts: write %d, read %d\n",
hostdata->spin_max_w, hostdata->spin_max_r);
-#endif
- spin_lock_irq(instance->host_lock);
- if (!hostdata->connected)
- seq_printf(m, "scsi%d: no currently connected command\n", instance->host_no);
- else
- lprint_Scsi_Cmnd((struct scsi_cmnd *) hostdata->connected, m);
- seq_printf(m, "scsi%d: issue_queue\n", instance->host_no);
- for (ptr = (struct scsi_cmnd *) hostdata->issue_queue; ptr; ptr = (struct scsi_cmnd *) ptr->host_scribble)
- lprint_Scsi_Cmnd(ptr, m);
-
- seq_printf(m, "scsi%d: disconnected_queue\n", instance->host_no);
- for (ptr = (struct scsi_cmnd *) hostdata->disconnected_queue; ptr; ptr = (struct scsi_cmnd *) ptr->host_scribble)
- lprint_Scsi_Cmnd(ptr, m);
- spin_unlock_irq(instance->host_lock);
return 0;
}
-
-static void lprint_Scsi_Cmnd(struct scsi_cmnd *cmd, struct seq_file *m)
-{
- seq_printf(m, "scsi%d : destination target %d, lun %llu\n", cmd->device->host->host_no, cmd->device->id, cmd->device->lun);
- seq_puts(m, " command = ");
- lprint_command(cmd->cmnd, m);
-}
-
-static void lprint_command(unsigned char *command, struct seq_file *m)
-{
- int i, s;
- lprint_opcode(command[0], m);
- for (i = 1, s = COMMAND_SIZE(command[0]); i < s; ++i)
- seq_printf(m, "%02x ", command[i]);
- seq_putc(m, '\n');
-}
-
-static void lprint_opcode(int opcode, struct seq_file *m)
-{
- seq_printf(m, "%2d (0x%02x)", opcode, opcode);
-}
-
+#endif
/**
- * NCR5380_init - initialise an NCR5380
- * @instance: adapter to configure
- * @flags: control flags
+ * NCR5380_init - initialise an NCR5380
+ * @instance: adapter to configure
+ * @flags: control flags
*
- * Initializes *instance and corresponding 5380 chip,
- * with flags OR'd into the initial flags value.
+ * Initializes *instance and corresponding 5380 chip,
+ * with flags OR'd into the initial flags value.
*
- * Notes : I assume that the host, hostno, and id bits have been
- * set correctly. I don't care about the irq and other fields.
+ * Notes : I assume that the host, hostno, and id bits have been
+ * set correctly. I don't care about the irq and other fields.
*
- * Returns 0 for success
- *
- * Locks: interrupts must be enabled when we are called
+ * Returns 0 for success
*/
static int NCR5380_init(struct Scsi_Host *instance, int flags)
{
- NCR5380_local_declare();
- int i, pass;
- unsigned long timeout;
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
-
- if(in_interrupt())
- printk(KERN_ERR "NCR5380_init called with interrupts off!\n");
- /*
- * On NCR53C400 boards, NCR5380 registers are mapped 8 past
- * the base address.
- */
-
-#ifdef NCR53C400
- if (flags & FLAG_NCR53C400)
- instance->NCR5380_instance_name += NCR53C400_address_adjust;
-#endif
-
- NCR5380_setup(instance);
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ int i;
+ unsigned long deadline;
- hostdata->aborted = 0;
+ hostdata->host = instance;
hostdata->id_mask = 1 << instance->this_id;
+ hostdata->id_higher_mask = 0;
for (i = hostdata->id_mask; i <= 0x80; i <<= 1)
if (i > hostdata->id_mask)
hostdata->id_higher_mask |= i;
@@ -820,21 +554,21 @@ static int NCR5380_init(struct Scsi_Host *instance, int flags)
#ifdef REAL_DMA
hostdata->dmalen = 0;
#endif
- hostdata->targets_present = 0;
+ spin_lock_init(&hostdata->lock);
hostdata->connected = NULL;
- hostdata->issue_queue = NULL;
- hostdata->disconnected_queue = NULL;
-
- INIT_DELAYED_WORK(&hostdata->coroutine, NCR5380_main);
-
- /* The CHECK code seems to break the 53C400. Will check it later maybe */
- if (flags & FLAG_NCR53C400)
- hostdata->flags = FLAG_HAS_LAST_BYTE_SENT | flags;
- else
- hostdata->flags = FLAG_CHECK_LAST_BYTE_SENT | flags;
+ hostdata->sensing = NULL;
+ INIT_LIST_HEAD(&hostdata->autosense);
+ INIT_LIST_HEAD(&hostdata->unissued);
+ INIT_LIST_HEAD(&hostdata->disconnected);
- hostdata->host = instance;
- hostdata->time_expires = 0;
+ hostdata->flags = flags;
+
+ INIT_WORK(&hostdata->main_task, NCR5380_main);
+ hostdata->work_q = alloc_workqueue("ncr5380_%d",
+ WQ_UNBOUND | WQ_MEM_RECLAIM,
+ 1, instance->host_no);
+ if (!hostdata->work_q)
+ return -ENOMEM;
prepare_info(instance);
@@ -843,43 +577,69 @@ static int NCR5380_init(struct Scsi_Host *instance, int flags)
NCR5380_write(TARGET_COMMAND_REG, 0);
NCR5380_write(SELECT_ENABLE_REG, 0);
-#ifdef NCR53C400
- if (hostdata->flags & FLAG_NCR53C400) {
- NCR5380_write(C400_CONTROL_STATUS_REG, CSR_BASE);
- }
-#endif
+ /* Calibrate register polling loop */
+ i = 0;
+ deadline = jiffies + 1;
+ do {
+ cpu_relax();
+ } while (time_is_after_jiffies(deadline));
+ deadline += msecs_to_jiffies(256);
+ do {
+ NCR5380_read(STATUS_REG);
+ ++i;
+ cpu_relax();
+ } while (time_is_after_jiffies(deadline));
+ hostdata->accesses_per_ms = i / 256;
- /*
- * Detect and correct bus wedge problems.
- *
- * If the system crashed, it may have crashed in a state
- * where a SCSI command was still executing, and the
- * SCSI bus is not in a BUS FREE STATE.
- *
- * If this is the case, we'll try to abort the currently
- * established nexus which we know nothing about, and that
- * failing, do a hard reset of the SCSI bus
- */
+ return 0;
+}
+
+/**
+ * NCR5380_maybe_reset_bus - Detect and correct bus wedge problems.
+ * @instance: adapter to check
+ *
+ * If the system crashed, it may have crashed with a connected target and
+ * the SCSI bus busy. Check for BUS FREE phase. If not, try to abort the
+ * currently established nexus, which we know nothing about. Failing that
+ * do a bus reset.
+ *
+ * Note that a bus reset will cause the chip to assert IRQ.
+ *
+ * Returns 0 if successful, otherwise -ENXIO.
+ */
+
+static int NCR5380_maybe_reset_bus(struct Scsi_Host *instance)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ int pass;
for (pass = 1; (NCR5380_read(STATUS_REG) & SR_BSY) && pass <= 6; ++pass) {
switch (pass) {
case 1:
case 3:
case 5:
- printk(KERN_INFO "scsi%d: SCSI bus busy, waiting up to five seconds\n", instance->host_no);
- timeout = jiffies + 5 * HZ;
- NCR5380_poll_politely(instance, STATUS_REG, SR_BSY, 0, 5*HZ);
+ shost_printk(KERN_ERR, instance, "SCSI bus busy, waiting up to five seconds\n");
+ NCR5380_poll_politely(instance,
+ STATUS_REG, SR_BSY, 0, 5 * HZ);
break;
case 2:
- printk(KERN_WARNING "scsi%d: bus busy, attempting abort\n", instance->host_no);
+ shost_printk(KERN_ERR, instance, "bus busy, attempting abort\n");
do_abort(instance);
break;
case 4:
- printk(KERN_WARNING "scsi%d: bus busy, attempting reset\n", instance->host_no);
+ shost_printk(KERN_ERR, instance, "bus busy, attempting reset\n");
do_reset(instance);
+ /* Wait after a reset; the SCSI standard calls for
+ * 250ms, we wait 500ms to be on the safe side.
+ * But some Toshiba CD-ROMs need ten times that.
+ */
+ if (hostdata->flags & FLAG_TOSHIBA_DELAY)
+ msleep(2500);
+ else
+ msleep(500);
break;
case 6:
- printk(KERN_ERR "scsi%d: bus locked solid or invalid override\n", instance->host_no);
+ shost_printk(KERN_ERR, instance, "bus locked solid\n");
return -ENXIO;
}
}
@@ -887,450 +647,513 @@ static int NCR5380_init(struct Scsi_Host *instance, int flags)
}
/**
- * NCR5380_exit - remove an NCR5380
- * @instance: adapter to remove
+ * NCR5380_exit - remove an NCR5380
+ * @instance: adapter to remove
+ *
+ * Assumes that no more work can be queued (e.g. by NCR5380_intr).
*/
static void NCR5380_exit(struct Scsi_Host *instance)
{
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
- cancel_delayed_work_sync(&hostdata->coroutine);
+ cancel_work_sync(&hostdata->main_task);
+ destroy_workqueue(hostdata->work_q);
}
/**
- * NCR5380_queue_command - queue a command
- * @cmd: SCSI command
- * @done: completion handler
- *
- * cmd is added to the per instance issue_queue, with minor
- * twiddling done to the host specific fields of cmd. If the
- * main coroutine is not running, it is restarted.
+ * complete_cmd - finish processing a command and return it to the SCSI ML
+ * @instance: the host instance
+ * @cmd: command to complete
+ */
+
+static void complete_cmd(struct Scsi_Host *instance,
+ struct scsi_cmnd *cmd)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+
+ dsprintk(NDEBUG_QUEUES, instance, "complete_cmd: cmd %p\n", cmd);
+
+ if (hostdata->sensing == cmd) {
+ /* Autosense processing ends here */
+ if ((cmd->result & 0xff) != SAM_STAT_GOOD) {
+ scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+ set_host_byte(cmd, DID_ERROR);
+ } else
+ scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+ hostdata->sensing = NULL;
+ }
+
+ hostdata->busy[scmd_id(cmd)] &= ~(1 << cmd->device->lun);
+
+ cmd->scsi_done(cmd);
+}
+
+/**
+ * NCR5380_queue_command - queue a command
+ * @instance: the relevant SCSI adapter
+ * @cmd: SCSI command
*
- * Locks: host lock taken by caller
+ * cmd is added to the per-instance issue queue, with minor
+ * twiddling done to the host specific fields of cmd. If the
+ * main coroutine is not running, it is restarted.
*/
-static int NCR5380_queue_command_lck(struct scsi_cmnd *cmd, void (*done) (struct scsi_cmnd *))
+static int NCR5380_queue_command(struct Scsi_Host *instance,
+ struct scsi_cmnd *cmd)
{
- struct Scsi_Host *instance = cmd->device->host;
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
- struct scsi_cmnd *tmp;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
+ unsigned long flags;
#if (NDEBUG & NDEBUG_NO_WRITE)
switch (cmd->cmnd[0]) {
case WRITE_6:
case WRITE_10:
- printk("scsi%d : WRITE attempted with NO_WRITE debugging flag set\n", instance->host_no);
+ shost_printk(KERN_DEBUG, instance, "WRITE attempted with NDEBUG_NO_WRITE set\n");
cmd->result = (DID_ERROR << 16);
- done(cmd);
+ cmd->scsi_done(cmd);
return 0;
}
-#endif /* (NDEBUG & NDEBUG_NO_WRITE) */
-
- /*
- * We use the host_scribble field as a pointer to the next command
- * in a queue
- */
+#endif /* (NDEBUG & NDEBUG_NO_WRITE) */
- cmd->host_scribble = NULL;
- cmd->scsi_done = done;
cmd->result = 0;
- /*
- * Insert the cmd into the issue queue. Note that REQUEST SENSE
+ spin_lock_irqsave(&hostdata->lock, flags);
+
+ /*
+ * Insert the cmd into the issue queue. Note that REQUEST SENSE
* commands are added to the head of the queue since any command will
- * clear the contingent allegiance condition that exists and the
+ * clear the contingent allegiance condition that exists and the
* sense data is only guaranteed to be valid while the condition exists.
*/
- if (!(hostdata->issue_queue) || (cmd->cmnd[0] == REQUEST_SENSE)) {
- LIST(cmd, hostdata->issue_queue);
- cmd->host_scribble = (unsigned char *) hostdata->issue_queue;
- hostdata->issue_queue = cmd;
- } else {
- for (tmp = (struct scsi_cmnd *) hostdata->issue_queue; tmp->host_scribble; tmp = (struct scsi_cmnd *) tmp->host_scribble);
- LIST(cmd, tmp);
- tmp->host_scribble = (unsigned char *) cmd;
- }
- dprintk(NDEBUG_QUEUES, "scsi%d : command added to %s of queue\n", instance->host_no, (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail");
+ if (cmd->cmnd[0] == REQUEST_SENSE)
+ list_add(&ncmd->list, &hostdata->unissued);
+ else
+ list_add_tail(&ncmd->list, &hostdata->unissued);
+
+ spin_unlock_irqrestore(&hostdata->lock, flags);
+
+ dsprintk(NDEBUG_QUEUES, instance, "command %p added to %s of queue\n",
+ cmd, (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail");
- /* Run the coroutine if it isn't already running. */
/* Kick off command processing */
- schedule_delayed_work(&hostdata->coroutine, 0);
+ queue_work(hostdata->work_q, &hostdata->main_task);
return 0;
}
-static DEF_SCSI_QCMD(NCR5380_queue_command)
+/**
+ * dequeue_next_cmd - dequeue a command for processing
+ * @instance: the scsi host instance
+ *
+ * Priority is given to commands on the autosense queue. These commands
+ * need autosense because of a CHECK CONDITION result.
+ *
+ * Returns a command pointer if a command is found for a target that is
+ * not already busy. Otherwise returns NULL.
+ */
+
+static struct scsi_cmnd *dequeue_next_cmd(struct Scsi_Host *instance)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ struct NCR5380_cmd *ncmd;
+ struct scsi_cmnd *cmd;
+
+ if (list_empty(&hostdata->autosense)) {
+ list_for_each_entry(ncmd, &hostdata->unissued, list) {
+ cmd = NCR5380_to_scmd(ncmd);
+ dsprintk(NDEBUG_QUEUES, instance, "dequeue: cmd=%p target=%d busy=0x%02x lun=%llu\n",
+ cmd, scmd_id(cmd), hostdata->busy[scmd_id(cmd)], cmd->device->lun);
+
+ if (!(hostdata->busy[scmd_id(cmd)] & (1 << cmd->device->lun))) {
+ list_del(&ncmd->list);
+ dsprintk(NDEBUG_QUEUES, instance,
+ "dequeue: removed %p from issue queue\n", cmd);
+ return cmd;
+ }
+ }
+ } else {
+ /* Autosense processing begins here */
+ ncmd = list_first_entry(&hostdata->autosense,
+ struct NCR5380_cmd, list);
+ list_del(&ncmd->list);
+ cmd = NCR5380_to_scmd(ncmd);
+ dsprintk(NDEBUG_QUEUES, instance,
+ "dequeue: removed %p from autosense queue\n", cmd);
+ scsi_eh_prep_cmnd(cmd, &hostdata->ses, NULL, 0, ~0);
+ hostdata->sensing = cmd;
+ return cmd;
+ }
+ return NULL;
+}
+
+static void requeue_cmd(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
+
+ if (hostdata->sensing) {
+ scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+ list_add(&ncmd->list, &hostdata->autosense);
+ hostdata->sensing = NULL;
+ } else
+ list_add(&ncmd->list, &hostdata->unissued);
+}
/**
- * NCR5380_main - NCR state machines
- *
- * NCR5380_main is a coroutine that runs as long as more work can
- * be done on the NCR5380 host adapters in a system. Both
- * NCR5380_queue_command() and NCR5380_intr() will try to start it
- * in case it is not running.
- *
- * Locks: called as its own thread with no locks held. Takes the
- * host lock and called routines may take the isa dma lock.
+ * NCR5380_main - NCR state machines
+ *
+ * NCR5380_main is a coroutine that runs as long as more work can
+ * be done on the NCR5380 host adapters in a system. Both
+ * NCR5380_queue_command() and NCR5380_intr() will try to start it
+ * in case it is not running.
*/
static void NCR5380_main(struct work_struct *work)
{
struct NCR5380_hostdata *hostdata =
- container_of(work, struct NCR5380_hostdata, coroutine.work);
+ container_of(work, struct NCR5380_hostdata, main_task);
struct Scsi_Host *instance = hostdata->host;
- struct scsi_cmnd *tmp, *prev;
+ struct scsi_cmnd *cmd;
int done;
-
- spin_lock_irq(instance->host_lock);
+
do {
- /* Lock held here */
done = 1;
- if (!hostdata->connected && !hostdata->selecting) {
- dprintk(NDEBUG_MAIN, "scsi%d : not connected\n", instance->host_no);
- /*
- * Search through the issue_queue for a command destined
- * for a target that's not busy.
- */
- for (tmp = (struct scsi_cmnd *) hostdata->issue_queue, prev = NULL; tmp; prev = tmp, tmp = (struct scsi_cmnd *) tmp->host_scribble)
- {
- if (prev != tmp)
- dprintk(NDEBUG_LISTS, "MAIN tmp=%p target=%d busy=%d lun=%llu\n", tmp, tmp->device->id, hostdata->busy[tmp->device->id], tmp->device->lun);
- /* When we find one, remove it from the issue queue. */
- if (!(hostdata->busy[tmp->device->id] &
- (1 << (u8)(tmp->device->lun & 0xff)))) {
- if (prev) {
- REMOVE(prev, prev->host_scribble, tmp, tmp->host_scribble);
- prev->host_scribble = tmp->host_scribble;
- } else {
- REMOVE(-1, hostdata->issue_queue, tmp, tmp->host_scribble);
- hostdata->issue_queue = (struct scsi_cmnd *) tmp->host_scribble;
- }
- tmp->host_scribble = NULL;
- /*
- * Attempt to establish an I_T_L nexus here.
- * On success, instance->hostdata->connected is set.
- * On failure, we must add the command back to the
- * issue queue so we can keep trying.
- */
- dprintk(NDEBUG_MAIN|NDEBUG_QUEUES, "scsi%d : main() : command for target %d lun %llu removed from issue_queue\n", instance->host_no, tmp->device->id, tmp->device->lun);
-
- /*
- * A successful selection is defined as one that
- * leaves us with the command connected and
- * in hostdata->connected, OR has terminated the
- * command.
- *
- * With successful commands, we fall through
- * and see if we can do an information transfer,
- * with failures we will restart.
- */
- hostdata->selecting = NULL;
- /* RvC: have to preset this to indicate a new command is being performed */
+ spin_lock_irq(&hostdata->lock);
+ while (!hostdata->connected &&
+ (cmd = dequeue_next_cmd(instance))) {
- /*
- * REQUEST SENSE commands are issued without tagged
- * queueing, even on SCSI-II devices because the
- * contingent allegiance condition exists for the
- * entire unit.
- */
+ dsprintk(NDEBUG_MAIN, instance, "main: dequeued %p\n", cmd);
- if (!NCR5380_select(instance, tmp)) {
- break;
- } else {
- LIST(tmp, hostdata->issue_queue);
- tmp->host_scribble = (unsigned char *) hostdata->issue_queue;
- hostdata->issue_queue = tmp;
- done = 0;
- dprintk(NDEBUG_MAIN|NDEBUG_QUEUES, "scsi%d : main(): select() failed, returned to issue_queue\n", instance->host_no);
- }
- /* lock held here still */
- } /* if target/lun is not busy */
- } /* for */
- /* exited locked */
- } /* if (!hostdata->connected) */
- if (hostdata->selecting) {
- tmp = (struct scsi_cmnd *) hostdata->selecting;
- /* Selection will drop and retake the lock */
- if (!NCR5380_select(instance, tmp)) {
- /* Ok ?? */
+ /*
+ * Attempt to establish an I_T_L nexus here.
+ * On success, instance->hostdata->connected is set.
+ * On failure, we must add the command back to the
+ * issue queue so we can keep trying.
+ */
+ /*
+ * REQUEST SENSE commands are issued without tagged
+ * queueing, even on SCSI-II devices because the
+ * contingent allegiance condition exists for the
+ * entire unit.
+ */
+
+ cmd = NCR5380_select(instance, cmd);
+ if (!cmd) {
+ dsprintk(NDEBUG_MAIN, instance, "main: select complete\n");
} else {
- /* RvC: device failed, so we wait a long time
- this is needed for Mustek scanners, that
- do not respond to commands immediately
- after a scan */
- printk(KERN_DEBUG "scsi%d: device %d did not respond in time\n", instance->host_no, tmp->device->id);
- LIST(tmp, hostdata->issue_queue);
- tmp->host_scribble = (unsigned char *) hostdata->issue_queue;
- hostdata->issue_queue = tmp;
- NCR5380_set_timer(hostdata, USLEEP_WAITLONG);
+ dsprintk(NDEBUG_MAIN | NDEBUG_QUEUES, instance,
+ "main: select failed, returning %p to queue\n", cmd);
+ requeue_cmd(instance, cmd);
}
- } /* if hostdata->selecting */
+ }
if (hostdata->connected
#ifdef REAL_DMA
&& !hostdata->dmalen
#endif
- && (!hostdata->time_expires || time_before_eq(hostdata->time_expires, jiffies))
) {
- dprintk(NDEBUG_MAIN, "scsi%d : main() : performing information transfer\n", instance->host_no);
+ dsprintk(NDEBUG_MAIN, instance, "main: performing information transfer\n");
NCR5380_information_transfer(instance);
- dprintk(NDEBUG_MAIN, "scsi%d : main() : done set false\n", instance->host_no);
done = 0;
- } else
- break;
+ }
+ spin_unlock_irq(&hostdata->lock);
+ if (!done)
+ cond_resched();
} while (!done);
-
- spin_unlock_irq(instance->host_lock);
}
#ifndef DONT_USE_INTR
/**
- * NCR5380_intr - generic NCR5380 irq handler
- * @irq: interrupt number
- * @dev_id: device info
- *
- * Handle interrupts, reestablishing I_T_L or I_T_L_Q nexuses
- * from the disconnected queue, and restarting NCR5380_main()
- * as required.
- *
- * Locks: takes the needed instance locks
+ * NCR5380_intr - generic NCR5380 irq handler
+ * @irq: interrupt number
+ * @dev_id: device info
+ *
+ * Handle interrupts, reestablishing I_T_L or I_T_L_Q nexuses
+ * from the disconnected queue, and restarting NCR5380_main()
+ * as required.
+ *
+ * The chip can assert IRQ in any of six different conditions. The IRQ flag
+ * is then cleared by reading the Reset Parity/Interrupt Register (RPIR).
+ * Three of these six conditions are latched in the Bus and Status Register:
+ * - End of DMA (cleared by ending DMA Mode)
+ * - Parity error (cleared by reading RPIR)
+ * - Loss of BSY (cleared by reading RPIR)
+ * Two conditions have flag bits that are not latched:
+ * - Bus phase mismatch (non-maskable in DMA Mode, cleared by ending DMA Mode)
+ * - Bus reset (non-maskable)
+ * The remaining condition has no flag bit at all:
+ * - Selection/reselection
+ *
+ * Hence, establishing the cause(s) of any interrupt is partly guesswork.
+ * In "The DP8490 and DP5380 Comparison Guide", National Semiconductor
+ * claimed that "the design of the [DP8490] interrupt logic ensures
+ * interrupts will not be lost (they can be on the DP5380)."
+ * The L5380/53C80 datasheet from LOGIC Devices has more details.
+ *
+ * Checking for bus reset by reading RST is futile because of interrupt
+ * latency, but a bus reset will reset chip logic. Checking for parity error
+ * is unnecessary because that interrupt is never enabled. A Loss of BSY
+ * condition will clear DMA Mode. We can tell when this occurs because the
+ * the Busy Monitor interrupt is enabled together with DMA Mode.
*/
-static irqreturn_t NCR5380_intr(int dummy, void *dev_id)
+static irqreturn_t NCR5380_intr(int irq, void *dev_id)
{
- NCR5380_local_declare();
struct Scsi_Host *instance = dev_id;
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
- int done;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ int handled = 0;
unsigned char basr;
unsigned long flags;
- dprintk(NDEBUG_INTR, "scsi : NCR5380 irq %d triggered\n",
- instance->irq);
+ spin_lock_irqsave(&hostdata->lock, flags);
+
+ basr = NCR5380_read(BUS_AND_STATUS_REG);
+ if (basr & BASR_IRQ) {
+ unsigned char mr = NCR5380_read(MODE_REG);
+ unsigned char sr = NCR5380_read(STATUS_REG);
+
+ dsprintk(NDEBUG_INTR, instance, "IRQ %d, BASR 0x%02x, SR 0x%02x, MR 0x%02x\n",
+ irq, basr, sr, mr);
- do {
- done = 1;
- spin_lock_irqsave(instance->host_lock, flags);
- /* Look for pending interrupts */
- NCR5380_setup(instance);
- basr = NCR5380_read(BUS_AND_STATUS_REG);
- /* XXX dispatch to appropriate routine if found and done=0 */
- if (basr & BASR_IRQ) {
- NCR5380_dprint(NDEBUG_INTR, instance);
- if ((NCR5380_read(STATUS_REG) & (SR_SEL | SR_IO)) == (SR_SEL | SR_IO)) {
- done = 0;
- dprintk(NDEBUG_INTR, "scsi%d : SEL interrupt\n", instance->host_no);
- NCR5380_reselect(instance);
- (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
- } else if (basr & BASR_PARITY_ERROR) {
- dprintk(NDEBUG_INTR, "scsi%d : PARITY interrupt\n", instance->host_no);
- (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
- } else if ((NCR5380_read(STATUS_REG) & SR_RST) == SR_RST) {
- dprintk(NDEBUG_INTR, "scsi%d : RESET interrupt\n", instance->host_no);
- (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
- } else {
#if defined(REAL_DMA)
- /*
- * We should only get PHASE MISMATCH and EOP interrupts
- * if we have DMA enabled, so do a sanity check based on
- * the current setting of the MODE register.
- */
+ if ((mr & MR_DMA_MODE) || (mr & MR_MONITOR_BSY)) {
+ /* Probably End of DMA, Phase Mismatch or Loss of BSY.
+ * We ack IRQ after clearing Mode Register. Workarounds
+ * for End of DMA errata need to happen in DMA Mode.
+ */
- if ((NCR5380_read(MODE_REG) & MR_DMA) && ((basr & BASR_END_DMA_TRANSFER) || !(basr & BASR_PHASE_MATCH))) {
- int transferred;
+ dsprintk(NDEBUG_INTR, instance, "interrupt in DMA mode\n");
- if (!hostdata->connected)
- panic("scsi%d : received end of DMA interrupt with no connected cmd\n", instance->hostno);
+ int transferred;
- transferred = (hostdata->dmalen - NCR5380_dma_residual(instance));
- hostdata->connected->SCp.this_residual -= transferred;
- hostdata->connected->SCp.ptr += transferred;
- hostdata->dmalen = 0;
+ if (!hostdata->connected)
+ panic("scsi%d : DMA interrupt with no connected cmd\n",
+ instance->hostno);
- (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-
- /* FIXME: we need to poll briefly then defer a workqueue task ! */
- NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG, BASR_ACK, 0, 2*HZ);
+ transferred = hostdata->dmalen - NCR5380_dma_residual(instance);
+ hostdata->connected->SCp.this_residual -= transferred;
+ hostdata->connected->SCp.ptr += transferred;
+ hostdata->dmalen = 0;
- NCR5380_write(MODE_REG, MR_BASE);
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- }
-#else
- dprintk(NDEBUG_INTR, "scsi : unknown interrupt, BASR 0x%X, MR 0x%X, SR 0x%x\n", basr, NCR5380_read(MODE_REG), NCR5380_read(STATUS_REG));
- (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-#endif
+ /* FIXME: we need to poll briefly then defer a workqueue task ! */
+ NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG, BASR_ACK, 0, 2 * HZ);
+
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+ NCR5380_write(MODE_REG, MR_BASE);
+ NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+ } else
+#endif /* REAL_DMA */
+ if ((NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_mask) &&
+ (sr & (SR_SEL | SR_IO | SR_BSY | SR_RST)) == (SR_SEL | SR_IO)) {
+ /* Probably reselected */
+ NCR5380_write(SELECT_ENABLE_REG, 0);
+ NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+
+ dsprintk(NDEBUG_INTR, instance, "interrupt with SEL and IO\n");
+
+ if (!hostdata->connected) {
+ NCR5380_reselect(instance);
+ queue_work(hostdata->work_q, &hostdata->main_task);
}
- } /* if BASR_IRQ */
- spin_unlock_irqrestore(instance->host_lock, flags);
- if(!done)
- schedule_delayed_work(&hostdata->coroutine, 0);
- } while (!done);
- return IRQ_HANDLED;
+ if (!hostdata->connected)
+ NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+ } else {
+ /* Probably Bus Reset */
+ NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+
+ dsprintk(NDEBUG_INTR, instance, "unknown interrupt\n");
+ }
+ handled = 1;
+ } else {
+ shost_printk(KERN_NOTICE, instance, "interrupt without IRQ bit\n");
+ }
+
+ spin_unlock_irqrestore(&hostdata->lock, flags);
+
+ return IRQ_RETVAL(handled);
}
-#endif
+#endif
-/*
+/*
* Function : int NCR5380_select(struct Scsi_Host *instance,
- * struct scsi_cmnd *cmd)
+ * struct scsi_cmnd *cmd)
*
* Purpose : establishes I_T_L or I_T_L_Q nexus for new or existing command,
- * including ARBITRATION, SELECTION, and initial message out for
- * IDENTIFY and queue messages.
- *
- * Inputs : instance - instantiation of the 5380 driver on which this
- * target lives, cmd - SCSI command to execute.
- *
- * Returns : -1 if selection could not execute for some reason,
- * 0 if selection succeeded or failed because the target
- * did not respond.
- *
- * Side effects :
- * If bus busy, arbitration failed, etc, NCR5380_select() will exit
- * with registers as they should have been on entry - ie
- * SELECT_ENABLE will be set appropriately, the NCR5380
- * will cease to drive any SCSI bus signals.
- *
- * If successful : I_T_L or I_T_L_Q nexus will be established,
- * instance->connected will be set to cmd.
- * SELECT interrupt will be disabled.
- *
- * If failed (no target) : cmd->scsi_done() will be called, and the
- * cmd->result host byte set to DID_BAD_TARGET.
- *
- * Locks: caller holds hostdata lock in IRQ mode
+ * including ARBITRATION, SELECTION, and initial message out for
+ * IDENTIFY and queue messages.
+ *
+ * Inputs : instance - instantiation of the 5380 driver on which this
+ * target lives, cmd - SCSI command to execute.
+ *
+ * Returns cmd if selection failed but should be retried,
+ * NULL if selection failed and should not be retried, or
+ * NULL if selection succeeded (hostdata->connected == cmd).
+ *
+ * Side effects :
+ * If bus busy, arbitration failed, etc, NCR5380_select() will exit
+ * with registers as they should have been on entry - ie
+ * SELECT_ENABLE will be set appropriately, the NCR5380
+ * will cease to drive any SCSI bus signals.
+ *
+ * If successful : I_T_L or I_T_L_Q nexus will be established,
+ * instance->connected will be set to cmd.
+ * SELECT interrupt will be disabled.
+ *
+ * If failed (no target) : cmd->scsi_done() will be called, and the
+ * cmd->result host byte set to DID_BAD_TARGET.
*/
-
-static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
+
+static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance,
+ struct scsi_cmnd *cmd)
{
- NCR5380_local_declare();
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
unsigned char tmp[3], phase;
unsigned char *data;
int len;
- unsigned long timeout;
- unsigned char value;
int err;
- NCR5380_setup(instance);
-
- if (hostdata->selecting)
- goto part2;
-
- hostdata->restart_select = 0;
NCR5380_dprint(NDEBUG_ARBITRATION, instance);
- dprintk(NDEBUG_ARBITRATION, "scsi%d : starting arbitration, id = %d\n", instance->host_no, instance->this_id);
+ dsprintk(NDEBUG_ARBITRATION, instance, "starting arbitration, id = %d\n",
+ instance->this_id);
+
+ /*
+ * Arbitration and selection phases are slow and involve dropping the
+ * lock, so we have to watch out for EH. An exception handler may
+ * change 'selecting' to NULL. This function will then return NULL
+ * so that the caller will forget about 'cmd'. (During information
+ * transfer phases, EH may change 'connected' to NULL.)
+ */
+ hostdata->selecting = cmd;
- /*
- * Set the phase bits to 0, otherwise the NCR5380 won't drive the
+ /*
+ * Set the phase bits to 0, otherwise the NCR5380 won't drive the
* data bus during SELECTION.
*/
NCR5380_write(TARGET_COMMAND_REG, 0);
- /*
+ /*
* Start arbitration.
*/
NCR5380_write(OUTPUT_DATA_REG, hostdata->id_mask);
NCR5380_write(MODE_REG, MR_ARBITRATE);
+ /* The chip now waits for BUS FREE phase. Then after the 800 ns
+ * Bus Free Delay, arbitration will begin.
+ */
- /* We can be relaxed here, interrupts are on, we are
- in workqueue context, the birds are singing in the trees */
- spin_unlock_irq(instance->host_lock);
- err = NCR5380_poll_politely(instance, INITIATOR_COMMAND_REG, ICR_ARBITRATION_PROGRESS, ICR_ARBITRATION_PROGRESS, 5*HZ);
- spin_lock_irq(instance->host_lock);
+ spin_unlock_irq(&hostdata->lock);
+ err = NCR5380_poll_politely2(instance, MODE_REG, MR_ARBITRATE, 0,
+ INITIATOR_COMMAND_REG, ICR_ARBITRATION_PROGRESS,
+ ICR_ARBITRATION_PROGRESS, HZ);
+ spin_lock_irq(&hostdata->lock);
+ if (!(NCR5380_read(MODE_REG) & MR_ARBITRATE)) {
+ /* Reselection interrupt */
+ goto out;
+ }
if (err < 0) {
- printk(KERN_DEBUG "scsi: arbitration timeout at %d\n", __LINE__);
NCR5380_write(MODE_REG, MR_BASE);
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- goto failed;
+ shost_printk(KERN_ERR, instance,
+ "select: arbitration timeout\n");
+ goto out;
}
+ spin_unlock_irq(&hostdata->lock);
- dprintk(NDEBUG_ARBITRATION, "scsi%d : arbitration complete\n", instance->host_no);
-
- /*
- * The arbitration delay is 2.2us, but this is a minimum and there is
- * no maximum so we can safely sleep for ceil(2.2) usecs to accommodate
- * the integral nature of udelay().
- *
- */
-
+ /* The SCSI-2 arbitration delay is 2.4 us */
udelay(3);
/* Check for lost arbitration */
- if ((NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) || (NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_higher_mask) || (NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST)) {
- NCR5380_write(MODE_REG, MR_BASE);
- dprintk(NDEBUG_ARBITRATION, "scsi%d : lost arbitration, deasserting MR_ARBITRATE\n", instance->host_no);
- goto failed;
- }
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_SEL);
-
- if (!(hostdata->flags & FLAG_DTC3181E) &&
- /* RvC: DTC3181E has some trouble with this
- * so we simply removed it. Seems to work with
- * only Mustek scanner attached
- */
+ if ((NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) ||
+ (NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_higher_mask) ||
(NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST)) {
NCR5380_write(MODE_REG, MR_BASE);
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- dprintk(NDEBUG_ARBITRATION, "scsi%d : lost arbitration, deasserting ICR_ASSERT_SEL\n", instance->host_no);
- goto failed;
+ dsprintk(NDEBUG_ARBITRATION, instance, "lost arbitration, deasserting MR_ARBITRATE\n");
+ spin_lock_irq(&hostdata->lock);
+ goto out;
}
- /*
- * Again, bus clear + bus settle time is 1.2us, however, this is
+
+ /* After/during arbitration, BSY should be asserted.
+ * IBM DPES-31080 Version S31Q works now
+ * Tnx to Thomas_Roesch@m2.maus.de for finding this! (Roman)
+ */
+ NCR5380_write(INITIATOR_COMMAND_REG,
+ ICR_BASE | ICR_ASSERT_SEL | ICR_ASSERT_BSY);
+
+ /*
+ * Again, bus clear + bus settle time is 1.2us, however, this is
* a minimum so we'll udelay ceil(1.2)
*/
- udelay(2);
+ if (hostdata->flags & FLAG_TOSHIBA_DELAY)
+ udelay(15);
+ else
+ udelay(2);
+
+ spin_lock_irq(&hostdata->lock);
+
+ /* NCR5380_reselect() clears MODE_REG after a reselection interrupt */
+ if (!(NCR5380_read(MODE_REG) & MR_ARBITRATE))
+ goto out;
+
+ if (!hostdata->selecting) {
+ NCR5380_write(MODE_REG, MR_BASE);
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+ goto out;
+ }
- dprintk(NDEBUG_ARBITRATION, "scsi%d : won arbitration\n", instance->host_no);
+ dsprintk(NDEBUG_ARBITRATION, instance, "won arbitration\n");
- /*
- * Now that we have won arbitration, start Selection process, asserting
+ /*
+ * Now that we have won arbitration, start Selection process, asserting
* the host and target ID's on the SCSI bus.
*/
- NCR5380_write(OUTPUT_DATA_REG, (hostdata->id_mask | (1 << scmd_id(cmd))));
+ NCR5380_write(OUTPUT_DATA_REG, hostdata->id_mask | (1 << scmd_id(cmd)));
- /*
+ /*
* Raise ATN while SEL is true before BSY goes false from arbitration,
* since this is the only way to guarantee that we'll get a MESSAGE OUT
* phase immediately after selection.
*/
- NCR5380_write(INITIATOR_COMMAND_REG, (ICR_BASE | ICR_ASSERT_BSY | ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL));
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY |
+ ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL);
NCR5380_write(MODE_REG, MR_BASE);
- /*
+ /*
* Reselect interrupts must be turned off prior to the dropping of BSY,
* otherwise we will trigger an interrupt.
*/
NCR5380_write(SELECT_ENABLE_REG, 0);
+ spin_unlock_irq(&hostdata->lock);
+
/*
- * The initiator shall then wait at least two deskew delays and release
+ * The initiator shall then wait at least two deskew delays and release
* the BSY signal.
*/
- udelay(1); /* wingel -- wait two bus deskew delay >2*45ns */
+ udelay(1); /* wingel -- wait two bus deskew delay >2*45ns */
/* Reset BSY */
- NCR5380_write(INITIATOR_COMMAND_REG, (ICR_BASE | ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL));
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA |
+ ICR_ASSERT_ATN | ICR_ASSERT_SEL);
- /*
+ /*
* Something weird happens when we cease to drive BSY - looks
- * like the board/chip is letting us do another read before the
+ * like the board/chip is letting us do another read before the
* appropriate propagation delay has expired, and we're confusing
* a BSY signal from ourselves as the target's response to SELECTION.
*
* A small delay (the 'C++' frontend breaks the pipeline with an
* unnecessary jump, making it work on my 386-33/Trantor T128, the
- * tighter 'C' code breaks and requires this) solves the problem -
- * the 1 us delay is arbitrary, and only used because this delay will
- * be the same on other platforms and since it works here, it should
+ * tighter 'C' code breaks and requires this) solves the problem -
+ * the 1 us delay is arbitrary, and only used because this delay will
+ * be the same on other platforms and since it works here, it should
* work there.
*
* wingel suggests that this could be due to failing to wait
@@ -1339,50 +1162,43 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
udelay(1);
- dprintk(NDEBUG_SELECTION, "scsi%d : selecting target %d\n", instance->host_no, scmd_id(cmd));
+ dsprintk(NDEBUG_SELECTION, instance, "selecting target %d\n", scmd_id(cmd));
- /*
- * The SCSI specification calls for a 250 ms timeout for the actual
+ /*
+ * The SCSI specification calls for a 250 ms timeout for the actual
* selection.
*/
- timeout = jiffies + msecs_to_jiffies(250);
-
- /*
- * XXX very interesting - we're seeing a bounce where the BSY we
- * asserted is being reflected / still asserted (propagation delay?)
- * and it's detecting as true. Sigh.
- */
-
- hostdata->select_time = 0; /* we count the clock ticks at which we polled */
- hostdata->selecting = cmd;
+ err = NCR5380_poll_politely(instance, STATUS_REG, SR_BSY, SR_BSY,
+ msecs_to_jiffies(250));
-part2:
- /* RvC: here we enter after a sleeping period, or immediately after
- execution of part 1
- we poll only once ech clock tick */
- value = NCR5380_read(STATUS_REG) & (SR_BSY | SR_IO);
-
- if (!value && (hostdata->select_time < HZ/4)) {
- /* RvC: we still must wait for a device response */
- hostdata->select_time++; /* after 25 ticks the device has failed */
- NCR5380_set_timer(hostdata, 1);
- return 0; /* RvC: we return here with hostdata->selecting set,
- to go to sleep */
- }
-
- hostdata->selecting = NULL;/* clear this pointer, because we passed the
- waiting period */
if ((NCR5380_read(STATUS_REG) & (SR_SEL | SR_IO)) == (SR_SEL | SR_IO)) {
+ spin_lock_irq(&hostdata->lock);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
NCR5380_reselect(instance);
- printk("scsi%d : reselection after won arbitration?\n", instance->host_no);
+ if (!hostdata->connected)
+ NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+ shost_printk(KERN_ERR, instance, "reselection after won arbitration?\n");
+ goto out;
+ }
+
+ if (err < 0) {
+ spin_lock_irq(&hostdata->lock);
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- return -1;
+ /* Can't touch cmd if it has been reclaimed by the scsi ML */
+ if (hostdata->selecting) {
+ cmd->result = DID_BAD_TARGET << 16;
+ complete_cmd(instance, cmd);
+ dsprintk(NDEBUG_SELECTION, instance, "target did not respond within 250ms\n");
+ cmd = NULL;
+ }
+ goto out;
}
- /*
- * No less than two deskew delays after the initiator detects the
- * BSY signal is true, it shall release the SEL signal and may
+
+ /*
+ * No less than two deskew delays after the initiator detects the
+ * BSY signal is true, it shall release the SEL signal and may
* change the DATA BUS. -wingel
*/
@@ -1390,53 +1206,38 @@ part2:
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
- if (!(NCR5380_read(STATUS_REG) & SR_BSY)) {
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- if (hostdata->targets_present & (1 << scmd_id(cmd))) {
- printk(KERN_DEBUG "scsi%d : weirdness\n", instance->host_no);
- if (hostdata->restart_select)
- printk(KERN_DEBUG "\trestart select\n");
- NCR5380_dprint(NDEBUG_SELECTION, instance);
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- return -1;
- }
- cmd->result = DID_BAD_TARGET << 16;
- cmd->scsi_done(cmd);
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- dprintk(NDEBUG_SELECTION, "scsi%d : target did not respond within 250ms\n", instance->host_no);
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- return 0;
- }
- hostdata->targets_present |= (1 << scmd_id(cmd));
-
/*
- * Since we followed the SCSI spec, and raised ATN while SEL
+ * Since we followed the SCSI spec, and raised ATN while SEL
* was true but before BSY was false during selection, the information
* transfer phase should be a MESSAGE OUT phase so that we can send the
* IDENTIFY message.
- *
+ *
* If SCSI-II tagged queuing is enabled, we also send a SIMPLE_QUEUE_TAG
* message (2 bytes) with a tag ID that we increment with every command
* until it wraps back to 0.
*
* XXX - it turns out that there are some broken SCSI-II devices,
- * which claim to support tagged queuing but fail when more than
- * some number of commands are issued at once.
+ * which claim to support tagged queuing but fail when more than
+ * some number of commands are issued at once.
*/
/* Wait for start of REQ/ACK handshake */
- spin_unlock_irq(instance->host_lock);
err = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ);
- spin_lock_irq(instance->host_lock);
-
- if(err) {
- printk(KERN_ERR "scsi%d: timeout at NCR5380.c:%d\n", instance->host_no, __LINE__);
+ spin_lock_irq(&hostdata->lock);
+ if (err < 0) {
+ shost_printk(KERN_ERR, instance, "select: REQ timeout\n");
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- goto failed;
+ goto out;
+ }
+ if (!hostdata->selecting) {
+ do_abort(instance);
+ goto out;
}
- dprintk(NDEBUG_SELECTION, "scsi%d : target %d selected, going into MESSAGE OUT phase.\n", instance->host_no, cmd->device->id);
+ dsprintk(NDEBUG_SELECTION, instance, "target %d selected, going into MESSAGE OUT phase.\n",
+ scmd_id(cmd));
tmp[0] = IDENTIFY(((instance->irq == NO_IRQ) ? 0 : 1), cmd->device->lun);
len = 1;
@@ -1446,104 +1247,82 @@ part2:
data = tmp;
phase = PHASE_MSGOUT;
NCR5380_transfer_pio(instance, &phase, &len, &data);
- dprintk(NDEBUG_SELECTION, "scsi%d : nexus established.\n", instance->host_no);
+ dsprintk(NDEBUG_SELECTION, instance, "nexus established.\n");
/* XXX need to handle errors here */
+
hostdata->connected = cmd;
- hostdata->busy[cmd->device->id] |= (1 << (cmd->device->lun & 0xFF));
+ hostdata->busy[cmd->device->id] |= 1 << cmd->device->lun;
initialize_SCp(cmd);
- return 0;
-
- /* Selection failed */
-failed:
- return -1;
+ cmd = NULL;
+out:
+ if (!hostdata->selecting)
+ return NULL;
+ hostdata->selecting = NULL;
+ return cmd;
}
-/*
- * Function : int NCR5380_transfer_pio (struct Scsi_Host *instance,
- * unsigned char *phase, int *count, unsigned char **data)
+/*
+ * Function : int NCR5380_transfer_pio (struct Scsi_Host *instance,
+ * unsigned char *phase, int *count, unsigned char **data)
*
* Purpose : transfers data in given phase using polled I/O
*
- * Inputs : instance - instance of driver, *phase - pointer to
- * what phase is expected, *count - pointer to number of
- * bytes to transfer, **data - pointer to data pointer.
- *
+ * Inputs : instance - instance of driver, *phase - pointer to
+ * what phase is expected, *count - pointer to number of
+ * bytes to transfer, **data - pointer to data pointer.
+ *
* Returns : -1 when different phase is entered without transferring
- * maximum number of bytes, 0 if all bytes or transferred or exit
- * is in same phase.
+ * maximum number of bytes, 0 if all bytes are transferred or exit
+ * is in same phase.
*
- * Also, *phase, *count, *data are modified in place.
+ * Also, *phase, *count, *data are modified in place.
*
* XXX Note : handling for bus free may be useful.
*/
/*
- * Note : this code is not as quick as it could be, however it
+ * Note : this code is not as quick as it could be, however it
* IS 100% reliable, and for the actual data transfer where speed
* counts, we will always do a pseudo DMA or DMA transfer.
*/
-static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data) {
- NCR5380_local_declare();
+static int NCR5380_transfer_pio(struct Scsi_Host *instance,
+ unsigned char *phase, int *count,
+ unsigned char **data)
+{
unsigned char p = *phase, tmp;
int c = *count;
unsigned char *d = *data;
- /*
- * RvC: some administrative data to process polling time
- */
- int break_allowed = 0;
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
- NCR5380_setup(instance);
-
- if (!(p & SR_IO))
- dprintk(NDEBUG_PIO, "scsi%d : pio write %d bytes\n", instance->host_no, c);
- else
- dprintk(NDEBUG_PIO, "scsi%d : pio read %d bytes\n", instance->host_no, c);
- /*
- * The NCR5380 chip will only drive the SCSI bus when the
+ /*
+ * The NCR5380 chip will only drive the SCSI bus when the
* phase specified in the appropriate bits of the TARGET COMMAND
* REGISTER match the STATUS REGISTER
*/
- NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
+ NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
- /* RvC: don't know if this is necessary, but other SCSI I/O is short
- * so breaks are not necessary there
- */
- if ((p == PHASE_DATAIN) || (p == PHASE_DATAOUT)) {
- break_allowed = 1;
- }
do {
- /*
- * Wait for assertion of REQ, after which the phase bits will be
- * valid
- */
-
- /* RvC: we simply poll once, after that we stop temporarily
- * and let the device buffer fill up
- * if breaking is not allowed, we keep polling as long as needed
+ /*
+ * Wait for assertion of REQ, after which the phase bits will be
+ * valid
*/
- /* FIXME */
- while (!((tmp = NCR5380_read(STATUS_REG)) & SR_REQ) && !break_allowed);
- if (!(tmp & SR_REQ)) {
- /* timeout condition */
- NCR5380_set_timer(hostdata, USLEEP_SLEEP);
+ if (NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ) < 0)
break;
- }
- dprintk(NDEBUG_HANDSHAKE, "scsi%d : REQ detected\n", instance->host_no);
+ dsprintk(NDEBUG_HANDSHAKE, instance, "REQ asserted\n");
/* Check for phase mismatch */
- if ((tmp & PHASE_MASK) != p) {
- dprintk(NDEBUG_HANDSHAKE, "scsi%d : phase mismatch\n", instance->host_no);
- NCR5380_dprint_phase(NDEBUG_HANDSHAKE, instance);
+ if ((NCR5380_read(STATUS_REG) & PHASE_MASK) != p) {
+ dsprintk(NDEBUG_PIO, instance, "phase mismatch\n");
+ NCR5380_dprint_phase(NDEBUG_PIO, instance);
break;
}
+
/* Do actual transfer from SCSI bus to / from memory */
if (!(p & SR_IO))
NCR5380_write(OUTPUT_DATA_REG, *d);
@@ -1552,7 +1331,7 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase
++d;
- /*
+ /*
* The SCSI standard suggests that in MSGOUT phase, the initiator
* should drop ATN on the last byte of the message phase
* after REQ has been asserted for the handshake but before
@@ -1563,29 +1342,34 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase
if (!((p & SR_MSG) && c > 1)) {
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA);
NCR5380_dprint(NDEBUG_PIO, instance);
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA | ICR_ASSERT_ACK);
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
+ ICR_ASSERT_DATA | ICR_ASSERT_ACK);
} else {
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA | ICR_ASSERT_ATN);
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
+ ICR_ASSERT_DATA | ICR_ASSERT_ATN);
NCR5380_dprint(NDEBUG_PIO, instance);
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
+ ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
}
} else {
NCR5380_dprint(NDEBUG_PIO, instance);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ACK);
}
- /* FIXME - if this fails bus reset ?? */
- NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, 0, 5*HZ);
- dprintk(NDEBUG_HANDSHAKE, "scsi%d : req false, handshake complete\n", instance->host_no);
+ if (NCR5380_poll_politely(instance,
+ STATUS_REG, SR_REQ, 0, 5 * HZ) < 0)
+ break;
+
+ dsprintk(NDEBUG_HANDSHAKE, instance, "REQ negated, handshake complete\n");
/*
- * We have several special cases to consider during REQ/ACK handshaking :
- * 1. We were in MSGOUT phase, and we are on the last byte of the
- * message. ATN must be dropped as ACK is dropped.
+ * We have several special cases to consider during REQ/ACK handshaking :
+ * 1. We were in MSGOUT phase, and we are on the last byte of the
+ * message. ATN must be dropped as ACK is dropped.
*
- * 2. We are in a MSGIN phase, and we are on the last byte of the
- * message. We must exit with ACK asserted, so that the calling
- * code may raise ATN before dropping ACK to reject the message.
+ * 2. We are in a MSGIN phase, and we are on the last byte of the
+ * message. We must exit with ACK asserted, so that the calling
+ * code may raise ATN before dropping ACK to reject the message.
*
* 3. ACK and ATN are clear and the target may proceed as normal.
*/
@@ -1597,12 +1381,16 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase
}
} while (--c);
- dprintk(NDEBUG_PIO, "scsi%d : residual %d\n", instance->host_no, c);
+ dsprintk(NDEBUG_PIO, instance, "residual %d\n", c);
*count = c;
*data = d;
tmp = NCR5380_read(STATUS_REG);
- if (tmp & SR_REQ)
+ /* The phase read from the bus is valid if either REQ is (already)
+ * asserted or if ACK hasn't been released yet. The latter applies if
+ * we're in MSG IN, DATA IN or STATUS and all bytes have been received.
+ */
+ if ((tmp & SR_REQ) || ((tmp & SR_IO) && c == 0))
*phase = tmp & PHASE_MASK;
else
*phase = PHASE_UNKNOWN;
@@ -1614,79 +1402,80 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase
}
/**
- * do_reset - issue a reset command
- * @host: adapter to reset
+ * do_reset - issue a reset command
+ * @instance: adapter to reset
*
- * Issue a reset sequence to the NCR5380 and try and get the bus
- * back into sane shape.
+ * Issue a reset sequence to the NCR5380 and try and get the bus
+ * back into sane shape.
*
- * Locks: caller holds queue lock
+ * This clears the reset interrupt flag because there may be no handler for
+ * it. When the driver is initialized, the NCR5380_intr() handler has not yet
+ * been installed. And when in EH we may have released the ST DMA interrupt.
*/
-
-static void do_reset(struct Scsi_Host *host) {
- NCR5380_local_declare();
- NCR5380_setup(host);
- NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG) & PHASE_MASK));
+static void do_reset(struct Scsi_Host *instance)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ NCR5380_write(TARGET_COMMAND_REG,
+ PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG) & PHASE_MASK));
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST);
- udelay(25);
+ udelay(50);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+ (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+ local_irq_restore(flags);
}
-/*
- * Function : do_abort (Scsi_Host *host)
- *
- * Purpose : abort the currently established nexus. Should only be
- * called from a routine which can drop into a
- *
- * Returns : 0 on success, -1 on failure.
- *
- * Locks: queue lock held by caller
- * FIXME: sort this out and get new_eh running
+/**
+ * do_abort - abort the currently established nexus by going to
+ * MESSAGE OUT phase and sending an ABORT message.
+ * @instance: relevant scsi host instance
+ *
+ * Returns 0 on success, -1 on failure.
*/
-static int do_abort(struct Scsi_Host *host) {
- NCR5380_local_declare();
+static int do_abort(struct Scsi_Host *instance)
+{
unsigned char *msgptr, phase, tmp;
int len;
int rc;
- NCR5380_setup(host);
-
/* Request message out phase */
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
- /*
- * Wait for the target to indicate a valid phase by asserting
- * REQ. Once this happens, we'll have either a MSGOUT phase
- * and can immediately send the ABORT message, or we'll have some
+ /*
+ * Wait for the target to indicate a valid phase by asserting
+ * REQ. Once this happens, we'll have either a MSGOUT phase
+ * and can immediately send the ABORT message, or we'll have some
* other phase and will have to source/sink data.
- *
+ *
* We really don't care what value was on the bus or what value
* the target sees, so we just handshake.
*/
- rc = NCR5380_poll_politely(host, STATUS_REG, SR_REQ, SR_REQ, 60 * HZ);
-
- if(rc < 0)
- return -1;
+ rc = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, 10 * HZ);
+ if (rc < 0)
+ goto timeout;
+
+ tmp = NCR5380_read(STATUS_REG) & PHASE_MASK;
- tmp = (unsigned char)rc;
-
NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(tmp));
- if ((tmp & PHASE_MASK) != PHASE_MSGOUT) {
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
- rc = NCR5380_poll_politely(host, STATUS_REG, SR_REQ, 0, 3*HZ);
+ if (tmp != PHASE_MSGOUT) {
+ NCR5380_write(INITIATOR_COMMAND_REG,
+ ICR_BASE | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
+ rc = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, 0, 3 * HZ);
+ if (rc < 0)
+ goto timeout;
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
- if(rc == -1)
- return -1;
}
+
tmp = ABORT;
msgptr = &tmp;
len = 1;
phase = PHASE_MSGOUT;
- NCR5380_transfer_pio(host, &phase, &len, &msgptr);
+ NCR5380_transfer_pio(instance, &phase, &len, &msgptr);
/*
* If we got here, and the command completed successfully,
@@ -1694,32 +1483,37 @@ static int do_abort(struct Scsi_Host *host) {
*/
return len ? -1 : 0;
+
+timeout:
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+ return -1;
}
#if defined(REAL_DMA) || defined(PSEUDO_DMA) || defined (REAL_DMA_POLL)
-/*
- * Function : int NCR5380_transfer_dma (struct Scsi_Host *instance,
- * unsigned char *phase, int *count, unsigned char **data)
+/*
+ * Function : int NCR5380_transfer_dma (struct Scsi_Host *instance,
+ * unsigned char *phase, int *count, unsigned char **data)
*
* Purpose : transfers data in given phase using either real
- * or pseudo DMA.
+ * or pseudo DMA.
*
- * Inputs : instance - instance of driver, *phase - pointer to
- * what phase is expected, *count - pointer to number of
- * bytes to transfer, **data - pointer to data pointer.
- *
- * Returns : -1 when different phase is entered without transferring
- * maximum number of bytes, 0 if all bytes or transferred or exit
- * is in same phase.
+ * Inputs : instance - instance of driver, *phase - pointer to
+ * what phase is expected, *count - pointer to number of
+ * bytes to transfer, **data - pointer to data pointer.
*
- * Also, *phase, *count, *data are modified in place.
+ * Returns : -1 when different phase is entered without transferring
+ * maximum number of bytes, 0 if all bytes or transferred or exit
+ * is in same phase.
*
- * Locks: io_request lock held by caller
+ * Also, *phase, *count, *data are modified in place.
*/
-static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data) {
- NCR5380_local_declare();
+static int NCR5380_transfer_dma(struct Scsi_Host *instance,
+ unsigned char *phase, int *count,
+ unsigned char **data)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
register int c = *count;
register unsigned char p = *phase;
register unsigned char *d = *data;
@@ -1730,54 +1524,47 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
unsigned char saved_data = 0, overrun = 0, residue;
#endif
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
-
- NCR5380_setup(instance);
-
if ((tmp = (NCR5380_read(STATUS_REG) & PHASE_MASK)) != p) {
*phase = tmp;
return -1;
}
#if defined(REAL_DMA) || defined(REAL_DMA_POLL)
-#ifdef READ_OVERRUNS
if (p & SR_IO) {
- c -= 2;
+ if (!(hostdata->flags & FLAG_NO_DMA_FIXUPS))
+ c -= 2;
}
-#endif
- dprintk(NDEBUG_DMA, "scsi%d : initializing DMA channel %d for %s, %d bytes %s %0x\n", instance->host_no, instance->dma_channel, (p & SR_IO) ? "reading" : "writing", c, (p & SR_IO) ? "to" : "from", (unsigned) d);
hostdata->dma_len = (p & SR_IO) ? NCR5380_dma_read_setup(instance, d, c) : NCR5380_dma_write_setup(instance, d, c);
+
+ dsprintk(NDEBUG_DMA, instance, "initializing DMA %s: length %d, address %p\n",
+ (p & SR_IO) ? "receive" : "send", c, *data);
#endif
NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
#ifdef REAL_DMA
- NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_ENABLE_EOP_INTR | MR_MONITOR_BSY);
+ NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
+ MR_ENABLE_EOP_INTR);
#elif defined(REAL_DMA_POLL)
- NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE);
+ NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY);
#else
/*
* Note : on my sample board, watch-dog timeouts occurred when interrupts
- * were not disabled for the duration of a single DMA transfer, from
+ * were not disabled for the duration of a single DMA transfer, from
* before the setting of DMA mode to after transfer of the last byte.
*/
-#if defined(PSEUDO_DMA) && defined(UNSAFE)
- spin_unlock_irq(instance->host_lock);
-#endif
- /* KLL May need eop and parity in 53c400 */
- if (hostdata->flags & FLAG_NCR53C400)
- NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE |
- MR_ENABLE_PAR_CHECK | MR_ENABLE_PAR_INTR |
- MR_ENABLE_EOP_INTR | MR_MONITOR_BSY);
+ if (hostdata->flags & FLAG_NO_DMA_FIXUP)
+ NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
+ MR_ENABLE_EOP_INTR);
else
- NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE);
+ NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY);
#endif /* def REAL_DMA */
dprintk(NDEBUG_DMA, "scsi%d : mode reg = 0x%X\n", instance->host_no, NCR5380_read(MODE_REG));
- /*
- * On the PAS16 at least I/O recovery delays are not needed here.
- * Everyone else seems to want them.
+ /*
+ * On the PAS16 at least I/O recovery delays are not needed here.
+ * Everyone else seems to want them.
*/
if (p & SR_IO) {
@@ -1797,49 +1584,49 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
} while ((tmp & BASR_PHASE_MATCH) && !(tmp & (BASR_BUSY_ERROR | BASR_END_DMA_TRANSFER)));
/*
- At this point, either we've completed DMA, or we have a phase mismatch,
- or we've unexpectedly lost BUSY (which is a real error).
-
- For write DMAs, we want to wait until the last byte has been
- transferred out over the bus before we turn off DMA mode. Alas, there
- seems to be no terribly good way of doing this on a 5380 under all
- conditions. For non-scatter-gather operations, we can wait until REQ
- and ACK both go false, or until a phase mismatch occurs. Gather-writes
- are nastier, since the device will be expecting more data than we
- are prepared to send it, and REQ will remain asserted. On a 53C8[01] we
- could test LAST BIT SENT to assure transfer (I imagine this is precisely
- why this signal was added to the newer chips) but on the older 538[01]
- this signal does not exist. The workaround for this lack is a watchdog;
- we bail out of the wait-loop after a modest amount of wait-time if
- the usual exit conditions are not met. Not a terribly clean or
- correct solution :-%
-
- Reads are equally tricky due to a nasty characteristic of the NCR5380.
- If the chip is in DMA mode for an READ, it will respond to a target's
- REQ by latching the SCSI data into the INPUT DATA register and asserting
- ACK, even if it has _already_ been notified by the DMA controller that
- the current DMA transfer has completed! If the NCR5380 is then taken
- out of DMA mode, this already-acknowledged byte is lost.
-
- This is not a problem for "one DMA transfer per command" reads, because
- the situation will never arise... either all of the data is DMA'ed
- properly, or the target switches to MESSAGE IN phase to signal a
- disconnection (either operation bringing the DMA to a clean halt).
- However, in order to handle scatter-reads, we must work around the
- problem. The chosen fix is to DMA N-2 bytes, then check for the
- condition before taking the NCR5380 out of DMA mode. One or two extra
- bytes are transferred via PIO as necessary to fill out the original
- request.
+ * At this point, either we've completed DMA, or we have a phase mismatch,
+ * or we've unexpectedly lost BUSY (which is a real error).
+ *
+ * For DMA sends, we want to wait until the last byte has been
+ * transferred out over the bus before we turn off DMA mode. Alas, there
+ * seems to be no terribly good way of doing this on a 5380 under all
+ * conditions. For non-scatter-gather operations, we can wait until REQ
+ * and ACK both go false, or until a phase mismatch occurs. Gather-sends
+ * are nastier, since the device will be expecting more data than we
+ * are prepared to send it, and REQ will remain asserted. On a 53C8[01] we
+ * could test Last Byte Sent to assure transfer (I imagine this is precisely
+ * why this signal was added to the newer chips) but on the older 538[01]
+ * this signal does not exist. The workaround for this lack is a watchdog;
+ * we bail out of the wait-loop after a modest amount of wait-time if
+ * the usual exit conditions are not met. Not a terribly clean or
+ * correct solution :-%
+ *
+ * DMA receive is equally tricky due to a nasty characteristic of the NCR5380.
+ * If the chip is in DMA receive mode, it will respond to a target's
+ * REQ by latching the SCSI data into the INPUT DATA register and asserting
+ * ACK, even if it has _already_ been notified by the DMA controller that
+ * the current DMA transfer has completed! If the NCR5380 is then taken
+ * out of DMA mode, this already-acknowledged byte is lost. This is
+ * not a problem for "one DMA transfer per READ command", because
+ * the situation will never arise... either all of the data is DMA'ed
+ * properly, or the target switches to MESSAGE IN phase to signal a
+ * disconnection (either operation bringing the DMA to a clean halt).
+ * However, in order to handle scatter-receive, we must work around the
+ * problem. The chosen fix is to DMA N-2 bytes, then check for the
+ * condition before taking the NCR5380 out of DMA mode. One or two extra
+ * bytes are transferred via PIO as necessary to fill out the original
+ * request.
*/
if (p & SR_IO) {
-#ifdef READ_OVERRUNS
- udelay(10);
- if (((NCR5380_read(BUS_AND_STATUS_REG) & (BASR_PHASE_MATCH | BASR_ACK)) == (BASR_PHASE_MATCH | BASR_ACK))) {
- saved_data = NCR5380_read(INPUT_DATA_REGISTER);
- overrun = 1;
+ if (!(hostdata->flags & FLAG_NO_DMA_FIXUPS)) {
+ udelay(10);
+ if ((NCR5380_read(BUS_AND_STATUS_REG) & (BASR_PHASE_MATCH | BASR_ACK)) ==
+ (BASR_PHASE_MATCH | BASR_ACK)) {
+ saved_data = NCR5380_read(INPUT_DATA_REGISTER);
+ overrun = 1;
+ }
}
-#endif
} else {
int limit = 100;
while (((tmp = NCR5380_read(BUS_AND_STATUS_REG)) & BASR_ACK) || (NCR5380_read(STATUS_REG) & SR_REQ)) {
@@ -1850,7 +1637,8 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
}
}
- dprintk(NDEBUG_DMA, "scsi%d : polled DMA transfer complete, basr 0x%X, sr 0x%X\n", instance->host_no, tmp, NCR5380_read(STATUS_REG));
+ dsprintk(NDEBUG_DMA, "polled DMA transfer complete, basr 0x%02x, sr 0x%02x\n",
+ tmp, NCR5380_read(STATUS_REG));
NCR5380_write(MODE_REG, MR_BASE);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
@@ -1861,8 +1649,8 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
*data += c;
*phase = NCR5380_read(STATUS_REG) & PHASE_MASK;
-#ifdef READ_OVERRUNS
- if (*phase == p && (p & SR_IO) && residue == 0) {
+ if (!(hostdata->flags & FLAG_NO_DMA_FIXUPS) &&
+ *phase == p && (p & SR_IO) && residue == 0) {
if (overrun) {
dprintk(NDEBUG_DMA, "Got an input overrun, using saved byte\n");
**data = saved_data;
@@ -1877,7 +1665,6 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
NCR5380_transfer_pio(instance, phase, &cnt, data);
*count -= toPIO - cnt;
}
-#endif
dprintk(NDEBUG_DMA, "Return with data ptr = 0x%X, count %d, last 0x%X, next 0x%X\n", *data, *count, *(*data + *count - 1), *(*data + *count));
return 0;
@@ -1886,95 +1673,64 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
return 0;
#else /* defined(REAL_DMA_POLL) */
if (p & SR_IO) {
-#ifdef DMA_WORKS_RIGHT
- foo = NCR5380_pread(instance, d, c);
-#else
- int diff = 1;
- if (hostdata->flags & FLAG_NCR53C400) {
- diff = 0;
- }
- if (!(foo = NCR5380_pread(instance, d, c - diff))) {
+ foo = NCR5380_pread(instance, d,
+ hostdata->flags & FLAG_NO_DMA_FIXUP ? c : c - 1);
+ if (!foo && !(hostdata->flags & FLAG_NO_DMA_FIXUP)) {
/*
- * We can't disable DMA mode after successfully transferring
+ * We can't disable DMA mode after successfully transferring
* what we plan to be the last byte, since that would open up
- * a race condition where if the target asserted REQ before
+ * a race condition where if the target asserted REQ before
* we got the DMA mode reset, the NCR5380 would have latched
* an additional byte into the INPUT DATA register and we'd
* have dropped it.
- *
- * The workaround was to transfer one fewer bytes than we
- * intended to with the pseudo-DMA read function, wait for
+ *
+ * The workaround was to transfer one fewer bytes than we
+ * intended to with the pseudo-DMA read function, wait for
* the chip to latch the last byte, read it, and then disable
* pseudo-DMA mode.
- *
+ *
* After REQ is asserted, the NCR5380 asserts DRQ and ACK.
* REQ is deasserted when ACK is asserted, and not reasserted
* until ACK goes false. Since the NCR5380 won't lower ACK
* until DACK is asserted, which won't happen unless we twiddle
- * the DMA port or we take the NCR5380 out of DMA mode, we
- * can guarantee that we won't handshake another extra
+ * the DMA port or we take the NCR5380 out of DMA mode, we
+ * can guarantee that we won't handshake another extra
* byte.
*/
- if (!(hostdata->flags & FLAG_NCR53C400)) {
- while (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_DRQ));
- /* Wait for clean handshake */
- while (NCR5380_read(STATUS_REG) & SR_REQ);
- d[c - 1] = NCR5380_read(INPUT_DATA_REG);
+ if (NCR5380_poll_politely(instance, BUS_AND_STATUS_REG,
+ BASR_DRQ, BASR_DRQ, HZ) < 0) {
+ foo = -1;
+ shost_printk(KERN_ERR, instance, "PDMA read: DRQ timeout\n");
+ }
+ if (NCR5380_poll_politely(instance, STATUS_REG,
+ SR_REQ, 0, HZ) < 0) {
+ foo = -1;
+ shost_printk(KERN_ERR, instance, "PDMA read: !REQ timeout\n");
}
+ d[c - 1] = NCR5380_read(INPUT_DATA_REG);
}
-#endif
} else {
-#ifdef DMA_WORKS_RIGHT
foo = NCR5380_pwrite(instance, d, c);
-#else
- int timeout;
- dprintk(NDEBUG_C400_PWRITE, "About to pwrite %d bytes\n", c);
- if (!(foo = NCR5380_pwrite(instance, d, c))) {
+ if (!foo && !(hostdata->flags & FLAG_NO_DMA_FIXUP)) {
/*
- * Wait for the last byte to be sent. If REQ is being asserted for
- * the byte we're interested, we'll ACK it and it will go false.
+ * Wait for the last byte to be sent. If REQ is being asserted for
+ * the byte we're interested, we'll ACK it and it will go false.
*/
- if (!(hostdata->flags & FLAG_HAS_LAST_BYTE_SENT)) {
- timeout = 20000;
- while (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_DRQ) && (NCR5380_read(BUS_AND_STATUS_REG) & BASR_PHASE_MATCH));
-
- if (!timeout)
- dprintk(NDEBUG_LAST_BYTE_SENT, "scsi%d : timed out on last byte\n", instance->host_no);
-
- if (hostdata->flags & FLAG_CHECK_LAST_BYTE_SENT) {
- hostdata->flags &= ~FLAG_CHECK_LAST_BYTE_SENT;
- if (NCR5380_read(TARGET_COMMAND_REG) & TCR_LAST_BYTE_SENT) {
- hostdata->flags |= FLAG_HAS_LAST_BYTE_SENT;
- dprintk(NDEBUG_LAST_BYTE_SENT, "scsi%d : last byte sent works\n", instance->host_no);
- }
- }
- } else {
- dprintk(NDEBUG_C400_PWRITE, "Waiting for LASTBYTE\n");
- while (!(NCR5380_read(TARGET_COMMAND_REG) & TCR_LAST_BYTE_SENT));
- dprintk(NDEBUG_C400_PWRITE, "Got LASTBYTE\n");
+ if (NCR5380_poll_politely2(instance,
+ BUS_AND_STATUS_REG, BASR_DRQ, BASR_DRQ,
+ BUS_AND_STATUS_REG, BASR_PHASE_MATCH, 0, HZ) < 0) {
+ foo = -1;
+ shost_printk(KERN_ERR, instance, "PDMA write: DRQ and phase timeout\n");
}
}
-#endif
}
NCR5380_write(MODE_REG, MR_BASE);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-
- if ((!(p & SR_IO)) && (hostdata->flags & FLAG_NCR53C400)) {
- dprintk(NDEBUG_C400_PWRITE, "53C400w: Checking for IRQ\n");
- if (NCR5380_read(BUS_AND_STATUS_REG) & BASR_IRQ) {
- dprintk(NDEBUG_C400_PWRITE, "53C400w: got it, reading reset interrupt reg\n");
- NCR5380_read(RESET_PARITY_INTERRUPT_REG);
- } else {
- printk("53C400w: IRQ NOT THERE!\n");
- }
- }
+ NCR5380_read(RESET_PARITY_INTERRUPT_REG);
*data = d + c;
*count = 0;
*phase = NCR5380_read(STATUS_REG) & PHASE_MASK;
-#if defined(PSEUDO_DMA) && defined(UNSAFE)
- spin_lock_irq(instance->host_lock);
-#endif /* defined(REAL_DMA_POLL) */
return foo;
#endif /* def REAL_DMA */
}
@@ -1983,25 +1739,23 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
/*
* Function : NCR5380_information_transfer (struct Scsi_Host *instance)
*
- * Purpose : run through the various SCSI phases and do as the target
- * directs us to. Operates on the currently connected command,
- * instance->connected.
+ * Purpose : run through the various SCSI phases and do as the target
+ * directs us to. Operates on the currently connected command,
+ * instance->connected.
*
* Inputs : instance, instance for which we are doing commands
*
- * Side effects : SCSI things happen, the disconnected queue will be
- * modified if a command disconnects, *instance->connected will
- * change.
- *
- * XXX Note : we need to watch for bus free or a reset condition here
- * to recover from an unexpected bus free condition.
+ * Side effects : SCSI things happen, the disconnected queue will be
+ * modified if a command disconnects, *instance->connected will
+ * change.
*
- * Locks: io_request_lock held by caller in IRQ mode
+ * XXX Note : we need to watch for bus free or a reset condition here
+ * to recover from an unexpected bus free condition.
*/
-static void NCR5380_information_transfer(struct Scsi_Host *instance) {
- NCR5380_local_declare();
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *)instance->hostdata;
+static void NCR5380_information_transfer(struct Scsi_Host *instance)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
unsigned char msgout = NOP;
int sink = 0;
int len;
@@ -2010,13 +1764,11 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
#endif
unsigned char *data;
unsigned char phase, tmp, extended_msg[10], old_phase = 0xff;
- struct scsi_cmnd *cmd = (struct scsi_cmnd *) hostdata->connected;
- /* RvC: we need to set the end of the polling time */
- unsigned long poll_time = jiffies + USLEEP_POLL;
+ struct scsi_cmnd *cmd;
- NCR5380_setup(instance);
+ while ((cmd = hostdata->connected)) {
+ struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
- while (1) {
tmp = NCR5380_read(STATUS_REG);
/* We only have a valid SCSI phase when REQ is asserted */
if (tmp & SR_REQ) {
@@ -2028,24 +1780,28 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
if (sink && (phase != PHASE_MSGOUT)) {
NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(tmp));
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
- while (NCR5380_read(STATUS_REG) & SR_REQ);
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN |
+ ICR_ASSERT_ACK);
+ while (NCR5380_read(STATUS_REG) & SR_REQ)
+ ;
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
+ ICR_ASSERT_ATN);
sink = 0;
continue;
}
+
switch (phase) {
- case PHASE_DATAIN:
case PHASE_DATAOUT:
#if (NDEBUG & NDEBUG_NO_DATAOUT)
- printk("scsi%d : NDEBUG_NO_DATAOUT set, attempted DATAOUT aborted\n", instance->host_no);
+ shost_printk(KERN_DEBUG, instance, "NDEBUG_NO_DATAOUT set, attempted DATAOUT aborted\n");
sink = 1;
do_abort(instance);
cmd->result = DID_ERROR << 16;
- cmd->scsi_done(cmd);
+ complete_cmd(instance, cmd);
return;
#endif
- /*
+ case PHASE_DATAIN:
+ /*
* If there is no room left in the current buffer in the
* scatter-gather list, move onto the next one.
*/
@@ -2055,10 +1811,13 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
--cmd->SCp.buffers_residual;
cmd->SCp.this_residual = cmd->SCp.buffer->length;
cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
- dprintk(NDEBUG_INFORMATION, "scsi%d : %d bytes and %d buffers left\n", instance->host_no, cmd->SCp.this_residual, cmd->SCp.buffers_residual);
+ dsprintk(NDEBUG_INFORMATION, instance, "%d bytes and %d buffers left\n",
+ cmd->SCp.this_residual,
+ cmd->SCp.buffers_residual);
}
+
/*
- * The preferred transfer method is going to be
+ * The preferred transfer method is going to be
* PSEUDO-DMA for systems that are strictly PIO,
* since we can let the hardware do the handshaking.
*
@@ -2068,50 +1827,39 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
*/
#if defined(PSEUDO_DMA) || defined(REAL_DMA_POLL)
- /* KLL
- * PSEUDO_DMA is defined here. If this is the g_NCR5380
- * driver then it will always be defined, so the
- * FLAG_NO_PSEUDO_DMA is used to inhibit PDMA in the base
- * NCR5380 case. I think this is a fairly clean solution.
- * We supplement these 2 if's with the flag.
- */
-#ifdef NCR5380_dma_xfer_len
- if (!cmd->device->borken && !(hostdata->flags & FLAG_NO_PSEUDO_DMA) && (transfersize = NCR5380_dma_xfer_len(instance, cmd)) != 0) {
-#else
- transfersize = cmd->transfersize;
-
-#ifdef LIMIT_TRANSFERSIZE /* If we have problems with interrupt service */
- if (transfersize > 512)
- transfersize = 512;
-#endif /* LIMIT_TRANSFERSIZE */
-
- if (!cmd->device->borken && transfersize && !(hostdata->flags & FLAG_NO_PSEUDO_DMA) && cmd->SCp.this_residual && !(cmd->SCp.this_residual % transfersize)) {
- /* Limit transfers to 32K, for xx400 & xx406
- * pseudoDMA that transfers in 128 bytes blocks. */
- if (transfersize > 32 * 1024)
- transfersize = 32 * 1024;
-#endif
+ transfersize = 0;
+ if (!cmd->device->borken &&
+ !(hostdata->flags & FLAG_NO_PSEUDO_DMA))
+ transfersize = NCR5380_dma_xfer_len(instance, cmd, phase);
+
+ if (transfersize) {
len = transfersize;
- if (NCR5380_transfer_dma(instance, &phase, &len, (unsigned char **) &cmd->SCp.ptr)) {
+ if (NCR5380_transfer_dma(instance, &phase,
+ &len, (unsigned char **)&cmd->SCp.ptr)) {
/*
- * If the watchdog timer fires, all future accesses to this
- * device will use the polled-IO.
+ * If the watchdog timer fires, all future
+ * accesses to this device will use the
+ * polled-IO.
*/
scmd_printk(KERN_INFO, cmd,
- "switching to slow handshake\n");
+ "switching to slow handshake\n");
cmd->device->borken = 1;
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
sink = 1;
do_abort(instance);
cmd->result = DID_ERROR << 16;
- cmd->scsi_done(cmd);
+ complete_cmd(instance, cmd);
/* XXX - need to source or sink data here, as appropriate */
} else
cmd->SCp.this_residual -= transfersize - len;
} else
#endif /* defined(PSEUDO_DMA) || defined(REAL_DMA_POLL) */
- NCR5380_transfer_pio(instance, &phase, (int *) &cmd->SCp.this_residual, (unsigned char **)
- &cmd->SCp.ptr);
+ {
+ spin_unlock_irq(&hostdata->lock);
+ NCR5380_transfer_pio(instance, &phase,
+ (int *)&cmd->SCp.this_residual,
+ (unsigned char **)&cmd->SCp.ptr);
+ spin_lock_irq(&hostdata->lock);
+ }
break;
case PHASE_MSGIN:
len = 1;
@@ -2120,101 +1868,42 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
cmd->SCp.Message = tmp;
switch (tmp) {
- /*
- * Linking lets us reduce the time required to get the
- * next command out to the device, hopefully this will
- * mean we don't waste another revolution due to the delays
- * required by ARBITRATION and another SELECTION.
- *
- * In the current implementation proposal, low level drivers
- * merely have to start the next command, pointed to by
- * next_link, done() is called as with unlinked commands.
- */
-#ifdef LINKED
- case LINKED_CMD_COMPLETE:
- case LINKED_FLG_CMD_COMPLETE:
- /* Accept message by clearing ACK */
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- dprintk(NDEBUG_LINKED, "scsi%d : target %d lun %llu linked command complete.\n", instance->host_no, cmd->device->id, cmd->device->lun);
- /*
- * Sanity check : A linked command should only terminate with
- * one of these messages if there are more linked commands
- * available.
- */
- if (!cmd->next_link) {
- printk("scsi%d : target %d lun %llu linked command complete, no next_link\n" instance->host_no, cmd->device->id, cmd->device->lun);
- sink = 1;
- do_abort(instance);
- return;
- }
- initialize_SCp(cmd->next_link);
- /* The next command is still part of this process */
- cmd->next_link->tag = cmd->tag;
- cmd->result = cmd->SCp.Status | (cmd->SCp.Message << 8);
- dprintk(NDEBUG_LINKED, "scsi%d : target %d lun %llu linked request done, calling scsi_done().\n", instance->host_no, cmd->device->id, cmd->device->lun);
- cmd->scsi_done(cmd);
- cmd = hostdata->connected;
- break;
-#endif /* def LINKED */
case ABORT:
case COMMAND_COMPLETE:
/* Accept message by clearing ACK */
sink = 1;
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- hostdata->connected = NULL;
- dprintk(NDEBUG_QUEUES, "scsi%d : command for target %d, lun %llu completed\n", instance->host_no, cmd->device->id, cmd->device->lun);
- hostdata->busy[cmd->device->id] &= ~(1 << (cmd->device->lun & 0xFF));
-
- /*
- * I'm not sure what the correct thing to do here is :
- *
- * If the command that just executed is NOT a request
- * sense, the obvious thing to do is to set the result
- * code to the values of the stored parameters.
- *
- * If it was a REQUEST SENSE command, we need some way
- * to differentiate between the failure code of the original
- * and the failure code of the REQUEST sense - the obvious
- * case is success, where we fall through and leave the result
- * code unchanged.
- *
- * The non-obvious place is where the REQUEST SENSE failed
- */
-
- if (cmd->cmnd[0] != REQUEST_SENSE)
- cmd->result = cmd->SCp.Status | (cmd->SCp.Message << 8);
- else if (status_byte(cmd->SCp.Status) != GOOD)
- cmd->result = (cmd->result & 0x00ffff) | (DID_ERROR << 16);
+ dsprintk(NDEBUG_QUEUES, instance,
+ "COMMAND COMPLETE %p target %d lun %llu\n",
+ cmd, scmd_id(cmd), cmd->device->lun);
- if ((cmd->cmnd[0] == REQUEST_SENSE) &&
- hostdata->ses.cmd_len) {
- scsi_eh_restore_cmnd(cmd, &hostdata->ses);
- hostdata->ses.cmd_len = 0 ;
- }
-
- if ((cmd->cmnd[0] != REQUEST_SENSE) && (status_byte(cmd->SCp.Status) == CHECK_CONDITION)) {
- scsi_eh_prep_cmnd(cmd, &hostdata->ses, NULL, 0, ~0);
-
- dprintk(NDEBUG_AUTOSENSE, "scsi%d : performing request sense\n", instance->host_no);
+ hostdata->connected = NULL;
- LIST(cmd, hostdata->issue_queue);
- cmd->host_scribble = (unsigned char *)
- hostdata->issue_queue;
- hostdata->issue_queue = (struct scsi_cmnd *) cmd;
- dprintk(NDEBUG_QUEUES, "scsi%d : REQUEST SENSE added to head of issue queue\n", instance->host_no);
- } else {
- cmd->scsi_done(cmd);
+ cmd->result &= ~0xffff;
+ cmd->result |= cmd->SCp.Status;
+ cmd->result |= cmd->SCp.Message << 8;
+
+ if (cmd->cmnd[0] == REQUEST_SENSE)
+ complete_cmd(instance, cmd);
+ else {
+ if (cmd->SCp.Status == SAM_STAT_CHECK_CONDITION ||
+ cmd->SCp.Status == SAM_STAT_COMMAND_TERMINATED) {
+ dsprintk(NDEBUG_QUEUES, instance, "autosense: adding cmd %p to tail of autosense queue\n",
+ cmd);
+ list_add_tail(&ncmd->list,
+ &hostdata->autosense);
+ } else
+ complete_cmd(instance, cmd);
}
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- /*
- * Restore phase bits to 0 so an interrupted selection,
+ /*
+ * Restore phase bits to 0 so an interrupted selection,
* arbitration can resume.
*/
NCR5380_write(TARGET_COMMAND_REG, 0);
- while ((NCR5380_read(STATUS_REG) & SR_BSY) && !hostdata->connected)
- barrier();
+ /* Enable reselect interrupts */
+ NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
return;
case MESSAGE_REJECT:
/* Accept message by clearing ACK */
@@ -2229,38 +1918,33 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
default:
break;
}
- case DISCONNECT:{
- /* Accept message by clearing ACK */
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- cmd->device->disconnect = 1;
- LIST(cmd, hostdata->disconnected_queue);
- cmd->host_scribble = (unsigned char *)
- hostdata->disconnected_queue;
- hostdata->connected = NULL;
- hostdata->disconnected_queue = cmd;
- dprintk(NDEBUG_QUEUES, "scsi%d : command for target %d lun %llu was moved from connected to" " the disconnected_queue\n", instance->host_no, cmd->device->id, cmd->device->lun);
- /*
- * Restore phase bits to 0 so an interrupted selection,
- * arbitration can resume.
- */
- NCR5380_write(TARGET_COMMAND_REG, 0);
-
- /* Enable reselect interrupts */
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- /* Wait for bus free to avoid nasty timeouts - FIXME timeout !*/
- /* NCR538_poll_politely(instance, STATUS_REG, SR_BSY, 0, 30 * HZ); */
- while ((NCR5380_read(STATUS_REG) & SR_BSY) && !hostdata->connected)
- barrier();
- return;
- }
- /*
+ break;
+ case DISCONNECT:
+ /* Accept message by clearing ACK */
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+ hostdata->connected = NULL;
+ list_add(&ncmd->list, &hostdata->disconnected);
+ dsprintk(NDEBUG_INFORMATION | NDEBUG_QUEUES,
+ instance, "connected command %p for target %d lun %llu moved to disconnected queue\n",
+ cmd, scmd_id(cmd), cmd->device->lun);
+
+ /*
+ * Restore phase bits to 0 so an interrupted selection,
+ * arbitration can resume.
+ */
+ NCR5380_write(TARGET_COMMAND_REG, 0);
+
+ /* Enable reselect interrupts */
+ NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+ return;
+ /*
* The SCSI data pointer is *IMPLICITLY* saved on a disconnect
- * operation, in violation of the SCSI spec so we can safely
+ * operation, in violation of the SCSI spec so we can safely
* ignore SAVE/RESTORE pointers calls.
*
- * Unfortunately, some disks violate the SCSI spec and
+ * Unfortunately, some disks violate the SCSI spec and
* don't issue the required SAVE_POINTERS message before
- * disconnecting, and we have to break spec to remain
+ * disconnecting, and we have to break spec to remain
* compatible.
*/
case SAVE_POINTERS:
@@ -2269,31 +1953,28 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
break;
case EXTENDED_MESSAGE:
-/*
- * Extended messages are sent in the following format :
- * Byte
- * 0 EXTENDED_MESSAGE == 1
- * 1 length (includes one byte for code, doesn't
- * include first two bytes)
- * 2 code
- * 3..length+1 arguments
- *
- * Start the extended message buffer with the EXTENDED_MESSAGE
- * byte, since spi_print_msg() wants the whole thing.
- */
+ /*
+ * Start the message buffer with the EXTENDED_MESSAGE
+ * byte, since spi_print_msg() wants the whole thing.
+ */
extended_msg[0] = EXTENDED_MESSAGE;
/* Accept first byte by clearing ACK */
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- dprintk(NDEBUG_EXTENDED, "scsi%d : receiving extended message\n", instance->host_no);
+
+ spin_unlock_irq(&hostdata->lock);
+
+ dsprintk(NDEBUG_EXTENDED, instance, "receiving extended message\n");
len = 2;
data = extended_msg + 1;
phase = PHASE_MSGIN;
NCR5380_transfer_pio(instance, &phase, &len, &data);
+ dsprintk(NDEBUG_EXTENDED, instance, "length %d, code 0x%02x\n",
+ (int)extended_msg[1],
+ (int)extended_msg[2]);
- dprintk(NDEBUG_EXTENDED, "scsi%d : length=%d, code=0x%02x\n", instance->host_no, (int) extended_msg[1], (int) extended_msg[2]);
-
- if (!len && extended_msg[1] <= (sizeof(extended_msg) - 1)) {
+ if (!len && extended_msg[1] > 0 &&
+ extended_msg[1] <= sizeof(extended_msg) - 2) {
/* Accept third byte by clearing ACK */
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
len = extended_msg[1] - 1;
@@ -2301,7 +1982,8 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
phase = PHASE_MSGIN;
NCR5380_transfer_pio(instance, &phase, &len, &data);
- dprintk(NDEBUG_EXTENDED, "scsi%d : message received, residual %d\n", instance->host_no, len);
+ dsprintk(NDEBUG_EXTENDED, instance, "message received, residual %d\n",
+ len);
switch (extended_msg[2]) {
case EXTENDED_SDTR:
@@ -2311,34 +1993,42 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
tmp = 0;
}
} else if (len) {
- printk("scsi%d: error receiving extended message\n", instance->host_no);
+ shost_printk(KERN_ERR, instance, "error receiving extended message\n");
tmp = 0;
} else {
- printk("scsi%d: extended message code %02x length %d is too long\n", instance->host_no, extended_msg[2], extended_msg[1]);
+ shost_printk(KERN_NOTICE, instance, "extended message code %02x length %d is too long\n",
+ extended_msg[2], extended_msg[1]);
tmp = 0;
}
+
+ spin_lock_irq(&hostdata->lock);
+ if (!hostdata->connected)
+ return;
+
/* Fall through to reject message */
- /*
- * If we get something weird that we aren't expecting,
+ /*
+ * If we get something weird that we aren't expecting,
* reject it.
*/
default:
if (!tmp) {
- printk("scsi%d: rejecting message ", instance->host_no);
+ shost_printk(KERN_ERR, instance, "rejecting message ");
spi_print_msg(extended_msg);
printk("\n");
} else if (tmp != EXTENDED_MESSAGE)
scmd_printk(KERN_INFO, cmd,
- "rejecting unknown message %02x\n",tmp);
+ "rejecting unknown message %02x\n",
+ tmp);
else
scmd_printk(KERN_INFO, cmd,
- "rejecting unknown extended message code %02x, length %d\n", extended_msg[1], extended_msg[0]);
+ "rejecting unknown extended message code %02x, length %d\n",
+ extended_msg[1], extended_msg[0]);
msgout = MESSAGE_REJECT;
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
break;
- } /* switch (tmp) */
+ } /* switch (tmp) */
break;
case PHASE_MSGOUT:
len = 1;
@@ -2346,10 +2036,9 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
hostdata->last_message = msgout;
NCR5380_transfer_pio(instance, &phase, &len, &data);
if (msgout == ABORT) {
- hostdata->busy[cmd->device->id] &= ~(1 << (cmd->device->lun & 0xFF));
hostdata->connected = NULL;
cmd->result = DID_ERROR << 16;
- cmd->scsi_done(cmd);
+ complete_cmd(instance, cmd);
NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
return;
}
@@ -2358,17 +2047,12 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
case PHASE_CMDOUT:
len = cmd->cmd_len;
data = cmd->cmnd;
- /*
- * XXX for performance reasons, on machines with a
- * PSEUDO-DMA architecture we should probably
- * use the dma transfer function.
+ /*
+ * XXX for performance reasons, on machines with a
+ * PSEUDO-DMA architecture we should probably
+ * use the dma transfer function.
*/
NCR5380_transfer_pio(instance, &phase, &len, &data);
- if (!cmd->device->disconnect && should_disconnect(cmd->cmnd[0])) {
- NCR5380_set_timer(hostdata, USLEEP_SLEEP);
- dprintk(NDEBUG_USLEEP, "scsi%d : issued command, sleeping until %lu\n", instance->host_no, hostdata->time_expires);
- return;
- }
break;
case PHASE_STATIN:
len = 1;
@@ -2377,46 +2061,37 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
cmd->SCp.Status = tmp;
break;
default:
- printk("scsi%d : unknown phase\n", instance->host_no);
+ shost_printk(KERN_ERR, instance, "unknown phase\n");
NCR5380_dprint(NDEBUG_ANY, instance);
- } /* switch(phase) */
- } /* if (tmp * SR_REQ) */
- else {
- /* RvC: go to sleep if polling time expired
- */
- if (!cmd->device->disconnect && time_after_eq(jiffies, poll_time)) {
- NCR5380_set_timer(hostdata, USLEEP_SLEEP);
- dprintk(NDEBUG_USLEEP, "scsi%d : poll timed out, sleeping until %lu\n", instance->host_no, hostdata->time_expires);
- return;
- }
+ } /* switch(phase) */
+ } else {
+ spin_unlock_irq(&hostdata->lock);
+ NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ);
+ spin_lock_irq(&hostdata->lock);
}
- } /* while (1) */
+ }
}
/*
* Function : void NCR5380_reselect (struct Scsi_Host *instance)
*
- * Purpose : does reselection, initializing the instance->connected
- * field to point to the scsi_cmnd for which the I_T_L or I_T_L_Q
- * nexus has been reestablished,
- *
- * Inputs : instance - this instance of the NCR5380.
+ * Purpose : does reselection, initializing the instance->connected
+ * field to point to the scsi_cmnd for which the I_T_L or I_T_L_Q
+ * nexus has been reestablished,
*
- * Locks: io_request_lock held by caller if IRQ driven
+ * Inputs : instance - this instance of the NCR5380.
*/
-static void NCR5380_reselect(struct Scsi_Host *instance) {
- NCR5380_local_declare();
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *)
- instance->hostdata;
+static void NCR5380_reselect(struct Scsi_Host *instance)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
unsigned char target_mask;
unsigned char lun, phase;
int len;
unsigned char msg[3];
unsigned char *data;
- struct scsi_cmnd *tmp = NULL, *prev;
- int abort = 0;
- NCR5380_setup(instance);
+ struct NCR5380_cmd *ncmd;
+ struct scsi_cmnd *tmp;
/*
* Disable arbitration, etc. since the host adapter obviously
@@ -2424,12 +2099,12 @@ static void NCR5380_reselect(struct Scsi_Host *instance) {
*/
NCR5380_write(MODE_REG, MR_BASE);
- hostdata->restart_select = 1;
target_mask = NCR5380_read(CURRENT_SCSI_DATA_REG) & ~(hostdata->id_mask);
- dprintk(NDEBUG_SELECTION, "scsi%d : reselect\n", instance->host_no);
- /*
+ dsprintk(NDEBUG_RESELECTION, instance, "reselect\n");
+
+ /*
* At this point, we have detected that our SCSI ID is on the bus,
* SEL is true and BSY was false for at least one bus settle delay
* (400 ns).
@@ -2439,103 +2114,110 @@ static void NCR5380_reselect(struct Scsi_Host *instance) {
*/
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY);
-
- /* FIXME: timeout too long, must fail to workqueue */
- if(NCR5380_poll_politely(instance, STATUS_REG, SR_SEL, 0, 2*HZ)<0)
- abort = 1;
-
+ if (NCR5380_poll_politely(instance,
+ STATUS_REG, SR_SEL, 0, 2 * HZ) < 0) {
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+ return;
+ }
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
/*
* Wait for target to go into MSGIN.
- * FIXME: timeout needed and fail to work queeu
*/
- if(NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, 2*HZ))
- abort = 1;
+ if (NCR5380_poll_politely(instance,
+ STATUS_REG, SR_REQ, SR_REQ, 2 * HZ) < 0) {
+ do_abort(instance);
+ return;
+ }
len = 1;
data = msg;
phase = PHASE_MSGIN;
NCR5380_transfer_pio(instance, &phase, &len, &data);
+ if (len) {
+ do_abort(instance);
+ return;
+ }
+
if (!(msg[0] & 0x80)) {
- printk(KERN_ERR "scsi%d : expecting IDENTIFY message, got ", instance->host_no);
+ shost_printk(KERN_ERR, instance, "expecting IDENTIFY message, got ");
spi_print_msg(msg);
- abort = 1;
- } else {
- /* Accept message by clearing ACK */
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- lun = (msg[0] & 0x07);
+ printk("\n");
+ do_abort(instance);
+ return;
+ }
+ lun = msg[0] & 0x07;
- /*
- * We need to add code for SCSI-II to track which devices have
- * I_T_L_Q nexuses established, and which have simple I_T_L
- * nexuses so we can chose to do additional data transfer.
- */
+ /*
+ * We need to add code for SCSI-II to track which devices have
+ * I_T_L_Q nexuses established, and which have simple I_T_L
+ * nexuses so we can chose to do additional data transfer.
+ */
- /*
- * Find the command corresponding to the I_T_L or I_T_L_Q nexus we
- * just reestablished, and remove it from the disconnected queue.
- */
+ /*
+ * Find the command corresponding to the I_T_L or I_T_L_Q nexus we
+ * just reestablished, and remove it from the disconnected queue.
+ */
+ tmp = NULL;
+ list_for_each_entry(ncmd, &hostdata->disconnected, list) {
+ struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
- for (tmp = (struct scsi_cmnd *) hostdata->disconnected_queue, prev = NULL; tmp; prev = tmp, tmp = (struct scsi_cmnd *) tmp->host_scribble)
- if ((target_mask == (1 << tmp->device->id)) && (lun == (u8)tmp->device->lun)
- ) {
- if (prev) {
- REMOVE(prev, prev->host_scribble, tmp, tmp->host_scribble);
- prev->host_scribble = tmp->host_scribble;
- } else {
- REMOVE(-1, hostdata->disconnected_queue, tmp, tmp->host_scribble);
- hostdata->disconnected_queue = (struct scsi_cmnd *) tmp->host_scribble;
- }
- tmp->host_scribble = NULL;
- break;
- }
- if (!tmp) {
- printk(KERN_ERR "scsi%d : warning : target bitmask %02x lun %d not in disconnect_queue.\n", instance->host_no, target_mask, lun);
- /*
- * Since we have an established nexus that we can't do anything with,
- * we must abort it.
- */
- abort = 1;
+ if (target_mask == (1 << scmd_id(cmd)) &&
+ lun == (u8)cmd->device->lun) {
+ list_del(&ncmd->list);
+ tmp = cmd;
+ break;
}
}
- if (abort) {
- do_abort(instance);
+ if (tmp) {
+ dsprintk(NDEBUG_RESELECTION | NDEBUG_QUEUES, instance,
+ "reselect: removed %p from disconnected queue\n", tmp);
} else {
- hostdata->connected = tmp;
- dprintk(NDEBUG_RESELECTION, "scsi%d : nexus established, target = %d, lun = %llu, tag = %d\n", instance->host_no, tmp->device->id, tmp->device->lun, tmp->tag);
+ shost_printk(KERN_ERR, instance, "target bitmask 0x%02x lun %d not in disconnected queue.\n",
+ target_mask, lun);
+ /*
+ * Since we have an established nexus that we can't do anything
+ * with, we must abort it.
+ */
+ do_abort(instance);
+ return;
}
+
+ /* Accept message by clearing ACK */
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+
+ hostdata->connected = tmp;
+ dsprintk(NDEBUG_RESELECTION, instance, "nexus established, target %d, lun %llu, tag %d\n",
+ scmd_id(tmp), tmp->device->lun, tmp->tag);
}
/*
* Function : void NCR5380_dma_complete (struct Scsi_Host *instance)
*
* Purpose : called by interrupt handler when DMA finishes or a phase
- * mismatch occurs (which would finish the DMA transfer).
+ * mismatch occurs (which would finish the DMA transfer).
*
* Inputs : instance - this instance of the NCR5380.
*
* Returns : pointer to the scsi_cmnd structure for which the I_T_L
- * nexus has been reestablished, on failure NULL is returned.
+ * nexus has been reestablished, on failure NULL is returned.
*/
#ifdef REAL_DMA
static void NCR5380_dma_complete(NCR5380_instance * instance) {
- NCR5380_local_declare();
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
int transferred;
- NCR5380_setup(instance);
/*
* XXX this might not be right.
*
* Wait for final byte to transfer, ie wait for ACK to go false.
*
- * We should use the Last Byte Sent bit, unfortunately this is
+ * We should use the Last Byte Sent bit, unfortunately this is
* not available on the 5380/5381 (only the various CMOS chips)
*
* FIXME: timeout, and need to handle long timeout/irq case
@@ -2543,7 +2225,6 @@ static void NCR5380_dma_complete(NCR5380_instance * instance) {
NCR5380_poll_politely(instance, BUS_AND_STATUS_REG, BASR_ACK, 0, 5*HZ);
- NCR5380_write(MODE_REG, MR_BASE);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
/*
@@ -2560,190 +2241,251 @@ static void NCR5380_dma_complete(NCR5380_instance * instance) {
}
#endif /* def REAL_DMA */
-/*
- * Function : int NCR5380_abort (struct scsi_cmnd *cmd)
- *
- * Purpose : abort a command
- *
- * Inputs : cmd - the scsi_cmnd to abort, code - code to set the
- * host byte of the result field to, if zero DID_ABORTED is
- * used.
- *
- * Returns : SUCCESS - success, FAILED on failure.
- *
- * XXX - there is no way to abort the command that is currently
- * connected, you have to wait for it to complete. If this is
- * a problem, we could implement longjmp() / setjmp(), setjmp()
- * called where the loop started in NCR5380_main().
- *
- * Locks: host lock taken by caller
+/**
+ * list_find_cmd - test for presence of a command in a linked list
+ * @haystack: list of commands
+ * @needle: command to search for
*/
-static int NCR5380_abort(struct scsi_cmnd *cmd)
+static bool list_find_cmd(struct list_head *haystack,
+ struct scsi_cmnd *needle)
{
- NCR5380_local_declare();
- struct Scsi_Host *instance = cmd->device->host;
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
- struct scsi_cmnd *tmp, **prev;
+ struct NCR5380_cmd *ncmd;
- scmd_printk(KERN_WARNING, cmd, "aborting command\n");
+ list_for_each_entry(ncmd, haystack, list)
+ if (NCR5380_to_scmd(ncmd) == needle)
+ return true;
+ return false;
+}
- NCR5380_print_status(instance);
+/**
+ * list_remove_cmd - remove a command from linked list
+ * @haystack: list of commands
+ * @needle: command to remove
+ */
- NCR5380_setup(instance);
+static bool list_del_cmd(struct list_head *haystack,
+ struct scsi_cmnd *needle)
+{
+ if (list_find_cmd(haystack, needle)) {
+ struct NCR5380_cmd *ncmd = scsi_cmd_priv(needle);
- dprintk(NDEBUG_ABORT, "scsi%d : abort called\n", instance->host_no);
- dprintk(NDEBUG_ABORT, " basr 0x%X, sr 0x%X\n", NCR5380_read(BUS_AND_STATUS_REG), NCR5380_read(STATUS_REG));
+ list_del(&ncmd->list);
+ return true;
+ }
+ return false;
+}
-#if 0
-/*
- * Case 1 : If the command is the currently executing command,
- * we'll set the aborted flag and return control so that
- * information transfer routine can exit cleanly.
+/**
+ * NCR5380_abort - scsi host eh_abort_handler() method
+ * @cmd: the command to be aborted
+ *
+ * Try to abort a given command by removing it from queues and/or sending
+ * the target an abort message. This may not succeed in causing a target
+ * to abort the command. Nonetheless, the low-level driver must forget about
+ * the command because the mid-layer reclaims it and it may be re-issued.
+ *
+ * The normal path taken by a command is as follows. For EH we trace this
+ * same path to locate and abort the command.
+ *
+ * unissued -> selecting -> [unissued -> selecting ->]... connected ->
+ * [disconnected -> connected ->]...
+ * [autosense -> connected ->] done
+ *
+ * If cmd is unissued then just remove it.
+ * If cmd is disconnected, try to select the target.
+ * If cmd is connected, try to send an abort message.
+ * If cmd is waiting for autosense, give it a chance to complete but check
+ * that it isn't left connected.
+ * If cmd was not found at all then presumably it has already been completed,
+ * in which case return SUCCESS to try to avoid further EH measures.
+ * If the command has not completed yet, we must not fail to find it.
*/
- if (hostdata->connected == cmd) {
- dprintk(NDEBUG_ABORT, "scsi%d : aborting connected command\n", instance->host_no);
- hostdata->aborted = 1;
-/*
- * We should perform BSY checking, and make sure we haven't slipped
- * into BUS FREE.
- */
+static int NCR5380_abort(struct scsi_cmnd *cmd)
+{
+ struct Scsi_Host *instance = cmd->device->host;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ unsigned long flags;
+ int result = SUCCESS;
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_ASSERT_ATN);
-/*
- * Since we can't change phases until we've completed the current
- * handshake, we have to source or sink a byte of data if the current
- * phase is not MSGOUT.
- */
+ spin_lock_irqsave(&hostdata->lock, flags);
-/*
- * Return control to the executing NCR drive so we can clear the
- * aborted flag and get back into our main loop.
- */
+#if (NDEBUG & NDEBUG_ANY)
+ scmd_printk(KERN_INFO, cmd, __func__);
+#endif
+ NCR5380_dprint(NDEBUG_ANY, instance);
+ NCR5380_dprint_phase(NDEBUG_ANY, instance);
- return SUCCESS;
+ if (list_del_cmd(&hostdata->unissued, cmd)) {
+ dsprintk(NDEBUG_ABORT, instance,
+ "abort: removed %p from issue queue\n", cmd);
+ cmd->result = DID_ABORT << 16;
+ cmd->scsi_done(cmd); /* No tag or busy flag to worry about */
}
-#endif
-/*
- * Case 2 : If the command hasn't been issued yet, we simply remove it
- * from the issue queue.
- */
-
- dprintk(NDEBUG_ABORT, "scsi%d : abort going into loop.\n", instance->host_no);
- for (prev = (struct scsi_cmnd **) &(hostdata->issue_queue), tmp = (struct scsi_cmnd *) hostdata->issue_queue; tmp; prev = (struct scsi_cmnd **) &(tmp->host_scribble), tmp = (struct scsi_cmnd *) tmp->host_scribble)
- if (cmd == tmp) {
- REMOVE(5, *prev, tmp, tmp->host_scribble);
- (*prev) = (struct scsi_cmnd *) tmp->host_scribble;
- tmp->host_scribble = NULL;
- tmp->result = DID_ABORT << 16;
- dprintk(NDEBUG_ABORT, "scsi%d : abort removed command from issue queue.\n", instance->host_no);
- tmp->scsi_done(tmp);
- return SUCCESS;
+ if (hostdata->selecting == cmd) {
+ dsprintk(NDEBUG_ABORT, instance,
+ "abort: cmd %p == selecting\n", cmd);
+ hostdata->selecting = NULL;
+ cmd->result = DID_ABORT << 16;
+ complete_cmd(instance, cmd);
+ goto out;
+ }
+
+ if (list_del_cmd(&hostdata->disconnected, cmd)) {
+ dsprintk(NDEBUG_ABORT, instance,
+ "abort: removed %p from disconnected list\n", cmd);
+ cmd->result = DID_ERROR << 16;
+ if (!hostdata->connected)
+ NCR5380_select(instance, cmd);
+ if (hostdata->connected != cmd) {
+ complete_cmd(instance, cmd);
+ result = FAILED;
+ goto out;
+ }
+ }
+
+ if (hostdata->connected == cmd) {
+ dsprintk(NDEBUG_ABORT, instance, "abort: cmd %p is connected\n", cmd);
+ hostdata->connected = NULL;
+ if (do_abort(instance)) {
+ set_host_byte(cmd, DID_ERROR);
+ complete_cmd(instance, cmd);
+ result = FAILED;
+ goto out;
}
-#if (NDEBUG & NDEBUG_ABORT)
- /* KLL */
- else if (prev == tmp)
- printk(KERN_ERR "scsi%d : LOOP\n", instance->host_no);
+ set_host_byte(cmd, DID_ABORT);
+#ifdef REAL_DMA
+ hostdata->dma_len = 0;
#endif
+ if (cmd->cmnd[0] == REQUEST_SENSE)
+ complete_cmd(instance, cmd);
+ else {
+ struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
-/*
- * Case 3 : If any commands are connected, we're going to fail the abort
- * and let the high level SCSI driver retry at a later time or
- * issue a reset.
- *
- * Timeouts, and therefore aborted commands, will be highly unlikely
- * and handling them cleanly in this situation would make the common
- * case of noresets less efficient, and would pollute our code. So,
- * we fail.
- */
+ /* Perform autosense for this command */
+ list_add(&ncmd->list, &hostdata->autosense);
+ }
+ }
- if (hostdata->connected) {
- dprintk(NDEBUG_ABORT, "scsi%d : abort failed, command connected.\n", instance->host_no);
- return FAILED;
+ if (list_find_cmd(&hostdata->autosense, cmd)) {
+ dsprintk(NDEBUG_ABORT, instance,
+ "abort: found %p on sense queue\n", cmd);
+ spin_unlock_irqrestore(&hostdata->lock, flags);
+ queue_work(hostdata->work_q, &hostdata->main_task);
+ msleep(1000);
+ spin_lock_irqsave(&hostdata->lock, flags);
+ if (list_del_cmd(&hostdata->autosense, cmd)) {
+ dsprintk(NDEBUG_ABORT, instance,
+ "abort: removed %p from sense queue\n", cmd);
+ set_host_byte(cmd, DID_ABORT);
+ complete_cmd(instance, cmd);
+ goto out;
+ }
}
-/*
- * Case 4: If the command is currently disconnected from the bus, and
- * there are no connected commands, we reconnect the I_T_L or
- * I_T_L_Q nexus associated with it, go into message out, and send
- * an abort message.
- *
- * This case is especially ugly. In order to reestablish the nexus, we
- * need to call NCR5380_select(). The easiest way to implement this
- * function was to abort if the bus was busy, and let the interrupt
- * handler triggered on the SEL for reselect take care of lost arbitrations
- * where necessary, meaning interrupts need to be enabled.
- *
- * When interrupts are enabled, the queues may change - so we
- * can't remove it from the disconnected queue before selecting it
- * because that could cause a failure in hashing the nexus if that
- * device reselected.
- *
- * Since the queues may change, we can't use the pointers from when we
- * first locate it.
- *
- * So, we must first locate the command, and if NCR5380_select()
- * succeeds, then issue the abort, relocate the command and remove
- * it from the disconnected queue.
- */
- for (tmp = (struct scsi_cmnd *) hostdata->disconnected_queue; tmp; tmp = (struct scsi_cmnd *) tmp->host_scribble)
- if (cmd == tmp) {
- dprintk(NDEBUG_ABORT, "scsi%d : aborting disconnected command.\n", instance->host_no);
+ if (hostdata->connected == cmd) {
+ dsprintk(NDEBUG_ABORT, instance, "abort: cmd %p is connected\n", cmd);
+ hostdata->connected = NULL;
+ if (do_abort(instance)) {
+ set_host_byte(cmd, DID_ERROR);
+ complete_cmd(instance, cmd);
+ result = FAILED;
+ goto out;
+ }
+ set_host_byte(cmd, DID_ABORT);
+#ifdef REAL_DMA
+ hostdata->dma_len = 0;
+#endif
+ complete_cmd(instance, cmd);
+ }
- if (NCR5380_select(instance, cmd))
- return FAILED;
- dprintk(NDEBUG_ABORT, "scsi%d : nexus reestablished.\n", instance->host_no);
+out:
+ if (result == FAILED)
+ dsprintk(NDEBUG_ABORT, instance, "abort: failed to abort %p\n", cmd);
+ else
+ dsprintk(NDEBUG_ABORT, instance, "abort: successfully aborted %p\n", cmd);
- do_abort(instance);
+ queue_work(hostdata->work_q, &hostdata->main_task);
+ spin_unlock_irqrestore(&hostdata->lock, flags);
- for (prev = (struct scsi_cmnd **) &(hostdata->disconnected_queue), tmp = (struct scsi_cmnd *) hostdata->disconnected_queue; tmp; prev = (struct scsi_cmnd **) &(tmp->host_scribble), tmp = (struct scsi_cmnd *) tmp->host_scribble)
- if (cmd == tmp) {
- REMOVE(5, *prev, tmp, tmp->host_scribble);
- *prev = (struct scsi_cmnd *) tmp->host_scribble;
- tmp->host_scribble = NULL;
- tmp->result = DID_ABORT << 16;
- tmp->scsi_done(tmp);
- return SUCCESS;
- }
- }
-/*
- * Case 5 : If we reached this point, the command was not found in any of
- * the queues.
- *
- * We probably reached this point because of an unlikely race condition
- * between the command completing successfully and the abortion code,
- * so we won't panic, but we will notify the user in case something really
- * broke.
- */
- printk(KERN_WARNING "scsi%d : warning : SCSI command probably completed successfully\n"
- " before abortion\n", instance->host_no);
- return FAILED;
+ return result;
}
-/*
- * Function : int NCR5380_bus_reset (struct scsi_cmnd *cmd)
- *
- * Purpose : reset the SCSI bus.
- *
- * Returns : SUCCESS
+/**
+ * NCR5380_bus_reset - reset the SCSI bus
+ * @cmd: SCSI command undergoing EH
*
- * Locks: host lock taken by caller
+ * Returns SUCCESS
*/
static int NCR5380_bus_reset(struct scsi_cmnd *cmd)
{
struct Scsi_Host *instance = cmd->device->host;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ int i;
+ unsigned long flags;
+ struct NCR5380_cmd *ncmd;
- NCR5380_local_declare();
- NCR5380_setup(instance);
- NCR5380_print_status(instance);
+ spin_lock_irqsave(&hostdata->lock, flags);
+
+#if (NDEBUG & NDEBUG_ANY)
+ scmd_printk(KERN_INFO, cmd, __func__);
+#endif
+ NCR5380_dprint(NDEBUG_ANY, instance);
+ NCR5380_dprint_phase(NDEBUG_ANY, instance);
- spin_lock_irq(instance->host_lock);
do_reset(instance);
- spin_unlock_irq(instance->host_lock);
+
+ /* reset NCR registers */
+ NCR5380_write(MODE_REG, MR_BASE);
+ NCR5380_write(TARGET_COMMAND_REG, 0);
+ NCR5380_write(SELECT_ENABLE_REG, 0);
+
+ /* After the reset, there are no more connected or disconnected commands
+ * and no busy units; so clear the low-level status here to avoid
+ * conflicts when the mid-level code tries to wake up the affected
+ * commands!
+ */
+
+ hostdata->selecting = NULL;
+
+ list_for_each_entry(ncmd, &hostdata->disconnected, list) {
+ struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+ set_host_byte(cmd, DID_RESET);
+ cmd->scsi_done(cmd);
+ }
+
+ list_for_each_entry(ncmd, &hostdata->autosense, list) {
+ struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+ set_host_byte(cmd, DID_RESET);
+ cmd->scsi_done(cmd);
+ }
+
+ if (hostdata->connected) {
+ set_host_byte(hostdata->connected, DID_RESET);
+ complete_cmd(instance, hostdata->connected);
+ hostdata->connected = NULL;
+ }
+
+ if (hostdata->sensing) {
+ set_host_byte(hostdata->connected, DID_RESET);
+ complete_cmd(instance, hostdata->sensing);
+ hostdata->sensing = NULL;
+ }
+
+ for (i = 0; i < 8; ++i)
+ hostdata->busy[i] = 0;
+#ifdef REAL_DMA
+ hostdata->dma_len = 0;
+#endif
+
+ queue_work(hostdata->work_q, &hostdata->main_task);
+ spin_unlock_irqrestore(&hostdata->lock, flags);
return SUCCESS;
}
diff --git a/drivers/scsi/NCR5380.h b/drivers/scsi/NCR5380.h
index 162112d..a792886 100644
--- a/drivers/scsi/NCR5380.h
+++ b/drivers/scsi/NCR5380.h
@@ -22,8 +22,13 @@
#ifndef NCR5380_H
#define NCR5380_H
+#include <linux/delay.h>
#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <scsi/scsi_dbg.h>
#include <scsi/scsi_eh.h>
+#include <scsi/scsi_transport_spi.h>
#define NDEBUG_ARBITRATION 0x1
#define NDEBUG_AUTOSENSE 0x2
@@ -158,8 +163,7 @@
/* Write any value to this register to start an ini mode DMA receive */
#define START_DMA_INITIATOR_RECEIVE_REG 7 /* wo */
-#define C400_CONTROL_STATUS_REG NCR53C400_register_offset-8 /* rw */
-
+/* NCR 53C400(A) Control Status Register bits: */
#define CSR_RESET 0x80 /* wo Resets 53c400 */
#define CSR_53C80_REG 0x80 /* ro 5380 registers busy */
#define CSR_TRANS_DIR 0x40 /* rw Data transfer direction */
@@ -176,16 +180,6 @@
#define CSR_BASE CSR_53C80_INTR
#endif
-/* Number of 128-byte blocks to be transferred */
-#define C400_BLOCK_COUNTER_REG NCR53C400_register_offset-7 /* rw */
-
-/* Resume transfer after disconnect */
-#define C400_RESUME_TRANSFER_REG NCR53C400_register_offset-6 /* wo */
-
-/* Access to host buffer stack */
-#define C400_HOST_BUFFER NCR53C400_register_offset-4 /* rw */
-
-
/* Note : PHASE_* macros are based on the values of the STATUS register */
#define PHASE_MASK (SR_MSG | SR_CD | SR_IO)
@@ -205,16 +199,6 @@
#define PHASE_SR_TO_TCR(phase) ((phase) >> 2)
-/*
- * The internal should_disconnect() function returns these based on the
- * expected length of a disconnect if a device supports disconnect/
- * reconnect.
- */
-
-#define DISCONNECT_NONE 0
-#define DISCONNECT_TIME_TO_DATA 1
-#define DISCONNECT_LONG 2
-
/*
* "Special" value for the (unsigned char) command tag, to indicate
* I_T_L nexus instead of I_T_L_Q.
@@ -236,15 +220,11 @@
#define NO_IRQ 0
#endif
-#define FLAG_HAS_LAST_BYTE_SENT 1 /* NCR53c81 or better */
-#define FLAG_CHECK_LAST_BYTE_SENT 2 /* Only test once */
-#define FLAG_NCR53C400 4 /* NCR53c400 */
+#define FLAG_NO_DMA_FIXUP 1 /* No DMA errata workarounds */
#define FLAG_NO_PSEUDO_DMA 8 /* Inhibit DMA */
-#define FLAG_DTC3181E 16 /* DTC3181E */
#define FLAG_LATE_DMA_SETUP 32 /* Setup NCR before DMA H/W */
#define FLAG_TAGGED_QUEUING 64 /* as X3T9.2 spelled it */
-
-#ifndef ASM
+#define FLAG_TOSHIBA_DELAY 128 /* Allow for borken CD-ROMs */
#ifdef SUPPORT_TAGS
struct tag_alloc {
@@ -258,33 +238,24 @@ struct NCR5380_hostdata {
NCR5380_implementation_fields; /* implementation specific */
struct Scsi_Host *host; /* Host backpointer */
unsigned char id_mask, id_higher_mask; /* 1 << id, all bits greater */
- unsigned char targets_present; /* targets we have connected
- to, so we can call a select
- failure a retryable condition */
- volatile unsigned char busy[8]; /* index = target, bit = lun */
+ unsigned char busy[8]; /* index = target, bit = lun */
#if defined(REAL_DMA) || defined(REAL_DMA_POLL)
- volatile int dma_len; /* requested length of DMA */
+ int dma_len; /* requested length of DMA */
#endif
- volatile unsigned char last_message; /* last message OUT */
- volatile struct scsi_cmnd *connected; /* currently connected command */
- volatile struct scsi_cmnd *issue_queue; /* waiting to be issued */
- volatile struct scsi_cmnd *disconnected_queue; /* waiting for reconnect */
- volatile int restart_select; /* we have disconnected,
- used to restart
- NCR5380_select() */
- volatile unsigned aborted:1; /* flag, says aborted */
+ unsigned char last_message; /* last message OUT */
+ struct scsi_cmnd *connected; /* currently connected cmnd */
+ struct scsi_cmnd *selecting; /* cmnd to be connected */
+ struct list_head unissued; /* waiting to be issued */
+ struct list_head autosense; /* priority issue queue */
+ struct list_head disconnected; /* waiting for reconnect */
+ spinlock_t lock; /* protects this struct */
int flags;
- unsigned long time_expires; /* in jiffies, set prior to sleeping */
- int select_time; /* timer in select for target response */
- volatile struct scsi_cmnd *selecting;
- struct delayed_work coroutine; /* our co-routine */
struct scsi_eh_save ses;
+ struct scsi_cmnd *sensing;
char info[256];
int read_overruns; /* number of bytes to cut from a
* transfer to handle chip overruns */
- int retain_dma_intr;
struct work_struct main_task;
- volatile int main_running;
#ifdef SUPPORT_TAGS
struct tag_alloc TagAlloc[8][8]; /* 8 targets and 8 LUNs */
#endif
@@ -292,10 +263,23 @@ struct NCR5380_hostdata {
unsigned spin_max_r;
unsigned spin_max_w;
#endif
+ struct workqueue_struct *work_q;
+ unsigned long accesses_per_ms; /* chip register accesses per ms */
};
#ifdef __KERNEL__
+struct NCR5380_cmd {
+ struct list_head list;
+};
+
+#define NCR5380_CMD_SIZE (sizeof(struct NCR5380_cmd))
+
+static inline struct scsi_cmnd *NCR5380_to_scmd(struct NCR5380_cmd *ncmd_ptr)
+{
+ return ((struct scsi_cmnd *)ncmd_ptr) - 1;
+}
+
#ifndef NDEBUG
#define NDEBUG (0)
#endif
@@ -304,6 +288,11 @@ struct NCR5380_hostdata {
do { if ((NDEBUG) & (flg)) \
printk(KERN_DEBUG fmt, ## __VA_ARGS__); } while (0)
+#define dsprintk(flg, host, fmt, ...) \
+ do { if ((NDEBUG) & (flg)) \
+ shost_printk(KERN_DEBUG, host, fmt, ## __VA_ARGS__); \
+ } while (0)
+
#if NDEBUG
#define NCR5380_dprint(flg, arg) \
do { if ((NDEBUG) & (flg)) NCR5380_print(arg); } while (0)
@@ -320,6 +309,7 @@ static void NCR5380_print(struct Scsi_Host *instance);
static int NCR5380_probe_irq(struct Scsi_Host *instance, int possible);
#endif
static int NCR5380_init(struct Scsi_Host *instance, int flags);
+static int NCR5380_maybe_reset_bus(struct Scsi_Host *);
static void NCR5380_exit(struct Scsi_Host *instance);
static void NCR5380_information_transfer(struct Scsi_Host *instance);
#ifndef DONT_USE_INTR
@@ -328,7 +318,7 @@ static irqreturn_t NCR5380_intr(int irq, void *dev_id);
static void NCR5380_main(struct work_struct *work);
static const char *NCR5380_info(struct Scsi_Host *instance);
static void NCR5380_reselect(struct Scsi_Host *instance);
-static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd);
+static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *, struct scsi_cmnd *);
#if defined(PSEUDO_DMA) || defined(REAL_DMA) || defined(REAL_DMA_POLL)
static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data);
#endif
@@ -443,5 +433,4 @@ static __inline__ int NCR5380_pc_dma_residual(struct Scsi_Host *instance)
#endif /* defined(i386) || defined(__alpha__) */
#endif /* defined(REAL_DMA) */
#endif /* __KERNEL__ */
-#endif /* ndef ASM */
#endif /* NCR5380_H */
diff --git a/drivers/scsi/arm/cumana_1.c b/drivers/scsi/arm/cumana_1.c
index d28d6c0..221f18c 100644
--- a/drivers/scsi/arm/cumana_1.c
+++ b/drivers/scsi/arm/cumana_1.c
@@ -4,9 +4,7 @@
* Copyright 1995-2002, Russell King
*/
#include <linux/module.h>
-#include <linux/signal.h>
#include <linux/ioport.h>
-#include <linux/delay.h>
#include <linux/blkdev.h>
#include <linux/init.h>
@@ -15,15 +13,14 @@
#include <scsi/scsi_host.h>
-#include <scsi/scsicam.h>
-
#define PSEUDO_DMA
#define priv(host) ((struct NCR5380_hostdata *)(host)->hostdata)
-#define NCR5380_local_declare() struct Scsi_Host *_instance
-#define NCR5380_setup(instance) _instance = instance
-#define NCR5380_read(reg) cumanascsi_read(_instance, reg)
-#define NCR5380_write(reg, value) cumanascsi_write(_instance, reg, value)
+#define NCR5380_read(reg) cumanascsi_read(instance, reg)
+#define NCR5380_write(reg, value) cumanascsi_write(instance, reg, value)
+
+#define NCR5380_dma_xfer_len(instance, cmd, phase) (cmd->transfersize)
+
#define NCR5380_intr cumanascsi_intr
#define NCR5380_queue_command cumanascsi_queue_command
#define NCR5380_info cumanascsi_info
@@ -211,6 +208,8 @@ static struct scsi_host_template cumanascsi_template = {
.cmd_per_lun = 2,
.use_clustering = DISABLE_CLUSTERING,
.proc_name = "CumanaSCSI-1",
+ .cmd_size = NCR5380_CMD_SIZE,
+ .max_sectors = 128,
};
static int cumanascsi1_probe(struct expansion_card *ec,
@@ -240,23 +239,21 @@ static int cumanascsi1_probe(struct expansion_card *ec,
host->irq = ec->irq;
- NCR5380_init(host, 0);
+ ret = NCR5380_init(host, 0);
+ if (ret)
+ goto out_unmap;
+
+ NCR5380_maybe_reset_bus(host);
priv(host)->ctrl = 0;
writeb(0, priv(host)->base + CTRL);
- host->n_io_port = 255;
- if (!(request_region(host->io_port, host->n_io_port, "CumanaSCSI-1"))) {
- ret = -EBUSY;
- goto out_unmap;
- }
-
ret = request_irq(host->irq, cumanascsi_intr, 0,
"CumanaSCSI-1", host);
if (ret) {
printk("scsi%d: IRQ%d not free: %d\n",
host->host_no, host->irq, ret);
- goto out_unmap;
+ goto out_exit;
}
ret = scsi_add_host(host, &ec->dev);
@@ -268,6 +265,8 @@ static int cumanascsi1_probe(struct expansion_card *ec,
out_free_irq:
free_irq(host->irq, host);
+ out_exit:
+ NCR5380_exit(host);
out_unmap:
iounmap(priv(host)->base);
iounmap(priv(host)->dma);
diff --git a/drivers/scsi/arm/oak.c b/drivers/scsi/arm/oak.c
index 7c6fa14..1fab1d18 100644
--- a/drivers/scsi/arm/oak.c
+++ b/drivers/scsi/arm/oak.c
@@ -5,9 +5,7 @@
*/
#include <linux/module.h>
-#include <linux/signal.h>
#include <linux/ioport.h>
-#include <linux/delay.h>
#include <linux/blkdev.h>
#include <linux/init.h>
@@ -20,14 +18,16 @@
#define DONT_USE_INTR
#define priv(host) ((struct NCR5380_hostdata *)(host)->hostdata)
-#define NCR5380_local_declare() void __iomem *_base
-#define NCR5380_setup(host) _base = priv(host)->base
-#define NCR5380_read(reg) readb(_base + ((reg) << 2))
-#define NCR5380_write(reg, value) writeb(value, _base + ((reg) << 2))
+#define NCR5380_read(reg) \
+ readb(priv(instance)->base + ((reg) << 2))
+#define NCR5380_write(reg, value) \
+ writeb(value, priv(instance)->base + ((reg) << 2))
+
+#define NCR5380_dma_xfer_len(instance, cmd, phase) (cmd->transfersize)
+
#define NCR5380_queue_command oakscsi_queue_command
#define NCR5380_info oakscsi_info
-#define NCR5380_show_info oakscsi_show_info
#define NCR5380_implementation_fields \
void __iomem *base
@@ -103,7 +103,6 @@ printk("reading %p len %d\n", addr, len);
static struct scsi_host_template oakscsi_template = {
.module = THIS_MODULE,
- .show_info = oakscsi_show_info,
.name = "Oak 16-bit SCSI",
.info = oakscsi_info,
.queuecommand = oakscsi_queue_command,
@@ -115,6 +114,8 @@ static struct scsi_host_template oakscsi_template = {
.cmd_per_lun = 2,
.use_clustering = DISABLE_CLUSTERING,
.proc_name = "oakscsi",
+ .cmd_size = NCR5380_CMD_SIZE,
+ .max_sectors = 128,
};
static int oakscsi_probe(struct expansion_card *ec, const struct ecard_id *id)
@@ -142,15 +143,21 @@ static int oakscsi_probe(struct expansion_card *ec, const struct ecard_id *id)
host->irq = NO_IRQ;
host->n_io_port = 255;
- NCR5380_init(host, 0);
+ ret = NCR5380_init(host, 0);
+ if (ret)
+ goto out_unmap;
+
+ NCR5380_maybe_reset_bus(host);
ret = scsi_add_host(host, &ec->dev);
if (ret)
- goto out_unmap;
+ goto out_exit;
scsi_scan_host(host);
goto out;
+ out_exit:
+ NCR5380_exit(host);
out_unmap:
iounmap(priv(host)->base);
unreg:
diff --git a/drivers/scsi/atari_NCR5380.c b/drivers/scsi/atari_NCR5380.c
index db87ece..e654786 100644
--- a/drivers/scsi/atari_NCR5380.c
+++ b/drivers/scsi/atari_NCR5380.c
@@ -1,15 +1,15 @@
/*
* NCR 5380 generic driver routines. These should make it *trivial*
- * to implement 5380 SCSI drivers under Linux with a non-trantor
- * architecture.
+ * to implement 5380 SCSI drivers under Linux with a non-trantor
+ * architecture.
*
- * Note that these routines also work with NR53c400 family chips.
+ * Note that these routines also work with NR53c400 family chips.
*
* Copyright 1993, Drew Eckhardt
- * Visionary Computing
- * (Unix and Linux consulting and custom programming)
- * drew@colorado.edu
- * +1 (303) 666-5836
+ * Visionary Computing
+ * (Unix and Linux consulting and custom programming)
+ * drew@colorado.edu
+ * +1 (303) 666-5836
*
* For more information, please consult
*
@@ -24,84 +24,10 @@
* 1+ (800) 334-5454
*/
-/*
- * ++roman: To port the 5380 driver to the Atari, I had to do some changes in
- * this file, too:
- *
- * - Some of the debug statements were incorrect (undefined variables and the
- * like). I fixed that.
- *
- * - In information_transfer(), I think a #ifdef was wrong. Looking at the
- * possible DMA transfer size should also happen for REAL_DMA. I added this
- * in the #if statement.
- *
- * - When using real DMA, information_transfer() should return in a DATAOUT
- * phase after starting the DMA. It has nothing more to do.
- *
- * - The interrupt service routine should run main after end of DMA, too (not
- * only after RESELECTION interrupts). Additionally, it should _not_ test
- * for more interrupts after running main, since a DMA process may have
- * been started and interrupts are turned on now. The new int could happen
- * inside the execution of NCR5380_intr(), leading to recursive
- * calls.
- *
- * - I've added a function merge_contiguous_buffers() that tries to
- * merge scatter-gather buffers that are located at contiguous
- * physical addresses and can be processed with the same DMA setup.
- * Since most scatter-gather operations work on a page (4K) of
- * 4 buffers (1K), in more than 90% of all cases three interrupts and
- * DMA setup actions are saved.
- *
- * - I've deleted all the stuff for AUTOPROBE_IRQ, REAL_DMA_POLL, PSEUDO_DMA
- * and USLEEP, because these were messing up readability and will never be
- * needed for Atari SCSI.
- *
- * - I've revised the NCR5380_main() calling scheme (relax the 'main_running'
- * stuff), and 'main' is executed in a bottom half if awoken by an
- * interrupt.
- *
- * - The code was quite cluttered up by "#if (NDEBUG & NDEBUG_*) printk..."
- * constructs. In my eyes, this made the source rather unreadable, so I
- * finally replaced that by the *_PRINTK() macros.
- *
- */
-
-/*
- * Further development / testing that should be done :
- * 1. Test linked command handling code after Eric is ready with
- * the high level code.
- */
+/* Ported to Atari by Roman Hodek and others. */
/* Adapted for the sun3 by Sam Creasey. */
-#include <scsi/scsi_dbg.h>
-#include <scsi/scsi_transport_spi.h>
-
-#if (NDEBUG & NDEBUG_LISTS)
-#define LIST(x, y) \
- do { \
- printk("LINE:%d Adding %p to %p\n", \
- __LINE__, (void*)(x), (void*)(y)); \
- if ((x) == (y)) \
- udelay(5); \
- } while (0)
-#define REMOVE(w, x, y, z) \
- do { \
- printk("LINE:%d Removing: %p->%p %p->%p \n", \
- __LINE__, (void*)(w), (void*)(x), \
- (void*)(y), (void*)(z)); \
- if ((x) == (y)) \
- udelay(5); \
- } while (0)
-#else
-#define LIST(x,y)
-#define REMOVE(w,x,y,z)
-#endif
-
-#ifndef notyet
-#undef LINKED
-#endif
-
/*
* Design
*
@@ -126,17 +52,7 @@
* piece of hardware that requires you to sit in a loop polling for
* the REQ signal as long as you are connected. Some devices are
* brain dead (ie, many TEXEL CD ROM drives) and won't disconnect
- * while doing long seek operations.
- *
- * The workaround for this is to keep track of devices that have
- * disconnected. If the device hasn't disconnected, for commands that
- * should disconnect, we do something like
- *
- * while (!REQ is asserted) { sleep for N usecs; poll for M usecs }
- *
- * Some tweaking of N and M needs to be done. An algorithm based
- * on "time to data" would give the best results as long as short time
- * to datas (ie, on the same track) were considered, however these
+ * while doing long seek operations. [...] These
* broken devices are the exception rather than the rule and I'd rather
* spend my time optimizing for the normal case.
*
@@ -177,12 +93,10 @@
*
* These macros control options :
* AUTOSENSE - if defined, REQUEST SENSE will be performed automatically
- * for commands that return with a CHECK CONDITION status.
+ * for commands that return with a CHECK CONDITION status.
*
* DIFFERENTIAL - if defined, NCR53c81 chips will use external differential
- * transceivers.
- *
- * LINKED - if defined, linked commands are supported.
+ * transceivers.
*
* REAL_DMA - if defined, REAL DMA is used during the data transfer phases.
*
@@ -195,17 +109,17 @@
* NCR5380_write(register, value) - write to the specific register
*
* NCR5380_implementation_fields - additional fields needed for this
- * specific implementation of the NCR5380
+ * specific implementation of the NCR5380
*
* Either real DMA *or* pseudo DMA may be implemented
* REAL functions :
* NCR5380_REAL_DMA should be defined if real DMA is to be used.
* Note that the DMA setup functions should return the number of bytes
- * that they were able to program the controller for.
+ * that they were able to program the controller for.
*
* Also note that generic i386/PC versions of these macros are
- * available as NCR5380_i386_dma_write_setup,
- * NCR5380_i386_dma_read_setup, and NCR5380_i386_dma_residual.
+ * available as NCR5380_i386_dma_write_setup,
+ * NCR5380_i386_dma_read_setup, and NCR5380_i386_dma_residual.
*
* NCR5380_dma_write_setup(instance, src, count) - initialize
* NCR5380_dma_read_setup(instance, dst, count) - initialize
@@ -221,18 +135,8 @@
* possible) function may be used.
*/
-/* Macros ease life... :-) */
-#define SETUP_HOSTDATA(in) \
- struct NCR5380_hostdata *hostdata = \
- (struct NCR5380_hostdata *)(in)->hostdata
-#define HOSTDATA(in) ((struct NCR5380_hostdata *)(in)->hostdata)
-
-#define NEXT(cmd) ((struct scsi_cmnd *)(cmd)->host_scribble)
-#define SET_NEXT(cmd,next) ((cmd)->host_scribble = (void *)(next))
-#define NEXTADDR(cmd) ((struct scsi_cmnd **)&(cmd)->host_scribble)
-
-#define HOSTNO instance->host_no
-#define H_NO(cmd) (cmd)->device->host->host_no
+static int do_abort(struct Scsi_Host *);
+static void do_reset(struct Scsi_Host *);
#ifdef SUPPORT_TAGS
@@ -251,9 +155,7 @@
* cannot know it in advance :-( We just see a QUEUE_FULL status being
* returned. So, in this case, the driver internal queue size assumption is
* reduced to the number of active tags if QUEUE_FULL is returned by the
- * target. The command is returned to the mid-level, but with status changed
- * to BUSY, since --as I've seen-- the mid-level can't handle QUEUE_FULL
- * correctly.
+ * target.
*
* We're also not allowed running tagged commands as long as an untagged
* command is active. And REQUEST SENSE commands after a contingent allegiance
@@ -304,7 +206,8 @@ static void __init init_tags(struct NCR5380_hostdata *hostdata)
static int is_lun_busy(struct scsi_cmnd *cmd, int should_be_tagged)
{
u8 lun = cmd->device->lun;
- SETUP_HOSTDATA(cmd->device->host);
+ struct Scsi_Host *instance = cmd->device->host;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
if (hostdata->busy[cmd->device->id] & (1 << lun))
return 1;
@@ -314,8 +217,8 @@ static int is_lun_busy(struct scsi_cmnd *cmd, int should_be_tagged)
return 0;
if (hostdata->TagAlloc[scmd_id(cmd)][lun].nr_allocated >=
hostdata->TagAlloc[scmd_id(cmd)][lun].queue_size) {
- dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %d: no free tags\n",
- H_NO(cmd), cmd->device->id, lun);
+ dsprintk(NDEBUG_TAGS, instance, "target %d lun %d: no free tags\n",
+ scmd_id(cmd), lun);
return 1;
}
return 0;
@@ -330,7 +233,8 @@ static int is_lun_busy(struct scsi_cmnd *cmd, int should_be_tagged)
static void cmd_get_tag(struct scsi_cmnd *cmd, int should_be_tagged)
{
u8 lun = cmd->device->lun;
- SETUP_HOSTDATA(cmd->device->host);
+ struct Scsi_Host *instance = cmd->device->host;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
/* If we or the target don't support tagged queuing, allocate the LUN for
* an untagged command.
@@ -340,18 +244,16 @@ static void cmd_get_tag(struct scsi_cmnd *cmd, int should_be_tagged)
!cmd->device->tagged_supported) {
cmd->tag = TAG_NONE;
hostdata->busy[cmd->device->id] |= (1 << lun);
- dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %d now allocated by untagged "
- "command\n", H_NO(cmd), cmd->device->id, lun);
+ dsprintk(NDEBUG_TAGS, instance, "target %d lun %d now allocated by untagged command\n",
+ scmd_id(cmd), lun);
} else {
struct tag_alloc *ta = &hostdata->TagAlloc[scmd_id(cmd)][lun];
cmd->tag = find_first_zero_bit(ta->allocated, MAX_TAGS);
set_bit(cmd->tag, ta->allocated);
ta->nr_allocated++;
- dprintk(NDEBUG_TAGS, "scsi%d: using tag %d for target %d lun %d "
- "(now %d tags in use)\n",
- H_NO(cmd), cmd->tag, cmd->device->id,
- lun, ta->nr_allocated);
+ dsprintk(NDEBUG_TAGS, instance, "using tag %d for target %d lun %d (%d tags allocated)\n",
+ cmd->tag, scmd_id(cmd), lun, ta->nr_allocated);
}
}
@@ -363,21 +265,22 @@ static void cmd_get_tag(struct scsi_cmnd *cmd, int should_be_tagged)
static void cmd_free_tag(struct scsi_cmnd *cmd)
{
u8 lun = cmd->device->lun;
- SETUP_HOSTDATA(cmd->device->host);
+ struct Scsi_Host *instance = cmd->device->host;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
if (cmd->tag == TAG_NONE) {
hostdata->busy[cmd->device->id] &= ~(1 << lun);
- dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %d untagged cmd finished\n",
- H_NO(cmd), cmd->device->id, lun);
+ dsprintk(NDEBUG_TAGS, instance, "target %d lun %d untagged cmd freed\n",
+ scmd_id(cmd), lun);
} else if (cmd->tag >= MAX_TAGS) {
- printk(KERN_NOTICE "scsi%d: trying to free bad tag %d!\n",
- H_NO(cmd), cmd->tag);
+ shost_printk(KERN_NOTICE, instance,
+ "trying to free bad tag %d!\n", cmd->tag);
} else {
struct tag_alloc *ta = &hostdata->TagAlloc[scmd_id(cmd)][lun];
clear_bit(cmd->tag, ta->allocated);
ta->nr_allocated--;
- dprintk(NDEBUG_TAGS, "scsi%d: freed tag %d for target %d lun %d\n",
- H_NO(cmd), cmd->tag, cmd->device->id, lun);
+ dsprintk(NDEBUG_TAGS, instance, "freed tag %d for target %d lun %d\n",
+ cmd->tag, scmd_id(cmd), lun);
}
}
@@ -401,17 +304,15 @@ static void free_all_tags(struct NCR5380_hostdata *hostdata)
#endif /* SUPPORT_TAGS */
-
-/*
- * Function: void merge_contiguous_buffers( struct scsi_cmnd *cmd )
- *
- * Purpose: Try to merge several scatter-gather requests into one DMA
- * transfer. This is possible if the scatter buffers lie on
- * physical contiguous addresses.
- *
- * Parameters: struct scsi_cmnd *cmd
- * The command to work on. The first scatter buffer's data are
- * assumed to be already transferred into ptr/this_residual.
+/**
+ * merge_contiguous_buffers - coalesce scatter-gather list entries
+ * @cmd: command requesting IO
+ *
+ * Try to merge several scatter-gather buffers into one DMA transfer.
+ * This is possible if the scatter buffers lie on physically
+ * contiguous addresses. The first scatter-gather buffer's data are
+ * assumed to be already transferred into cmd->SCp.this_residual.
+ * Every buffer merged avoids an interrupt and a DMA setup operation.
*/
static void merge_contiguous_buffers(struct scsi_cmnd *cmd)
@@ -463,9 +364,7 @@ static inline void initialize_SCp(struct scsi_cmnd *cmd)
cmd->SCp.buffers_residual = scsi_sg_count(cmd) - 1;
cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
cmd->SCp.this_residual = cmd->SCp.buffer->length;
- /* ++roman: Try to merge some scatter-buffers if they are at
- * contiguous physical addresses.
- */
+
merge_contiguous_buffers(cmd);
} else {
cmd->SCp.buffer = NULL;
@@ -473,31 +372,110 @@ static inline void initialize_SCp(struct scsi_cmnd *cmd)
cmd->SCp.ptr = NULL;
cmd->SCp.this_residual = 0;
}
+
+ cmd->SCp.Status = 0;
+ cmd->SCp.Message = 0;
+}
+
+/**
+ * NCR5380_poll_politely2 - wait for two chip register values
+ * @instance: controller to poll
+ * @reg1: 5380 register to poll
+ * @bit1: Bitmask to check
+ * @val1: Expected value
+ * @reg2: Second 5380 register to poll
+ * @bit2: Second bitmask to check
+ * @val2: Second expected value
+ * @wait: Time-out in jiffies
+ *
+ * Polls the chip in a reasonably efficient manner waiting for an
+ * event to occur. After a short quick poll we begin to yield the CPU
+ * (if possible). In irq contexts the time-out is arbitrarily limited.
+ * Callers may hold locks as long as they are held in irq mode.
+ *
+ * Returns 0 if either or both event(s) occurred otherwise -ETIMEDOUT.
+ */
+
+static int NCR5380_poll_politely2(struct Scsi_Host *instance,
+ int reg1, int bit1, int val1,
+ int reg2, int bit2, int val2, int wait)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ unsigned long deadline = jiffies + wait;
+ unsigned long n;
+
+ /* Busy-wait for up to 10 ms */
+ n = min(10000U, jiffies_to_usecs(wait));
+ n *= hostdata->accesses_per_ms;
+ n /= 2000;
+ do {
+ if ((NCR5380_read(reg1) & bit1) == val1)
+ return 0;
+ if ((NCR5380_read(reg2) & bit2) == val2)
+ return 0;
+ cpu_relax();
+ } while (n--);
+
+ if (irqs_disabled() || in_interrupt())
+ return -ETIMEDOUT;
+
+ /* Repeatedly sleep for 1 ms until deadline */
+ while (time_is_after_jiffies(deadline)) {
+ schedule_timeout_uninterruptible(1);
+ if ((NCR5380_read(reg1) & bit1) == val1)
+ return 0;
+ if ((NCR5380_read(reg2) & bit2) == val2)
+ return 0;
+ }
+
+ return -ETIMEDOUT;
}
-#include <linux/delay.h>
+static inline int NCR5380_poll_politely(struct Scsi_Host *instance,
+ int reg, int bit, int val, int wait)
+{
+ return NCR5380_poll_politely2(instance, reg, bit, val,
+ reg, bit, val, wait);
+}
#if NDEBUG
static struct {
unsigned char mask;
const char *name;
} signals[] = {
- { SR_DBP, "PARITY"}, { SR_RST, "RST" }, { SR_BSY, "BSY" },
- { SR_REQ, "REQ" }, { SR_MSG, "MSG" }, { SR_CD, "CD" }, { SR_IO, "IO" },
- { SR_SEL, "SEL" }, {0, NULL}
-}, basrs[] = {
- {BASR_ATN, "ATN"}, {BASR_ACK, "ACK"}, {0, NULL}
-}, icrs[] = {
- {ICR_ASSERT_RST, "ASSERT RST"},{ICR_ASSERT_ACK, "ASSERT ACK"},
- {ICR_ASSERT_BSY, "ASSERT BSY"}, {ICR_ASSERT_SEL, "ASSERT SEL"},
- {ICR_ASSERT_ATN, "ASSERT ATN"}, {ICR_ASSERT_DATA, "ASSERT DATA"},
+ {SR_DBP, "PARITY"},
+ {SR_RST, "RST"},
+ {SR_BSY, "BSY"},
+ {SR_REQ, "REQ"},
+ {SR_MSG, "MSG"},
+ {SR_CD, "CD"},
+ {SR_IO, "IO"},
+ {SR_SEL, "SEL"},
+ {0, NULL}
+},
+basrs[] = {
+ {BASR_ATN, "ATN"},
+ {BASR_ACK, "ACK"},
{0, NULL}
-}, mrs[] = {
- {MR_BLOCK_DMA_MODE, "MODE BLOCK DMA"}, {MR_TARGET, "MODE TARGET"},
- {MR_ENABLE_PAR_CHECK, "MODE PARITY CHECK"}, {MR_ENABLE_PAR_INTR,
- "MODE PARITY INTR"}, {MR_ENABLE_EOP_INTR,"MODE EOP INTR"},
+},
+icrs[] = {
+ {ICR_ASSERT_RST, "ASSERT RST"},
+ {ICR_ASSERT_ACK, "ASSERT ACK"},
+ {ICR_ASSERT_BSY, "ASSERT BSY"},
+ {ICR_ASSERT_SEL, "ASSERT SEL"},
+ {ICR_ASSERT_ATN, "ASSERT ATN"},
+ {ICR_ASSERT_DATA, "ASSERT DATA"},
+ {0, NULL}
+},
+mrs[] = {
+ {MR_BLOCK_DMA_MODE, "MODE BLOCK DMA"},
+ {MR_TARGET, "MODE TARGET"},
+ {MR_ENABLE_PAR_CHECK, "MODE PARITY CHECK"},
+ {MR_ENABLE_PAR_INTR, "MODE PARITY INTR"},
+ {MR_ENABLE_EOP_INTR, "MODE EOP INTR"},
{MR_MONITOR_BSY, "MODE MONITOR BSY"},
- {MR_DMA_MODE, "MODE DMA"}, {MR_ARBITRATE, "MODE ARBITRATION"},
+ {MR_DMA_MODE, "MODE DMA"},
+ {MR_ARBITRATE, "MODE ARBITRATION"},
{0, NULL}
};
@@ -511,15 +489,13 @@ static struct {
static void NCR5380_print(struct Scsi_Host *instance)
{
unsigned char status, data, basr, mr, icr, i;
- unsigned long flags;
- local_irq_save(flags);
data = NCR5380_read(CURRENT_SCSI_DATA_REG);
status = NCR5380_read(STATUS_REG);
mr = NCR5380_read(MODE_REG);
icr = NCR5380_read(INITIATOR_COMMAND_REG);
basr = NCR5380_read(BUS_AND_STATUS_REG);
- local_irq_restore(flags);
+
printk("STATUS_REG: %02x ", status);
for (i = 0; signals[i].mask; ++i)
if (status & signals[i].mask)
@@ -543,8 +519,12 @@ static struct {
unsigned char value;
const char *name;
} phases[] = {
- {PHASE_DATAOUT, "DATAOUT"}, {PHASE_DATAIN, "DATAIN"}, {PHASE_CMDOUT, "CMDOUT"},
- {PHASE_STATIN, "STATIN"}, {PHASE_MSGOUT, "MSGOUT"}, {PHASE_MSGIN, "MSGIN"},
+ {PHASE_DATAOUT, "DATAOUT"},
+ {PHASE_DATAIN, "DATAIN"},
+ {PHASE_CMDOUT, "CMDOUT"},
+ {PHASE_STATIN, "STATIN"},
+ {PHASE_MSGOUT, "MSGOUT"},
+ {PHASE_MSGIN, "MSGIN"},
{PHASE_UNKNOWN, "UNKNOWN"}
};
@@ -553,8 +533,6 @@ static struct {
* @instance: adapter to dump
*
* Print the current SCSI phase for debugging purposes
- *
- * Locks: none
*/
static void NCR5380_print_phase(struct Scsi_Host *instance)
@@ -564,54 +542,21 @@ static void NCR5380_print_phase(struct Scsi_Host *instance)
status = NCR5380_read(STATUS_REG);
if (!(status & SR_REQ))
- printk(KERN_DEBUG "scsi%d: REQ not asserted, phase unknown.\n", HOSTNO);
+ shost_printk(KERN_DEBUG, instance, "REQ not asserted, phase unknown.\n");
else {
for (i = 0; (phases[i].value != PHASE_UNKNOWN) &&
(phases[i].value != (status & PHASE_MASK)); ++i)
;
- printk(KERN_DEBUG "scsi%d: phase %s\n", HOSTNO, phases[i].name);
+ shost_printk(KERN_DEBUG, instance, "phase %s\n", phases[i].name);
}
}
-
#endif
-/*
- * ++roman: New scheme of calling NCR5380_main()
- *
- * If we're not in an interrupt, we can call our main directly, it cannot be
- * already running. Else, we queue it on a task queue, if not 'main_running'
- * tells us that a lower level is already executing it. This way,
- * 'main_running' needs not be protected in a special way.
- *
- * queue_main() is a utility function for putting our main onto the task
- * queue, if main_running is false. It should be called only from a
- * interrupt or bottom half.
- */
-
-#include <linux/gfp.h>
-#include <linux/workqueue.h>
-#include <linux/interrupt.h>
-
-static inline void queue_main(struct NCR5380_hostdata *hostdata)
-{
- if (!hostdata->main_running) {
- /* If in interrupt and NCR5380_main() not already running,
- queue it on the 'immediate' task queue, to be processed
- immediately after the current interrupt processing has
- finished. */
- schedule_work(&hostdata->main_task);
- }
- /* else: nothing to do: the running NCR5380_main() will pick up
- any newly queued command. */
-}
-
/**
* NCR58380_info - report driver and host information
* @instance: relevant scsi host instance
*
* For use as the host template info() handler.
- *
- * Locks: none
*/
static const char *NCR5380_info(struct Scsi_Host *instance)
@@ -630,13 +575,14 @@ static void prepare_info(struct Scsi_Host *instance)
"base 0x%lx, irq %d, "
"can_queue %d, cmd_per_lun %d, "
"sg_tablesize %d, this_id %d, "
- "flags { %s}, "
+ "flags { %s%s}, "
"options { %s} ",
instance->hostt->name, instance->io_port, instance->n_io_port,
instance->base, instance->irq,
instance->can_queue, instance->cmd_per_lun,
instance->sg_tablesize, instance->this_id,
hostdata->flags & FLAG_TAGGED_QUEUING ? "TAGGED_QUEUING " : "",
+ hostdata->flags & FLAG_TOSHIBA_DELAY ? "TOSHIBA_DELAY " : "",
#ifdef DIFFERENTIAL
"DIFFERENTIAL "
#endif
@@ -653,102 +599,6 @@ static void prepare_info(struct Scsi_Host *instance)
}
/**
- * NCR5380_print_status - dump controller info
- * @instance: controller to dump
- *
- * Print commands in the various queues, called from NCR5380_abort
- * to aid debugging.
- */
-
-static void lprint_Scsi_Cmnd(struct scsi_cmnd *cmd)
-{
- int i, s;
- unsigned char *command;
- printk("scsi%d: destination target %d, lun %llu\n",
- H_NO(cmd), cmd->device->id, cmd->device->lun);
- printk(KERN_CONT " command = ");
- command = cmd->cmnd;
- printk(KERN_CONT "%2d (0x%02x)", command[0], command[0]);
- for (i = 1, s = COMMAND_SIZE(command[0]); i < s; ++i)
- printk(KERN_CONT " %02x", command[i]);
- printk("\n");
-}
-
-static void NCR5380_print_status(struct Scsi_Host *instance)
-{
- struct NCR5380_hostdata *hostdata;
- struct scsi_cmnd *ptr;
- unsigned long flags;
-
- NCR5380_dprint(NDEBUG_ANY, instance);
- NCR5380_dprint_phase(NDEBUG_ANY, instance);
-
- hostdata = (struct NCR5380_hostdata *)instance->hostdata;
-
- local_irq_save(flags);
- printk("NCR5380: coroutine is%s running.\n",
- hostdata->main_running ? "" : "n't");
- if (!hostdata->connected)
- printk("scsi%d: no currently connected command\n", HOSTNO);
- else
- lprint_Scsi_Cmnd((struct scsi_cmnd *) hostdata->connected);
- printk("scsi%d: issue_queue\n", HOSTNO);
- for (ptr = (struct scsi_cmnd *)hostdata->issue_queue; ptr; ptr = NEXT(ptr))
- lprint_Scsi_Cmnd(ptr);
-
- printk("scsi%d: disconnected_queue\n", HOSTNO);
- for (ptr = (struct scsi_cmnd *) hostdata->disconnected_queue; ptr;
- ptr = NEXT(ptr))
- lprint_Scsi_Cmnd(ptr);
-
- local_irq_restore(flags);
- printk("\n");
-}
-
-static void show_Scsi_Cmnd(struct scsi_cmnd *cmd, struct seq_file *m)
-{
- int i, s;
- unsigned char *command;
- seq_printf(m, "scsi%d: destination target %d, lun %llu\n",
- H_NO(cmd), cmd->device->id, cmd->device->lun);
- seq_puts(m, " command = ");
- command = cmd->cmnd;
- seq_printf(m, "%2d (0x%02x)", command[0], command[0]);
- for (i = 1, s = COMMAND_SIZE(command[0]); i < s; ++i)
- seq_printf(m, " %02x", command[i]);
- seq_putc(m, '\n');
-}
-
-static int __maybe_unused NCR5380_show_info(struct seq_file *m,
- struct Scsi_Host *instance)
-{
- struct NCR5380_hostdata *hostdata;
- struct scsi_cmnd *ptr;
- unsigned long flags;
-
- hostdata = (struct NCR5380_hostdata *)instance->hostdata;
-
- local_irq_save(flags);
- seq_printf(m, "NCR5380: coroutine is%s running.\n",
- hostdata->main_running ? "" : "n't");
- if (!hostdata->connected)
- seq_printf(m, "scsi%d: no currently connected command\n", HOSTNO);
- else
- show_Scsi_Cmnd((struct scsi_cmnd *) hostdata->connected, m);
- seq_printf(m, "scsi%d: issue_queue\n", HOSTNO);
- for (ptr = (struct scsi_cmnd *)hostdata->issue_queue; ptr; ptr = NEXT(ptr))
- show_Scsi_Cmnd(ptr, m);
-
- seq_printf(m, "scsi%d: disconnected_queue\n", HOSTNO);
- for (ptr = (struct scsi_cmnd *) hostdata->disconnected_queue; ptr;
- ptr = NEXT(ptr))
- show_Scsi_Cmnd(ptr, m);
-
- local_irq_restore(flags);
- return 0;
-}
-
-/**
* NCR5380_init - initialise an NCR5380
* @instance: adapter to configure
* @flags: control flags
@@ -764,11 +614,11 @@ static int __maybe_unused NCR5380_show_info(struct seq_file *m,
static int __init NCR5380_init(struct Scsi_Host *instance, int flags)
{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
int i;
- SETUP_HOSTDATA(instance);
+ unsigned long deadline;
hostdata->host = instance;
- hostdata->aborted = 0;
hostdata->id_mask = 1 << instance->this_id;
hostdata->id_higher_mask = 0;
for (i = hostdata->id_mask; i <= 0x80; i <<= 1)
@@ -782,13 +632,21 @@ static int __init NCR5380_init(struct Scsi_Host *instance, int flags)
#if defined (REAL_DMA)
hostdata->dma_len = 0;
#endif
- hostdata->targets_present = 0;
+ spin_lock_init(&hostdata->lock);
hostdata->connected = NULL;
- hostdata->issue_queue = NULL;
- hostdata->disconnected_queue = NULL;
+ hostdata->sensing = NULL;
+ INIT_LIST_HEAD(&hostdata->autosense);
+ INIT_LIST_HEAD(&hostdata->unissued);
+ INIT_LIST_HEAD(&hostdata->disconnected);
+
hostdata->flags = flags;
INIT_WORK(&hostdata->main_task, NCR5380_main);
+ hostdata->work_q = alloc_workqueue("ncr5380_%d",
+ WQ_UNBOUND | WQ_MEM_RECLAIM,
+ 1, instance->host_no);
+ if (!hostdata->work_q)
+ return -ENOMEM;
prepare_info(instance);
@@ -797,6 +655,72 @@ static int __init NCR5380_init(struct Scsi_Host *instance, int flags)
NCR5380_write(TARGET_COMMAND_REG, 0);
NCR5380_write(SELECT_ENABLE_REG, 0);
+ /* Calibrate register polling loop */
+ i = 0;
+ deadline = jiffies + 1;
+ do {
+ cpu_relax();
+ } while (time_is_after_jiffies(deadline));
+ deadline += msecs_to_jiffies(256);
+ do {
+ NCR5380_read(STATUS_REG);
+ ++i;
+ cpu_relax();
+ } while (time_is_after_jiffies(deadline));
+ hostdata->accesses_per_ms = i / 256;
+
+ return 0;
+}
+
+/**
+ * NCR5380_maybe_reset_bus - Detect and correct bus wedge problems.
+ * @instance: adapter to check
+ *
+ * If the system crashed, it may have crashed with a connected target and
+ * the SCSI bus busy. Check for BUS FREE phase. If not, try to abort the
+ * currently established nexus, which we know nothing about. Failing that
+ * do a bus reset.
+ *
+ * Note that a bus reset will cause the chip to assert IRQ.
+ *
+ * Returns 0 if successful, otherwise -ENXIO.
+ */
+
+static int NCR5380_maybe_reset_bus(struct Scsi_Host *instance)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ int pass;
+
+ for (pass = 1; (NCR5380_read(STATUS_REG) & SR_BSY) && pass <= 6; ++pass) {
+ switch (pass) {
+ case 1:
+ case 3:
+ case 5:
+ shost_printk(KERN_ERR, instance, "SCSI bus busy, waiting up to five seconds\n");
+ NCR5380_poll_politely(instance,
+ STATUS_REG, SR_BSY, 0, 5 * HZ);
+ break;
+ case 2:
+ shost_printk(KERN_ERR, instance, "bus busy, attempting abort\n");
+ do_abort(instance);
+ break;
+ case 4:
+ shost_printk(KERN_ERR, instance, "bus busy, attempting reset\n");
+ do_reset(instance);
+ /* Wait after a reset; the SCSI standard calls for
+ * 250ms, we wait 500ms to be on the safe side.
+ * But some Toshiba CD-ROMs need ten times that.
+ */
+ if (hostdata->flags & FLAG_TOSHIBA_DELAY)
+ msleep(2500);
+ else
+ msleep(500);
+ break;
+ case 6:
+ shost_printk(KERN_ERR, instance, "bus locked solid\n");
+ return -ENXIO;
+ }
+ }
return 0;
}
@@ -812,6 +736,38 @@ static void NCR5380_exit(struct Scsi_Host *instance)
struct NCR5380_hostdata *hostdata = shost_priv(instance);
cancel_work_sync(&hostdata->main_task);
+ destroy_workqueue(hostdata->work_q);
+}
+
+/**
+ * complete_cmd - finish processing a command and return it to the SCSI ML
+ * @instance: the host instance
+ * @cmd: command to complete
+ */
+
+static void complete_cmd(struct Scsi_Host *instance,
+ struct scsi_cmnd *cmd)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+
+ dsprintk(NDEBUG_QUEUES, instance, "complete_cmd: cmd %p\n", cmd);
+
+ if (hostdata->sensing == cmd) {
+ /* Autosense processing ends here */
+ if ((cmd->result & 0xff) != SAM_STAT_GOOD) {
+ scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+ set_host_byte(cmd, DID_ERROR);
+ } else
+ scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+ hostdata->sensing = NULL;
+ }
+
+#ifdef SUPPORT_TAGS
+ cmd_free_tag(cmd);
+#else
+ hostdata->busy[scmd_id(cmd)] &= ~(1 << cmd->device->lun);
+#endif
+ cmd->scsi_done(cmd);
}
/**
@@ -819,7 +775,7 @@ static void NCR5380_exit(struct Scsi_Host *instance)
* @instance: the relevant SCSI adapter
* @cmd: SCSI command
*
- * cmd is added to the per instance issue_queue, with minor
+ * cmd is added to the per-instance issue queue, with minor
* twiddling done to the host specific fields of cmd. If the
* main coroutine is not running, it is restarted.
*/
@@ -828,44 +784,23 @@ static int NCR5380_queue_command(struct Scsi_Host *instance,
struct scsi_cmnd *cmd)
{
struct NCR5380_hostdata *hostdata = shost_priv(instance);
- struct scsi_cmnd *tmp;
+ struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
unsigned long flags;
#if (NDEBUG & NDEBUG_NO_WRITE)
switch (cmd->cmnd[0]) {
case WRITE_6:
case WRITE_10:
- printk(KERN_NOTICE "scsi%d: WRITE attempted with NO_WRITE debugging flag set\n",
- H_NO(cmd));
+ shost_printk(KERN_DEBUG, instance, "WRITE attempted with NDEBUG_NO_WRITE set\n");
cmd->result = (DID_ERROR << 16);
cmd->scsi_done(cmd);
return 0;
}
#endif /* (NDEBUG & NDEBUG_NO_WRITE) */
- /*
- * We use the host_scribble field as a pointer to the next command
- * in a queue
- */
-
- SET_NEXT(cmd, NULL);
cmd->result = 0;
/*
- * Insert the cmd into the issue queue. Note that REQUEST SENSE
- * commands are added to the head of the queue since any command will
- * clear the contingent allegiance condition that exists and the
- * sense data is only guaranteed to be valid while the condition exists.
- */
-
- /* ++guenther: now that the issue queue is being set up, we can lock ST-DMA.
- * Otherwise a running NCR5380_main may steal the lock.
- * Lock before actually inserting due to fairness reasons explained in
- * atari_scsi.c. If we insert first, then it's impossible for this driver
- * to release the lock.
- * Stop timer for this command while waiting for the lock, or timeouts
- * may happen (and they really do), and it's no good if the command doesn't
- * appear in any of the queues.
* ++roman: Just disabling the NCR interrupt isn't sufficient here,
* because also a timer int can trigger an abort or reset, which would
* alter queues and touch the lock.
@@ -873,7 +808,7 @@ static int NCR5380_queue_command(struct Scsi_Host *instance,
if (!NCR5380_acquire_dma_irq(instance))
return SCSI_MLQUEUE_HOST_BUSY;
- local_irq_save(flags);
+ spin_lock_irqsave(&hostdata->lock, flags);
/*
* Insert the cmd into the issue queue. Note that REQUEST SENSE
@@ -882,33 +817,18 @@ static int NCR5380_queue_command(struct Scsi_Host *instance,
* sense data is only guaranteed to be valid while the condition exists.
*/
- if (!(hostdata->issue_queue) || (cmd->cmnd[0] == REQUEST_SENSE)) {
- LIST(cmd, hostdata->issue_queue);
- SET_NEXT(cmd, hostdata->issue_queue);
- hostdata->issue_queue = cmd;
- } else {
- for (tmp = (struct scsi_cmnd *)hostdata->issue_queue;
- NEXT(tmp); tmp = NEXT(tmp))
- ;
- LIST(cmd, tmp);
- SET_NEXT(tmp, cmd);
- }
- local_irq_restore(flags);
+ if (cmd->cmnd[0] == REQUEST_SENSE)
+ list_add(&ncmd->list, &hostdata->unissued);
+ else
+ list_add_tail(&ncmd->list, &hostdata->unissued);
- dprintk(NDEBUG_QUEUES, "scsi%d: command added to %s of queue\n", H_NO(cmd),
- (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail");
+ spin_unlock_irqrestore(&hostdata->lock, flags);
- /* If queue_command() is called from an interrupt (real one or bottom
- * half), we let queue_main() do the job of taking care about main. If it
- * is already running, this is a no-op, else main will be queued.
- *
- * If we're not in an interrupt, we can call NCR5380_main()
- * unconditionally, because it cannot be already running.
- */
- if (in_interrupt() || irqs_disabled())
- queue_main(hostdata);
- else
- NCR5380_main(&hostdata->main_task);
+ dsprintk(NDEBUG_QUEUES, instance, "command %p added to %s of queue\n",
+ cmd, (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail");
+
+ /* Kick off command processing */
+ queue_work(hostdata->work_q, &hostdata->main_task);
return 0;
}
@@ -917,22 +837,85 @@ static inline void maybe_release_dma_irq(struct Scsi_Host *instance)
struct NCR5380_hostdata *hostdata = shost_priv(instance);
/* Caller does the locking needed to set & test these data atomically */
- if (!hostdata->disconnected_queue &&
- !hostdata->issue_queue &&
+ if (list_empty(&hostdata->disconnected) &&
+ list_empty(&hostdata->unissued) &&
+ list_empty(&hostdata->autosense) &&
!hostdata->connected &&
- !hostdata->retain_dma_intr)
+ !hostdata->selecting)
NCR5380_release_dma_irq(instance);
}
/**
+ * dequeue_next_cmd - dequeue a command for processing
+ * @instance: the scsi host instance
+ *
+ * Priority is given to commands on the autosense queue. These commands
+ * need autosense because of a CHECK CONDITION result.
+ *
+ * Returns a command pointer if a command is found for a target that is
+ * not already busy. Otherwise returns NULL.
+ */
+
+static struct scsi_cmnd *dequeue_next_cmd(struct Scsi_Host *instance)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ struct NCR5380_cmd *ncmd;
+ struct scsi_cmnd *cmd;
+
+ if (list_empty(&hostdata->autosense)) {
+ list_for_each_entry(ncmd, &hostdata->unissued, list) {
+ cmd = NCR5380_to_scmd(ncmd);
+ dsprintk(NDEBUG_QUEUES, instance, "dequeue: cmd=%p target=%d busy=0x%02x lun=%llu\n",
+ cmd, scmd_id(cmd), hostdata->busy[scmd_id(cmd)], cmd->device->lun);
+
+ if (
+#ifdef SUPPORT_TAGS
+ !is_lun_busy(cmd, 1)
+#else
+ !(hostdata->busy[scmd_id(cmd)] & (1 << cmd->device->lun))
+#endif
+ ) {
+ list_del(&ncmd->list);
+ dsprintk(NDEBUG_QUEUES, instance,
+ "dequeue: removed %p from issue queue\n", cmd);
+ return cmd;
+ }
+ }
+ } else {
+ /* Autosense processing begins here */
+ ncmd = list_first_entry(&hostdata->autosense,
+ struct NCR5380_cmd, list);
+ list_del(&ncmd->list);
+ cmd = NCR5380_to_scmd(ncmd);
+ dsprintk(NDEBUG_QUEUES, instance,
+ "dequeue: removed %p from autosense queue\n", cmd);
+ scsi_eh_prep_cmnd(cmd, &hostdata->ses, NULL, 0, ~0);
+ hostdata->sensing = cmd;
+ return cmd;
+ }
+ return NULL;
+}
+
+static void requeue_cmd(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
+
+ if (hostdata->sensing) {
+ scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+ list_add(&ncmd->list, &hostdata->autosense);
+ hostdata->sensing = NULL;
+ } else
+ list_add(&ncmd->list, &hostdata->unissued);
+}
+
+/**
* NCR5380_main - NCR state machines
*
* NCR5380_main is a coroutine that runs as long as more work can
* be done on the NCR5380 host adapters in a system. Both
* NCR5380_queue_command() and NCR5380_intr() will try to start it
* in case it is not running.
- *
- * Locks: called as its own thread with no locks held.
*/
static void NCR5380_main(struct work_struct *work)
@@ -940,154 +923,69 @@ static void NCR5380_main(struct work_struct *work)
struct NCR5380_hostdata *hostdata =
container_of(work, struct NCR5380_hostdata, main_task);
struct Scsi_Host *instance = hostdata->host;
- struct scsi_cmnd *tmp, *prev;
+ struct scsi_cmnd *cmd;
int done;
- unsigned long flags;
/*
- * We run (with interrupts disabled) until we're sure that none of
- * the host adapters have anything that can be done, at which point
- * we set main_running to 0 and exit.
- *
- * Interrupts are enabled before doing various other internal
- * instructions, after we've decided that we need to run through
- * the loop again.
- *
- * this should prevent any race conditions.
- *
* ++roman: Just disabling the NCR interrupt isn't sufficient here,
* because also a timer int can trigger an abort or reset, which can
* alter queues and touch the Falcon lock.
*/
- /* Tell int handlers main() is now already executing. Note that
- no races are possible here. If an int comes in before
- 'main_running' is set here, and queues/executes main via the
- task queue, it doesn't do any harm, just this instance of main
- won't find any work left to do. */
- if (hostdata->main_running)
- return;
- hostdata->main_running = 1;
-
- local_save_flags(flags);
do {
- local_irq_disable(); /* Freeze request queues */
done = 1;
- if (!hostdata->connected) {
- dprintk(NDEBUG_MAIN, "scsi%d: not connected\n", HOSTNO);
- /*
- * Search through the issue_queue for a command destined
- * for a target that's not busy.
- */
-#if (NDEBUG & NDEBUG_LISTS)
- for (tmp = (struct scsi_cmnd *) hostdata->issue_queue, prev = NULL;
- tmp && (tmp != prev); prev = tmp, tmp = NEXT(tmp))
- ;
- /*printk("%p ", tmp);*/
- if ((tmp == prev) && tmp)
- printk(" LOOP\n");
- /* else printk("\n"); */
-#endif
- for (tmp = (struct scsi_cmnd *) hostdata->issue_queue,
- prev = NULL; tmp; prev = tmp, tmp = NEXT(tmp)) {
- u8 lun = tmp->device->lun;
-
- dprintk(NDEBUG_LISTS,
- "MAIN tmp=%p target=%d busy=%d lun=%d\n",
- tmp, scmd_id(tmp), hostdata->busy[scmd_id(tmp)],
- lun);
- /* When we find one, remove it from the issue queue. */
- /* ++guenther: possible race with Falcon locking */
- if (
-#ifdef SUPPORT_TAGS
- !is_lun_busy( tmp, tmp->cmnd[0] != REQUEST_SENSE)
-#else
- !(hostdata->busy[tmp->device->id] & (1 << lun))
-#endif
- ) {
- /* ++guenther: just to be sure, this must be atomic */
- local_irq_disable();
- if (prev) {
- REMOVE(prev, NEXT(prev), tmp, NEXT(tmp));
- SET_NEXT(prev, NEXT(tmp));
- } else {
- REMOVE(-1, hostdata->issue_queue, tmp, NEXT(tmp));
- hostdata->issue_queue = NEXT(tmp);
- }
- SET_NEXT(tmp, NULL);
- hostdata->retain_dma_intr++;
+ spin_lock_irq(&hostdata->lock);
+ while (!hostdata->connected &&
+ (cmd = dequeue_next_cmd(instance))) {
- /* reenable interrupts after finding one */
- local_irq_restore(flags);
+ dsprintk(NDEBUG_MAIN, instance, "main: dequeued %p\n", cmd);
- /*
- * Attempt to establish an I_T_L nexus here.
- * On success, instance->hostdata->connected is set.
- * On failure, we must add the command back to the
- * issue queue so we can keep trying.
- */
- dprintk(NDEBUG_MAIN, "scsi%d: main(): command for target %d "
- "lun %d removed from issue_queue\n",
- HOSTNO, tmp->device->id, lun);
- /*
- * REQUEST SENSE commands are issued without tagged
- * queueing, even on SCSI-II devices because the
- * contingent allegiance condition exists for the
- * entire unit.
- */
- /* ++roman: ...and the standard also requires that
- * REQUEST SENSE command are untagged.
- */
+ /*
+ * Attempt to establish an I_T_L nexus here.
+ * On success, instance->hostdata->connected is set.
+ * On failure, we must add the command back to the
+ * issue queue so we can keep trying.
+ */
+ /*
+ * REQUEST SENSE commands are issued without tagged
+ * queueing, even on SCSI-II devices because the
+ * contingent allegiance condition exists for the
+ * entire unit.
+ */
+ /* ++roman: ...and the standard also requires that
+ * REQUEST SENSE command are untagged.
+ */
#ifdef SUPPORT_TAGS
- cmd_get_tag(tmp, tmp->cmnd[0] != REQUEST_SENSE);
+ cmd_get_tag(cmd, cmd->cmnd[0] != REQUEST_SENSE);
#endif
- if (!NCR5380_select(instance, tmp)) {
- local_irq_disable();
- hostdata->retain_dma_intr--;
- /* release if target did not response! */
- maybe_release_dma_irq(instance);
- local_irq_restore(flags);
- break;
- } else {
- local_irq_disable();
- LIST(tmp, hostdata->issue_queue);
- SET_NEXT(tmp, hostdata->issue_queue);
- hostdata->issue_queue = tmp;
+ cmd = NCR5380_select(instance, cmd);
+ if (!cmd) {
+ dsprintk(NDEBUG_MAIN, instance, "main: select complete\n");
+ maybe_release_dma_irq(instance);
+ } else {
+ dsprintk(NDEBUG_MAIN | NDEBUG_QUEUES, instance,
+ "main: select failed, returning %p to queue\n", cmd);
+ requeue_cmd(instance, cmd);
#ifdef SUPPORT_TAGS
- cmd_free_tag(tmp);
+ cmd_free_tag(cmd);
#endif
- hostdata->retain_dma_intr--;
- local_irq_restore(flags);
- dprintk(NDEBUG_MAIN, "scsi%d: main(): select() failed, "
- "returned to issue_queue\n", HOSTNO);
- if (hostdata->connected)
- break;
- }
- } /* if target/lun/target queue is not busy */
- } /* for issue_queue */
- } /* if (!hostdata->connected) */
-
+ }
+ }
if (hostdata->connected
#ifdef REAL_DMA
&& !hostdata->dma_len
#endif
) {
- local_irq_restore(flags);
- dprintk(NDEBUG_MAIN, "scsi%d: main: performing information transfer\n",
- HOSTNO);
+ dsprintk(NDEBUG_MAIN, instance, "main: performing information transfer\n");
NCR5380_information_transfer(instance);
- dprintk(NDEBUG_MAIN, "scsi%d: main: done set false\n", HOSTNO);
done = 0;
}
+ spin_unlock_irq(&hostdata->lock);
+ if (!done)
+ cond_resched();
} while (!done);
-
- /* Better allow ints _after_ 'main_running' has been cleared, else
- an interrupt could believe we'll pick up the work it left for
- us, but we won't see it anymore here... */
- hostdata->main_running = 0;
- local_irq_restore(flags);
}
@@ -1096,27 +994,20 @@ static void NCR5380_main(struct work_struct *work)
* Function : void NCR5380_dma_complete (struct Scsi_Host *instance)
*
* Purpose : Called by interrupt handler when DMA finishes or a phase
- * mismatch occurs (which would finish the DMA transfer).
+ * mismatch occurs (which would finish the DMA transfer).
*
* Inputs : instance - this instance of the NCR5380.
- *
*/
static void NCR5380_dma_complete(struct Scsi_Host *instance)
{
- SETUP_HOSTDATA(instance);
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
int transferred;
unsigned char **data;
- volatile int *count;
+ int *count;
int saved_data = 0, overrun = 0;
unsigned char p;
- if (!hostdata->connected) {
- printk(KERN_WARNING "scsi%d: received end of DMA interrupt with "
- "no connected cmd\n", HOSTNO);
- return;
- }
-
if (hostdata->read_overruns) {
p = hostdata->connected->SCp.phase;
if (p & SR_IO) {
@@ -1126,15 +1017,11 @@ static void NCR5380_dma_complete(struct Scsi_Host *instance)
(BASR_PHASE_MATCH|BASR_ACK)) {
saved_data = NCR5380_read(INPUT_DATA_REG);
overrun = 1;
- dprintk(NDEBUG_DMA, "scsi%d: read overrun handled\n", HOSTNO);
+ dsprintk(NDEBUG_DMA, instance, "read overrun handled\n");
}
}
}
- dprintk(NDEBUG_DMA, "scsi%d: real DMA transfer complete, basr 0x%X, sr 0x%X\n",
- HOSTNO, NCR5380_read(BUS_AND_STATUS_REG),
- NCR5380_read(STATUS_REG));
-
#if defined(CONFIG_SUN3)
if ((sun3scsi_dma_finish(rq_data_dir(hostdata->connected->request)))) {
pr_err("scsi%d: overrun in UDC counter -- not prepared to deal with this!\n",
@@ -1153,9 +1040,9 @@ static void NCR5380_dma_complete(struct Scsi_Host *instance)
}
#endif
- (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
NCR5380_write(MODE_REG, MR_BASE);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+ NCR5380_read(RESET_PARITY_INTERRUPT_REG);
transferred = hostdata->dma_len - NCR5380_dma_residual(instance);
hostdata->dma_len = 0;
@@ -1194,140 +1081,160 @@ static void NCR5380_dma_complete(struct Scsi_Host *instance)
* Handle interrupts, reestablishing I_T_L or I_T_L_Q nexuses
* from the disconnected queue, and restarting NCR5380_main()
* as required.
+ *
+ * The chip can assert IRQ in any of six different conditions. The IRQ flag
+ * is then cleared by reading the Reset Parity/Interrupt Register (RPIR).
+ * Three of these six conditions are latched in the Bus and Status Register:
+ * - End of DMA (cleared by ending DMA Mode)
+ * - Parity error (cleared by reading RPIR)
+ * - Loss of BSY (cleared by reading RPIR)
+ * Two conditions have flag bits that are not latched:
+ * - Bus phase mismatch (non-maskable in DMA Mode, cleared by ending DMA Mode)
+ * - Bus reset (non-maskable)
+ * The remaining condition has no flag bit at all:
+ * - Selection/reselection
+ *
+ * Hence, establishing the cause(s) of any interrupt is partly guesswork.
+ * In "The DP8490 and DP5380 Comparison Guide", National Semiconductor
+ * claimed that "the design of the [DP8490] interrupt logic ensures
+ * interrupts will not be lost (they can be on the DP5380)."
+ * The L5380/53C80 datasheet from LOGIC Devices has more details.
+ *
+ * Checking for bus reset by reading RST is futile because of interrupt
+ * latency, but a bus reset will reset chip logic. Checking for parity error
+ * is unnecessary because that interrupt is never enabled. A Loss of BSY
+ * condition will clear DMA Mode. We can tell when this occurs because the
+ * the Busy Monitor interrupt is enabled together with DMA Mode.
*/
static irqreturn_t NCR5380_intr(int irq, void *dev_id)
{
struct Scsi_Host *instance = dev_id;
- int done = 1, handled = 0;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ int handled = 0;
unsigned char basr;
+ unsigned long flags;
- dprintk(NDEBUG_INTR, "scsi%d: NCR5380 irq triggered\n", HOSTNO);
+ spin_lock_irqsave(&hostdata->lock, flags);
- /* Look for pending interrupts */
basr = NCR5380_read(BUS_AND_STATUS_REG);
- dprintk(NDEBUG_INTR, "scsi%d: BASR=%02x\n", HOSTNO, basr);
- /* dispatch to appropriate routine if found and done=0 */
if (basr & BASR_IRQ) {
- NCR5380_dprint(NDEBUG_INTR, instance);
- if ((NCR5380_read(STATUS_REG) & (SR_SEL|SR_IO)) == (SR_SEL|SR_IO)) {
- done = 0;
- dprintk(NDEBUG_INTR, "scsi%d: SEL interrupt\n", HOSTNO);
- NCR5380_reselect(instance);
- (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
- } else if (basr & BASR_PARITY_ERROR) {
- dprintk(NDEBUG_INTR, "scsi%d: PARITY interrupt\n", HOSTNO);
- (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
- } else if ((NCR5380_read(STATUS_REG) & SR_RST) == SR_RST) {
- dprintk(NDEBUG_INTR, "scsi%d: RESET interrupt\n", HOSTNO);
- (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
- } else {
- /*
- * The rest of the interrupt conditions can occur only during a
- * DMA transfer
- */
+ unsigned char mr = NCR5380_read(MODE_REG);
+ unsigned char sr = NCR5380_read(STATUS_REG);
+
+ dsprintk(NDEBUG_INTR, instance, "IRQ %d, BASR 0x%02x, SR 0x%02x, MR 0x%02x\n",
+ irq, basr, sr, mr);
#if defined(REAL_DMA)
- /*
- * We should only get PHASE MISMATCH and EOP interrupts if we have
- * DMA enabled, so do a sanity check based on the current setting
- * of the MODE register.
+ if ((mr & MR_DMA_MODE) || (mr & MR_MONITOR_BSY)) {
+ /* Probably End of DMA, Phase Mismatch or Loss of BSY.
+ * We ack IRQ after clearing Mode Register. Workarounds
+ * for End of DMA errata need to happen in DMA Mode.
*/
- if ((NCR5380_read(MODE_REG) & MR_DMA_MODE) &&
- ((basr & BASR_END_DMA_TRANSFER) ||
- !(basr & BASR_PHASE_MATCH))) {
+ dsprintk(NDEBUG_INTR, instance, "interrupt in DMA mode\n");
- dprintk(NDEBUG_INTR, "scsi%d: PHASE MISM or EOP interrupt\n", HOSTNO);
- NCR5380_dma_complete( instance );
- done = 0;
- } else
+ if (hostdata->connected) {
+ NCR5380_dma_complete(instance);
+ queue_work(hostdata->work_q, &hostdata->main_task);
+ } else {
+ NCR5380_write(MODE_REG, MR_BASE);
+ NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+ }
+ } else
#endif /* REAL_DMA */
- {
-/* MS: Ignore unknown phase mismatch interrupts (caused by EOP interrupt) */
- if (basr & BASR_PHASE_MATCH)
- dprintk(NDEBUG_INTR, "scsi%d: unknown interrupt, "
- "BASR 0x%x, MR 0x%x, SR 0x%x\n",
- HOSTNO, basr, NCR5380_read(MODE_REG),
- NCR5380_read(STATUS_REG));
- (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+ if ((NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_mask) &&
+ (sr & (SR_SEL | SR_IO | SR_BSY | SR_RST)) == (SR_SEL | SR_IO)) {
+ /* Probably reselected */
+ NCR5380_write(SELECT_ENABLE_REG, 0);
+ NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+
+ dsprintk(NDEBUG_INTR, instance, "interrupt with SEL and IO\n");
+
+ if (!hostdata->connected) {
+ NCR5380_reselect(instance);
+ queue_work(hostdata->work_q, &hostdata->main_task);
+ }
+ if (!hostdata->connected)
+ NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+ } else {
+ /* Probably Bus Reset */
+ NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+
+ dsprintk(NDEBUG_INTR, instance, "unknown interrupt\n");
#ifdef SUN3_SCSI_VME
- dregs->csr |= CSR_DMA_ENABLE;
+ dregs->csr |= CSR_DMA_ENABLE;
#endif
- }
- } /* if !(SELECTION || PARITY) */
+ }
handled = 1;
- } /* BASR & IRQ */ else {
- printk(KERN_NOTICE "scsi%d: interrupt without IRQ bit set in BASR, "
- "BASR 0x%X, MR 0x%X, SR 0x%x\n", HOSTNO, basr,
- NCR5380_read(MODE_REG), NCR5380_read(STATUS_REG));
- (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+ } else {
+ shost_printk(KERN_NOTICE, instance, "interrupt without IRQ bit\n");
#ifdef SUN3_SCSI_VME
dregs->csr |= CSR_DMA_ENABLE;
#endif
}
- if (!done) {
- dprintk(NDEBUG_INTR, "scsi%d: in int routine, calling main\n", HOSTNO);
- /* Put a call to NCR5380_main() on the queue... */
- queue_main(shost_priv(instance));
- }
+ spin_unlock_irqrestore(&hostdata->lock, flags);
+
return IRQ_RETVAL(handled);
}
/*
* Function : int NCR5380_select(struct Scsi_Host *instance,
- * struct scsi_cmnd *cmd)
+ * struct scsi_cmnd *cmd)
*
* Purpose : establishes I_T_L or I_T_L_Q nexus for new or existing command,
- * including ARBITRATION, SELECTION, and initial message out for
- * IDENTIFY and queue messages.
+ * including ARBITRATION, SELECTION, and initial message out for
+ * IDENTIFY and queue messages.
*
* Inputs : instance - instantiation of the 5380 driver on which this
- * target lives, cmd - SCSI command to execute.
+ * target lives, cmd - SCSI command to execute.
*
- * Returns : -1 if selection could not execute for some reason,
- * 0 if selection succeeded or failed because the target
- * did not respond.
+ * Returns cmd if selection failed but should be retried,
+ * NULL if selection failed and should not be retried, or
+ * NULL if selection succeeded (hostdata->connected == cmd).
*
* Side effects :
- * If bus busy, arbitration failed, etc, NCR5380_select() will exit
- * with registers as they should have been on entry - ie
- * SELECT_ENABLE will be set appropriately, the NCR5380
- * will cease to drive any SCSI bus signals.
+ * If bus busy, arbitration failed, etc, NCR5380_select() will exit
+ * with registers as they should have been on entry - ie
+ * SELECT_ENABLE will be set appropriately, the NCR5380
+ * will cease to drive any SCSI bus signals.
*
- * If successful : I_T_L or I_T_L_Q nexus will be established,
- * instance->connected will be set to cmd.
- * SELECT interrupt will be disabled.
+ * If successful : I_T_L or I_T_L_Q nexus will be established,
+ * instance->connected will be set to cmd.
+ * SELECT interrupt will be disabled.
*
- * If failed (no target) : cmd->scsi_done() will be called, and the
- * cmd->result host byte set to DID_BAD_TARGET.
+ * If failed (no target) : cmd->scsi_done() will be called, and the
+ * cmd->result host byte set to DID_BAD_TARGET.
*/
-static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
+static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance,
+ struct scsi_cmnd *cmd)
{
- SETUP_HOSTDATA(instance);
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
unsigned char tmp[3], phase;
unsigned char *data;
int len;
- unsigned long timeout;
- unsigned long flags;
+ int err;
- hostdata->restart_select = 0;
NCR5380_dprint(NDEBUG_ARBITRATION, instance);
- dprintk(NDEBUG_ARBITRATION, "scsi%d: starting arbitration, id = %d\n", HOSTNO,
- instance->this_id);
+ dsprintk(NDEBUG_ARBITRATION, instance, "starting arbitration, id = %d\n",
+ instance->this_id);
+
+ /*
+ * Arbitration and selection phases are slow and involve dropping the
+ * lock, so we have to watch out for EH. An exception handler may
+ * change 'selecting' to NULL. This function will then return NULL
+ * so that the caller will forget about 'cmd'. (During information
+ * transfer phases, EH may change 'connected' to NULL.)
+ */
+ hostdata->selecting = cmd;
/*
* Set the phase bits to 0, otherwise the NCR5380 won't drive the
* data bus during SELECTION.
*/
- local_irq_save(flags);
- if (hostdata->connected) {
- local_irq_restore(flags);
- return -1;
- }
NCR5380_write(TARGET_COMMAND_REG, 0);
/*
@@ -1337,96 +1244,77 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
NCR5380_write(OUTPUT_DATA_REG, hostdata->id_mask);
NCR5380_write(MODE_REG, MR_ARBITRATE);
- local_irq_restore(flags);
-
- /* Wait for arbitration logic to complete */
-#if defined(NCR_TIMEOUT)
- {
- unsigned long timeout = jiffies + 2*NCR_TIMEOUT;
+ /* The chip now waits for BUS FREE phase. Then after the 800 ns
+ * Bus Free Delay, arbitration will begin.
+ */
- while (!(NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_PROGRESS) &&
- time_before(jiffies, timeout) && !hostdata->connected)
- ;
- if (time_after_eq(jiffies, timeout)) {
- printk("scsi : arbitration timeout at %d\n", __LINE__);
- NCR5380_write(MODE_REG, MR_BASE);
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- return -1;
- }
+ spin_unlock_irq(&hostdata->lock);
+ err = NCR5380_poll_politely2(instance, MODE_REG, MR_ARBITRATE, 0,
+ INITIATOR_COMMAND_REG, ICR_ARBITRATION_PROGRESS,
+ ICR_ARBITRATION_PROGRESS, HZ);
+ spin_lock_irq(&hostdata->lock);
+ if (!(NCR5380_read(MODE_REG) & MR_ARBITRATE)) {
+ /* Reselection interrupt */
+ goto out;
}
-#else /* NCR_TIMEOUT */
- while (!(NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_PROGRESS) &&
- !hostdata->connected)
- ;
-#endif
-
- dprintk(NDEBUG_ARBITRATION, "scsi%d: arbitration complete\n", HOSTNO);
-
- if (hostdata->connected) {
+ if (err < 0) {
NCR5380_write(MODE_REG, MR_BASE);
- return -1;
+ shost_printk(KERN_ERR, instance,
+ "select: arbitration timeout\n");
+ goto out;
}
- /*
- * The arbitration delay is 2.2us, but this is a minimum and there is
- * no maximum so we can safely sleep for ceil(2.2) usecs to accommodate
- * the integral nature of udelay().
- *
- */
+ spin_unlock_irq(&hostdata->lock);
+ /* The SCSI-2 arbitration delay is 2.4 us */
udelay(3);
/* Check for lost arbitration */
if ((NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) ||
(NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_higher_mask) ||
- (NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) ||
- hostdata->connected) {
+ (NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST)) {
NCR5380_write(MODE_REG, MR_BASE);
- dprintk(NDEBUG_ARBITRATION, "scsi%d: lost arbitration, deasserting MR_ARBITRATE\n",
- HOSTNO);
- return -1;
+ dsprintk(NDEBUG_ARBITRATION, instance, "lost arbitration, deasserting MR_ARBITRATE\n");
+ spin_lock_irq(&hostdata->lock);
+ goto out;
}
- /* after/during arbitration, BSY should be asserted.
- IBM DPES-31080 Version S31Q works now */
- /* Tnx to Thomas_Roesch@m2.maus.de for finding this! (Roman) */
+ /* After/during arbitration, BSY should be asserted.
+ * IBM DPES-31080 Version S31Q works now
+ * Tnx to Thomas_Roesch@m2.maus.de for finding this! (Roman)
+ */
NCR5380_write(INITIATOR_COMMAND_REG,
ICR_BASE | ICR_ASSERT_SEL | ICR_ASSERT_BSY);
- if ((NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) ||
- hostdata->connected) {
- NCR5380_write(MODE_REG, MR_BASE);
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- dprintk(NDEBUG_ARBITRATION, "scsi%d: lost arbitration, deasserting ICR_ASSERT_SEL\n",
- HOSTNO);
- return -1;
- }
-
/*
* Again, bus clear + bus settle time is 1.2us, however, this is
* a minimum so we'll udelay ceil(1.2)
*/
-#ifdef CONFIG_ATARI_SCSI_TOSHIBA_DELAY
- /* ++roman: But some targets (see above :-) seem to need a bit more... */
- udelay(15);
-#else
- udelay(2);
-#endif
+ if (hostdata->flags & FLAG_TOSHIBA_DELAY)
+ udelay(15);
+ else
+ udelay(2);
- if (hostdata->connected) {
+ spin_lock_irq(&hostdata->lock);
+
+ /* NCR5380_reselect() clears MODE_REG after a reselection interrupt */
+ if (!(NCR5380_read(MODE_REG) & MR_ARBITRATE))
+ goto out;
+
+ if (!hostdata->selecting) {
NCR5380_write(MODE_REG, MR_BASE);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- return -1;
+ goto out;
}
- dprintk(NDEBUG_ARBITRATION, "scsi%d: won arbitration\n", HOSTNO);
+ dsprintk(NDEBUG_ARBITRATION, instance, "won arbitration\n");
/*
* Now that we have won arbitration, start Selection process, asserting
* the host and target ID's on the SCSI bus.
*/
- NCR5380_write(OUTPUT_DATA_REG, (hostdata->id_mask | (1 << cmd->device->id)));
+ NCR5380_write(OUTPUT_DATA_REG, hostdata->id_mask | (1 << scmd_id(cmd)));
/*
* Raise ATN while SEL is true before BSY goes false from arbitration,
@@ -1434,22 +1322,18 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
* phase immediately after selection.
*/
- NCR5380_write(INITIATOR_COMMAND_REG, (ICR_BASE | ICR_ASSERT_BSY |
- ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL ));
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY |
+ ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL);
NCR5380_write(MODE_REG, MR_BASE);
/*
* Reselect interrupts must be turned off prior to the dropping of BSY,
* otherwise we will trigger an interrupt.
*/
-
- if (hostdata->connected) {
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- return -1;
- }
-
NCR5380_write(SELECT_ENABLE_REG, 0);
+ spin_unlock_irq(&hostdata->lock);
+
/*
* The initiator shall then wait at least two deskew delays and release
* the BSY signal.
@@ -1457,8 +1341,8 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
udelay(1); /* wingel -- wait two bus deskew delay >2*45ns */
/* Reset BSY */
- NCR5380_write(INITIATOR_COMMAND_REG, (ICR_BASE | ICR_ASSERT_DATA |
- ICR_ASSERT_ATN | ICR_ASSERT_SEL));
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA |
+ ICR_ASSERT_ATN | ICR_ASSERT_SEL);
/*
* Something weird happens when we cease to drive BSY - looks
@@ -1479,45 +1363,39 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
udelay(1);
- dprintk(NDEBUG_SELECTION, "scsi%d: selecting target %d\n", HOSTNO, cmd->device->id);
+ dsprintk(NDEBUG_SELECTION, instance, "selecting target %d\n", scmd_id(cmd));
/*
* The SCSI specification calls for a 250 ms timeout for the actual
* selection.
*/
- timeout = jiffies + msecs_to_jiffies(250);
-
- /*
- * XXX very interesting - we're seeing a bounce where the BSY we
- * asserted is being reflected / still asserted (propagation delay?)
- * and it's detecting as true. Sigh.
- */
-
-#if 0
- /* ++roman: If a target conformed to the SCSI standard, it wouldn't assert
- * IO while SEL is true. But again, there are some disks out the in the
- * world that do that nevertheless. (Somebody claimed that this announces
- * reselection capability of the target.) So we better skip that test and
- * only wait for BSY... (Famous german words: Der Klügere gibt nach :-)
- */
-
- while (time_before(jiffies, timeout) &&
- !(NCR5380_read(STATUS_REG) & (SR_BSY | SR_IO)))
- ;
+ err = NCR5380_poll_politely(instance, STATUS_REG, SR_BSY, SR_BSY,
+ msecs_to_jiffies(250));
if ((NCR5380_read(STATUS_REG) & (SR_SEL | SR_IO)) == (SR_SEL | SR_IO)) {
+ spin_lock_irq(&hostdata->lock);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
NCR5380_reselect(instance);
- printk(KERN_ERR "scsi%d: reselection after won arbitration?\n",
- HOSTNO);
+ if (!hostdata->connected)
+ NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+ shost_printk(KERN_ERR, instance, "reselection after won arbitration?\n");
+ goto out;
+ }
+
+ if (err < 0) {
+ spin_lock_irq(&hostdata->lock);
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- return -1;
+ /* Can't touch cmd if it has been reclaimed by the scsi ML */
+ if (hostdata->selecting) {
+ cmd->result = DID_BAD_TARGET << 16;
+ complete_cmd(instance, cmd);
+ dsprintk(NDEBUG_SELECTION, instance, "target did not respond within 250ms\n");
+ cmd = NULL;
+ }
+ goto out;
}
-#else
- while (time_before(jiffies, timeout) && !(NCR5380_read(STATUS_REG) & SR_BSY))
- ;
-#endif
/*
* No less than two deskew delays after the initiator detects the
@@ -1529,29 +1407,6 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
- if (!(NCR5380_read(STATUS_REG) & SR_BSY)) {
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- if (hostdata->targets_present & (1 << cmd->device->id)) {
- printk(KERN_ERR "scsi%d: weirdness\n", HOSTNO);
- if (hostdata->restart_select)
- printk(KERN_NOTICE "\trestart select\n");
- NCR5380_dprint(NDEBUG_ANY, instance);
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- return -1;
- }
- cmd->result = DID_BAD_TARGET << 16;
-#ifdef SUPPORT_TAGS
- cmd_free_tag(cmd);
-#endif
- cmd->scsi_done(cmd);
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- dprintk(NDEBUG_SELECTION, "scsi%d: target did not respond within 250ms\n", HOSTNO);
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- return 0;
- }
-
- hostdata->targets_present |= (1 << cmd->device->id);
-
/*
* Since we followed the SCSI spec, and raised ATN while SEL
* was true but before BSY was false during selection, the information
@@ -1563,16 +1418,27 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
* until it wraps back to 0.
*
* XXX - it turns out that there are some broken SCSI-II devices,
- * which claim to support tagged queuing but fail when more than
- * some number of commands are issued at once.
+ * which claim to support tagged queuing but fail when more than
+ * some number of commands are issued at once.
*/
/* Wait for start of REQ/ACK handshake */
- while (!(NCR5380_read(STATUS_REG) & SR_REQ))
- ;
- dprintk(NDEBUG_SELECTION, "scsi%d: target %d selected, going into MESSAGE OUT phase.\n",
- HOSTNO, cmd->device->id);
+ err = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ);
+ spin_lock_irq(&hostdata->lock);
+ if (err < 0) {
+ shost_printk(KERN_ERR, instance, "select: REQ timeout\n");
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+ NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+ goto out;
+ }
+ if (!hostdata->selecting) {
+ do_abort(instance);
+ goto out;
+ }
+
+ dsprintk(NDEBUG_SELECTION, instance, "target %d selected, going into MESSAGE OUT phase.\n",
+ scmd_id(cmd));
tmp[0] = IDENTIFY(1, cmd->device->lun);
#ifdef SUPPORT_TAGS
@@ -1591,11 +1457,12 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
data = tmp;
phase = PHASE_MSGOUT;
NCR5380_transfer_pio(instance, &phase, &len, &data);
- dprintk(NDEBUG_SELECTION, "scsi%d: nexus established.\n", HOSTNO);
+ dsprintk(NDEBUG_SELECTION, instance, "nexus established.\n");
/* XXX need to handle errors here */
+
hostdata->connected = cmd;
#ifndef SUPPORT_TAGS
- hostdata->busy[cmd->device->id] |= (1 << cmd->device->lun);
+ hostdata->busy[cmd->device->id] |= 1 << cmd->device->lun;
#endif
#ifdef SUN3_SCSI_VME
dregs->csr |= CSR_INTR;
@@ -1603,24 +1470,30 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
initialize_SCp(cmd);
- return 0;
+ cmd = NULL;
+
+out:
+ if (!hostdata->selecting)
+ return NULL;
+ hostdata->selecting = NULL;
+ return cmd;
}
/*
* Function : int NCR5380_transfer_pio (struct Scsi_Host *instance,
- * unsigned char *phase, int *count, unsigned char **data)
+ * unsigned char *phase, int *count, unsigned char **data)
*
* Purpose : transfers data in given phase using polled I/O
*
* Inputs : instance - instance of driver, *phase - pointer to
- * what phase is expected, *count - pointer to number of
- * bytes to transfer, **data - pointer to data pointer.
+ * what phase is expected, *count - pointer to number of
+ * bytes to transfer, **data - pointer to data pointer.
*
* Returns : -1 when different phase is entered without transferring
- * maximum number of bytes, 0 if all bytes are transferred or exit
- * is in same phase.
+ * maximum number of bytes, 0 if all bytes are transferred or exit
+ * is in same phase.
*
- * Also, *phase, *count, *data are modified in place.
+ * Also, *phase, *count, *data are modified in place.
*
* XXX Note : handling for bus free may be useful.
*/
@@ -1635,9 +1508,9 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
unsigned char *phase, int *count,
unsigned char **data)
{
- register unsigned char p = *phase, tmp;
- register int c = *count;
- register unsigned char *d = *data;
+ unsigned char p = *phase, tmp;
+ int c = *count;
+ unsigned char *d = *data;
/*
* The NCR5380 chip will only drive the SCSI bus when the
@@ -1652,14 +1525,15 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
* Wait for assertion of REQ, after which the phase bits will be
* valid
*/
- while (!((tmp = NCR5380_read(STATUS_REG)) & SR_REQ))
- ;
- dprintk(NDEBUG_HANDSHAKE, "scsi%d: REQ detected\n", HOSTNO);
+ if (NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ) < 0)
+ break;
+
+ dsprintk(NDEBUG_HANDSHAKE, instance, "REQ asserted\n");
/* Check for phase mismatch */
- if ((tmp & PHASE_MASK) != p) {
- dprintk(NDEBUG_PIO, "scsi%d: phase mismatch\n", HOSTNO);
+ if ((NCR5380_read(STATUS_REG) & PHASE_MASK) != p) {
+ dsprintk(NDEBUG_PIO, instance, "phase mismatch\n");
NCR5380_dprint_phase(NDEBUG_PIO, instance);
break;
}
@@ -1684,35 +1558,36 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA);
NCR5380_dprint(NDEBUG_PIO, instance);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
- ICR_ASSERT_DATA | ICR_ASSERT_ACK);
+ ICR_ASSERT_DATA | ICR_ASSERT_ACK);
} else {
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
- ICR_ASSERT_DATA | ICR_ASSERT_ATN);
+ ICR_ASSERT_DATA | ICR_ASSERT_ATN);
NCR5380_dprint(NDEBUG_PIO, instance);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
- ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
+ ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
}
} else {
NCR5380_dprint(NDEBUG_PIO, instance);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ACK);
}
- while (NCR5380_read(STATUS_REG) & SR_REQ)
- ;
+ if (NCR5380_poll_politely(instance,
+ STATUS_REG, SR_REQ, 0, 5 * HZ) < 0)
+ break;
- dprintk(NDEBUG_HANDSHAKE, "scsi%d: req false, handshake complete\n", HOSTNO);
+ dsprintk(NDEBUG_HANDSHAKE, instance, "REQ negated, handshake complete\n");
- /*
- * We have several special cases to consider during REQ/ACK handshaking :
- * 1. We were in MSGOUT phase, and we are on the last byte of the
- * message. ATN must be dropped as ACK is dropped.
- *
- * 2. We are in a MSGIN phase, and we are on the last byte of the
- * message. We must exit with ACK asserted, so that the calling
- * code may raise ATN before dropping ACK to reject the message.
- *
- * 3. ACK and ATN are clear and the target may proceed as normal.
- */
+/*
+ * We have several special cases to consider during REQ/ACK handshaking :
+ * 1. We were in MSGOUT phase, and we are on the last byte of the
+ * message. ATN must be dropped as ACK is dropped.
+ *
+ * 2. We are in a MSGIN phase, and we are on the last byte of the
+ * message. We must exit with ACK asserted, so that the calling
+ * code may raise ATN before dropping ACK to reject the message.
+ *
+ * 3. ACK and ATN are clear and the target may proceed as normal.
+ */
if (!(p == PHASE_MSGIN && c == 1)) {
if (p == PHASE_MSGOUT && c > 1)
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
@@ -1721,16 +1596,16 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
}
} while (--c);
- dprintk(NDEBUG_PIO, "scsi%d: residual %d\n", HOSTNO, c);
+ dsprintk(NDEBUG_PIO, instance, "residual %d\n", c);
*count = c;
*data = d;
tmp = NCR5380_read(STATUS_REG);
/* The phase read from the bus is valid if either REQ is (already)
- * asserted or if ACK hasn't been released yet. The latter is the case if
- * we're in MSGIN and all wanted bytes have been received.
+ * asserted or if ACK hasn't been released yet. The latter applies if
+ * we're in MSG IN, DATA IN or STATUS and all bytes have been received.
*/
- if ((tmp & SR_REQ) || (p == PHASE_MSGIN && c == 0))
+ if ((tmp & SR_REQ) || ((tmp & SR_IO) && c == 0))
*phase = tmp & PHASE_MASK;
else
*phase = PHASE_UNKNOWN;
@@ -1741,19 +1616,45 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
return -1;
}
-/*
- * Function : do_abort (Scsi_Host *host)
+/**
+ * do_reset - issue a reset command
+ * @instance: adapter to reset
*
- * Purpose : abort the currently established nexus. Should only be
- * called from a routine which can drop into a
+ * Issue a reset sequence to the NCR5380 and try and get the bus
+ * back into sane shape.
*
- * Returns : 0 on success, -1 on failure.
+ * This clears the reset interrupt flag because there may be no handler for
+ * it. When the driver is initialized, the NCR5380_intr() handler has not yet
+ * been installed. And when in EH we may have released the ST DMA interrupt.
+ */
+
+static void do_reset(struct Scsi_Host *instance)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ NCR5380_write(TARGET_COMMAND_REG,
+ PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG) & PHASE_MASK));
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST);
+ udelay(50);
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+ (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+ local_irq_restore(flags);
+}
+
+/**
+ * do_abort - abort the currently established nexus by going to
+ * MESSAGE OUT phase and sending an ABORT message.
+ * @instance: relevant scsi host instance
+ *
+ * Returns 0 on success, -1 on failure.
*/
static int do_abort(struct Scsi_Host *instance)
{
- unsigned char tmp, *msgptr, phase;
+ unsigned char *msgptr, phase, tmp;
int len;
+ int rc;
/* Request message out phase */
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
@@ -1768,16 +1669,20 @@ static int do_abort(struct Scsi_Host *instance)
* the target sees, so we just handshake.
*/
- while (!((tmp = NCR5380_read(STATUS_REG)) & SR_REQ))
- ;
+ rc = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, 10 * HZ);
+ if (rc < 0)
+ goto timeout;
+
+ tmp = NCR5380_read(STATUS_REG) & PHASE_MASK;
NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(tmp));
- if ((tmp & PHASE_MASK) != PHASE_MSGOUT) {
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN |
- ICR_ASSERT_ACK);
- while (NCR5380_read(STATUS_REG) & SR_REQ)
- ;
+ if (tmp != PHASE_MSGOUT) {
+ NCR5380_write(INITIATOR_COMMAND_REG,
+ ICR_BASE | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
+ rc = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, 0, 3 * HZ);
+ if (rc < 0)
+ goto timeout;
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
}
@@ -1793,26 +1698,29 @@ static int do_abort(struct Scsi_Host *instance)
*/
return len ? -1 : 0;
+
+timeout:
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+ return -1;
}
#if defined(REAL_DMA)
/*
* Function : int NCR5380_transfer_dma (struct Scsi_Host *instance,
- * unsigned char *phase, int *count, unsigned char **data)
+ * unsigned char *phase, int *count, unsigned char **data)
*
* Purpose : transfers data in given phase using either real
- * or pseudo DMA.
+ * or pseudo DMA.
*
* Inputs : instance - instance of driver, *phase - pointer to
- * what phase is expected, *count - pointer to number of
- * bytes to transfer, **data - pointer to data pointer.
+ * what phase is expected, *count - pointer to number of
+ * bytes to transfer, **data - pointer to data pointer.
*
* Returns : -1 when different phase is entered without transferring
- * maximum number of bytes, 0 if all bytes or transferred or exit
- * is in same phase.
- *
- * Also, *phase, *count, *data are modified in place.
+ * maximum number of bytes, 0 if all bytes or transferred or exit
+ * is in same phase.
*
+ * Also, *phase, *count, *data are modified in place.
*/
@@ -1820,10 +1728,9 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
unsigned char *phase, int *count,
unsigned char **data)
{
- SETUP_HOSTDATA(instance);
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
register int c = *count;
register unsigned char p = *phase;
- unsigned long flags;
#if defined(CONFIG_SUN3)
/* sanity check */
@@ -1834,29 +1741,22 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
}
hostdata->dma_len = c;
- dprintk(NDEBUG_DMA, "scsi%d: initializing DMA for %s, %d bytes %s %p\n",
- instance->host_no, (p & SR_IO) ? "reading" : "writing",
- c, (p & SR_IO) ? "to" : "from", *data);
+ dsprintk(NDEBUG_DMA, instance, "initializing DMA %s: length %d, address %p\n",
+ (p & SR_IO) ? "receive" : "send", c, *data);
/* netbsd turns off ints here, why not be safe and do it too */
- local_irq_save(flags);
/* send start chain */
sun3scsi_dma_start(c, *data);
+ NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
+ NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
+ MR_ENABLE_EOP_INTR);
if (p & SR_IO) {
- NCR5380_write(TARGET_COMMAND_REG, 1);
- NCR5380_read(RESET_PARITY_INTERRUPT_REG);
NCR5380_write(INITIATOR_COMMAND_REG, 0);
- NCR5380_write(MODE_REG,
- (NCR5380_read(MODE_REG) | MR_DMA_MODE | MR_ENABLE_EOP_INTR));
NCR5380_write(START_DMA_INITIATOR_RECEIVE_REG, 0);
} else {
- NCR5380_write(TARGET_COMMAND_REG, 0);
- NCR5380_read(RESET_PARITY_INTERRUPT_REG);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_ASSERT_DATA);
- NCR5380_write(MODE_REG,
- (NCR5380_read(MODE_REG) | MR_DMA_MODE | MR_ENABLE_EOP_INTR));
NCR5380_write(START_DMA_SEND_REG, 0);
}
@@ -1864,8 +1764,6 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
dregs->csr |= CSR_DMA_ENABLE;
#endif
- local_irq_restore(flags);
-
sun3_dma_active = 1;
#else /* !defined(CONFIG_SUN3) */
@@ -1880,25 +1778,20 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
if (hostdata->read_overruns && (p & SR_IO))
c -= hostdata->read_overruns;
- dprintk(NDEBUG_DMA, "scsi%d: initializing DMA for %s, %d bytes %s %p\n",
- HOSTNO, (p & SR_IO) ? "reading" : "writing",
- c, (p & SR_IO) ? "to" : "from", d);
+ dsprintk(NDEBUG_DMA, instance, "initializing DMA %s: length %d, address %p\n",
+ (p & SR_IO) ? "receive" : "send", c, d);
NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
-
-#ifdef REAL_DMA
- NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_ENABLE_EOP_INTR | MR_MONITOR_BSY);
-#endif /* def REAL_DMA */
+ NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
+ MR_ENABLE_EOP_INTR);
if (!(hostdata->flags & FLAG_LATE_DMA_SETUP)) {
/* On the Medusa, it is a must to initialize the DMA before
* starting the NCR. This is also the cleaner way for the TT.
*/
- local_irq_save(flags);
hostdata->dma_len = (p & SR_IO) ?
NCR5380_dma_read_setup(instance, d, c) :
NCR5380_dma_write_setup(instance, d, c);
- local_irq_restore(flags);
}
if (p & SR_IO)
@@ -1912,11 +1805,9 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
/* On the Falcon, the DMA setup must be done after the last */
/* NCR access, else the DMA setup gets trashed!
*/
- local_irq_save(flags);
hostdata->dma_len = (p & SR_IO) ?
NCR5380_dma_read_setup(instance, d, c) :
NCR5380_dma_write_setup(instance, d, c);
- local_irq_restore(flags);
}
#endif /* !defined(CONFIG_SUN3) */
@@ -1928,23 +1819,22 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
* Function : NCR5380_information_transfer (struct Scsi_Host *instance)
*
* Purpose : run through the various SCSI phases and do as the target
- * directs us to. Operates on the currently connected command,
- * instance->connected.
+ * directs us to. Operates on the currently connected command,
+ * instance->connected.
*
* Inputs : instance, instance for which we are doing commands
*
* Side effects : SCSI things happen, the disconnected queue will be
- * modified if a command disconnects, *instance->connected will
- * change.
+ * modified if a command disconnects, *instance->connected will
+ * change.
*
* XXX Note : we need to watch for bus free or a reset condition here
- * to recover from an unexpected bus free condition.
+ * to recover from an unexpected bus free condition.
*/
static void NCR5380_information_transfer(struct Scsi_Host *instance)
{
- SETUP_HOSTDATA(instance);
- unsigned long flags;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
unsigned char msgout = NOP;
int sink = 0;
int len;
@@ -1953,13 +1843,15 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
#endif
unsigned char *data;
unsigned char phase, tmp, extended_msg[10], old_phase = 0xff;
- struct scsi_cmnd *cmd = (struct scsi_cmnd *) hostdata->connected;
+ struct scsi_cmnd *cmd;
#ifdef SUN3_SCSI_VME
dregs->csr |= CSR_INTR;
#endif
- while (1) {
+ while ((cmd = hostdata->connected)) {
+ struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
+
tmp = NCR5380_read(STATUS_REG);
/* We only have a valid SCSI phase when REQ is asserted */
if (tmp & SR_REQ) {
@@ -1984,7 +1876,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
/* this command setup for dma yet? */
if ((count >= DMA_MIN_SIZE) && (sun3_dma_setup_done != cmd)) {
if (cmd->request->cmd_type == REQ_TYPE_FS) {
- sun3scsi_dma_setup(d, count,
+ sun3scsi_dma_setup(instance, d, count,
rq_data_dir(cmd->request));
sun3_dma_setup_done = cmd;
}
@@ -2000,11 +1892,11 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(tmp));
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN |
- ICR_ASSERT_ACK);
+ ICR_ASSERT_ACK);
while (NCR5380_read(STATUS_REG) & SR_REQ)
;
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
- ICR_ASSERT_ATN);
+ ICR_ASSERT_ATN);
sink = 0;
continue;
}
@@ -2012,12 +1904,11 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
switch (phase) {
case PHASE_DATAOUT:
#if (NDEBUG & NDEBUG_NO_DATAOUT)
- printk("scsi%d: NDEBUG_NO_DATAOUT set, attempted DATAOUT "
- "aborted\n", HOSTNO);
+ shost_printk(KERN_DEBUG, instance, "NDEBUG_NO_DATAOUT set, attempted DATAOUT aborted\n");
sink = 1;
do_abort(instance);
cmd->result = DID_ERROR << 16;
- cmd->scsi_done(cmd);
+ complete_cmd(instance, cmd);
return;
#endif
case PHASE_DATAIN:
@@ -2031,13 +1922,10 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
--cmd->SCp.buffers_residual;
cmd->SCp.this_residual = cmd->SCp.buffer->length;
cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
- /* ++roman: Try to merge some scatter-buffers if
- * they are at contiguous physical addresses.
- */
merge_contiguous_buffers(cmd);
- dprintk(NDEBUG_INFORMATION, "scsi%d: %d bytes and %d buffers left\n",
- HOSTNO, cmd->SCp.this_residual,
- cmd->SCp.buffers_residual);
+ dsprintk(NDEBUG_INFORMATION, instance, "%d bytes and %d buffers left\n",
+ cmd->SCp.this_residual,
+ cmd->SCp.buffers_residual);
}
/*
@@ -2051,16 +1939,18 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
*/
/* ++roman: I suggest, this should be
- * #if def(REAL_DMA)
+ * #if def(REAL_DMA)
* instead of leaving REAL_DMA out.
*/
#if defined(REAL_DMA)
- if (
#if !defined(CONFIG_SUN3)
- !cmd->device->borken &&
+ transfersize = 0;
+ if (!cmd->device->borken)
#endif
- (transfersize = NCR5380_dma_xfer_len(instance, cmd, phase)) >= DMA_MIN_SIZE) {
+ transfersize = NCR5380_dma_xfer_len(instance, cmd, phase);
+
+ if (transfersize >= DMA_MIN_SIZE) {
len = transfersize;
cmd->SCp.phase = phase;
if (NCR5380_transfer_dma(instance, &phase,
@@ -2068,16 +1958,15 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
/*
* If the watchdog timer fires, all future
* accesses to this device will use the
- * polled-IO. */
+ * polled-IO.
+ */
scmd_printk(KERN_INFO, cmd,
"switching to slow handshake\n");
cmd->device->borken = 1;
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
- ICR_ASSERT_ATN);
sink = 1;
do_abort(instance);
cmd->result = DID_ERROR << 16;
- cmd->scsi_done(cmd);
+ complete_cmd(instance, cmd);
/* XXX - need to source or sink data here, as appropriate */
} else {
#ifdef REAL_DMA
@@ -2093,9 +1982,13 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
}
} else
#endif /* defined(REAL_DMA) */
+ {
+ spin_unlock_irq(&hostdata->lock);
NCR5380_transfer_pio(instance, &phase,
- (int *)&cmd->SCp.this_residual,
- (unsigned char **)&cmd->SCp.ptr);
+ (int *)&cmd->SCp.this_residual,
+ (unsigned char **)&cmd->SCp.ptr);
+ spin_lock_irq(&hostdata->lock);
+ }
#if defined(CONFIG_SUN3) && defined(REAL_DMA)
/* if we had intended to dma that command clear it */
if (sun3_dma_setup_done == cmd)
@@ -2105,162 +1998,64 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
case PHASE_MSGIN:
len = 1;
data = &tmp;
- NCR5380_write(SELECT_ENABLE_REG, 0); /* disable reselects */
NCR5380_transfer_pio(instance, &phase, &len, &data);
cmd->SCp.Message = tmp;
switch (tmp) {
- /*
- * Linking lets us reduce the time required to get the
- * next command out to the device, hopefully this will
- * mean we don't waste another revolution due to the delays
- * required by ARBITRATION and another SELECTION.
- *
- * In the current implementation proposal, low level drivers
- * merely have to start the next command, pointed to by
- * next_link, done() is called as with unlinked commands.
- */
-#ifdef LINKED
- case LINKED_CMD_COMPLETE:
- case LINKED_FLG_CMD_COMPLETE:
- /* Accept message by clearing ACK */
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-
- dprintk(NDEBUG_LINKED, "scsi%d: target %d lun %llu linked command "
- "complete.\n", HOSTNO, cmd->device->id, cmd->device->lun);
-
- /* Enable reselect interrupts */
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- /*
- * Sanity check : A linked command should only terminate
- * with one of these messages if there are more linked
- * commands available.
- */
-
- if (!cmd->next_link) {
- printk(KERN_NOTICE "scsi%d: target %d lun %llu "
- "linked command complete, no next_link\n",
- HOSTNO, cmd->device->id, cmd->device->lun);
- sink = 1;
- do_abort(instance);
- return;
- }
-
- initialize_SCp(cmd->next_link);
- /* The next command is still part of this process; copy it
- * and don't free it! */
- cmd->next_link->tag = cmd->tag;
- cmd->result = cmd->SCp.Status | (cmd->SCp.Message << 8);
- dprintk(NDEBUG_LINKED, "scsi%d: target %d lun %llu linked request "
- "done, calling scsi_done().\n",
- HOSTNO, cmd->device->id, cmd->device->lun);
- cmd->scsi_done(cmd);
- cmd = hostdata->connected;
- break;
-#endif /* def LINKED */
case ABORT:
case COMMAND_COMPLETE:
/* Accept message by clearing ACK */
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- dprintk(NDEBUG_QUEUES, "scsi%d: command for target %d, lun %llu "
- "completed\n", HOSTNO, cmd->device->id, cmd->device->lun);
+ dsprintk(NDEBUG_QUEUES, instance,
+ "COMMAND COMPLETE %p target %d lun %llu\n",
+ cmd, scmd_id(cmd), cmd->device->lun);
- local_irq_save(flags);
- hostdata->retain_dma_intr++;
hostdata->connected = NULL;
#ifdef SUPPORT_TAGS
cmd_free_tag(cmd);
if (status_byte(cmd->SCp.Status) == QUEUE_FULL) {
- /* Turn a QUEUE FULL status into BUSY, I think the
- * mid level cannot handle QUEUE FULL :-( (The
- * command is retried after BUSY). Also update our
- * queue size to the number of currently issued
- * commands now.
- */
- /* ++Andreas: the mid level code knows about
- QUEUE_FULL now. */
- struct tag_alloc *ta = &hostdata->TagAlloc[scmd_id(cmd)][cmd->device->lun];
- dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %llu returned "
- "QUEUE_FULL after %d commands\n",
- HOSTNO, cmd->device->id, cmd->device->lun,
- ta->nr_allocated);
+ u8 lun = cmd->device->lun;
+ struct tag_alloc *ta = &hostdata->TagAlloc[scmd_id(cmd)][lun];
+
+ dsprintk(NDEBUG_TAGS, instance,
+ "QUEUE_FULL %p target %d lun %d nr_allocated %d\n",
+ cmd, scmd_id(cmd), lun, ta->nr_allocated);
if (ta->queue_size > ta->nr_allocated)
- ta->nr_allocated = ta->queue_size;
+ ta->queue_size = ta->nr_allocated;
}
-#else
- hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
#endif
- /* Enable reselect interrupts */
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-
- /*
- * I'm not sure what the correct thing to do here is :
- *
- * If the command that just executed is NOT a request
- * sense, the obvious thing to do is to set the result
- * code to the values of the stored parameters.
- *
- * If it was a REQUEST SENSE command, we need some way to
- * differentiate between the failure code of the original
- * and the failure code of the REQUEST sense - the obvious
- * case is success, where we fall through and leave the
- * result code unchanged.
- *
- * The non-obvious place is where the REQUEST SENSE failed
- */
-
- if (cmd->cmnd[0] != REQUEST_SENSE)
- cmd->result = cmd->SCp.Status | (cmd->SCp.Message << 8);
- else if (status_byte(cmd->SCp.Status) != GOOD)
- cmd->result = (cmd->result & 0x00ffff) | (DID_ERROR << 16);
-
- if ((cmd->cmnd[0] == REQUEST_SENSE) &&
- hostdata->ses.cmd_len) {
- scsi_eh_restore_cmnd(cmd, &hostdata->ses);
- hostdata->ses.cmd_len = 0 ;
- }
-
- if ((cmd->cmnd[0] != REQUEST_SENSE) &&
- (status_byte(cmd->SCp.Status) == CHECK_CONDITION)) {
- scsi_eh_prep_cmnd(cmd, &hostdata->ses, NULL, 0, ~0);
-
- dprintk(NDEBUG_AUTOSENSE, "scsi%d: performing request sense\n", HOSTNO);
- LIST(cmd,hostdata->issue_queue);
- SET_NEXT(cmd, hostdata->issue_queue);
- hostdata->issue_queue = (struct scsi_cmnd *) cmd;
- dprintk(NDEBUG_QUEUES, "scsi%d: REQUEST SENSE added to head of "
- "issue queue\n", H_NO(cmd));
- } else {
- cmd->scsi_done(cmd);
+ cmd->result &= ~0xffff;
+ cmd->result |= cmd->SCp.Status;
+ cmd->result |= cmd->SCp.Message << 8;
+
+ if (cmd->cmnd[0] == REQUEST_SENSE)
+ complete_cmd(instance, cmd);
+ else {
+ if (cmd->SCp.Status == SAM_STAT_CHECK_CONDITION ||
+ cmd->SCp.Status == SAM_STAT_COMMAND_TERMINATED) {
+ dsprintk(NDEBUG_QUEUES, instance, "autosense: adding cmd %p to tail of autosense queue\n",
+ cmd);
+ list_add_tail(&ncmd->list,
+ &hostdata->autosense);
+ } else
+ complete_cmd(instance, cmd);
}
- local_irq_restore(flags);
-
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
/*
* Restore phase bits to 0 so an interrupted selection,
* arbitration can resume.
*/
NCR5380_write(TARGET_COMMAND_REG, 0);
- while ((NCR5380_read(STATUS_REG) & SR_BSY) && !hostdata->connected)
- barrier();
+ /* Enable reselect interrupts */
+ NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- local_irq_save(flags);
- hostdata->retain_dma_intr--;
- /* ++roman: For Falcon SCSI, release the lock on the
- * ST-DMA here if no other commands are waiting on the
- * disconnected queue.
- */
maybe_release_dma_irq(instance);
- local_irq_restore(flags);
return;
case MESSAGE_REJECT:
/* Accept message by clearing ACK */
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- /* Enable reselect interrupts */
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
switch (hostdata->last_message) {
case HEAD_OF_QUEUE_TAG:
case ORDERED_QUEUE_TAG:
@@ -2274,27 +2069,20 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
cmd->device->tagged_supported = 0;
hostdata->busy[cmd->device->id] |= (1 << cmd->device->lun);
cmd->tag = TAG_NONE;
- dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %llu rejected "
- "QUEUE_TAG message; tagged queuing "
- "disabled\n",
- HOSTNO, cmd->device->id, cmd->device->lun);
+ dsprintk(NDEBUG_TAGS, instance, "target %d lun %llu rejected QUEUE_TAG message; tagged queuing disabled\n",
+ scmd_id(cmd), cmd->device->lun);
break;
}
break;
case DISCONNECT:
/* Accept message by clearing ACK */
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- local_irq_save(flags);
- cmd->device->disconnect = 1;
- LIST(cmd,hostdata->disconnected_queue);
- SET_NEXT(cmd, hostdata->disconnected_queue);
hostdata->connected = NULL;
- hostdata->disconnected_queue = cmd;
- local_irq_restore(flags);
- dprintk(NDEBUG_QUEUES, "scsi%d: command for target %d lun %llu was "
- "moved from connected to the "
- "disconnected_queue\n", HOSTNO,
- cmd->device->id, cmd->device->lun);
+ list_add(&ncmd->list, &hostdata->disconnected);
+ dsprintk(NDEBUG_INFORMATION | NDEBUG_QUEUES,
+ instance, "connected command %p for target %d lun %llu moved to disconnected queue\n",
+ cmd, scmd_id(cmd), cmd->device->lun);
+
/*
* Restore phase bits to 0 so an interrupted selection,
* arbitration can resume.
@@ -2303,9 +2091,6 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
/* Enable reselect interrupts */
NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- /* Wait for bus free to avoid nasty timeouts */
- while ((NCR5380_read(STATUS_REG) & SR_BSY) && !hostdata->connected)
- barrier();
#ifdef SUN3_SCSI_VME
dregs->csr |= CSR_DMA_ENABLE;
#endif
@@ -2324,37 +2109,30 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
case RESTORE_POINTERS:
/* Accept message by clearing ACK */
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- /* Enable reselect interrupts */
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
break;
case EXTENDED_MESSAGE:
/*
- * Extended messages are sent in the following format :
- * Byte
- * 0 EXTENDED_MESSAGE == 1
- * 1 length (includes one byte for code, doesn't
- * include first two bytes)
- * 2 code
- * 3..length+1 arguments
- *
- * Start the extended message buffer with the EXTENDED_MESSAGE
+ * Start the message buffer with the EXTENDED_MESSAGE
* byte, since spi_print_msg() wants the whole thing.
*/
extended_msg[0] = EXTENDED_MESSAGE;
/* Accept first byte by clearing ACK */
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- dprintk(NDEBUG_EXTENDED, "scsi%d: receiving extended message\n", HOSTNO);
+ spin_unlock_irq(&hostdata->lock);
+
+ dsprintk(NDEBUG_EXTENDED, instance, "receiving extended message\n");
len = 2;
data = extended_msg + 1;
phase = PHASE_MSGIN;
NCR5380_transfer_pio(instance, &phase, &len, &data);
- dprintk(NDEBUG_EXTENDED, "scsi%d: length=%d, code=0x%02x\n", HOSTNO,
- (int)extended_msg[1], (int)extended_msg[2]);
+ dsprintk(NDEBUG_EXTENDED, instance, "length %d, code 0x%02x\n",
+ (int)extended_msg[1],
+ (int)extended_msg[2]);
- if (!len && extended_msg[1] <=
- (sizeof(extended_msg) - 1)) {
+ if (!len && extended_msg[1] > 0 &&
+ extended_msg[1] <= sizeof(extended_msg) - 2) {
/* Accept third byte by clearing ACK */
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
len = extended_msg[1] - 1;
@@ -2362,8 +2140,8 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
phase = PHASE_MSGIN;
NCR5380_transfer_pio(instance, &phase, &len, &data);
- dprintk(NDEBUG_EXTENDED, "scsi%d: message received, residual %d\n",
- HOSTNO, len);
+ dsprintk(NDEBUG_EXTENDED, instance, "message received, residual %d\n",
+ len);
switch (extended_msg[2]) {
case EXTENDED_SDTR:
@@ -2373,15 +2151,18 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
tmp = 0;
}
} else if (len) {
- printk(KERN_NOTICE "scsi%d: error receiving "
- "extended message\n", HOSTNO);
+ shost_printk(KERN_ERR, instance, "error receiving extended message\n");
tmp = 0;
} else {
- printk(KERN_NOTICE "scsi%d: extended message "
- "code %02x length %d is too long\n",
- HOSTNO, extended_msg[2], extended_msg[1]);
+ shost_printk(KERN_NOTICE, instance, "extended message code %02x length %d is too long\n",
+ extended_msg[2], extended_msg[1]);
tmp = 0;
}
+
+ spin_lock_irq(&hostdata->lock);
+ if (!hostdata->connected)
+ return;
+
/* Fall through to reject message */
/*
@@ -2390,8 +2171,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
*/
default:
if (!tmp) {
- printk(KERN_INFO "scsi%d: rejecting message ",
- instance->host_no);
+ shost_printk(KERN_ERR, instance, "rejecting message ");
spi_print_msg(extended_msg);
printk("\n");
} else if (tmp != EXTENDED_MESSAGE)
@@ -2414,18 +2194,11 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
hostdata->last_message = msgout;
NCR5380_transfer_pio(instance, &phase, &len, &data);
if (msgout == ABORT) {
- local_irq_save(flags);
-#ifdef SUPPORT_TAGS
- cmd_free_tag(cmd);
-#else
- hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
-#endif
hostdata->connected = NULL;
cmd->result = DID_ERROR << 16;
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+ complete_cmd(instance, cmd);
maybe_release_dma_irq(instance);
- local_irq_restore(flags);
- cmd->scsi_done(cmd);
+ NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
return;
}
msgout = NOP;
@@ -2447,22 +2220,25 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
cmd->SCp.Status = tmp;
break;
default:
- printk("scsi%d: unknown phase\n", HOSTNO);
+ shost_printk(KERN_ERR, instance, "unknown phase\n");
NCR5380_dprint(NDEBUG_ANY, instance);
} /* switch(phase) */
- } /* if (tmp * SR_REQ) */
- } /* while (1) */
+ } else {
+ spin_unlock_irq(&hostdata->lock);
+ NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ);
+ spin_lock_irq(&hostdata->lock);
+ }
+ }
}
/*
* Function : void NCR5380_reselect (struct Scsi_Host *instance)
*
* Purpose : does reselection, initializing the instance->connected
- * field to point to the scsi_cmnd for which the I_T_L or I_T_L_Q
- * nexus has been reestablished,
+ * field to point to the scsi_cmnd for which the I_T_L or I_T_L_Q
+ * nexus has been reestablished,
*
* Inputs : instance - this instance of the NCR5380.
- *
*/
@@ -2471,7 +2247,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
static void NCR5380_reselect(struct Scsi_Host *instance)
{
- SETUP_HOSTDATA(instance);
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
unsigned char target_mask;
unsigned char lun;
#ifdef SUPPORT_TAGS
@@ -2480,7 +2256,8 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
unsigned char msg[3];
int __maybe_unused len;
unsigned char __maybe_unused *data, __maybe_unused phase;
- struct scsi_cmnd *tmp = NULL, *prev;
+ struct NCR5380_cmd *ncmd;
+ struct scsi_cmnd *tmp;
/*
* Disable arbitration, etc. since the host adapter obviously
@@ -2488,11 +2265,10 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
*/
NCR5380_write(MODE_REG, MR_BASE);
- hostdata->restart_select = 1;
target_mask = NCR5380_read(CURRENT_SCSI_DATA_REG) & ~(hostdata->id_mask);
- dprintk(NDEBUG_RESELECTION, "scsi%d: reselect\n", HOSTNO);
+ dsprintk(NDEBUG_RESELECTION, instance, "reselect\n");
/*
* At this point, we have detected that our SCSI ID is on the bus,
@@ -2504,17 +2280,22 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
*/
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY);
-
- while (NCR5380_read(STATUS_REG) & SR_SEL)
- ;
+ if (NCR5380_poll_politely(instance,
+ STATUS_REG, SR_SEL, 0, 2 * HZ) < 0) {
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+ return;
+ }
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
/*
* Wait for target to go into MSGIN.
*/
- while (!(NCR5380_read(STATUS_REG) & SR_REQ))
- ;
+ if (NCR5380_poll_politely(instance,
+ STATUS_REG, SR_REQ, SR_REQ, 2 * HZ) < 0) {
+ do_abort(instance);
+ return;
+ }
#if defined(CONFIG_SUN3) && defined(REAL_DMA)
/* acknowledge toggle to MSGIN */
@@ -2527,15 +2308,21 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
data = msg;
phase = PHASE_MSGIN;
NCR5380_transfer_pio(instance, &phase, &len, &data);
+
+ if (len) {
+ do_abort(instance);
+ return;
+ }
#endif
if (!(msg[0] & 0x80)) {
- printk(KERN_DEBUG "scsi%d: expecting IDENTIFY message, got ", HOSTNO);
+ shost_printk(KERN_ERR, instance, "expecting IDENTIFY message, got ");
spi_print_msg(msg);
+ printk("\n");
do_abort(instance);
return;
}
- lun = (msg[0] & 0x07);
+ lun = msg[0] & 0x07;
#if defined(SUPPORT_TAGS) && !defined(CONFIG_SUN3)
/* If the phase is still MSGIN, the target wants to send some more
@@ -2551,8 +2338,8 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
if (!NCR5380_transfer_pio(instance, &phase, &len, &data) &&
msg[1] == SIMPLE_QUEUE_TAG)
tag = msg[2];
- dprintk(NDEBUG_TAGS, "scsi%d: target mask %02x, lun %d sent tag %d at "
- "reselection\n", HOSTNO, target_mask, lun, tag);
+ dsprintk(NDEBUG_TAGS, instance, "reselect: target mask %02x, lun %d sent tag %d\n",
+ target_mask, lun, tag);
}
#endif
@@ -2561,36 +2348,34 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
* just reestablished, and remove it from the disconnected queue.
*/
- for (tmp = (struct scsi_cmnd *) hostdata->disconnected_queue, prev = NULL;
- tmp; prev = tmp, tmp = NEXT(tmp)) {
- if ((target_mask == (1 << tmp->device->id)) && (lun == tmp->device->lun)
+ tmp = NULL;
+ list_for_each_entry(ncmd, &hostdata->disconnected, list) {
+ struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+ if (target_mask == (1 << scmd_id(cmd)) &&
+ lun == (u8)cmd->device->lun
#ifdef SUPPORT_TAGS
- && (tag == tmp->tag)
+ && (tag == cmd->tag)
#endif
) {
- if (prev) {
- REMOVE(prev, NEXT(prev), tmp, NEXT(tmp));
- SET_NEXT(prev, NEXT(tmp));
- } else {
- REMOVE(-1, hostdata->disconnected_queue, tmp, NEXT(tmp));
- hostdata->disconnected_queue = NEXT(tmp);
- }
- SET_NEXT(tmp, NULL);
+ list_del(&ncmd->list);
+ tmp = cmd;
break;
}
}
- if (!tmp) {
- printk(KERN_WARNING "scsi%d: warning: target bitmask %02x lun %d "
-#ifdef SUPPORT_TAGS
- "tag %d "
-#endif
- "not in disconnected_queue.\n",
- HOSTNO, target_mask, lun
+ if (tmp) {
+ dsprintk(NDEBUG_RESELECTION | NDEBUG_QUEUES, instance,
+ "reselect: removed %p from disconnected queue\n", tmp);
+ } else {
+
#ifdef SUPPORT_TAGS
- , tag
+ shost_printk(KERN_ERR, instance, "target bitmask 0x%02x lun %d tag %d not in disconnected queue.\n",
+ target_mask, lun, tag);
+#else
+ shost_printk(KERN_ERR, instance, "target bitmask 0x%02x lun %d not in disconnected queue.\n",
+ target_mask, lun);
#endif
- );
/*
* Since we have an established nexus that we can't do anything
* with, we must abort it.
@@ -2614,7 +2399,8 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
}
/* setup this command for dma if not already */
if ((count >= DMA_MIN_SIZE) && (sun3_dma_setup_done != tmp)) {
- sun3scsi_dma_setup(d, count, rq_data_dir(tmp->request));
+ sun3scsi_dma_setup(instance, d, count,
+ rq_data_dir(tmp->request));
sun3_dma_setup_done = tmp;
}
}
@@ -2639,235 +2425,196 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
if (!NCR5380_transfer_pio(instance, &phase, &len, &data) &&
msg[1] == SIMPLE_QUEUE_TAG)
tag = msg[2];
- dprintk(NDEBUG_TAGS, "scsi%d: target mask %02x, lun %d sent tag %d at reselection\n"
- HOSTNO, target_mask, lun, tag);
+ dsprintk(NDEBUG_TAGS, instance, "reselect: target mask %02x, lun %d sent tag %d\n"
+ target_mask, lun, tag);
}
#endif
hostdata->connected = tmp;
- dprintk(NDEBUG_RESELECTION, "scsi%d: nexus established, target = %d, lun = %llu, tag = %d\n",
- HOSTNO, tmp->device->id, tmp->device->lun, tmp->tag);
+ dsprintk(NDEBUG_RESELECTION, instance, "nexus established, target %d, lun %llu, tag %d\n",
+ scmd_id(tmp), tmp->device->lun, tmp->tag);
}
-/*
- * Function : int NCR5380_abort (struct scsi_cmnd *cmd)
- *
- * Purpose : abort a command
- *
- * Inputs : cmd - the scsi_cmnd to abort, code - code to set the
- * host byte of the result field to, if zero DID_ABORTED is
- * used.
- *
- * Returns : SUCCESS - success, FAILED on failure.
- *
- * XXX - there is no way to abort the command that is currently
- * connected, you have to wait for it to complete. If this is
- * a problem, we could implement longjmp() / setjmp(), setjmp()
- * called where the loop started in NCR5380_main().
+/**
+ * list_find_cmd - test for presence of a command in a linked list
+ * @haystack: list of commands
+ * @needle: command to search for
*/
-static
-int NCR5380_abort(struct scsi_cmnd *cmd)
+static bool list_find_cmd(struct list_head *haystack,
+ struct scsi_cmnd *needle)
{
- struct Scsi_Host *instance = cmd->device->host;
- SETUP_HOSTDATA(instance);
- struct scsi_cmnd *tmp, **prev;
- unsigned long flags;
+ struct NCR5380_cmd *ncmd;
- scmd_printk(KERN_NOTICE, cmd, "aborting command\n");
+ list_for_each_entry(ncmd, haystack, list)
+ if (NCR5380_to_scmd(ncmd) == needle)
+ return true;
+ return false;
+}
- NCR5380_print_status(instance);
+/**
+ * list_remove_cmd - remove a command from linked list
+ * @haystack: list of commands
+ * @needle: command to remove
+ */
- local_irq_save(flags);
+static bool list_del_cmd(struct list_head *haystack,
+ struct scsi_cmnd *needle)
+{
+ if (list_find_cmd(haystack, needle)) {
+ struct NCR5380_cmd *ncmd = scsi_cmd_priv(needle);
- dprintk(NDEBUG_ABORT, "scsi%d: abort called basr 0x%02x, sr 0x%02x\n", HOSTNO,
- NCR5380_read(BUS_AND_STATUS_REG),
- NCR5380_read(STATUS_REG));
+ list_del(&ncmd->list);
+ return true;
+ }
+ return false;
+}
-#if 1
- /*
- * Case 1 : If the command is the currently executing command,
- * we'll set the aborted flag and return control so that
- * information transfer routine can exit cleanly.
- */
+/**
+ * NCR5380_abort - scsi host eh_abort_handler() method
+ * @cmd: the command to be aborted
+ *
+ * Try to abort a given command by removing it from queues and/or sending
+ * the target an abort message. This may not succeed in causing a target
+ * to abort the command. Nonetheless, the low-level driver must forget about
+ * the command because the mid-layer reclaims it and it may be re-issued.
+ *
+ * The normal path taken by a command is as follows. For EH we trace this
+ * same path to locate and abort the command.
+ *
+ * unissued -> selecting -> [unissued -> selecting ->]... connected ->
+ * [disconnected -> connected ->]...
+ * [autosense -> connected ->] done
+ *
+ * If cmd is unissued then just remove it.
+ * If cmd is disconnected, try to select the target.
+ * If cmd is connected, try to send an abort message.
+ * If cmd is waiting for autosense, give it a chance to complete but check
+ * that it isn't left connected.
+ * If cmd was not found at all then presumably it has already been completed,
+ * in which case return SUCCESS to try to avoid further EH measures.
+ * If the command has not completed yet, we must not fail to find it.
+ */
- if (hostdata->connected == cmd) {
+static int NCR5380_abort(struct scsi_cmnd *cmd)
+{
+ struct Scsi_Host *instance = cmd->device->host;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ unsigned long flags;
+ int result = SUCCESS;
- dprintk(NDEBUG_ABORT, "scsi%d: aborting connected command\n", HOSTNO);
- /*
- * We should perform BSY checking, and make sure we haven't slipped
- * into BUS FREE.
- */
+ spin_lock_irqsave(&hostdata->lock, flags);
- /* NCR5380_write(INITIATOR_COMMAND_REG, ICR_ASSERT_ATN); */
- /*
- * Since we can't change phases until we've completed the current
- * handshake, we have to source or sink a byte of data if the current
- * phase is not MSGOUT.
- */
+#if (NDEBUG & NDEBUG_ANY)
+ scmd_printk(KERN_INFO, cmd, __func__);
+#endif
+ NCR5380_dprint(NDEBUG_ANY, instance);
+ NCR5380_dprint_phase(NDEBUG_ANY, instance);
- /*
- * Return control to the executing NCR drive so we can clear the
- * aborted flag and get back into our main loop.
- */
+ if (list_del_cmd(&hostdata->unissued, cmd)) {
+ dsprintk(NDEBUG_ABORT, instance,
+ "abort: removed %p from issue queue\n", cmd);
+ cmd->result = DID_ABORT << 16;
+ cmd->scsi_done(cmd); /* No tag or busy flag to worry about */
+ }
- if (do_abort(instance) == 0) {
- hostdata->aborted = 1;
- hostdata->connected = NULL;
- cmd->result = DID_ABORT << 16;
-#ifdef SUPPORT_TAGS
- cmd_free_tag(cmd);
-#else
- hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
-#endif
- maybe_release_dma_irq(instance);
- local_irq_restore(flags);
- cmd->scsi_done(cmd);
- return SUCCESS;
- } else {
- local_irq_restore(flags);
- printk("scsi%d: abort of connected command failed!\n", HOSTNO);
- return FAILED;
- }
+ if (hostdata->selecting == cmd) {
+ dsprintk(NDEBUG_ABORT, instance,
+ "abort: cmd %p == selecting\n", cmd);
+ hostdata->selecting = NULL;
+ cmd->result = DID_ABORT << 16;
+ complete_cmd(instance, cmd);
+ goto out;
}
-#endif
- /*
- * Case 2 : If the command hasn't been issued yet, we simply remove it
- * from the issue queue.
- */
- for (prev = (struct scsi_cmnd **)&(hostdata->issue_queue),
- tmp = (struct scsi_cmnd *)hostdata->issue_queue;
- tmp; prev = NEXTADDR(tmp), tmp = NEXT(tmp)) {
- if (cmd == tmp) {
- REMOVE(5, *prev, tmp, NEXT(tmp));
- (*prev) = NEXT(tmp);
- SET_NEXT(tmp, NULL);
- tmp->result = DID_ABORT << 16;
- maybe_release_dma_irq(instance);
- local_irq_restore(flags);
- dprintk(NDEBUG_ABORT, "scsi%d: abort removed command from issue queue.\n",
- HOSTNO);
- /* Tagged queuing note: no tag to free here, hasn't been assigned
- * yet... */
- tmp->scsi_done(tmp);
- return SUCCESS;
+ if (list_del_cmd(&hostdata->disconnected, cmd)) {
+ dsprintk(NDEBUG_ABORT, instance,
+ "abort: removed %p from disconnected list\n", cmd);
+ cmd->result = DID_ERROR << 16;
+ if (!hostdata->connected)
+ NCR5380_select(instance, cmd);
+ if (hostdata->connected != cmd) {
+ complete_cmd(instance, cmd);
+ result = FAILED;
+ goto out;
}
}
- /*
- * Case 3 : If any commands are connected, we're going to fail the abort
- * and let the high level SCSI driver retry at a later time or
- * issue a reset.
- *
- * Timeouts, and therefore aborted commands, will be highly unlikely
- * and handling them cleanly in this situation would make the common
- * case of noresets less efficient, and would pollute our code. So,
- * we fail.
- */
+ if (hostdata->connected == cmd) {
+ dsprintk(NDEBUG_ABORT, instance, "abort: cmd %p is connected\n", cmd);
+ hostdata->connected = NULL;
+ if (do_abort(instance)) {
+ set_host_byte(cmd, DID_ERROR);
+ complete_cmd(instance, cmd);
+ result = FAILED;
+ goto out;
+ }
+ set_host_byte(cmd, DID_ABORT);
+#ifdef REAL_DMA
+ hostdata->dma_len = 0;
+#endif
+ if (cmd->cmnd[0] == REQUEST_SENSE)
+ complete_cmd(instance, cmd);
+ else {
+ struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
- if (hostdata->connected) {
- local_irq_restore(flags);
- dprintk(NDEBUG_ABORT, "scsi%d: abort failed, command connected.\n", HOSTNO);
- return FAILED;
+ /* Perform autosense for this command */
+ list_add(&ncmd->list, &hostdata->autosense);
+ }
}
- /*
- * Case 4: If the command is currently disconnected from the bus, and
- * there are no connected commands, we reconnect the I_T_L or
- * I_T_L_Q nexus associated with it, go into message out, and send
- * an abort message.
- *
- * This case is especially ugly. In order to reestablish the nexus, we
- * need to call NCR5380_select(). The easiest way to implement this
- * function was to abort if the bus was busy, and let the interrupt
- * handler triggered on the SEL for reselect take care of lost arbitrations
- * where necessary, meaning interrupts need to be enabled.
- *
- * When interrupts are enabled, the queues may change - so we
- * can't remove it from the disconnected queue before selecting it
- * because that could cause a failure in hashing the nexus if that
- * device reselected.
- *
- * Since the queues may change, we can't use the pointers from when we
- * first locate it.
- *
- * So, we must first locate the command, and if NCR5380_select()
- * succeeds, then issue the abort, relocate the command and remove
- * it from the disconnected queue.
- */
-
- for (tmp = (struct scsi_cmnd *) hostdata->disconnected_queue; tmp;
- tmp = NEXT(tmp)) {
- if (cmd == tmp) {
- local_irq_restore(flags);
- dprintk(NDEBUG_ABORT, "scsi%d: aborting disconnected command.\n", HOSTNO);
-
- if (NCR5380_select(instance, cmd))
- return FAILED;
-
- dprintk(NDEBUG_ABORT, "scsi%d: nexus reestablished.\n", HOSTNO);
-
- do_abort(instance);
-
- local_irq_save(flags);
- for (prev = (struct scsi_cmnd **)&(hostdata->disconnected_queue),
- tmp = (struct scsi_cmnd *)hostdata->disconnected_queue;
- tmp; prev = NEXTADDR(tmp), tmp = NEXT(tmp)) {
- if (cmd == tmp) {
- REMOVE(5, *prev, tmp, NEXT(tmp));
- *prev = NEXT(tmp);
- SET_NEXT(tmp, NULL);
- tmp->result = DID_ABORT << 16;
- /* We must unlock the tag/LUN immediately here, since the
- * target goes to BUS FREE and doesn't send us another
- * message (COMMAND_COMPLETE or the like)
- */
-#ifdef SUPPORT_TAGS
- cmd_free_tag(tmp);
-#else
- hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
-#endif
- maybe_release_dma_irq(instance);
- local_irq_restore(flags);
- tmp->scsi_done(tmp);
- return SUCCESS;
- }
- }
+ if (list_find_cmd(&hostdata->autosense, cmd)) {
+ dsprintk(NDEBUG_ABORT, instance,
+ "abort: found %p on sense queue\n", cmd);
+ spin_unlock_irqrestore(&hostdata->lock, flags);
+ queue_work(hostdata->work_q, &hostdata->main_task);
+ msleep(1000);
+ spin_lock_irqsave(&hostdata->lock, flags);
+ if (list_del_cmd(&hostdata->autosense, cmd)) {
+ dsprintk(NDEBUG_ABORT, instance,
+ "abort: removed %p from sense queue\n", cmd);
+ set_host_byte(cmd, DID_ABORT);
+ complete_cmd(instance, cmd);
+ goto out;
}
}
- /* Maybe it is sufficient just to release the ST-DMA lock... (if
- * possible at all) At least, we should check if the lock could be
- * released after the abort, in case it is kept due to some bug.
- */
- maybe_release_dma_irq(instance);
- local_irq_restore(flags);
+ if (hostdata->connected == cmd) {
+ dsprintk(NDEBUG_ABORT, instance, "abort: cmd %p is connected\n", cmd);
+ hostdata->connected = NULL;
+ if (do_abort(instance)) {
+ set_host_byte(cmd, DID_ERROR);
+ complete_cmd(instance, cmd);
+ result = FAILED;
+ goto out;
+ }
+ set_host_byte(cmd, DID_ABORT);
+#ifdef REAL_DMA
+ hostdata->dma_len = 0;
+#endif
+ complete_cmd(instance, cmd);
+ }
- /*
- * Case 5 : If we reached this point, the command was not found in any of
- * the queues.
- *
- * We probably reached this point because of an unlikely race condition
- * between the command completing successfully and the abortion code,
- * so we won't panic, but we will notify the user in case something really
- * broke.
- */
+out:
+ if (result == FAILED)
+ dsprintk(NDEBUG_ABORT, instance, "abort: failed to abort %p\n", cmd);
+ else
+ dsprintk(NDEBUG_ABORT, instance, "abort: successfully aborted %p\n", cmd);
- printk(KERN_INFO "scsi%d: warning : SCSI command probably completed successfully before abortion\n", HOSTNO);
+ queue_work(hostdata->work_q, &hostdata->main_task);
+ maybe_release_dma_irq(instance);
+ spin_unlock_irqrestore(&hostdata->lock, flags);
- return FAILED;
+ return result;
}
-/*
- * Function : int NCR5380_reset (struct scsi_cmnd *cmd)
- *
- * Purpose : reset the SCSI bus.
- *
- * Returns : SUCCESS or FAILURE
+/**
+ * NCR5380_bus_reset - reset the SCSI bus
+ * @cmd: SCSI command undergoing EH
*
+ * Returns SUCCESS
*/
static int NCR5380_bus_reset(struct scsi_cmnd *cmd)
@@ -2876,23 +2623,22 @@ static int NCR5380_bus_reset(struct scsi_cmnd *cmd)
struct NCR5380_hostdata *hostdata = shost_priv(instance);
int i;
unsigned long flags;
+ struct NCR5380_cmd *ncmd;
- NCR5380_print_status(instance);
+ spin_lock_irqsave(&hostdata->lock, flags);
+
+#if (NDEBUG & NDEBUG_ANY)
+ scmd_printk(KERN_INFO, cmd, __func__);
+#endif
+ NCR5380_dprint(NDEBUG_ANY, instance);
+ NCR5380_dprint_phase(NDEBUG_ANY, instance);
+
+ do_reset(instance);
- /* get in phase */
- NCR5380_write(TARGET_COMMAND_REG,
- PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG)));
- /* assert RST */
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST);
- udelay(40);
/* reset NCR registers */
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
NCR5380_write(MODE_REG, MR_BASE);
NCR5380_write(TARGET_COMMAND_REG, 0);
NCR5380_write(SELECT_ENABLE_REG, 0);
- /* ++roman: reset interrupt condition! otherwise no interrupts don't get
- * through anymore ... */
- (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
/* After the reset, there are no more connected or disconnected commands
* and no busy units; so clear the low-level status here to avoid
@@ -2900,17 +2646,34 @@ static int NCR5380_bus_reset(struct scsi_cmnd *cmd)
* commands!
*/
- if (hostdata->issue_queue)
- dprintk(NDEBUG_ABORT, "scsi%d: reset aborted issued command(s)\n", H_NO(cmd));
- if (hostdata->connected)
- dprintk(NDEBUG_ABORT, "scsi%d: reset aborted a connected command\n", H_NO(cmd));
- if (hostdata->disconnected_queue)
- dprintk(NDEBUG_ABORT, "scsi%d: reset aborted disconnected command(s)\n", H_NO(cmd));
+ hostdata->selecting = NULL;
+
+ list_for_each_entry(ncmd, &hostdata->disconnected, list) {
+ struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+ set_host_byte(cmd, DID_RESET);
+ cmd->scsi_done(cmd);
+ }
+
+ list_for_each_entry(ncmd, &hostdata->autosense, list) {
+ struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+ set_host_byte(cmd, DID_RESET);
+ cmd->scsi_done(cmd);
+ }
+
+ if (hostdata->connected) {
+ set_host_byte(hostdata->connected, DID_RESET);
+ complete_cmd(instance, hostdata->connected);
+ hostdata->connected = NULL;
+ }
+
+ if (hostdata->sensing) {
+ set_host_byte(hostdata->connected, DID_RESET);
+ complete_cmd(instance, hostdata->sensing);
+ hostdata->sensing = NULL;
+ }
- local_irq_save(flags);
- hostdata->issue_queue = NULL;
- hostdata->connected = NULL;
- hostdata->disconnected_queue = NULL;
#ifdef SUPPORT_TAGS
free_all_tags(hostdata);
#endif
@@ -2920,8 +2683,9 @@ static int NCR5380_bus_reset(struct scsi_cmnd *cmd)
hostdata->dma_len = 0;
#endif
+ queue_work(hostdata->work_q, &hostdata->main_task);
maybe_release_dma_irq(instance);
- local_irq_restore(flags);
+ spin_unlock_irqrestore(&hostdata->lock, flags);
return SUCCESS;
}
diff --git a/drivers/scsi/atari_scsi.c b/drivers/scsi/atari_scsi.c
index 5ede3da..78d1b29 100644
--- a/drivers/scsi/atari_scsi.c
+++ b/drivers/scsi/atari_scsi.c
@@ -66,7 +66,6 @@
#include <linux/module.h>
#include <linux/types.h>
-#include <linux/delay.h>
#include <linux/blkdev.h>
#include <linux/interrupt.h>
#include <linux/init.h>
@@ -98,7 +97,6 @@
#define NCR5380_queue_command atari_scsi_queue_command
#define NCR5380_abort atari_scsi_abort
-#define NCR5380_show_info atari_scsi_show_info
#define NCR5380_info atari_scsi_info
#define NCR5380_dma_read_setup(instance, data, count) \
@@ -161,23 +159,10 @@ static inline unsigned long SCSI_DMA_GETADR(void)
return adr;
}
-#define HOSTDATA_DMALEN (((struct NCR5380_hostdata *) \
- (atari_scsi_host->hostdata))->dma_len)
-
-/* Time (in jiffies) to wait after a reset; the SCSI standard calls for 250ms,
- * we usually do 0.5s to be on the safe side. But Toshiba CD-ROMs once more
- * need ten times the standard value... */
-#ifndef CONFIG_ATARI_SCSI_TOSHIBA_DELAY
-#define AFTER_RESET_DELAY (HZ/2)
-#else
-#define AFTER_RESET_DELAY (5*HZ/2)
-#endif
-
#ifdef REAL_DMA
static void atari_scsi_fetch_restbytes(void);
#endif
-static struct Scsi_Host *atari_scsi_host;
static unsigned char (*atari_scsi_reg_read)(unsigned char reg);
static void (*atari_scsi_reg_write)(unsigned char reg, unsigned char value);
@@ -208,12 +193,12 @@ static int setup_cmd_per_lun = -1;
module_param(setup_cmd_per_lun, int, 0);
static int setup_sg_tablesize = -1;
module_param(setup_sg_tablesize, int, 0);
-#ifdef SUPPORT_TAGS
static int setup_use_tagged_queuing = -1;
module_param(setup_use_tagged_queuing, int, 0);
-#endif
static int setup_hostid = -1;
module_param(setup_hostid, int, 0);
+static int setup_toshiba_delay = -1;
+module_param(setup_toshiba_delay, int, 0);
#if defined(REAL_DMA)
@@ -273,15 +258,17 @@ static void scsi_dma_buserr(int irq, void *dummy)
#endif
-static irqreturn_t scsi_tt_intr(int irq, void *dummy)
+static irqreturn_t scsi_tt_intr(int irq, void *dev)
{
#ifdef REAL_DMA
+ struct Scsi_Host *instance = dev;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
int dma_stat;
dma_stat = tt_scsi_dma.dma_ctrl;
- dprintk(NDEBUG_INTR, "scsi%d: NCR5380 interrupt, DMA status = %02x\n",
- atari_scsi_host->host_no, dma_stat & 0xff);
+ dsprintk(NDEBUG_INTR, instance, "NCR5380 interrupt, DMA status = %02x\n",
+ dma_stat & 0xff);
/* Look if it was the DMA that has interrupted: First possibility
* is that a bus error occurred...
@@ -304,7 +291,8 @@ static irqreturn_t scsi_tt_intr(int irq, void *dummy)
* data reg!
*/
if ((dma_stat & 0x02) && !(dma_stat & 0x40)) {
- atari_dma_residual = HOSTDATA_DMALEN - (SCSI_DMA_READ_P(dma_addr) - atari_dma_startaddr);
+ atari_dma_residual = hostdata->dma_len -
+ (SCSI_DMA_READ_P(dma_addr) - atari_dma_startaddr);
dprintk(NDEBUG_DMA, "SCSI DMA: There are %ld residual bytes.\n",
atari_dma_residual);
@@ -356,15 +344,17 @@ static irqreturn_t scsi_tt_intr(int irq, void *dummy)
#endif /* REAL_DMA */
- NCR5380_intr(irq, dummy);
+ NCR5380_intr(irq, dev);
return IRQ_HANDLED;
}
-static irqreturn_t scsi_falcon_intr(int irq, void *dummy)
+static irqreturn_t scsi_falcon_intr(int irq, void *dev)
{
#ifdef REAL_DMA
+ struct Scsi_Host *instance = dev;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
int dma_stat;
/* Turn off DMA and select sector counter register before
@@ -399,7 +389,7 @@ static irqreturn_t scsi_falcon_intr(int irq, void *dummy)
printk(KERN_ERR "SCSI DMA error: %ld bytes lost in "
"ST-DMA fifo\n", transferred & 15);
- atari_dma_residual = HOSTDATA_DMALEN - transferred;
+ atari_dma_residual = hostdata->dma_len - transferred;
dprintk(NDEBUG_DMA, "SCSI DMA: There are %ld residual bytes.\n",
atari_dma_residual);
} else
@@ -411,13 +401,14 @@ static irqreturn_t scsi_falcon_intr(int irq, void *dummy)
* data to the original destination address.
*/
memcpy(atari_dma_orig_addr, phys_to_virt(atari_dma_startaddr),
- HOSTDATA_DMALEN - atari_dma_residual);
+ hostdata->dma_len - atari_dma_residual);
atari_dma_orig_addr = NULL;
}
#endif /* REAL_DMA */
- NCR5380_intr(irq, dummy);
+ NCR5380_intr(irq, dev);
+
return IRQ_HANDLED;
}
@@ -488,7 +479,7 @@ static int __init atari_scsi_setup(char *str)
* Defaults depend on TT or Falcon, determined at run time.
* Negative values mean don't change.
*/
- int ints[6];
+ int ints[8];
get_options(str, ARRAY_SIZE(ints), ints);
@@ -504,10 +495,11 @@ static int __init atari_scsi_setup(char *str)
setup_sg_tablesize = ints[3];
if (ints[0] >= 4)
setup_hostid = ints[4];
-#ifdef SUPPORT_TAGS
if (ints[0] >= 5)
setup_use_tagged_queuing = ints[5];
-#endif
+ /* ints[6] (use_pdma) is ignored */
+ if (ints[0] >= 7)
+ setup_toshiba_delay = ints[7];
return 1;
}
@@ -516,38 +508,6 @@ __setup("atascsi=", atari_scsi_setup);
#endif /* !MODULE */
-#ifdef CONFIG_ATARI_SCSI_RESET_BOOT
-static void __init atari_scsi_reset_boot(void)
-{
- unsigned long end;
-
- /*
- * Do a SCSI reset to clean up the bus during initialization. No messing
- * with the queues, interrupts, or locks necessary here.
- */
-
- printk("Atari SCSI: resetting the SCSI bus...");
-
- /* get in phase */
- NCR5380_write(TARGET_COMMAND_REG,
- PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG)));
-
- /* assert RST */
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST);
- /* The min. reset hold time is 25us, so 40us should be enough */
- udelay(50);
- /* reset RST and interrupt */
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-
- end = jiffies + AFTER_RESET_DELAY;
- while (time_before(jiffies, end))
- barrier();
-
- printk(" done\n");
-}
-#endif
-
#if defined(REAL_DMA)
static unsigned long atari_scsi_dma_setup(struct Scsi_Host *instance,
@@ -815,14 +775,14 @@ static int atari_scsi_bus_reset(struct scsi_cmnd *cmd)
static struct scsi_host_template atari_scsi_template = {
.module = THIS_MODULE,
.proc_name = DRV_MODULE_NAME,
- .show_info = atari_scsi_show_info,
.name = "Atari native SCSI",
.info = atari_scsi_info,
.queuecommand = atari_scsi_queue_command,
.eh_abort_handler = atari_scsi_abort,
.eh_bus_reset_handler = atari_scsi_bus_reset,
.this_id = 7,
- .use_clustering = DISABLE_CLUSTERING
+ .use_clustering = DISABLE_CLUSTERING,
+ .cmd_size = NCR5380_CMD_SIZE,
};
static int __init atari_scsi_probe(struct platform_device *pdev)
@@ -880,7 +840,7 @@ static int __init atari_scsi_probe(struct platform_device *pdev)
} else {
/* Test if a host id is set in the NVRam */
if (ATARIHW_PRESENT(TT_CLK) && nvram_check_checksum()) {
- unsigned char b = nvram_read_byte(14);
+ unsigned char b = nvram_read_byte(16);
/* Arbitration enabled? (for TOS)
* If yes, use configured host ID
@@ -915,21 +875,18 @@ static int __init atari_scsi_probe(struct platform_device *pdev)
error = -ENOMEM;
goto fail_alloc;
}
- atari_scsi_host = instance;
-
-#ifdef CONFIG_ATARI_SCSI_RESET_BOOT
- atari_scsi_reset_boot();
-#endif
instance->irq = irq->start;
host_flags |= IS_A_TT() ? 0 : FLAG_LATE_DMA_SETUP;
-
#ifdef SUPPORT_TAGS
host_flags |= setup_use_tagged_queuing > 0 ? FLAG_TAGGED_QUEUING : 0;
#endif
+ host_flags |= setup_toshiba_delay > 0 ? FLAG_TOSHIBA_DELAY : 0;
- NCR5380_init(instance, host_flags);
+ error = NCR5380_init(instance, host_flags);
+ if (error)
+ goto fail_init;
if (IS_A_TT()) {
error = request_irq(instance->irq, scsi_tt_intr, 0,
@@ -975,6 +932,8 @@ static int __init atari_scsi_probe(struct platform_device *pdev)
#endif
}
+ NCR5380_maybe_reset_bus(instance);
+
error = scsi_add_host(instance, NULL);
if (error)
goto fail_host;
@@ -989,6 +948,7 @@ fail_host:
free_irq(instance->irq, instance);
fail_irq:
NCR5380_exit(instance);
+fail_init:
scsi_host_put(instance);
fail_alloc:
if (atari_dma_buffer)
diff --git a/drivers/scsi/be2iscsi/Kconfig b/drivers/scsi/be2iscsi/Kconfig
index 4e7cad2..bad5f32 100644
--- a/drivers/scsi/be2iscsi/Kconfig
+++ b/drivers/scsi/be2iscsi/Kconfig
@@ -3,6 +3,7 @@ config BE2ISCSI
depends on PCI && SCSI && NET
select SCSI_ISCSI_ATTRS
select ISCSI_BOOT_SYSFS
+ select IRQ_POLL
help
This driver implements the iSCSI functionality for Emulex
diff --git a/drivers/scsi/be2iscsi/be.h b/drivers/scsi/be2iscsi/be.h
index 77f992e..a41c643 100644
--- a/drivers/scsi/be2iscsi/be.h
+++ b/drivers/scsi/be2iscsi/be.h
@@ -20,7 +20,7 @@
#include <linux/pci.h>
#include <linux/if_vlan.h>
-#include <linux/blk-iopoll.h>
+#include <linux/irq_poll.h>
#define FW_VER_LEN 32
#define MCC_Q_LEN 128
#define MCC_CQ_LEN 256
@@ -101,7 +101,7 @@ struct be_eq_obj {
struct beiscsi_hba *phba;
struct be_queue_info *cq;
struct work_struct work_cqs; /* Work Item */
- struct blk_iopoll iopoll;
+ struct irq_poll iopoll;
};
struct be_mcc_obj {
diff --git a/drivers/scsi/be2iscsi/be_iscsi.c b/drivers/scsi/be2iscsi/be_iscsi.c
index b7087ba..022e87b 100644
--- a/drivers/scsi/be2iscsi/be_iscsi.c
+++ b/drivers/scsi/be2iscsi/be_iscsi.c
@@ -1292,9 +1292,9 @@ static void beiscsi_flush_cq(struct beiscsi_hba *phba)
for (i = 0; i < phba->num_cpus; i++) {
pbe_eq = &phwi_context->be_eq[i];
- blk_iopoll_disable(&pbe_eq->iopoll);
+ irq_poll_disable(&pbe_eq->iopoll);
beiscsi_process_cq(pbe_eq);
- blk_iopoll_enable(&pbe_eq->iopoll);
+ irq_poll_enable(&pbe_eq->iopoll);
}
}
diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index fe0c514..cb9072a 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -910,8 +910,7 @@ static irqreturn_t be_isr_msix(int irq, void *dev_id)
num_eq_processed = 0;
while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32]
& EQE_VALID_MASK) {
- if (!blk_iopoll_sched_prep(&pbe_eq->iopoll))
- blk_iopoll_sched(&pbe_eq->iopoll);
+ irq_poll_sched(&pbe_eq->iopoll);
AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
queue_tail_inc(eq);
@@ -972,8 +971,7 @@ static irqreturn_t be_isr(int irq, void *dev_id)
spin_unlock_irqrestore(&phba->isr_lock, flags);
num_mcceq_processed++;
} else {
- if (!blk_iopoll_sched_prep(&pbe_eq->iopoll))
- blk_iopoll_sched(&pbe_eq->iopoll);
+ irq_poll_sched(&pbe_eq->iopoll);
num_ioeq_processed++;
}
AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
@@ -2295,7 +2293,7 @@ void beiscsi_process_all_cqs(struct work_struct *work)
hwi_ring_eq_db(phba, pbe_eq->q.id, 0, 0, 1, 1);
}
-static int be_iopoll(struct blk_iopoll *iop, int budget)
+static int be_iopoll(struct irq_poll *iop, int budget)
{
unsigned int ret;
struct beiscsi_hba *phba;
@@ -2306,7 +2304,7 @@ static int be_iopoll(struct blk_iopoll *iop, int budget)
pbe_eq->cq_count += ret;
if (ret < budget) {
phba = pbe_eq->phba;
- blk_iopoll_complete(iop);
+ irq_poll_complete(iop);
beiscsi_log(phba, KERN_INFO,
BEISCSI_LOG_CONFIG | BEISCSI_LOG_IO,
"BM_%d : rearm pbe_eq->q.id =%d\n",
@@ -5293,7 +5291,7 @@ static void beiscsi_quiesce(struct beiscsi_hba *phba,
for (i = 0; i < phba->num_cpus; i++) {
pbe_eq = &phwi_context->be_eq[i];
- blk_iopoll_disable(&pbe_eq->iopoll);
+ irq_poll_disable(&pbe_eq->iopoll);
}
if (unload_state == BEISCSI_CLEAN_UNLOAD) {
@@ -5579,9 +5577,8 @@ static void beiscsi_eeh_resume(struct pci_dev *pdev)
for (i = 0; i < phba->num_cpus; i++) {
pbe_eq = &phwi_context->be_eq[i];
- blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget,
+ irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget,
be_iopoll);
- blk_iopoll_enable(&pbe_eq->iopoll);
}
i = (phba->msix_enabled) ? i : 0;
@@ -5752,9 +5749,8 @@ static int beiscsi_dev_probe(struct pci_dev *pcidev,
for (i = 0; i < phba->num_cpus; i++) {
pbe_eq = &phwi_context->be_eq[i];
- blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget,
+ irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget,
be_iopoll);
- blk_iopoll_enable(&pbe_eq->iopoll);
}
i = (phba->msix_enabled) ? i : 0;
@@ -5795,7 +5791,7 @@ free_blkenbld:
destroy_workqueue(phba->wq);
for (i = 0; i < phba->num_cpus; i++) {
pbe_eq = &phwi_context->be_eq[i];
- blk_iopoll_disable(&pbe_eq->iopoll);
+ irq_poll_disable(&pbe_eq->iopoll);
}
free_twq:
beiscsi_clean_port(phba);
diff --git a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
index 0e2bee9..e22a268 100644
--- a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
+++ b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
@@ -57,7 +57,7 @@ MODULE_PARM_DESC(cxgb3i_snd_win, "TCP send window in bytes (default=128KB)");
static int cxgb3i_rx_credit_thres = 10 * 1024;
module_param(cxgb3i_rx_credit_thres, int, 0644);
-MODULE_PARM_DESC(rx_credit_thres,
+MODULE_PARM_DESC(cxgb3i_rx_credit_thres,
"RX credits return threshold in bytes (default=10KB)");
static unsigned int cxgb3i_max_connect = 8 * 1024;
diff --git a/drivers/scsi/dmx3191d.c b/drivers/scsi/dmx3191d.c
index 3e08812..6c14e68 100644
--- a/drivers/scsi/dmx3191d.c
+++ b/drivers/scsi/dmx3191d.c
@@ -36,17 +36,10 @@
#define DONT_USE_INTR
-#define NCR5380_read(reg) inb(port + reg)
-#define NCR5380_write(reg, value) outb(value, port + reg)
+#define NCR5380_read(reg) inb(instance->io_port + reg)
+#define NCR5380_write(reg, value) outb(value, instance->io_port + reg)
#define NCR5380_implementation_fields /* none */
-#define NCR5380_local_declare() unsigned int port
-#define NCR5380_setup(instance) port = instance->io_port
-
-/*
- * Includes needed for NCR5380.[ch] (XXX: Move them to NCR5380.h)
- */
-#include <linux/delay.h>
#include "NCR5380.h"
#include "NCR5380.c"
@@ -56,6 +49,7 @@
static struct scsi_host_template dmx3191d_driver_template = {
+ .module = THIS_MODULE,
.proc_name = DMX3191D_DRIVER_NAME,
.name = "Domex DMX3191D",
.info = NCR5380_info,
@@ -67,6 +61,8 @@ static struct scsi_host_template dmx3191d_driver_template = {
.sg_tablesize = SG_ALL,
.cmd_per_lun = 2,
.use_clustering = DISABLE_CLUSTERING,
+ .cmd_size = NCR5380_CMD_SIZE,
+ .max_sectors = 128,
};
static int dmx3191d_probe_one(struct pci_dev *pdev,
@@ -97,17 +93,25 @@ static int dmx3191d_probe_one(struct pci_dev *pdev,
*/
shost->irq = NO_IRQ;
- NCR5380_init(shost, FLAG_NO_PSEUDO_DMA | FLAG_DTC3181E);
+ error = NCR5380_init(shost, FLAG_NO_PSEUDO_DMA);
+ if (error)
+ goto out_host_put;
+
+ NCR5380_maybe_reset_bus(shost);
pci_set_drvdata(pdev, shost);
error = scsi_add_host(shost, &pdev->dev);
if (error)
- goto out_release_region;
+ goto out_exit;
scsi_scan_host(shost);
return 0;
+out_exit:
+ NCR5380_exit(shost);
+out_host_put:
+ scsi_host_put(shost);
out_release_region:
release_region(io, DMX3191D_REGION_LEN);
out_disable_device:
@@ -119,15 +123,14 @@ static int dmx3191d_probe_one(struct pci_dev *pdev,
static void dmx3191d_remove_one(struct pci_dev *pdev)
{
struct Scsi_Host *shost = pci_get_drvdata(pdev);
+ unsigned long io = shost->io_port;
scsi_remove_host(shost);
NCR5380_exit(shost);
-
- release_region(shost->io_port, DMX3191D_REGION_LEN);
- pci_disable_device(pdev);
-
scsi_host_put(shost);
+ release_region(io, DMX3191D_REGION_LEN);
+ pci_disable_device(pdev);
}
static struct pci_device_id dmx3191d_pci_tbl[] = {
diff --git a/drivers/scsi/dtc.c b/drivers/scsi/dtc.c
index 4c74c7b..6c736b0 100644
--- a/drivers/scsi/dtc.c
+++ b/drivers/scsi/dtc.c
@@ -1,9 +1,5 @@
-
#define PSEUDO_DMA
#define DONT_USE_INTR
-#define UNSAFE /* Leave interrupts enabled during pseudo-dma I/O */
-#define DMA_WORKS_RIGHT
-
/*
* DTC 3180/3280 driver, by
@@ -50,15 +46,13 @@
#include <linux/module.h>
-#include <linux/signal.h>
#include <linux/blkdev.h>
-#include <linux/delay.h>
-#include <linux/stat.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <scsi/scsi_host.h>
+
#include "dtc.h"
#define AUTOPROBE_IRQ
#include "NCR5380.h"
@@ -150,7 +144,7 @@ static const struct signature {
static int __init dtc_setup(char *str)
{
- static int commandline_current = 0;
+ static int commandline_current;
int i;
int ints[10];
@@ -188,7 +182,7 @@ __setup("dtc=", dtc_setup);
static int __init dtc_detect(struct scsi_host_template * tpnt)
{
- static int current_override = 0, current_base = 0;
+ static int current_override, current_base;
struct Scsi_Host *instance;
unsigned int addr;
void __iomem *base;
@@ -205,9 +199,8 @@ static int __init dtc_detect(struct scsi_host_template * tpnt)
addr = 0;
} else
for (; !addr && (current_base < NO_BASES); ++current_base) {
-#if (DTCDEBUG & DTCDEBUG_INIT)
- printk(KERN_DEBUG "scsi-dtc : probing address %08x\n", bases[current_base].address);
-#endif
+ dprintk(NDEBUG_INIT, "dtc: probing address 0x%08x\n",
+ (unsigned int)bases[current_base].address);
if (bases[current_base].noauto)
continue;
base = ioremap(bases[current_base].address, 0x2000);
@@ -216,18 +209,14 @@ static int __init dtc_detect(struct scsi_host_template * tpnt)
for (sig = 0; sig < NO_SIGNATURES; ++sig) {
if (check_signature(base + signatures[sig].offset, signatures[sig].string, strlen(signatures[sig].string))) {
addr = bases[current_base].address;
-#if (DTCDEBUG & DTCDEBUG_INIT)
- printk(KERN_DEBUG "scsi-dtc : detected board.\n");
-#endif
+ dprintk(NDEBUG_INIT, "dtc: detected board\n");
goto found;
}
}
iounmap(base);
}
-#if defined(DTCDEBUG) && (DTCDEBUG & DTCDEBUG_INIT)
- printk(KERN_DEBUG "scsi-dtc : base = %08x\n", addr);
-#endif
+ dprintk(NDEBUG_INIT, "dtc: addr = 0x%08x\n", addr);
if (!addr)
break;
@@ -235,12 +224,15 @@ static int __init dtc_detect(struct scsi_host_template * tpnt)
found:
instance = scsi_register(tpnt, sizeof(struct NCR5380_hostdata));
if (instance == NULL)
- break;
+ goto out_unmap;
instance->base = addr;
((struct NCR5380_hostdata *)(instance)->hostdata)->base = base;
- NCR5380_init(instance, 0);
+ if (NCR5380_init(instance, FLAG_NO_DMA_FIXUP))
+ goto out_unregister;
+
+ NCR5380_maybe_reset_bus(instance);
NCR5380_write(DTC_CONTROL_REG, CSR_5380_INTR); /* Enable int's */
if (overrides[current_override].irq != IRQ_AUTO)
@@ -271,14 +263,19 @@ found:
printk(KERN_WARNING "scsi%d : interrupts not used. Might as well not jumper it.\n", instance->host_no);
instance->irq = NO_IRQ;
#endif
-#if defined(DTCDEBUG) && (DTCDEBUG & DTCDEBUG_INIT)
- printk("scsi%d : irq = %d\n", instance->host_no, instance->irq);
-#endif
+ dprintk(NDEBUG_INIT, "scsi%d : irq = %d\n",
+ instance->host_no, instance->irq);
++current_override;
++count;
}
return count;
+
+out_unregister:
+ scsi_unregister(instance);
+out_unmap:
+ iounmap(base);
+ return count;
}
/*
@@ -331,12 +328,8 @@ static inline int NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst,
unsigned char *d = dst;
int i; /* For counting time spent in the poll-loop */
struct NCR5380_hostdata *hostdata = shost_priv(instance);
- NCR5380_local_declare();
- NCR5380_setup(instance);
i = 0;
- NCR5380_read(RESET_PARITY_INTERRUPT_REG);
- NCR5380_write(MODE_REG, MR_ENABLE_EOP_INTR | MR_DMA_MODE);
if (instance->irq == NO_IRQ)
NCR5380_write(DTC_CONTROL_REG, CSR_DIR_READ);
else
@@ -348,7 +341,7 @@ static inline int NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst,
while (NCR5380_read(DTC_CONTROL_REG) & CSR_HOST_BUF_NOT_RDY)
++i;
rtrc(3);
- memcpy_fromio(d, base + DTC_DATA_BUF, 128);
+ memcpy_fromio(d, hostdata->base + DTC_DATA_BUF, 128);
d += 128;
len -= 128;
rtrc(7);
@@ -358,9 +351,7 @@ static inline int NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst,
rtrc(4);
while (!(NCR5380_read(DTC_CONTROL_REG) & D_CR_ACCESS))
++i;
- NCR5380_write(MODE_REG, 0); /* Clear the operating mode */
rtrc(0);
- NCR5380_read(RESET_PARITY_INTERRUPT_REG);
if (i > hostdata->spin_max_r)
hostdata->spin_max_r = i;
return (0);
@@ -383,12 +374,7 @@ static inline int NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src,
{
int i;
struct NCR5380_hostdata *hostdata = shost_priv(instance);
- NCR5380_local_declare();
- NCR5380_setup(instance);
- NCR5380_read(RESET_PARITY_INTERRUPT_REG);
- NCR5380_write(MODE_REG, MR_ENABLE_EOP_INTR | MR_DMA_MODE);
- /* set direction (write) */
if (instance->irq == NO_IRQ)
NCR5380_write(DTC_CONTROL_REG, 0);
else
@@ -400,7 +386,7 @@ static inline int NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src,
while (NCR5380_read(DTC_CONTROL_REG) & CSR_HOST_BUF_NOT_RDY)
++i;
rtrc(3);
- memcpy_toio(base + DTC_DATA_BUF, src, 128);
+ memcpy_toio(hostdata->base + DTC_DATA_BUF, src, 128);
src += 128;
len -= 128;
}
@@ -413,47 +399,60 @@ static inline int NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src,
++i;
rtrc(7);
/* Check for parity error here. fixme. */
- NCR5380_write(MODE_REG, 0); /* Clear the operating mode */
rtrc(0);
if (i > hostdata->spin_max_w)
hostdata->spin_max_w = i;
return (0);
}
+static int dtc_dma_xfer_len(struct scsi_cmnd *cmd)
+{
+ int transfersize = cmd->transfersize;
+
+ /* Limit transfers to 32K, for xx400 & xx406
+ * pseudoDMA that transfers in 128 bytes blocks.
+ */
+ if (transfersize > 32 * 1024 && cmd->SCp.this_residual &&
+ !(cmd->SCp.this_residual % transfersize))
+ transfersize = 32 * 1024;
+
+ return transfersize;
+}
+
MODULE_LICENSE("GPL");
#include "NCR5380.c"
static int dtc_release(struct Scsi_Host *shost)
{
- NCR5380_local_declare();
- NCR5380_setup(shost);
+ struct NCR5380_hostdata *hostdata = shost_priv(shost);
+
if (shost->irq != NO_IRQ)
free_irq(shost->irq, shost);
NCR5380_exit(shost);
- if (shost->io_port && shost->n_io_port)
- release_region(shost->io_port, shost->n_io_port);
scsi_unregister(shost);
- iounmap(base);
+ iounmap(hostdata->base);
return 0;
}
static struct scsi_host_template driver_template = {
- .name = "DTC 3180/3280 ",
- .detect = dtc_detect,
- .release = dtc_release,
- .proc_name = "dtc3x80",
- .show_info = dtc_show_info,
- .write_info = dtc_write_info,
- .info = dtc_info,
- .queuecommand = dtc_queue_command,
- .eh_abort_handler = dtc_abort,
- .eh_bus_reset_handler = dtc_bus_reset,
- .bios_param = dtc_biosparam,
- .can_queue = CAN_QUEUE,
- .this_id = 7,
- .sg_tablesize = SG_ALL,
- .cmd_per_lun = CMD_PER_LUN,
- .use_clustering = DISABLE_CLUSTERING,
+ .name = "DTC 3180/3280",
+ .detect = dtc_detect,
+ .release = dtc_release,
+ .proc_name = "dtc3x80",
+ .show_info = dtc_show_info,
+ .write_info = dtc_write_info,
+ .info = dtc_info,
+ .queuecommand = dtc_queue_command,
+ .eh_abort_handler = dtc_abort,
+ .eh_bus_reset_handler = dtc_bus_reset,
+ .bios_param = dtc_biosparam,
+ .can_queue = 32,
+ .this_id = 7,
+ .sg_tablesize = SG_ALL,
+ .cmd_per_lun = 2,
+ .use_clustering = DISABLE_CLUSTERING,
+ .cmd_size = NCR5380_CMD_SIZE,
+ .max_sectors = 128,
};
#include "scsi_module.c"
diff --git a/drivers/scsi/dtc.h b/drivers/scsi/dtc.h
index 78a2332..56732cb 100644
--- a/drivers/scsi/dtc.h
+++ b/drivers/scsi/dtc.h
@@ -10,54 +10,17 @@
#ifndef DTC3280_H
#define DTC3280_H
-#define DTCDEBUG 0
-#define DTCDEBUG_INIT 0x1
-#define DTCDEBUG_TRANSFER 0x2
-
-#ifndef CMD_PER_LUN
-#define CMD_PER_LUN 2
-#endif
-
-#ifndef CAN_QUEUE
-#define CAN_QUEUE 32
-#endif
-
#define NCR5380_implementation_fields \
void __iomem *base
-#define NCR5380_local_declare() \
- void __iomem *base
-
-#define NCR5380_setup(instance) \
- base = ((struct NCR5380_hostdata *)(instance)->hostdata)->base
+#define DTC_address(reg) \
+ (((struct NCR5380_hostdata *)shost_priv(instance))->base + DTC_5380_OFFSET + reg)
-#define DTC_address(reg) (base + DTC_5380_OFFSET + reg)
-
-#define dbNCR5380_read(reg) \
- (rval=readb(DTC_address(reg)), \
- (((unsigned char) printk("DTC : read register %d at addr %p is: %02x\n"\
- , (reg), DTC_address(reg), rval)), rval ) )
-
-#define dbNCR5380_write(reg, value) do { \
- printk("DTC : write %02x to register %d at address %p\n", \
- (value), (reg), DTC_address(reg)); \
- writeb(value, DTC_address(reg));} while(0)
-
-
-#if !(DTCDEBUG & DTCDEBUG_TRANSFER)
#define NCR5380_read(reg) (readb(DTC_address(reg)))
#define NCR5380_write(reg, value) (writeb(value, DTC_address(reg)))
-#else
-#define NCR5380_read(reg) (readb(DTC_address(reg)))
-#define xNCR5380_read(reg) \
- (((unsigned char) printk("DTC : read register %d at address %p\n"\
- , (reg), DTC_address(reg))), readb(DTC_address(reg)))
-#define NCR5380_write(reg, value) do { \
- printk("DTC : write %02x to register %d at address %p\n", \
- (value), (reg), DTC_address(reg)); \
- writeb(value, DTC_address(reg));} while(0)
-#endif
+#define NCR5380_dma_xfer_len(instance, cmd, phase) \
+ dtc_dma_xfer_len(cmd)
#define NCR5380_intr dtc_intr
#define NCR5380_queue_command dtc_queue_command
diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index f8d2478..90091e6 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -56,40 +56,31 @@
*
*/
-/* settings for DTC3181E card with only Mustek scanner attached */
-#define USLEEP_POLL msecs_to_jiffies(10)
-#define USLEEP_SLEEP msecs_to_jiffies(200)
-#define USLEEP_WAITLONG msecs_to_jiffies(5000)
-
#define AUTOPROBE_IRQ
#ifdef CONFIG_SCSI_GENERIC_NCR53C400
-#define NCR53C400_PSEUDO_DMA 1
#define PSEUDO_DMA
-#define NCR53C400
#endif
#include <asm/io.h>
-#include <linux/signal.h>
#include <linux/blkdev.h>
+#include <linux/module.h>
#include <scsi/scsi_host.h>
#include "g_NCR5380.h"
#include "NCR5380.h"
-#include <linux/stat.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/isapnp.h>
-#include <linux/delay.h>
#include <linux/interrupt.h>
-#define NCR_NOT_SET 0
-static int ncr_irq = NCR_NOT_SET;
-static int ncr_dma = NCR_NOT_SET;
-static int ncr_addr = NCR_NOT_SET;
-static int ncr_5380 = NCR_NOT_SET;
-static int ncr_53c400 = NCR_NOT_SET;
-static int ncr_53c400a = NCR_NOT_SET;
-static int dtc_3181e = NCR_NOT_SET;
+static int ncr_irq;
+static int ncr_dma;
+static int ncr_addr;
+static int ncr_5380;
+static int ncr_53c400;
+static int ncr_53c400a;
+static int dtc_3181e;
+static int hp_c2502;
static struct override {
NCR5380_map_type NCR5380_map_name;
@@ -121,7 +112,7 @@ static struct override {
static void __init internal_setup(int board, char *str, int *ints)
{
- static int commandline_current = 0;
+ static int commandline_current;
switch (board) {
case BOARD_NCR5380:
if (ints[0] != 2 && ints[0] != 3) {
@@ -235,6 +226,30 @@ static int __init do_DTC3181E_setup(char *str)
#endif
+#ifndef SCSI_G_NCR5380_MEM
+/*
+ * Configure I/O address of 53C400A or DTC436 by writing magic numbers
+ * to ports 0x779 and 0x379.
+ */
+static void magic_configure(int idx, u8 irq, u8 magic[])
+{
+ u8 cfg = 0;
+
+ outb(magic[0], 0x779);
+ outb(magic[1], 0x379);
+ outb(magic[2], 0x379);
+ outb(magic[3], 0x379);
+ outb(magic[4], 0x379);
+
+ /* allowed IRQs for HP C2502 */
+ if (irq != 2 && irq != 3 && irq != 4 && irq != 5 && irq != 7)
+ irq = 0;
+ if (idx >= 0 && idx <= 7)
+ cfg = 0x80 | idx | (irq << 4);
+ outb(cfg, 0x379);
+}
+#endif
+
/**
* generic_NCR5380_detect - look for NCR5380 controllers
* @tpnt: the scsi template
@@ -243,19 +258,18 @@ static int __init do_DTC3181E_setup(char *str)
* and DTC436(ISAPnP) controllers. If overrides have been set we use
* them.
*
- * The caller supplied NCR5380_init function is invoked from here, before
- * the interrupt line is taken.
- *
* Locks: none
*/
static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
{
- static int current_override = 0;
+ static int current_override;
int count;
unsigned int *ports;
+ u8 *magic = NULL;
#ifndef SCSI_G_NCR5380_MEM
int i;
+ int port_idx = -1;
unsigned long region_size = 16;
#endif
static unsigned int __initdata ncr_53c400a_ports[] = {
@@ -264,27 +278,36 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
static unsigned int __initdata dtc_3181e_ports[] = {
0x220, 0x240, 0x280, 0x2a0, 0x2c0, 0x300, 0x320, 0x340, 0
};
- int flags = 0;
+ static u8 ncr_53c400a_magic[] __initdata = { /* 53C400A & DTC436 */
+ 0x59, 0xb9, 0xc5, 0xae, 0xa6
+ };
+ static u8 hp_c2502_magic[] __initdata = { /* HP C2502 */
+ 0x0f, 0x22, 0xf0, 0x20, 0x80
+ };
+ int flags;
struct Scsi_Host *instance;
+ struct NCR5380_hostdata *hostdata;
#ifdef SCSI_G_NCR5380_MEM
unsigned long base;
void __iomem *iomem;
#endif
- if (ncr_irq != NCR_NOT_SET)
+ if (ncr_irq)
overrides[0].irq = ncr_irq;
- if (ncr_dma != NCR_NOT_SET)
+ if (ncr_dma)
overrides[0].dma = ncr_dma;
- if (ncr_addr != NCR_NOT_SET)
+ if (ncr_addr)
overrides[0].NCR5380_map_name = (NCR5380_map_type) ncr_addr;
- if (ncr_5380 != NCR_NOT_SET)
+ if (ncr_5380)
overrides[0].board = BOARD_NCR5380;
- else if (ncr_53c400 != NCR_NOT_SET)
+ else if (ncr_53c400)
overrides[0].board = BOARD_NCR53C400;
- else if (ncr_53c400a != NCR_NOT_SET)
+ else if (ncr_53c400a)
overrides[0].board = BOARD_NCR53C400A;
- else if (dtc_3181e != NCR_NOT_SET)
+ else if (dtc_3181e)
overrides[0].board = BOARD_DTC3181E;
+ else if (hp_c2502)
+ overrides[0].board = BOARD_HP_C2502;
#ifndef SCSI_G_NCR5380_MEM
if (!current_override && isapnp_present()) {
struct pnp_dev *dev = NULL;
@@ -318,41 +341,45 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
}
}
#endif
- tpnt->proc_name = "g_NCR5380";
for (count = 0; current_override < NO_OVERRIDES; ++current_override) {
if (!(overrides[current_override].NCR5380_map_name))
continue;
ports = NULL;
+ flags = 0;
switch (overrides[current_override].board) {
case BOARD_NCR5380:
flags = FLAG_NO_PSEUDO_DMA;
break;
case BOARD_NCR53C400:
- flags = FLAG_NCR53C400;
+#ifdef PSEUDO_DMA
+ flags = FLAG_NO_DMA_FIXUP;
+#endif
break;
case BOARD_NCR53C400A:
- flags = FLAG_NO_PSEUDO_DMA;
+ flags = FLAG_NO_DMA_FIXUP;
+ ports = ncr_53c400a_ports;
+ magic = ncr_53c400a_magic;
+ break;
+ case BOARD_HP_C2502:
+ flags = FLAG_NO_DMA_FIXUP;
ports = ncr_53c400a_ports;
+ magic = hp_c2502_magic;
break;
case BOARD_DTC3181E:
- flags = FLAG_NO_PSEUDO_DMA | FLAG_DTC3181E;
+ flags = FLAG_NO_DMA_FIXUP;
ports = dtc_3181e_ports;
+ magic = ncr_53c400a_magic;
break;
}
#ifndef SCSI_G_NCR5380_MEM
- if (ports) {
+ if (ports && magic) {
/* wakeup sequence for the NCR53C400A and DTC3181E */
/* Disable the adapter and look for a free io port */
- outb(0x59, 0x779);
- outb(0xb9, 0x379);
- outb(0xc5, 0x379);
- outb(0xae, 0x379);
- outb(0xa6, 0x379);
- outb(0x00, 0x379);
+ magic_configure(-1, 0, magic);
if (overrides[current_override].NCR5380_map_name != PORT_AUTO)
for (i = 0; ports[i]; i++) {
@@ -371,17 +398,12 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
}
if (ports[i]) {
/* At this point we have our region reserved */
- outb(0x59, 0x779);
- outb(0xb9, 0x379);
- outb(0xc5, 0x379);
- outb(0xae, 0x379);
- outb(0xa6, 0x379);
- outb(0x80 | i, 0x379); /* set io port to be used */
+ magic_configure(i, 0, magic); /* no IRQ yet */
outb(0xc0, ports[i] + 9);
if (inb(ports[i] + 9) != 0x80)
continue;
- else
- overrides[current_override].NCR5380_map_name = ports[i];
+ overrides[current_override].NCR5380_map_name = ports[i];
+ port_idx = i;
} else
continue;
}
@@ -403,24 +425,65 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
}
#endif
instance = scsi_register(tpnt, sizeof(struct NCR5380_hostdata));
- if (instance == NULL) {
-#ifndef SCSI_G_NCR5380_MEM
- release_region(overrides[current_override].NCR5380_map_name, region_size);
-#else
- iounmap(iomem);
- release_mem_region(base, NCR5380_region_size);
-#endif
- continue;
- }
+ if (instance == NULL)
+ goto out_release;
+ hostdata = shost_priv(instance);
- instance->NCR5380_instance_name = overrides[current_override].NCR5380_map_name;
#ifndef SCSI_G_NCR5380_MEM
+ instance->io_port = overrides[current_override].NCR5380_map_name;
instance->n_io_port = region_size;
+ hostdata->io_width = 1; /* 8-bit PDMA by default */
+
+ /*
+ * On NCR53C400 boards, NCR5380 registers are mapped 8 past
+ * the base address.
+ */
+ switch (overrides[current_override].board) {
+ case BOARD_NCR53C400:
+ instance->io_port += 8;
+ hostdata->c400_ctl_status = 0;
+ hostdata->c400_blk_cnt = 1;
+ hostdata->c400_host_buf = 4;
+ break;
+ case BOARD_DTC3181E:
+ hostdata->io_width = 2; /* 16-bit PDMA */
+ /* fall through */
+ case BOARD_NCR53C400A:
+ case BOARD_HP_C2502:
+ hostdata->c400_ctl_status = 9;
+ hostdata->c400_blk_cnt = 10;
+ hostdata->c400_host_buf = 8;
+ break;
+ }
#else
- ((struct NCR5380_hostdata *)instance->hostdata)->iomem = iomem;
+ instance->base = overrides[current_override].NCR5380_map_name;
+ hostdata->iomem = iomem;
+ switch (overrides[current_override].board) {
+ case BOARD_NCR53C400:
+ hostdata->c400_ctl_status = 0x100;
+ hostdata->c400_blk_cnt = 0x101;
+ hostdata->c400_host_buf = 0x104;
+ break;
+ case BOARD_DTC3181E:
+ case BOARD_NCR53C400A:
+ case BOARD_HP_C2502:
+ pr_err(DRV_MODULE_NAME ": unknown register offsets\n");
+ goto out_unregister;
+ }
#endif
- NCR5380_init(instance, flags);
+ if (NCR5380_init(instance, flags))
+ goto out_unregister;
+
+ switch (overrides[current_override].board) {
+ case BOARD_NCR53C400:
+ case BOARD_DTC3181E:
+ case BOARD_NCR53C400A:
+ case BOARD_HP_C2502:
+ NCR5380_write(hostdata->c400_ctl_status, CSR_BASE);
+ }
+
+ NCR5380_maybe_reset_bus(instance);
if (overrides[current_override].irq != IRQ_AUTO)
instance->irq = overrides[current_override].irq;
@@ -431,12 +494,18 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
if (instance->irq == 255)
instance->irq = NO_IRQ;
- if (instance->irq != NO_IRQ)
+ if (instance->irq != NO_IRQ) {
+#ifndef SCSI_G_NCR5380_MEM
+ /* set IRQ for HP C2502 */
+ if (overrides[current_override].board == BOARD_HP_C2502)
+ magic_configure(port_idx, instance->irq, magic);
+#endif
if (request_irq(instance->irq, generic_NCR5380_intr,
0, "NCR5380", instance)) {
printk(KERN_WARNING "scsi%d : IRQ%d not free, interrupts disabled\n", instance->host_no, instance->irq);
instance->irq = NO_IRQ;
}
+ }
if (instance->irq == NO_IRQ) {
printk(KERN_INFO "scsi%d : interrupts not enabled. for better interactive performance,\n", instance->host_no);
@@ -447,6 +516,17 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
++count;
}
return count;
+
+out_unregister:
+ scsi_unregister(instance);
+out_release:
+#ifndef SCSI_G_NCR5380_MEM
+ release_region(overrides[current_override].NCR5380_map_name, region_size);
+#else
+ iounmap(iomem);
+ release_mem_region(base, NCR5380_region_size);
+#endif
+ return count;
}
/**
@@ -460,21 +540,15 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
static int generic_NCR5380_release_resources(struct Scsi_Host *instance)
{
- NCR5380_local_declare();
- NCR5380_setup(instance);
-
if (instance->irq != NO_IRQ)
free_irq(instance->irq, instance);
NCR5380_exit(instance);
-
#ifndef SCSI_G_NCR5380_MEM
- release_region(instance->NCR5380_instance_name, instance->n_io_port);
+ release_region(instance->io_port, instance->n_io_port);
#else
iounmap(((struct NCR5380_hostdata *)instance->hostdata)->iomem);
- release_mem_region(instance->NCR5380_instance_name, NCR5380_region_size);
+ release_mem_region(instance->base, NCR5380_region_size);
#endif
-
-
return 0;
}
@@ -507,7 +581,7 @@ generic_NCR5380_biosparam(struct scsi_device *sdev, struct block_device *bdev,
}
#endif
-#ifdef NCR53C400_PSEUDO_DMA
+#ifdef PSEUDO_DMA
/**
* NCR5380_pread - pseudo DMA read
@@ -521,75 +595,68 @@ generic_NCR5380_biosparam(struct scsi_device *sdev, struct block_device *bdev,
static inline int NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst, int len)
{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
int blocks = len / 128;
int start = 0;
- int bl;
-
- NCR5380_local_declare();
- NCR5380_setup(instance);
- NCR5380_write(C400_CONTROL_STATUS_REG, CSR_BASE | CSR_TRANS_DIR);
- NCR5380_write(C400_BLOCK_COUNTER_REG, blocks);
+ NCR5380_write(hostdata->c400_ctl_status, CSR_BASE | CSR_TRANS_DIR);
+ NCR5380_write(hostdata->c400_blk_cnt, blocks);
while (1) {
- if ((bl = NCR5380_read(C400_BLOCK_COUNTER_REG)) == 0) {
+ if (NCR5380_read(hostdata->c400_blk_cnt) == 0)
break;
- }
- if (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_GATED_53C80_IRQ) {
+ if (NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ) {
printk(KERN_ERR "53C400r: Got 53C80_IRQ start=%d, blocks=%d\n", start, blocks);
return -1;
}
- while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_HOST_BUF_NOT_RDY);
+ while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
+ ; /* FIXME - no timeout */
#ifndef SCSI_G_NCR5380_MEM
- {
- int i;
- for (i = 0; i < 128; i++)
- dst[start + i] = NCR5380_read(C400_HOST_BUFFER);
- }
+ if (hostdata->io_width == 2)
+ insw(instance->io_port + hostdata->c400_host_buf,
+ dst + start, 64);
+ else
+ insb(instance->io_port + hostdata->c400_host_buf,
+ dst + start, 128);
#else
/* implies SCSI_G_NCR5380_MEM */
- memcpy_fromio(dst + start, iomem + NCR53C400_host_buffer, 128);
+ memcpy_fromio(dst + start,
+ hostdata->iomem + NCR53C400_host_buffer, 128);
#endif
start += 128;
blocks--;
}
if (blocks) {
- while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_HOST_BUF_NOT_RDY)
- {
- // FIXME - no timeout
- }
+ while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
+ ; /* FIXME - no timeout */
#ifndef SCSI_G_NCR5380_MEM
- {
- int i;
- for (i = 0; i < 128; i++)
- dst[start + i] = NCR5380_read(C400_HOST_BUFFER);
- }
+ if (hostdata->io_width == 2)
+ insw(instance->io_port + hostdata->c400_host_buf,
+ dst + start, 64);
+ else
+ insb(instance->io_port + hostdata->c400_host_buf,
+ dst + start, 128);
#else
/* implies SCSI_G_NCR5380_MEM */
- memcpy_fromio(dst + start, iomem + NCR53C400_host_buffer, 128);
+ memcpy_fromio(dst + start,
+ hostdata->iomem + NCR53C400_host_buffer, 128);
#endif
start += 128;
blocks--;
}
- if (!(NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_GATED_53C80_IRQ))
+ if (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ))
printk("53C400r: no 53C80 gated irq after transfer");
-#if 0
- /*
- * DON'T DO THIS - THEY NEVER ARRIVE!
- */
- printk("53C400r: Waiting for 53C80 registers\n");
- while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_53C80_REG)
+ /* wait for 53C80 registers to be available */
+ while (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_53C80_REG))
;
-#endif
+
if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_END_DMA_TRANSFER))
printk(KERN_ERR "53C400r: no end dma signal\n");
- NCR5380_write(MODE_REG, MR_BASE);
- NCR5380_read(RESET_PARITY_INTERRUPT_REG);
return 0;
}
@@ -605,89 +672,91 @@ static inline int NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst,
static inline int NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src, int len)
{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
int blocks = len / 128;
int start = 0;
- int bl;
- int i;
- NCR5380_local_declare();
- NCR5380_setup(instance);
-
- NCR5380_write(C400_CONTROL_STATUS_REG, CSR_BASE);
- NCR5380_write(C400_BLOCK_COUNTER_REG, blocks);
+ NCR5380_write(hostdata->c400_ctl_status, CSR_BASE);
+ NCR5380_write(hostdata->c400_blk_cnt, blocks);
while (1) {
- if (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_GATED_53C80_IRQ) {
+ if (NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ) {
printk(KERN_ERR "53C400w: Got 53C80_IRQ start=%d, blocks=%d\n", start, blocks);
return -1;
}
- if ((bl = NCR5380_read(C400_BLOCK_COUNTER_REG)) == 0) {
+ if (NCR5380_read(hostdata->c400_blk_cnt) == 0)
break;
- }
- while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_HOST_BUF_NOT_RDY)
+ while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
; // FIXME - timeout
#ifndef SCSI_G_NCR5380_MEM
- {
- for (i = 0; i < 128; i++)
- NCR5380_write(C400_HOST_BUFFER, src[start + i]);
- }
+ if (hostdata->io_width == 2)
+ outsw(instance->io_port + hostdata->c400_host_buf,
+ src + start, 64);
+ else
+ outsb(instance->io_port + hostdata->c400_host_buf,
+ src + start, 128);
#else
/* implies SCSI_G_NCR5380_MEM */
- memcpy_toio(iomem + NCR53C400_host_buffer, src + start, 128);
+ memcpy_toio(hostdata->iomem + NCR53C400_host_buffer,
+ src + start, 128);
#endif
start += 128;
blocks--;
}
if (blocks) {
- while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_HOST_BUF_NOT_RDY)
+ while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
; // FIXME - no timeout
#ifndef SCSI_G_NCR5380_MEM
- {
- for (i = 0; i < 128; i++)
- NCR5380_write(C400_HOST_BUFFER, src[start + i]);
- }
+ if (hostdata->io_width == 2)
+ outsw(instance->io_port + hostdata->c400_host_buf,
+ src + start, 64);
+ else
+ outsb(instance->io_port + hostdata->c400_host_buf,
+ src + start, 128);
#else
/* implies SCSI_G_NCR5380_MEM */
- memcpy_toio(iomem + NCR53C400_host_buffer, src + start, 128);
+ memcpy_toio(hostdata->iomem + NCR53C400_host_buffer,
+ src + start, 128);
#endif
start += 128;
blocks--;
}
-#if 0
- printk("53C400w: waiting for registers to be available\n");
- THEY NEVER DO ! while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_53C80_REG);
- printk("53C400w: Got em\n");
-#endif
-
- /* Let's wait for this instead - could be ugly */
- /* All documentation says to check for this. Maybe my hardware is too
- * fast. Waiting for it seems to work fine! KLL
- */
- while (!(i = NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_GATED_53C80_IRQ))
- ; // FIXME - no timeout
-
- /*
- * I know. i is certainly != 0 here but the loop is new. See previous
- * comment.
- */
- if (i) {
- if (!((i = NCR5380_read(BUS_AND_STATUS_REG)) & BASR_END_DMA_TRANSFER))
- printk(KERN_ERR "53C400w: No END OF DMA bit - WHOOPS! BASR=%0x\n", i);
- } else
- printk(KERN_ERR "53C400w: no 53C80 gated irq after transfer (last block)\n");
+ /* wait for 53C80 registers to be available */
+ while (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_53C80_REG)) {
+ udelay(4); /* DTC436 chip hangs without this */
+ /* FIXME - no timeout */
+ }
-#if 0
if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_END_DMA_TRANSFER)) {
printk(KERN_ERR "53C400w: no end dma signal\n");
}
-#endif
+
while (!(NCR5380_read(TARGET_COMMAND_REG) & TCR_LAST_BYTE_SENT))
; // TIMEOUT
return 0;
}
-#endif /* PSEUDO_DMA */
+
+static int generic_NCR5380_dma_xfer_len(struct scsi_cmnd *cmd)
+{
+ int transfersize = cmd->transfersize;
+
+ /* Limit transfers to 32K, for xx400 & xx406
+ * pseudoDMA that transfers in 128 bytes blocks.
+ */
+ if (transfersize > 32 * 1024 && cmd->SCp.this_residual &&
+ !(cmd->SCp.this_residual % transfersize))
+ transfersize = 32 * 1024;
+
+ /* 53C400 datasheet: non-modulo-128-byte transfers should use PIO */
+ if (transfersize % 128)
+ transfersize = 0;
+
+ return transfersize;
+}
+
+#endif /* PSEUDO_DMA */
/*
* Include the NCR5380 core code that we build our driver around
@@ -696,22 +765,24 @@ static inline int NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src,
#include "NCR5380.c"
static struct scsi_host_template driver_template = {
- .show_info = generic_NCR5380_show_info,
- .name = "Generic NCR5380/NCR53C400 SCSI",
- .detect = generic_NCR5380_detect,
- .release = generic_NCR5380_release_resources,
- .info = generic_NCR5380_info,
- .queuecommand = generic_NCR5380_queue_command,
+ .proc_name = DRV_MODULE_NAME,
+ .name = "Generic NCR5380/NCR53C400 SCSI",
+ .detect = generic_NCR5380_detect,
+ .release = generic_NCR5380_release_resources,
+ .info = generic_NCR5380_info,
+ .queuecommand = generic_NCR5380_queue_command,
.eh_abort_handler = generic_NCR5380_abort,
.eh_bus_reset_handler = generic_NCR5380_bus_reset,
- .bios_param = NCR5380_BIOSPARAM,
- .can_queue = CAN_QUEUE,
- .this_id = 7,
- .sg_tablesize = SG_ALL,
- .cmd_per_lun = CMD_PER_LUN,
- .use_clustering = DISABLE_CLUSTERING,
+ .bios_param = NCR5380_BIOSPARAM,
+ .can_queue = 16,
+ .this_id = 7,
+ .sg_tablesize = SG_ALL,
+ .cmd_per_lun = 2,
+ .use_clustering = DISABLE_CLUSTERING,
+ .cmd_size = NCR5380_CMD_SIZE,
+ .max_sectors = 128,
};
-#include <linux/module.h>
+
#include "scsi_module.c"
module_param(ncr_irq, int, 0);
@@ -721,6 +792,7 @@ module_param(ncr_5380, int, 0);
module_param(ncr_53c400, int, 0);
module_param(ncr_53c400a, int, 0);
module_param(dtc_3181e, int, 0);
+module_param(hp_c2502, int, 0);
MODULE_LICENSE("GPL");
#if !defined(SCSI_G_NCR5380_MEM) && defined(MODULE)
diff --git a/drivers/scsi/g_NCR5380.h b/drivers/scsi/g_NCR5380.h
index bea1a3b..6f3d2ac 100644
--- a/drivers/scsi/g_NCR5380.h
+++ b/drivers/scsi/g_NCR5380.h
@@ -14,81 +14,67 @@
#ifndef GENERIC_NCR5380_H
#define GENERIC_NCR5380_H
-#ifdef NCR53C400
+#ifdef CONFIG_SCSI_GENERIC_NCR53C400
#define BIOSPARAM
#define NCR5380_BIOSPARAM generic_NCR5380_biosparam
#else
#define NCR5380_BIOSPARAM NULL
#endif
-#ifndef ASM
-
-#ifndef CMD_PER_LUN
-#define CMD_PER_LUN 2
-#endif
-
-#ifndef CAN_QUEUE
-#define CAN_QUEUE 16
-#endif
-
#define __STRVAL(x) #x
#define STRVAL(x) __STRVAL(x)
#ifndef SCSI_G_NCR5380_MEM
+#define DRV_MODULE_NAME "g_NCR5380"
-#define NCR5380_map_config port
#define NCR5380_map_type int
#define NCR5380_map_name port
-#define NCR5380_instance_name io_port
-#define NCR53C400_register_offset 0
-#define NCR53C400_address_adjust 8
-#ifdef NCR53C400
+#ifdef CONFIG_SCSI_GENERIC_NCR53C400
#define NCR5380_region_size 16
#else
#define NCR5380_region_size 8
#endif
-#define NCR5380_read(reg) (inb(NCR5380_map_name + (reg)))
-#define NCR5380_write(reg, value) (outb((value), (NCR5380_map_name + (reg))))
+#define NCR5380_read(reg) \
+ inb(instance->io_port + (reg))
+#define NCR5380_write(reg, value) \
+ outb(value, instance->io_port + (reg))
#define NCR5380_implementation_fields \
- NCR5380_map_type NCR5380_map_name
-
-#define NCR5380_local_declare() \
- register NCR5380_implementation_fields
-
-#define NCR5380_setup(instance) \
- NCR5380_map_name = (NCR5380_map_type)((instance)->NCR5380_instance_name)
+ int c400_ctl_status; \
+ int c400_blk_cnt; \
+ int c400_host_buf; \
+ int io_width;
#else
/* therefore SCSI_G_NCR5380_MEM */
+#define DRV_MODULE_NAME "g_NCR5380_mmio"
-#define NCR5380_map_config memory
#define NCR5380_map_type unsigned long
#define NCR5380_map_name base
-#define NCR5380_instance_name base
-#define NCR53C400_register_offset 0x108
-#define NCR53C400_address_adjust 0
#define NCR53C400_mem_base 0x3880
#define NCR53C400_host_buffer 0x3900
#define NCR5380_region_size 0x3a00
-#define NCR5380_read(reg) readb(iomem + NCR53C400_mem_base + (reg))
-#define NCR5380_write(reg, value) writeb(value, iomem + NCR53C400_mem_base + (reg))
+#define NCR5380_read(reg) \
+ readb(((struct NCR5380_hostdata *)shost_priv(instance))->iomem + \
+ NCR53C400_mem_base + (reg))
+#define NCR5380_write(reg, value) \
+ writeb(value, ((struct NCR5380_hostdata *)shost_priv(instance))->iomem + \
+ NCR53C400_mem_base + (reg))
#define NCR5380_implementation_fields \
- NCR5380_map_type NCR5380_map_name; \
- void __iomem *iomem;
-
-#define NCR5380_local_declare() \
- register void __iomem *iomem
-
-#define NCR5380_setup(instance) \
- iomem = (((struct NCR5380_hostdata *)(instance)->hostdata)->iomem)
+ void __iomem *iomem; \
+ int c400_ctl_status; \
+ int c400_blk_cnt; \
+ int c400_host_buf;
#endif
+#define NCR5380_dma_xfer_len(instance, cmd, phase) \
+ generic_NCR5380_dma_xfer_len(cmd)
+
#define NCR5380_intr generic_NCR5380_intr
#define NCR5380_queue_command generic_NCR5380_queue_command
#define NCR5380_abort generic_NCR5380_abort
@@ -102,7 +88,7 @@
#define BOARD_NCR53C400 1
#define BOARD_NCR53C400A 2
#define BOARD_DTC3181E 3
+#define BOARD_HP_C2502 4
-#endif /* ndef ASM */
#endif /* GENERIC_NCR5380_H */
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
index d543811..057fdeb 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
@@ -247,41 +247,36 @@
/* ITCT header */
/* qw0 */
#define ITCT_HDR_DEV_TYPE_OFF 0
-#define ITCT_HDR_DEV_TYPE_MSK (0x3 << ITCT_HDR_DEV_TYPE_OFF)
+#define ITCT_HDR_DEV_TYPE_MSK (0x3ULL << ITCT_HDR_DEV_TYPE_OFF)
#define ITCT_HDR_VALID_OFF 2
-#define ITCT_HDR_VALID_MSK (0x1 << ITCT_HDR_VALID_OFF)
-#define ITCT_HDR_BREAK_REPLY_ENA_OFF 3
-#define ITCT_HDR_BREAK_REPLY_ENA_MSK (0x1 << ITCT_HDR_BREAK_REPLY_ENA_OFF)
+#define ITCT_HDR_VALID_MSK (0x1ULL << ITCT_HDR_VALID_OFF)
#define ITCT_HDR_AWT_CONTROL_OFF 4
-#define ITCT_HDR_AWT_CONTROL_MSK (0x1 << ITCT_HDR_AWT_CONTROL_OFF)
+#define ITCT_HDR_AWT_CONTROL_MSK (0x1ULL << ITCT_HDR_AWT_CONTROL_OFF)
#define ITCT_HDR_MAX_CONN_RATE_OFF 5
-#define ITCT_HDR_MAX_CONN_RATE_MSK (0xf << ITCT_HDR_MAX_CONN_RATE_OFF)
+#define ITCT_HDR_MAX_CONN_RATE_MSK (0xfULL << ITCT_HDR_MAX_CONN_RATE_OFF)
#define ITCT_HDR_VALID_LINK_NUM_OFF 9
-#define ITCT_HDR_VALID_LINK_NUM_MSK (0xf << ITCT_HDR_VALID_LINK_NUM_OFF)
+#define ITCT_HDR_VALID_LINK_NUM_MSK (0xfULL << ITCT_HDR_VALID_LINK_NUM_OFF)
#define ITCT_HDR_PORT_ID_OFF 13
-#define ITCT_HDR_PORT_ID_MSK (0x7 << ITCT_HDR_PORT_ID_OFF)
+#define ITCT_HDR_PORT_ID_MSK (0x7ULL << ITCT_HDR_PORT_ID_OFF)
#define ITCT_HDR_SMP_TIMEOUT_OFF 16
-#define ITCT_HDR_SMP_TIMEOUT_MSK (0xffff << ITCT_HDR_SMP_TIMEOUT_OFF)
-#define ITCT_HDR_MAX_BURST_BYTES_OFF 16
-#define ITCT_HDR_MAX_BURST_BYTES_MSK (0xffffffff << \
- ITCT_MAX_BURST_BYTES_OFF)
+#define ITCT_HDR_SMP_TIMEOUT_MSK (0xffffULL << ITCT_HDR_SMP_TIMEOUT_OFF)
/* qw1 */
#define ITCT_HDR_MAX_SAS_ADDR_OFF 0
#define ITCT_HDR_MAX_SAS_ADDR_MSK (0xffffffffffffffff << \
ITCT_HDR_MAX_SAS_ADDR_OFF)
/* qw2 */
#define ITCT_HDR_IT_NEXUS_LOSS_TL_OFF 0
-#define ITCT_HDR_IT_NEXUS_LOSS_TL_MSK (0xffff << \
+#define ITCT_HDR_IT_NEXUS_LOSS_TL_MSK (0xffffULL << \
ITCT_HDR_IT_NEXUS_LOSS_TL_OFF)
#define ITCT_HDR_BUS_INACTIVE_TL_OFF 16
-#define ITCT_HDR_BUS_INACTIVE_TL_MSK (0xffff << \
+#define ITCT_HDR_BUS_INACTIVE_TL_MSK (0xffffULL << \
ITCT_HDR_BUS_INACTIVE_TL_OFF)
#define ITCT_HDR_MAX_CONN_TL_OFF 32
-#define ITCT_HDR_MAX_CONN_TL_MSK (0xffff << \
+#define ITCT_HDR_MAX_CONN_TL_MSK (0xffffULL << \
ITCT_HDR_MAX_CONN_TL_OFF)
#define ITCT_HDR_REJ_OPEN_TL_OFF 48
-#define ITCT_HDR_REJ_OPEN_TL_MSK (0xffff << \
- ITCT_REJ_OPEN_TL_OFF)
+#define ITCT_HDR_REJ_OPEN_TL_MSK (0xffffULL << \
+ ITCT_HDR_REJ_OPEN_TL_OFF)
/* Err record header */
#define ERR_HDR_DMA_TX_ERR_TYPE_OFF 0
@@ -533,10 +528,10 @@ static void setup_itct_v1_hw(struct hisi_hba *hisi_hba,
itct->sas_addr = __swab64(itct->sas_addr);
/* qw2 */
- itct->qw2 = cpu_to_le64((500 < ITCT_HDR_IT_NEXUS_LOSS_TL_OFF) |
- (0xff00 < ITCT_HDR_BUS_INACTIVE_TL_OFF) |
- (0xff00 < ITCT_HDR_MAX_CONN_TL_OFF) |
- (0xff00 < ITCT_HDR_REJ_OPEN_TL_OFF));
+ itct->qw2 = cpu_to_le64((500ULL << ITCT_HDR_IT_NEXUS_LOSS_TL_OFF) |
+ (0xff00ULL << ITCT_HDR_BUS_INACTIVE_TL_OFF) |
+ (0xff00ULL << ITCT_HDR_MAX_CONN_TL_OFF) |
+ (0xff00ULL << ITCT_HDR_REJ_OPEN_TL_OFF));
}
static void free_device_v1_hw(struct hisi_hba *hisi_hba,
@@ -544,7 +539,8 @@ static void free_device_v1_hw(struct hisi_hba *hisi_hba,
{
u64 dev_id = sas_dev->device_id;
struct hisi_sas_itct *itct = &hisi_hba->itct[dev_id];
- u32 qw0, reg_val = hisi_sas_read32(hisi_hba, CFG_AGING_TIME);
+ u64 qw0;
+ u32 reg_val = hisi_sas_read32(hisi_hba, CFG_AGING_TIME);
reg_val |= CFG_AGING_TIME_ITCT_REL_MSK;
hisi_sas_write32(hisi_hba, CFG_AGING_TIME, reg_val);
diff --git a/drivers/scsi/imm.c b/drivers/scsi/imm.c
index 4e1a632..f8b88fa 100644
--- a/drivers/scsi/imm.c
+++ b/drivers/scsi/imm.c
@@ -43,6 +43,7 @@ typedef struct {
unsigned dp:1; /* Data phase present */
unsigned rd:1; /* Read data in data phase */
unsigned wanted:1; /* Parport sharing busy flag */
+ unsigned int dev_no; /* Device number */
wait_queue_head_t *waiting;
struct Scsi_Host *host;
struct list_head list;
@@ -1120,15 +1121,40 @@ static struct scsi_host_template imm_template = {
static LIST_HEAD(imm_hosts);
+/*
+ * Finds the first available device number that can be alloted to the
+ * new imm device and returns the address of the previous node so that
+ * we can add to the tail and have a list in the ascending order.
+ */
+
+static inline imm_struct *find_parent(void)
+{
+ imm_struct *dev, *par = NULL;
+ unsigned int cnt = 0;
+
+ if (list_empty(&imm_hosts))
+ return NULL;
+
+ list_for_each_entry(dev, &imm_hosts, list) {
+ if (dev->dev_no != cnt)
+ return par;
+ cnt++;
+ par = dev;
+ }
+
+ return par;
+}
+
static int __imm_attach(struct parport *pb)
{
struct Scsi_Host *host;
- imm_struct *dev;
+ imm_struct *dev, *temp;
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waiting);
DEFINE_WAIT(wait);
int ports;
int modes, ppb;
int err = -ENOMEM;
+ struct pardev_cb imm_cb;
init_waitqueue_head(&waiting);
@@ -1141,9 +1167,15 @@ static int __imm_attach(struct parport *pb)
dev->mode = IMM_AUTODETECT;
INIT_LIST_HEAD(&dev->list);
- dev->dev = parport_register_device(pb, "imm", NULL, imm_wakeup,
- NULL, 0, dev);
+ temp = find_parent();
+ if (temp)
+ dev->dev_no = temp->dev_no + 1;
+
+ memset(&imm_cb, 0, sizeof(imm_cb));
+ imm_cb.private = dev;
+ imm_cb.wakeup = imm_wakeup;
+ dev->dev = parport_register_dev_model(pb, "imm", &imm_cb, dev->dev_no);
if (!dev->dev)
goto out;
@@ -1207,7 +1239,10 @@ static int __imm_attach(struct parport *pb)
host->unique_id = pb->number;
*(imm_struct **)&host->hostdata = dev;
dev->host = host;
- list_add_tail(&dev->list, &imm_hosts);
+ if (!temp)
+ list_add_tail(&dev->list, &imm_hosts);
+ else
+ list_add_tail(&dev->list, &temp->list);
err = scsi_add_host(host, NULL);
if (err)
goto out2;
@@ -1245,9 +1280,10 @@ static void imm_detach(struct parport *pb)
}
static struct parport_driver imm_driver = {
- .name = "imm",
- .attach = imm_attach,
- .detach = imm_detach,
+ .name = "imm",
+ .match_port = imm_attach,
+ .detach = imm_detach,
+ .devmodel = true,
};
static int __init imm_driver_init(void)
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 536cd5a..3b3e099 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -3638,7 +3638,7 @@ static struct device_attribute ipr_ioa_reset_attr = {
.store = ipr_store_reset_adapter
};
-static int ipr_iopoll(struct blk_iopoll *iop, int budget);
+static int ipr_iopoll(struct irq_poll *iop, int budget);
/**
* ipr_show_iopoll_weight - Show ipr polling mode
* @dev: class device struct
@@ -3681,34 +3681,33 @@ static ssize_t ipr_store_iopoll_weight(struct device *dev,
int i;
if (!ioa_cfg->sis64) {
- dev_info(&ioa_cfg->pdev->dev, "blk-iopoll not supported on this adapter\n");
+ dev_info(&ioa_cfg->pdev->dev, "irq_poll not supported on this adapter\n");
return -EINVAL;
}
if (kstrtoul(buf, 10, &user_iopoll_weight))
return -EINVAL;
if (user_iopoll_weight > 256) {
- dev_info(&ioa_cfg->pdev->dev, "Invalid blk-iopoll weight. It must be less than 256\n");
+ dev_info(&ioa_cfg->pdev->dev, "Invalid irq_poll weight. It must be less than 256\n");
return -EINVAL;
}
if (user_iopoll_weight == ioa_cfg->iopoll_weight) {
- dev_info(&ioa_cfg->pdev->dev, "Current blk-iopoll weight has the same weight\n");
+ dev_info(&ioa_cfg->pdev->dev, "Current irq_poll weight has the same weight\n");
return strlen(buf);
}
if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
for (i = 1; i < ioa_cfg->hrrq_num; i++)
- blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll);
+ irq_poll_disable(&ioa_cfg->hrrq[i].iopoll);
}
spin_lock_irqsave(shost->host_lock, lock_flags);
ioa_cfg->iopoll_weight = user_iopoll_weight;
if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
for (i = 1; i < ioa_cfg->hrrq_num; i++) {
- blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll,
+ irq_poll_init(&ioa_cfg->hrrq[i].iopoll,
ioa_cfg->iopoll_weight, ipr_iopoll);
- blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll);
}
}
spin_unlock_irqrestore(shost->host_lock, lock_flags);
@@ -4003,13 +4002,12 @@ static ssize_t ipr_store_update_fw(struct device *dev,
struct ipr_sglist *sglist;
char fname[100];
char *src;
- int len, result, dnld_size;
+ int result, dnld_size;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
- len = snprintf(fname, 99, "%s", buf);
- fname[len-1] = '\0';
+ snprintf(fname, sizeof(fname), "%s", buf);
if (request_firmware(&fw_entry, fname, &ioa_cfg->pdev->dev)) {
dev_err(&ioa_cfg->pdev->dev, "Firmware file %s not found\n", fname);
@@ -5569,7 +5567,7 @@ static int ipr_process_hrrq(struct ipr_hrr_queue *hrr_queue, int budget,
return num_hrrq;
}
-static int ipr_iopoll(struct blk_iopoll *iop, int budget)
+static int ipr_iopoll(struct irq_poll *iop, int budget)
{
struct ipr_ioa_cfg *ioa_cfg;
struct ipr_hrr_queue *hrrq;
@@ -5585,7 +5583,7 @@ static int ipr_iopoll(struct blk_iopoll *iop, int budget)
completed_ops = ipr_process_hrrq(hrrq, budget, &doneq);
if (completed_ops < budget)
- blk_iopoll_complete(iop);
+ irq_poll_complete(iop);
spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) {
@@ -5693,8 +5691,7 @@ static irqreturn_t ipr_isr_mhrrq(int irq, void *devp)
if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) ==
hrrq->toggle_bit) {
- if (!blk_iopoll_sched_prep(&hrrq->iopoll))
- blk_iopoll_sched(&hrrq->iopoll);
+ irq_poll_sched(&hrrq->iopoll);
spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
return IRQ_HANDLED;
}
@@ -10405,9 +10402,8 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
for (i = 1; i < ioa_cfg->hrrq_num; i++) {
- blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll,
+ irq_poll_init(&ioa_cfg->hrrq[i].iopoll,
ioa_cfg->iopoll_weight, ipr_iopoll);
- blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll);
}
}
@@ -10436,7 +10432,7 @@ static void ipr_shutdown(struct pci_dev *pdev)
if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
ioa_cfg->iopoll_weight = 0;
for (i = 1; i < ioa_cfg->hrrq_num; i++)
- blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll);
+ irq_poll_disable(&ioa_cfg->hrrq[i].iopoll);
}
while (ioa_cfg->in_reset_reload) {
diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h
index a34c7a5..56c5706 100644
--- a/drivers/scsi/ipr.h
+++ b/drivers/scsi/ipr.h
@@ -32,7 +32,7 @@
#include <linux/libata.h>
#include <linux/list.h>
#include <linux/kref.h>
-#include <linux/blk-iopoll.h>
+#include <linux/irq_poll.h>
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
@@ -517,7 +517,7 @@ struct ipr_hrr_queue {
u8 allow_cmds:1;
u8 removing_ioa:1;
- struct blk_iopoll iopoll;
+ struct irq_poll iopoll;
};
/* Command packet structure */
diff --git a/drivers/scsi/mac_scsi.c b/drivers/scsi/mac_scsi.c
index d64a769..bb23813 100644
--- a/drivers/scsi/mac_scsi.c
+++ b/drivers/scsi/mac_scsi.c
@@ -12,7 +12,6 @@
*/
#include <linux/types.h>
-#include <linux/delay.h>
#include <linux/module.h>
#include <linux/ioport.h>
#include <linux/init.h>
@@ -32,14 +31,13 @@
#define PSEUDO_DMA
#define NCR5380_implementation_fields unsigned char *pdma_base
-#define NCR5380_local_declare() struct Scsi_Host *_instance
-#define NCR5380_setup(instance) _instance = instance
-#define NCR5380_read(reg) macscsi_read(_instance, reg)
-#define NCR5380_write(reg, value) macscsi_write(_instance, reg, value)
+#define NCR5380_read(reg) macscsi_read(instance, reg)
+#define NCR5380_write(reg, value) macscsi_write(instance, reg, value)
#define NCR5380_pread macscsi_pread
#define NCR5380_pwrite macscsi_pwrite
+#define NCR5380_dma_xfer_len(instance, cmd, phase) (cmd->transfersize)
#define NCR5380_intr macscsi_intr
#define NCR5380_queue_command macscsi_queue_command
@@ -51,8 +49,6 @@
#include "NCR5380.h"
-#define RESET_BOOT
-
static int setup_can_queue = -1;
module_param(setup_can_queue, int, 0);
static int setup_cmd_per_lun = -1;
@@ -65,17 +61,8 @@ static int setup_use_tagged_queuing = -1;
module_param(setup_use_tagged_queuing, int, 0);
static int setup_hostid = -1;
module_param(setup_hostid, int, 0);
-
-/* Time (in jiffies) to wait after a reset; the SCSI standard calls for 250ms,
- * we usually do 0.5s to be on the safe side. But Toshiba CD-ROMs once more
- * need ten times the standard value... */
-#define TOSHIBA_DELAY
-
-#ifdef TOSHIBA_DELAY
-#define AFTER_RESET_DELAY (5*HZ/2)
-#else
-#define AFTER_RESET_DELAY (HZ/2)
-#endif
+static int setup_toshiba_delay = -1;
+module_param(setup_toshiba_delay, int, 0);
/*
* NCR 5380 register access functions
@@ -94,12 +81,12 @@ static inline void macscsi_write(struct Scsi_Host *instance, int reg, int value)
#ifndef MODULE
static int __init mac_scsi_setup(char *str)
{
- int ints[7];
+ int ints[8];
(void)get_options(str, ARRAY_SIZE(ints), ints);
- if (ints[0] < 1 || ints[0] > 6) {
- pr_err("Usage: mac5380=<can_queue>[,<cmd_per_lun>[,<sg_tablesize>[,<hostid>[,<use_tags>[,<use_pdma>]]]]]\n");
+ if (ints[0] < 1) {
+ pr_err("Usage: mac5380=<can_queue>[,<cmd_per_lun>[,<sg_tablesize>[,<hostid>[,<use_tags>[,<use_pdma>[,<toshiba_delay>]]]]]]\n");
return 0;
}
if (ints[0] >= 1)
@@ -114,50 +101,14 @@ static int __init mac_scsi_setup(char *str)
setup_use_tagged_queuing = ints[5];
if (ints[0] >= 6)
setup_use_pdma = ints[6];
+ if (ints[0] >= 7)
+ setup_toshiba_delay = ints[7];
return 1;
}
__setup("mac5380=", mac_scsi_setup);
#endif /* !MODULE */
-#ifdef RESET_BOOT
-/*
- * Our 'bus reset on boot' function
- */
-
-static void mac_scsi_reset_boot(struct Scsi_Host *instance)
-{
- unsigned long end;
-
- NCR5380_local_declare();
- NCR5380_setup(instance);
-
- /*
- * Do a SCSI reset to clean up the bus during initialization. No messing
- * with the queues, interrupts, or locks necessary here.
- */
-
- printk(KERN_INFO "Macintosh SCSI: resetting the SCSI bus..." );
-
- /* get in phase */
- NCR5380_write( TARGET_COMMAND_REG,
- PHASE_SR_TO_TCR( NCR5380_read(STATUS_REG) ));
-
- /* assert RST */
- NCR5380_write( INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST );
- /* The min. reset hold time is 25us, so 40us should be enough */
- udelay( 50 );
- /* reset RST and interrupt */
- NCR5380_write( INITIATOR_COMMAND_REG, ICR_BASE );
- NCR5380_read( RESET_PARITY_INTERRUPT_REG );
-
- for( end = jiffies + AFTER_RESET_DELAY; time_before(jiffies, end); )
- barrier();
-
- printk(KERN_INFO " done\n" );
-}
-#endif
-
#ifdef PSEUDO_DMA
/*
Pseudo-DMA: (Ove Edlund)
@@ -235,9 +186,6 @@ static int macscsi_pread(struct Scsi_Host *instance,
unsigned char *d;
unsigned char *s;
- NCR5380_local_declare();
- NCR5380_setup(instance);
-
s = hostdata->pdma_base + (INPUT_DATA_REG << 4);
d = dst;
@@ -329,9 +277,6 @@ static int macscsi_pwrite(struct Scsi_Host *instance,
unsigned char *s;
unsigned char *d;
- NCR5380_local_declare();
- NCR5380_setup(instance);
-
s = src;
d = hostdata->pdma_base + (OUTPUT_DATA_REG << 4);
@@ -364,20 +309,22 @@ static int macscsi_pwrite(struct Scsi_Host *instance,
#define PFX DRV_MODULE_NAME ": "
static struct scsi_host_template mac_scsi_template = {
- .module = THIS_MODULE,
- .proc_name = DRV_MODULE_NAME,
- .show_info = macscsi_show_info,
- .write_info = macscsi_write_info,
- .name = "Macintosh NCR5380 SCSI",
- .info = macscsi_info,
- .queuecommand = macscsi_queue_command,
- .eh_abort_handler = macscsi_abort,
- .eh_bus_reset_handler = macscsi_bus_reset,
- .can_queue = 16,
- .this_id = 7,
- .sg_tablesize = SG_ALL,
- .cmd_per_lun = 2,
- .use_clustering = DISABLE_CLUSTERING
+ .module = THIS_MODULE,
+ .proc_name = DRV_MODULE_NAME,
+ .show_info = macscsi_show_info,
+ .write_info = macscsi_write_info,
+ .name = "Macintosh NCR5380 SCSI",
+ .info = macscsi_info,
+ .queuecommand = macscsi_queue_command,
+ .eh_abort_handler = macscsi_abort,
+ .eh_bus_reset_handler = macscsi_bus_reset,
+ .can_queue = 16,
+ .this_id = 7,
+ .sg_tablesize = SG_ALL,
+ .cmd_per_lun = 2,
+ .use_clustering = DISABLE_CLUSTERING,
+ .cmd_size = NCR5380_CMD_SIZE,
+ .max_sectors = 128,
};
static int __init mac_scsi_probe(struct platform_device *pdev)
@@ -432,15 +379,14 @@ static int __init mac_scsi_probe(struct platform_device *pdev)
} else
host_flags |= FLAG_NO_PSEUDO_DMA;
-#ifdef RESET_BOOT
- mac_scsi_reset_boot(instance);
-#endif
-
#ifdef SUPPORT_TAGS
host_flags |= setup_use_tagged_queuing > 0 ? FLAG_TAGGED_QUEUING : 0;
#endif
+ host_flags |= setup_toshiba_delay > 0 ? FLAG_TOSHIBA_DELAY : 0;
- NCR5380_init(instance, host_flags);
+ error = NCR5380_init(instance, host_flags);
+ if (error)
+ goto fail_init;
if (instance->irq != NO_IRQ) {
error = request_irq(instance->irq, macscsi_intr, IRQF_SHARED,
@@ -449,6 +395,8 @@ static int __init mac_scsi_probe(struct platform_device *pdev)
goto fail_irq;
}
+ NCR5380_maybe_reset_bus(instance);
+
error = scsi_add_host(instance, NULL);
if (error)
goto fail_host;
@@ -463,6 +411,7 @@ fail_host:
free_irq(instance->irq, instance);
fail_irq:
NCR5380_exit(instance);
+fail_init:
scsi_host_put(instance);
return error;
}
diff --git a/drivers/scsi/megaraid/megaraid_mm.c b/drivers/scsi/megaraid/megaraid_mm.c
index a706927..4cf9ed9 100644
--- a/drivers/scsi/megaraid/megaraid_mm.c
+++ b/drivers/scsi/megaraid/megaraid_mm.c
@@ -179,8 +179,12 @@ mraid_mm_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
/*
* The following call will block till a kioc is available
+ * or return NULL if the list head is empty for the pointer
+ * of type mraid_mmapt passed to mraid_mm_alloc_kioc
*/
kioc = mraid_mm_alloc_kioc(adp);
+ if (!kioc)
+ return -ENXIO;
/*
* User sent the old mimd_t ioctl packet. Convert it to uioc_t.
diff --git a/drivers/scsi/pas16.c b/drivers/scsi/pas16.c
index e81eadd..512037e 100644
--- a/drivers/scsi/pas16.c
+++ b/drivers/scsi/pas16.c
@@ -1,6 +1,4 @@
#define PSEUDO_DMA
-#define UNSAFE /* Not unsafe for PAS16 -- use it */
-#define PDEBUG 0
/*
* This driver adapted from Drew Eckhardt's Trantor T128 driver
@@ -71,14 +69,10 @@
#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/proc_fs.h>
#include <asm/io.h>
#include <asm/dma.h>
#include <linux/blkdev.h>
-#include <linux/delay.h>
#include <linux/interrupt.h>
-#include <linux/stat.h>
#include <linux/init.h>
#include <scsi/scsi_host.h>
@@ -87,8 +81,8 @@
#include "NCR5380.h"
-static unsigned short pas16_addr = 0;
-static int pas16_irq = 0;
+static unsigned short pas16_addr;
+static int pas16_irq;
static const int scsi_irq_translate[] =
@@ -146,22 +140,6 @@ static const unsigned short pas16_offset[ 8 ] =
* START_DMA_INITIATOR_RECEIVE_REG wo
*/
};
-/*----------------------------------------------------------------*/
-/* the following will set the monitor border color (useful to find
- where something crashed or gets stuck at */
-/* 1 = blue
- 2 = green
- 3 = cyan
- 4 = red
- 5 = magenta
- 6 = yellow
- 7 = white
-*/
-#if 1
-#define rtrc(i) {inb(0x3da); outb(0x31, 0x3c0); outb((i), 0x3c0);}
-#else
-#define rtrc(i) {}
-#endif
/*
@@ -205,7 +183,7 @@ static void __init
outb( 0x01, io_port + P_TIMEOUT_STATUS_REG_OFFSET ); /* Reset TC */
outb( 0x01, io_port + WAIT_STATE ); /* 1 Wait state */
- NCR5380_read( RESET_PARITY_INTERRUPT_REG );
+ inb(io_port + pas16_offset[RESET_PARITY_INTERRUPT_REG]);
/* Set the SCSI interrupt pointer without mucking up the sound
* interrupt pointer in the same byte.
@@ -280,13 +258,13 @@ static int __init
* put in an additional test to try to weed them out.
*/
- outb( 0x01, io_port + WAIT_STATE ); /* 1 Wait state */
- NCR5380_write( MODE_REG, 0x20 ); /* Is it really SCSI? */
- if( NCR5380_read( MODE_REG ) != 0x20 ) /* Write to a reg. */
- return 0; /* and try to read */
- NCR5380_write( MODE_REG, 0x00 ); /* it back. */
- if( NCR5380_read( MODE_REG ) != 0x00 )
- return 0;
+ outb(0x01, io_port + WAIT_STATE); /* 1 Wait state */
+ outb(0x20, io_port + pas16_offset[MODE_REG]); /* Is it really SCSI? */
+ if (inb(io_port + pas16_offset[MODE_REG]) != 0x20) /* Write to a reg. */
+ return 0; /* and try to read */
+ outb(0x00, io_port + pas16_offset[MODE_REG]); /* it back. */
+ if (inb(io_port + pas16_offset[MODE_REG]) != 0x00)
+ return 0;
return 1;
}
@@ -305,7 +283,7 @@ static int __init
static int __init pas16_setup(char *str)
{
- static int commandline_current = 0;
+ static int commandline_current;
int i;
int ints[10];
@@ -344,8 +322,8 @@ __setup("pas16=", pas16_setup);
static int __init pas16_detect(struct scsi_host_template *tpnt)
{
- static int current_override = 0;
- static unsigned short current_base = 0;
+ static int current_override;
+ static unsigned short current_base;
struct Scsi_Host *instance;
unsigned short io_port;
int count;
@@ -377,34 +355,32 @@ static int __init pas16_detect(struct scsi_host_template *tpnt)
}
else
for (; !io_port && (current_base < NO_BASES); ++current_base) {
-#if (PDEBUG & PDEBUG_INIT)
- printk("scsi-pas16 : probing io_port %04x\n", (unsigned int) bases[current_base].io_port);
-#endif
+ dprintk(NDEBUG_INIT, "pas16: probing io_port 0x%04x\n",
+ (unsigned int)bases[current_base].io_port);
if ( !bases[current_base].noauto &&
pas16_hw_detect( current_base ) ){
io_port = bases[current_base].io_port;
init_board( io_port, default_irqs[ current_base ], 0 );
-#if (PDEBUG & PDEBUG_INIT)
- printk("scsi-pas16 : detected board.\n");
-#endif
+ dprintk(NDEBUG_INIT, "pas16: detected board\n");
}
}
-
-#if defined(PDEBUG) && (PDEBUG & PDEBUG_INIT)
- printk("scsi-pas16 : io_port = %04x\n", (unsigned int) io_port);
-#endif
+ dprintk(NDEBUG_INIT, "pas16: io_port = 0x%04x\n",
+ (unsigned int)io_port);
if (!io_port)
break;
instance = scsi_register (tpnt, sizeof(struct NCR5380_hostdata));
if(instance == NULL)
- break;
+ goto out;
instance->io_port = io_port;
- NCR5380_init(instance, 0);
+ if (NCR5380_init(instance, 0))
+ goto out_unregister;
+
+ NCR5380_maybe_reset_bus(instance);
if (overrides[current_override].irq != IRQ_AUTO)
instance->irq = overrides[current_override].irq;
@@ -431,14 +407,18 @@ static int __init pas16_detect(struct scsi_host_template *tpnt)
outb( (inb(io_port + IO_CONFIG_3) & 0x0f), io_port + IO_CONFIG_3 );
}
-#if defined(PDEBUG) && (PDEBUG & PDEBUG_INIT)
- printk("scsi%d : irq = %d\n", instance->host_no, instance->irq);
-#endif
+ dprintk(NDEBUG_INIT, "scsi%d : irq = %d\n",
+ instance->host_no, instance->irq);
++current_override;
++count;
}
return count;
+
+out_unregister:
+ scsi_unregister(instance);
+out:
+ return count;
}
/*
@@ -561,29 +541,29 @@ static int pas16_release(struct Scsi_Host *shost)
if (shost->irq != NO_IRQ)
free_irq(shost->irq, shost);
NCR5380_exit(shost);
- if (shost->io_port && shost->n_io_port)
- release_region(shost->io_port, shost->n_io_port);
scsi_unregister(shost);
return 0;
}
static struct scsi_host_template driver_template = {
- .name = "Pro Audio Spectrum-16 SCSI",
- .detect = pas16_detect,
- .release = pas16_release,
- .proc_name = "pas16",
- .show_info = pas16_show_info,
- .write_info = pas16_write_info,
- .info = pas16_info,
- .queuecommand = pas16_queue_command,
- .eh_abort_handler = pas16_abort,
- .eh_bus_reset_handler = pas16_bus_reset,
- .bios_param = pas16_biosparam,
- .can_queue = CAN_QUEUE,
- .this_id = 7,
- .sg_tablesize = SG_ALL,
- .cmd_per_lun = CMD_PER_LUN,
- .use_clustering = DISABLE_CLUSTERING,
+ .name = "Pro Audio Spectrum-16 SCSI",
+ .detect = pas16_detect,
+ .release = pas16_release,
+ .proc_name = "pas16",
+ .show_info = pas16_show_info,
+ .write_info = pas16_write_info,
+ .info = pas16_info,
+ .queuecommand = pas16_queue_command,
+ .eh_abort_handler = pas16_abort,
+ .eh_bus_reset_handler = pas16_bus_reset,
+ .bios_param = pas16_biosparam,
+ .can_queue = 32,
+ .this_id = 7,
+ .sg_tablesize = SG_ALL,
+ .cmd_per_lun = 2,
+ .use_clustering = DISABLE_CLUSTERING,
+ .cmd_size = NCR5380_CMD_SIZE,
+ .max_sectors = 128,
};
#include "scsi_module.c"
diff --git a/drivers/scsi/pas16.h b/drivers/scsi/pas16.h
index c6109c8..d375277 100644
--- a/drivers/scsi/pas16.h
+++ b/drivers/scsi/pas16.h
@@ -24,9 +24,6 @@
#ifndef PAS16_H
#define PAS16_H
-#define PDEBUG_INIT 0x1
-#define PDEBUG_TRANSFER 0x2
-
#define PAS16_DEFAULT_BASE_1 0x388
#define PAS16_DEFAULT_BASE_2 0x384
#define PAS16_DEFAULT_BASE_3 0x38c
@@ -98,46 +95,16 @@
#define OPERATION_MODE_1 0xec03
#define IO_CONFIG_3 0xf002
+#define NCR5380_implementation_fields /* none */
-#ifndef ASM
-
-#ifndef CMD_PER_LUN
-#define CMD_PER_LUN 2
-#endif
-
-#ifndef CAN_QUEUE
-#define CAN_QUEUE 32
-#endif
-
-#define NCR5380_implementation_fields \
- volatile unsigned short io_port
-
-#define NCR5380_local_declare() \
- volatile unsigned short io_port
+#define PAS16_io_port(reg) (instance->io_port + pas16_offset[(reg)])
-#define NCR5380_setup(instance) \
- io_port = (instance)->io_port
-
-#define PAS16_io_port(reg) ( io_port + pas16_offset[(reg)] )
-
-#if !(PDEBUG & PDEBUG_TRANSFER)
#define NCR5380_read(reg) ( inb(PAS16_io_port(reg)) )
#define NCR5380_write(reg, value) ( outb((value),PAS16_io_port(reg)) )
-#else
-#define NCR5380_read(reg) \
- (((unsigned char) printk("scsi%d : read register %d at io_port %04x\n"\
- , instance->hostno, (reg), PAS16_io_port(reg))), inb( PAS16_io_port(reg)) )
-
-#define NCR5380_write(reg, value) \
- (printk("scsi%d : write %02x to register %d at io_port %04x\n", \
- instance->hostno, (value), (reg), PAS16_io_port(reg)), \
- outb( (value),PAS16_io_port(reg) ) )
-
-#endif
+#define NCR5380_dma_xfer_len(instance, cmd, phase) (cmd->transfersize)
#define NCR5380_intr pas16_intr
-#define do_NCR5380_intr do_pas16_intr
#define NCR5380_queue_command pas16_queue_command
#define NCR5380_abort pas16_abort
#define NCR5380_bus_reset pas16_bus_reset
@@ -150,5 +117,4 @@
#define PAS16_IRQS 0xd4a8
-#endif /* ndef ASM */
#endif /* PAS16_H */
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index 2c1160c7..47b9d13 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -227,6 +227,7 @@ static struct {
{"Promise", "VTrak E610f", NULL, BLIST_SPARSELUN | BLIST_NO_RSOC},
{"Promise", "", NULL, BLIST_SPARSELUN},
{"QNAP", "iSCSI Storage", NULL, BLIST_MAX_1024},
+ {"SYNOLOGY", "iSCSI Storage", NULL, BLIST_MAX_1024},
{"QUANTUM", "XP34301", "1071", BLIST_NOTQ},
{"REGAL", "CDC-4X", NULL, BLIST_MAX5LUN | BLIST_SINGLELUN},
{"SanDisk", "ImageMate CF-SD1", NULL, BLIST_FORCELUN},
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 41c115c..55627d0 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -390,7 +390,7 @@ module_param(storvsc_ringbuffer_size, int, S_IRUGO);
MODULE_PARM_DESC(storvsc_ringbuffer_size, "Ring buffer size (bytes)");
module_param(storvsc_vcpus_per_sub_channel, int, S_IRUGO);
-MODULE_PARM_DESC(vcpus_per_sub_channel, "Ratio of VCPUs to subchannels");
+MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to subchannels");
/*
* Timeout in seconds for all devices managed by this driver.
*/
diff --git a/drivers/scsi/sun3_scsi.c b/drivers/scsi/sun3_scsi.c
index 22a4283..b9de487 100644
--- a/drivers/scsi/sun3_scsi.c
+++ b/drivers/scsi/sun3_scsi.c
@@ -53,13 +53,12 @@
#define NCR5380_queue_command sun3scsi_queue_command
#define NCR5380_bus_reset sun3scsi_bus_reset
#define NCR5380_abort sun3scsi_abort
-#define NCR5380_show_info sun3scsi_show_info
#define NCR5380_info sun3scsi_info
#define NCR5380_dma_read_setup(instance, data, count) \
- sun3scsi_dma_setup(data, count, 0)
+ sun3scsi_dma_setup(instance, data, count, 0)
#define NCR5380_dma_write_setup(instance, data, count) \
- sun3scsi_dma_setup(data, count, 1)
+ sun3scsi_dma_setup(instance, data, count, 1)
#define NCR5380_dma_residual(instance) \
sun3scsi_dma_residual(instance)
#define NCR5380_dma_xfer_len(instance, cmd, phase) \
@@ -86,10 +85,6 @@ module_param(setup_use_tagged_queuing, int, 0);
static int setup_hostid = -1;
module_param(setup_hostid, int, 0);
-/* #define RESET_BOOT */
-
-#define AFTER_RESET_DELAY (HZ/2)
-
/* ms to wait after hitting dma regs */
#define SUN3_DMA_DELAY 10
@@ -100,11 +95,10 @@ static struct scsi_cmnd *sun3_dma_setup_done;
static unsigned char *sun3_scsi_regp;
static volatile struct sun3_dma_regs *dregs;
static struct sun3_udc_regs *udc_regs;
-static unsigned char *sun3_dma_orig_addr = NULL;
-static unsigned long sun3_dma_orig_count = 0;
-static int sun3_dma_active = 0;
-static unsigned long last_residual = 0;
-static struct Scsi_Host *default_instance;
+static unsigned char *sun3_dma_orig_addr;
+static unsigned long sun3_dma_orig_count;
+static int sun3_dma_active;
+static unsigned long last_residual;
/*
* NCR 5380 register access functions
@@ -144,50 +138,12 @@ static inline void sun3_udc_write(unsigned short val, unsigned char reg)
}
#endif
-#ifdef RESET_BOOT
-static void sun3_scsi_reset_boot(struct Scsi_Host *instance)
-{
- unsigned long end;
-
- /*
- * Do a SCSI reset to clean up the bus during initialization. No
- * messing with the queues, interrupts, or locks necessary here.
- */
-
- printk( "Sun3 SCSI: resetting the SCSI bus..." );
-
- /* switch off SCSI IRQ - catch an interrupt without IRQ bit set else */
-// sun3_disable_irq( IRQ_SUN3_SCSI );
-
- /* get in phase */
- NCR5380_write( TARGET_COMMAND_REG,
- PHASE_SR_TO_TCR( NCR5380_read(STATUS_REG) ));
-
- /* assert RST */
- NCR5380_write( INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST );
-
- /* The min. reset hold time is 25us, so 40us should be enough */
- udelay( 50 );
-
- /* reset RST and interrupt */
- NCR5380_write( INITIATOR_COMMAND_REG, ICR_BASE );
- NCR5380_read( RESET_PARITY_INTERRUPT_REG );
-
- for( end = jiffies + AFTER_RESET_DELAY; time_before(jiffies, end); )
- barrier();
-
- /* switch on SCSI IRQ again */
-// sun3_enable_irq( IRQ_SUN3_SCSI );
-
- printk( " done\n" );
-}
-#endif
-
// safe bits for the CSR
#define CSR_GOOD 0x060f
-static irqreturn_t scsi_sun3_intr(int irq, void *dummy)
+static irqreturn_t scsi_sun3_intr(int irq, void *dev)
{
+ struct Scsi_Host *instance = dev;
unsigned short csr = dregs->csr;
int handled = 0;
@@ -196,46 +152,24 @@ static irqreturn_t scsi_sun3_intr(int irq, void *dummy)
#endif
if(csr & ~CSR_GOOD) {
- if(csr & CSR_DMA_BUSERR) {
- printk("scsi%d: bus error in dma\n", default_instance->host_no);
- }
-
- if(csr & CSR_DMA_CONFLICT) {
- printk("scsi%d: dma conflict\n", default_instance->host_no);
- }
+ if (csr & CSR_DMA_BUSERR)
+ shost_printk(KERN_ERR, instance, "bus error in DMA\n");
+ if (csr & CSR_DMA_CONFLICT)
+ shost_printk(KERN_ERR, instance, "DMA conflict\n");
handled = 1;
}
if(csr & (CSR_SDB_INT | CSR_DMA_INT)) {
- NCR5380_intr(irq, dummy);
+ NCR5380_intr(irq, dev);
handled = 1;
}
return IRQ_RETVAL(handled);
}
-/*
- * Debug stuff - to be called on NMI, or sysrq key. Use at your own risk;
- * reentering NCR5380_print_status seems to have ugly side effects
- */
-
-/* this doesn't seem to get used at all -- sam */
-#if 0
-void sun3_sun3_debug (void)
-{
- unsigned long flags;
-
- if (default_instance) {
- local_irq_save(flags);
- NCR5380_print_status(default_instance);
- local_irq_restore(flags);
- }
-}
-#endif
-
-
/* sun3scsi_dma_setup() -- initialize the dma controller for a read/write */
-static unsigned long sun3scsi_dma_setup(void *data, unsigned long count, int write_flag)
+static unsigned long sun3scsi_dma_setup(struct Scsi_Host *instance,
+ void *data, unsigned long count, int write_flag)
{
void *addr;
@@ -287,10 +221,9 @@ static unsigned long sun3scsi_dma_setup(void *data, unsigned long count, int wri
dregs->csr |= CSR_FIFO;
if(dregs->fifo_count != count) {
- printk("scsi%d: fifo_mismatch %04x not %04x\n",
- default_instance->host_no, dregs->fifo_count,
- (unsigned int) count);
- NCR5380_dprint(NDEBUG_DMA, default_instance);
+ shost_printk(KERN_ERR, instance, "FIFO mismatch %04x not %04x\n",
+ dregs->fifo_count, (unsigned int) count);
+ NCR5380_dprint(NDEBUG_DMA, instance);
}
/* setup udc */
@@ -325,21 +258,6 @@ static unsigned long sun3scsi_dma_setup(void *data, unsigned long count, int wri
}
-#ifndef SUN3_SCSI_VME
-static inline unsigned long sun3scsi_dma_count(struct Scsi_Host *instance)
-{
- unsigned short resid;
-
- dregs->udc_addr = 0x32;
- udelay(SUN3_DMA_DELAY);
- resid = dregs->udc_data;
- udelay(SUN3_DMA_DELAY);
- resid *= 2;
-
- return (unsigned long) resid;
-}
-#endif
-
static inline unsigned long sun3scsi_dma_residual(struct Scsi_Host *instance)
{
return last_residual;
@@ -437,7 +355,10 @@ static int sun3scsi_dma_finish(int write_flag)
}
}
- count = sun3scsi_dma_count(default_instance);
+ dregs->udc_addr = 0x32;
+ udelay(SUN3_DMA_DELAY);
+ count = 2 * dregs->udc_data;
+ udelay(SUN3_DMA_DELAY);
fifo = dregs->fifo_count;
last_residual = fifo;
@@ -502,17 +423,17 @@ static int sun3scsi_dma_finish(int write_flag)
static struct scsi_host_template sun3_scsi_template = {
.module = THIS_MODULE,
.proc_name = DRV_MODULE_NAME,
- .show_info = sun3scsi_show_info,
.name = SUN3_SCSI_NAME,
.info = sun3scsi_info,
.queuecommand = sun3scsi_queue_command,
- .eh_abort_handler = sun3scsi_abort,
- .eh_bus_reset_handler = sun3scsi_bus_reset,
+ .eh_abort_handler = sun3scsi_abort,
+ .eh_bus_reset_handler = sun3scsi_bus_reset,
.can_queue = 16,
.this_id = 7,
.sg_tablesize = SG_NONE,
.cmd_per_lun = 2,
- .use_clustering = DISABLE_CLUSTERING
+ .use_clustering = DISABLE_CLUSTERING,
+ .cmd_size = NCR5380_CMD_SIZE,
};
static int __init sun3_scsi_probe(struct platform_device *pdev)
@@ -591,7 +512,6 @@ static int __init sun3_scsi_probe(struct platform_device *pdev)
error = -ENOMEM;
goto fail_alloc;
}
- default_instance = instance;
instance->io_port = (unsigned long)ioaddr;
instance->irq = irq->start;
@@ -600,7 +520,9 @@ static int __init sun3_scsi_probe(struct platform_device *pdev)
host_flags |= setup_use_tagged_queuing > 0 ? FLAG_TAGGED_QUEUING : 0;
#endif
- NCR5380_init(instance, host_flags);
+ error = NCR5380_init(instance, host_flags);
+ if (error)
+ goto fail_init;
error = request_irq(instance->irq, scsi_sun3_intr, 0,
"NCR5380", instance);
@@ -631,9 +553,7 @@ static int __init sun3_scsi_probe(struct platform_device *pdev)
dregs->ivect = VME_DATA24 | (instance->irq & 0xff);
#endif
-#ifdef RESET_BOOT
- sun3_scsi_reset_boot(instance);
-#endif
+ NCR5380_maybe_reset_bus(instance);
error = scsi_add_host(instance, NULL);
if (error)
@@ -649,6 +569,7 @@ fail_host:
free_irq(instance->irq, instance);
fail_irq:
NCR5380_exit(instance);
+fail_init:
scsi_host_put(instance);
fail_alloc:
if (udc_regs)
diff --git a/drivers/scsi/t128.c b/drivers/scsi/t128.c
index 87828ac..4615fda 100644
--- a/drivers/scsi/t128.c
+++ b/drivers/scsi/t128.c
@@ -68,14 +68,11 @@
* 15 9-11
*/
-#include <linux/signal.h>
#include <linux/io.h>
#include <linux/blkdev.h>
#include <linux/interrupt.h>
-#include <linux/stat.h>
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/delay.h>
#include <scsi/scsi_host.h>
#include "t128.h"
@@ -126,7 +123,7 @@ static struct signature {
static int __init t128_setup(char *str)
{
- static int commandline_current = 0;
+ static int commandline_current;
int i;
int ints[10];
@@ -165,7 +162,7 @@ __setup("t128=", t128_setup);
static int __init t128_detect(struct scsi_host_template *tpnt)
{
- static int current_override = 0, current_base = 0;
+ static int current_override, current_base;
struct Scsi_Host *instance;
unsigned long base;
void __iomem *p;
@@ -182,9 +179,8 @@ static int __init t128_detect(struct scsi_host_template *tpnt)
base = 0;
} else
for (; !base && (current_base < NO_BASES); ++current_base) {
-#if (TDEBUG & TDEBUG_INIT)
- printk("scsi-t128 : probing address %08x\n", bases[current_base].address);
-#endif
+ dprintk(NDEBUG_INIT, "t128: probing address 0x%08x\n",
+ bases[current_base].address);
if (bases[current_base].noauto)
continue;
p = ioremap(bases[current_base].address, 0x2000);
@@ -195,17 +191,13 @@ static int __init t128_detect(struct scsi_host_template *tpnt)
signatures[sig].string,
strlen(signatures[sig].string))) {
base = bases[current_base].address;
-#if (TDEBUG & TDEBUG_INIT)
- printk("scsi-t128 : detected board.\n");
-#endif
+ dprintk(NDEBUG_INIT, "t128: detected board\n");
goto found;
}
iounmap(p);
}
-#if defined(TDEBUG) && (TDEBUG & TDEBUG_INIT)
- printk("scsi-t128 : base = %08x\n", (unsigned int) base);
-#endif
+ dprintk(NDEBUG_INIT, "t128: base = 0x%08x\n", (unsigned int)base);
if (!base)
break;
@@ -213,12 +205,15 @@ static int __init t128_detect(struct scsi_host_template *tpnt)
found:
instance = scsi_register (tpnt, sizeof(struct NCR5380_hostdata));
if(instance == NULL)
- break;
-
+ goto out_unmap;
+
instance->base = base;
((struct NCR5380_hostdata *)instance->hostdata)->base = p;
- NCR5380_init(instance, 0);
+ if (NCR5380_init(instance, 0))
+ goto out_unregister;
+
+ NCR5380_maybe_reset_bus(instance);
if (overrides[current_override].irq != IRQ_AUTO)
instance->irq = overrides[current_override].irq;
@@ -242,27 +237,30 @@ found:
printk("scsi%d : please jumper the board for a free IRQ.\n", instance->host_no);
}
-#if defined(TDEBUG) && (TDEBUG & TDEBUG_INIT)
- printk("scsi%d : irq = %d\n", instance->host_no, instance->irq);
-#endif
+ dprintk(NDEBUG_INIT, "scsi%d: irq = %d\n",
+ instance->host_no, instance->irq);
++current_override;
++count;
}
return count;
+
+out_unregister:
+ scsi_unregister(instance);
+out_unmap:
+ iounmap(p);
+ return count;
}
static int t128_release(struct Scsi_Host *shost)
{
- NCR5380_local_declare();
- NCR5380_setup(shost);
+ struct NCR5380_hostdata *hostdata = shost_priv(shost);
+
if (shost->irq != NO_IRQ)
free_irq(shost->irq, shost);
NCR5380_exit(shost);
- if (shost->io_port && shost->n_io_port)
- release_region(shost->io_port, shost->n_io_port);
scsi_unregister(shost);
- iounmap(base);
+ iounmap(hostdata->base);
return 0;
}
@@ -308,14 +306,14 @@ static int t128_biosparam(struct scsi_device *sdev, struct block_device *bdev,
* timeout.
*/
-static inline int NCR5380_pread (struct Scsi_Host *instance, unsigned char *dst,
- int len) {
- NCR5380_local_declare();
- void __iomem *reg;
+static inline int
+NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst, int len)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ void __iomem *reg, *base = hostdata->base;
unsigned char *d = dst;
register int i = len;
- NCR5380_setup(instance);
reg = base + T_DATA_REG_OFFSET;
#if 0
@@ -354,14 +352,14 @@ static inline int NCR5380_pread (struct Scsi_Host *instance, unsigned char *dst,
* timeout.
*/
-static inline int NCR5380_pwrite (struct Scsi_Host *instance, unsigned char *src,
- int len) {
- NCR5380_local_declare();
- void __iomem *reg;
+static inline int
+NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src, int len)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ void __iomem *reg, *base = hostdata->base;
unsigned char *s = src;
register int i = len;
- NCR5380_setup(instance);
reg = base + T_DATA_REG_OFFSET;
#if 0
@@ -392,21 +390,23 @@ MODULE_LICENSE("GPL");
#include "NCR5380.c"
static struct scsi_host_template driver_template = {
- .name = "Trantor T128/T128F/T228",
- .detect = t128_detect,
- .release = t128_release,
- .proc_name = "t128",
- .show_info = t128_show_info,
- .write_info = t128_write_info,
- .info = t128_info,
- .queuecommand = t128_queue_command,
- .eh_abort_handler = t128_abort,
- .eh_bus_reset_handler = t128_bus_reset,
- .bios_param = t128_biosparam,
- .can_queue = CAN_QUEUE,
- .this_id = 7,
- .sg_tablesize = SG_ALL,
- .cmd_per_lun = CMD_PER_LUN,
- .use_clustering = DISABLE_CLUSTERING,
+ .name = "Trantor T128/T128F/T228",
+ .detect = t128_detect,
+ .release = t128_release,
+ .proc_name = "t128",
+ .show_info = t128_show_info,
+ .write_info = t128_write_info,
+ .info = t128_info,
+ .queuecommand = t128_queue_command,
+ .eh_abort_handler = t128_abort,
+ .eh_bus_reset_handler = t128_bus_reset,
+ .bios_param = t128_biosparam,
+ .can_queue = 32,
+ .this_id = 7,
+ .sg_tablesize = SG_ALL,
+ .cmd_per_lun = 2,
+ .use_clustering = DISABLE_CLUSTERING,
+ .cmd_size = NCR5380_CMD_SIZE,
+ .max_sectors = 128,
};
#include "scsi_module.c"
diff --git a/drivers/scsi/t128.h b/drivers/scsi/t128.h
index 2c73714..dd16d85 100644
--- a/drivers/scsi/t128.h
+++ b/drivers/scsi/t128.h
@@ -23,10 +23,6 @@
#ifndef T128_H
#define T128_H
-#define TDEBUG 0
-#define TDEBUG_INIT 0x1
-#define TDEBUG_TRANSFER 0x2
-
/*
* The trantor boards are memory mapped. They use an NCR5380 or
* equivalent (my sample board had part second sourced from ZILOG).
@@ -71,44 +67,18 @@
#define T_DATA_REG_OFFSET 0x1e00 /* rw 512 bytes long */
-#ifndef ASM
-
-#ifndef CMD_PER_LUN
-#define CMD_PER_LUN 2
-#endif
-
-#ifndef CAN_QUEUE
-#define CAN_QUEUE 32
-#endif
-
#define NCR5380_implementation_fields \
void __iomem *base
-#define NCR5380_local_declare() \
- void __iomem *base
-
-#define NCR5380_setup(instance) \
- base = ((struct NCR5380_hostdata *)(instance->hostdata))->base
+#define T128_address(reg) \
+ (((struct NCR5380_hostdata *)shost_priv(instance))->base + T_5380_OFFSET + ((reg) * 0x20))
-#define T128_address(reg) (base + T_5380_OFFSET + ((reg) * 0x20))
-
-#if !(TDEBUG & TDEBUG_TRANSFER)
#define NCR5380_read(reg) readb(T128_address(reg))
#define NCR5380_write(reg, value) writeb((value),(T128_address(reg)))
-#else
-#define NCR5380_read(reg) \
- (((unsigned char) printk("scsi%d : read register %d at address %08x\n"\
- , instance->hostno, (reg), T128_address(reg))), readb(T128_address(reg)))
-
-#define NCR5380_write(reg, value) { \
- printk("scsi%d : write %02x to register %d at address %08x\n", \
- instance->hostno, (value), (reg), T128_address(reg)); \
- writeb((value), (T128_address(reg))); \
-}
-#endif
+
+#define NCR5380_dma_xfer_len(instance, cmd, phase) (cmd->transfersize)
#define NCR5380_intr t128_intr
-#define do_NCR5380_intr do_t128_intr
#define NCR5380_queue_command t128_queue_command
#define NCR5380_abort t128_abort
#define NCR5380_bus_reset t128_bus_reset
@@ -121,5 +91,4 @@
#define T128_IRQS 0xc4a8
-#endif /* ndef ASM */
#endif /* T128_H */
diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index fb2b393..8826020 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/mediatek/Kconfig"
source "drivers/soc/qcom/Kconfig"
source "drivers/soc/rockchip/Kconfig"
source "drivers/soc/sunxi/Kconfig"
+source "drivers/soc/tegra/Kconfig"
source "drivers/soc/ti/Kconfig"
source "drivers/soc/versatile/Kconfig"
diff --git a/drivers/soc/qcom/spm.c b/drivers/soc/qcom/spm.c
index 0ad66fa..5548a31 100644
--- a/drivers/soc/qcom/spm.c
+++ b/drivers/soc/qcom/spm.c
@@ -288,7 +288,7 @@ static struct spm_driver_data *spm_get_drv(struct platform_device *pdev,
struct spm_driver_data *drv = NULL;
struct device_node *cpu_node, *saw_node;
int cpu;
- bool found;
+ bool found = 0;
for_each_possible_cpu(cpu) {
cpu_node = of_cpu_device_node_get(cpu);
diff --git a/drivers/soc/tegra/Kconfig b/drivers/soc/tegra/Kconfig
new file mode 100644
index 0000000..d0c3c3e
--- /dev/null
+++ b/drivers/soc/tegra/Kconfig
@@ -0,0 +1,83 @@
+if ARCH_TEGRA
+
+# 32-bit ARM SoCs
+if ARM
+
+config ARCH_TEGRA_2x_SOC
+ bool "Enable support for Tegra20 family"
+ select ARCH_NEEDS_CPU_IDLE_COUPLED if SMP
+ select ARM_ERRATA_720789
+ select ARM_ERRATA_754327 if SMP
+ select ARM_ERRATA_764369 if SMP
+ select PINCTRL_TEGRA20
+ select PL310_ERRATA_727915 if CACHE_L2X0
+ select PL310_ERRATA_769419 if CACHE_L2X0
+ select TEGRA_TIMER
+ help
+ Support for NVIDIA Tegra AP20 and T20 processors, based on the
+ ARM CortexA9MP CPU and the ARM PL310 L2 cache controller
+
+config ARCH_TEGRA_3x_SOC
+ bool "Enable support for Tegra30 family"
+ select ARM_ERRATA_754322
+ select ARM_ERRATA_764369 if SMP
+ select PINCTRL_TEGRA30
+ select PL310_ERRATA_769419 if CACHE_L2X0
+ select TEGRA_TIMER
+ help
+ Support for NVIDIA Tegra T30 processor family, based on the
+ ARM CortexA9MP CPU and the ARM PL310 L2 cache controller
+
+config ARCH_TEGRA_114_SOC
+ bool "Enable support for Tegra114 family"
+ select ARM_ERRATA_798181 if SMP
+ select ARM_L1_CACHE_SHIFT_6
+ select HAVE_ARM_ARCH_TIMER
+ select PINCTRL_TEGRA114
+ select TEGRA_TIMER
+ help
+ Support for NVIDIA Tegra T114 processor family, based on the
+ ARM CortexA15MP CPU
+
+config ARCH_TEGRA_124_SOC
+ bool "Enable support for Tegra124 family"
+ select ARM_L1_CACHE_SHIFT_6
+ select HAVE_ARM_ARCH_TIMER
+ select PINCTRL_TEGRA124
+ select TEGRA_TIMER
+ help
+ Support for NVIDIA Tegra T124 processor family, based on the
+ ARM CortexA15MP CPU
+
+endif
+
+# 64-bit ARM SoCs
+if ARM64
+
+config ARCH_TEGRA_132_SOC
+ bool "NVIDIA Tegra132 SoC"
+ select PINCTRL_TEGRA124
+ help
+ Enable support for NVIDIA Tegra132 SoC, based on the Denver
+ ARMv8 CPU. The Tegra132 SoC is similar to the Tegra124 SoC,
+ but contains an NVIDIA Denver CPU complex in place of
+ Tegra124's "4+1" Cortex-A15 CPU complex.
+
+config ARCH_TEGRA_210_SOC
+ bool "NVIDIA Tegra210 SoC"
+ select PINCTRL_TEGRA210
+ help
+ Enable support for the NVIDIA Tegra210 SoC. Also known as Tegra X1,
+ the Tegra210 has four Cortex-A57 cores paired with four Cortex-A53
+ cores in a switched configuration. It features a GPU of the Maxwell
+ architecture with support for DX11, SM4, OpenGL 4.5, OpenGL ES 3.1
+ and providing 256 CUDA cores. It supports hardware-accelerated en-
+ and decoding of various video standards including H.265, H.264 and
+ VP8 at 4K resolution and up to 60 fps.
+
+ Besides the multimedia features it also comes with a variety of I/O
+ controllers, such as GPIO, I2C, SPI, SDHCI, PCIe, SATA and XHCI, to
+ name only a few.
+
+endif
+endif
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
index d6273e1..a80d993 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
@@ -151,16 +151,12 @@ do { \
#define LIBCFS_FREE(ptr, size) \
do { \
- int s = (size); \
if (unlikely((ptr) == NULL)) { \
CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at " \
- "%s:%d\n", s, __FILE__, __LINE__); \
+ "%s:%d\n", (int)(size), __FILE__, __LINE__); \
break; \
} \
- if (unlikely(s > LIBCFS_VMALLOC_SIZE)) \
- vfree(ptr); \
- else \
- kfree(ptr); \
+ kvfree(ptr); \
} while (0)
/******************************************************************************/
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
index 72af486..cb74ae7 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
@@ -2070,32 +2070,13 @@ static int kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts)
static int kiblnd_hdev_get_attr(kib_hca_dev_t *hdev)
{
- struct ib_device_attr *attr;
- int rc;
-
/* It's safe to assume a HCA can handle a page size
* matching that of the native system */
hdev->ibh_page_shift = PAGE_SHIFT;
hdev->ibh_page_size = 1 << PAGE_SHIFT;
hdev->ibh_page_mask = ~((__u64)hdev->ibh_page_size - 1);
- LIBCFS_ALLOC(attr, sizeof(*attr));
- if (attr == NULL) {
- CERROR("Out of memory\n");
- return -ENOMEM;
- }
-
- rc = ib_query_device(hdev->ibh_ibdev, attr);
- if (rc == 0)
- hdev->ibh_mr_size = attr->max_mr_size;
-
- LIBCFS_FREE(attr, sizeof(*attr));
-
- if (rc != 0) {
- CERROR("Failed to query IB device: %d\n", rc);
- return rc;
- }
-
+ hdev->ibh_mr_size = hdev->ibh_ibdev->attrs.max_mr_size;
if (hdev->ibh_mr_size == ~0ULL) {
hdev->ibh_mr_shift = 64;
return 0;
diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c
index 7b35531..8982f7d 100644
--- a/drivers/staging/lustre/lustre/llite/dir.c
+++ b/drivers/staging/lustre/lustre/llite/dir.c
@@ -1858,7 +1858,7 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
int api32 = ll_need_32bit_api(sbi);
loff_t ret = -EINVAL;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
switch (origin) {
case SEEK_SET:
break;
@@ -1896,7 +1896,7 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
goto out;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
index c92d58b..39e2ffd 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -2082,17 +2082,17 @@ putgl:
/* update time if requested */
rc = 0;
if (llss->ia2.ia_valid != 0) {
- mutex_lock(&llss->inode1->i_mutex);
+ inode_lock(llss->inode1);
rc = ll_setattr(file1->f_path.dentry, &llss->ia2);
- mutex_unlock(&llss->inode1->i_mutex);
+ inode_unlock(llss->inode1);
}
if (llss->ia1.ia_valid != 0) {
int rc1;
- mutex_lock(&llss->inode2->i_mutex);
+ inode_lock(llss->inode2);
rc1 = ll_setattr(file2->f_path.dentry, &llss->ia1);
- mutex_unlock(&llss->inode2->i_mutex);
+ inode_unlock(llss->inode2);
if (rc == 0)
rc = rc1;
}
@@ -2179,13 +2179,13 @@ static int ll_hsm_import(struct inode *inode, struct file *file,
ATTR_MTIME | ATTR_MTIME_SET |
ATTR_ATIME | ATTR_ATIME_SET;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
rc = ll_setattr_raw(file->f_path.dentry, attr, true);
if (rc == -ENODATA)
rc = 0;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
kfree(attr);
free_hss:
@@ -2609,7 +2609,7 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* catch async errors that were recorded back when async writeback
* failed for pages in this mapping. */
@@ -2641,7 +2641,7 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
fd->fd_write_failed = false;
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return rc;
}
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index ee8a1d6..845e992 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -631,8 +631,6 @@ struct ll_file_data {
struct lov_stripe_md;
-extern spinlock_t inode_lock;
-
extern struct dentry *llite_root;
extern struct kset *llite_kset;
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
index 1db93af..b2fc5b3 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -1277,7 +1277,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
return -ENOMEM;
if (!S_ISDIR(inode->i_mode))
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
memcpy(&op_data->op_attr, attr, sizeof(*attr));
@@ -1358,7 +1358,7 @@ out:
ll_finish_md_op_data(op_data);
if (!S_ISDIR(inode->i_mode)) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if ((attr->ia_valid & ATTR_SIZE) && !hsm_import)
inode_dio_wait(inode);
}
diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c
index e578a11..18aab25 100644
--- a/drivers/staging/lustre/lustre/llite/llite_nfs.c
+++ b/drivers/staging/lustre/lustre/llite/llite_nfs.c
@@ -245,9 +245,9 @@ static int ll_get_name(struct dentry *dentry, char *name,
goto out;
}
- mutex_lock(&dir->i_mutex);
+ inode_lock(dir);
rc = ll_dir_read(dir, &lgd.ctx);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
if (!rc && !lgd.lgd_found)
rc = -ENOENT;
out:
diff --git a/drivers/staging/lustre/lustre/llite/lloop.c b/drivers/staging/lustre/lustre/llite/lloop.c
index 420d391..871924b 100644
--- a/drivers/staging/lustre/lustre/llite/lloop.c
+++ b/drivers/staging/lustre/lustre/llite/lloop.c
@@ -257,9 +257,9 @@ static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head)
* be asked to write less pages once, this purely depends on
* implementation. Anyway, we should be careful to avoid deadlocking.
*/
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
bytes = ll_direct_rw_pages(env, io, rw, inode, pvec);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
cl_io_fini(env, io);
return (bytes == pvec->ldp_size) ? 0 : (int)bytes;
}
diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
index 95cdb0c..f355474 100644
--- a/drivers/staging/lustre/lustre/llite/rw.c
+++ b/drivers/staging/lustre/lustre/llite/rw.c
@@ -115,8 +115,8 @@ static struct ll_cl_context *ll_cl_init(struct file *file,
struct inode *inode = vmpage->mapping->host;
loff_t pos;
- if (mutex_trylock(&inode->i_mutex)) {
- mutex_unlock(&(inode)->i_mutex);
+ if (inode_trylock(inode)) {
+ inode_unlock((inode));
/* this is too bad. Someone is trying to write the
* page w/o holding inode mutex. This means we can
diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
index 39fa13b..711fda9 100644
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ b/drivers/staging/lustre/lustre/llite/rw26.c
@@ -403,7 +403,7 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
* 1. Need inode mutex to operate transient pages.
*/
if (iov_iter_rw(iter) == READ)
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
LASSERT(obj->cob_transient_pages == 0);
while (iov_iter_count(iter)) {
@@ -454,7 +454,7 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
out:
LASSERT(obj->cob_transient_pages == 0);
if (iov_iter_rw(iter) == READ)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (tot_bytes > 0) {
if (iov_iter_rw(iter) == WRITE) {
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
index f68e972..0920ac6 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c
@@ -439,7 +439,7 @@ static int vvp_io_setattr_start(const struct lu_env *env,
struct inode *inode = ccc_object_inode(io->ci_obj);
int result = 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (cl_io_is_trunc(io))
result = vvp_io_setattr_trunc(env, ios, inode,
io->u.ci_setattr.sa_attr.lvb_size);
@@ -459,7 +459,7 @@ static void vvp_io_setattr_end(const struct lu_env *env,
* because osc has already notified to destroy osc_extents. */
vvp_do_vmtruncate(inode, io->u.ci_setattr.sa_attr.lvb_size);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
static void vvp_io_setattr_fini(const struct lu_env *env,
diff --git a/drivers/staging/lustre/lustre/llite/vvp_page.c b/drivers/staging/lustre/lustre/llite/vvp_page.c
index 99c0d7a..a133475 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_page.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_page.c
@@ -428,7 +428,7 @@ static void vvp_transient_page_verify(const struct cl_page *page)
{
struct inode *inode = ccc_object_inode(page->cp_obj);
- LASSERT(!mutex_trylock(&inode->i_mutex));
+ LASSERT(!inode_trylock(inode));
}
static int vvp_transient_page_own(const struct lu_env *env,
@@ -480,9 +480,9 @@ static int vvp_transient_page_is_vmlocked(const struct lu_env *env,
struct inode *inode = ccc_object_inode(slice->cpl_obj);
int locked;
- locked = !mutex_trylock(&inode->i_mutex);
+ locked = !inode_trylock(inode);
if (!locked)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return locked ? -EBUSY : -ENODATA;
}
@@ -502,7 +502,7 @@ static void vvp_transient_page_fini(const struct lu_env *env,
struct ccc_object *clobj = cl2ccc(clp->cp_obj);
vvp_page_fini_common(cp);
- LASSERT(!mutex_trylock(&clobj->cob_inode->i_mutex));
+ LASSERT(!inode_trylock(clobj->cob_inode));
clobj->cob_transient_pages--;
}
@@ -548,7 +548,7 @@ int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
} else {
struct ccc_object *clobj = cl2ccc(obj);
- LASSERT(!mutex_trylock(&clobj->cob_inode->i_mutex));
+ LASSERT(!inode_trylock(clobj->cob_inode));
cl_page_slice_add(page, &cpg->cpg_cl, obj,
&vvp_transient_page_ops);
clobj->cob_transient_pages++;
diff --git a/drivers/staging/rdma/amso1100/c2_cq.c b/drivers/staging/rdma/amso1100/c2_cq.c
index 3ef881f..7ad0c08 100644
--- a/drivers/staging/rdma/amso1100/c2_cq.c
+++ b/drivers/staging/rdma/amso1100/c2_cq.c
@@ -173,9 +173,6 @@ static inline int c2_poll_one(struct c2_dev *c2dev,
case C2_WR_TYPE_RDMA_READ:
entry->opcode = IB_WC_RDMA_READ;
break;
- case C2_WR_TYPE_BIND_MW:
- entry->opcode = IB_WC_BIND_MW;
- break;
case C2_WR_TYPE_RECV:
entry->byte_len = be32_to_cpu(ce->bytes_rcvd);
entry->opcode = IB_WC_RECV;
diff --git a/drivers/staging/rdma/amso1100/c2_provider.c b/drivers/staging/rdma/amso1100/c2_provider.c
index a092ac7..de8d10e 100644
--- a/drivers/staging/rdma/amso1100/c2_provider.c
+++ b/drivers/staging/rdma/amso1100/c2_provider.c
@@ -337,43 +337,21 @@ static inline u32 c2_convert_access(int acc)
C2_ACF_LOCAL_READ | C2_ACF_WINDOW_BIND;
}
-static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 * iova_start)
+static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
{
struct c2_mr *mr;
u64 *page_list;
- u32 total_len;
- int err, i, j, k, page_shift, pbl_depth;
+ const u32 total_len = 0xffffffff; /* AMSO1100 limit */
+ int err, page_shift, pbl_depth, i;
+ u64 kva = 0;
- pbl_depth = 0;
- total_len = 0;
+ pr_debug("%s:%u\n", __func__, __LINE__);
- page_shift = PAGE_SHIFT;
/*
- * If there is only 1 buffer we assume this could
- * be a map of all phy mem...use a 32k page_shift.
+ * This is a map of all phy mem...use a 32k page_shift.
*/
- if (num_phys_buf == 1)
- page_shift += 3;
-
- for (i = 0; i < num_phys_buf; i++) {
-
- if (offset_in_page(buffer_list[i].addr)) {
- pr_debug("Unaligned Memory Buffer: 0x%x\n",
- (unsigned int) buffer_list[i].addr);
- return ERR_PTR(-EINVAL);
- }
-
- if (!buffer_list[i].size) {
- pr_debug("Invalid Buffer Size\n");
- return ERR_PTR(-EINVAL);
- }
-
- total_len += buffer_list[i].size;
- pbl_depth += ALIGN(buffer_list[i].size,
- BIT(page_shift)) >> page_shift;
- }
+ page_shift = PAGE_SHIFT + 3;
+ pbl_depth = ALIGN(total_len, BIT(page_shift)) >> page_shift;
page_list = vmalloc(sizeof(u64) * pbl_depth);
if (!page_list) {
@@ -382,16 +360,8 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
return ERR_PTR(-ENOMEM);
}
- for (i = 0, j = 0; i < num_phys_buf; i++) {
-
- int naddrs;
-
- naddrs = ALIGN(buffer_list[i].size,
- BIT(page_shift)) >> page_shift;
- for (k = 0; k < naddrs; k++)
- page_list[j++] = (buffer_list[i].addr +
- (k << page_shift));
- }
+ for (i = 0; i < pbl_depth; i++)
+ page_list[i] = (i << page_shift);
mr = kmalloc(sizeof(*mr), GFP_KERNEL);
if (!mr) {
@@ -399,17 +369,17 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
return ERR_PTR(-ENOMEM);
}
- mr->pd = to_c2pd(ib_pd);
+ mr->pd = to_c2pd(pd);
mr->umem = NULL;
pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
"*iova_start %llx, first pa %llx, last pa %llx\n",
__func__, page_shift, pbl_depth, total_len,
- (unsigned long long) *iova_start,
+ (unsigned long long) kva,
(unsigned long long) page_list[0],
(unsigned long long) page_list[pbl_depth-1]);
- err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list,
+ err = c2_nsmr_register_phys_kern(to_c2dev(pd->device), page_list,
BIT(page_shift), pbl_depth,
- total_len, 0, iova_start,
+ total_len, 0, &kva,
c2_convert_access(acc), mr);
vfree(page_list);
if (err) {
@@ -420,19 +390,6 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
return &mr->ibmr;
}
-static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
-{
- struct ib_phys_buf bl;
- u64 kva = 0;
-
- pr_debug("%s:%u\n", __func__, __LINE__);
-
- /* AMSO1100 limit */
- bl.size = 0xffffffff;
- bl.addr = 0;
- return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
-}
-
static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
{
@@ -840,7 +797,6 @@ int c2_register_device(struct c2_dev *dev)
dev->ibdev.destroy_cq = c2_destroy_cq;
dev->ibdev.poll_cq = c2_poll_cq;
dev->ibdev.get_dma_mr = c2_get_dma_mr;
- dev->ibdev.reg_phys_mr = c2_reg_phys_mr;
dev->ibdev.reg_user_mr = c2_reg_user_mr;
dev->ibdev.dereg_mr = c2_dereg_mr;
dev->ibdev.get_port_immutable = c2_port_immutable;
diff --git a/drivers/staging/rdma/ehca/ehca_classes.h b/drivers/staging/rdma/ehca/ehca_classes.h
index bd45e0f..e8c3387 100644
--- a/drivers/staging/rdma/ehca/ehca_classes.h
+++ b/drivers/staging/rdma/ehca/ehca_classes.h
@@ -316,9 +316,8 @@ struct ehca_mr_pginfo {
union {
struct { /* type EHCA_MR_PGI_PHYS section */
- int num_phys_buf;
- struct ib_phys_buf *phys_buf_array;
- u64 next_buf;
+ u64 addr;
+ u16 size;
} phy;
struct { /* type EHCA_MR_PGI_USER section */
struct ib_umem *region;
diff --git a/drivers/staging/rdma/ehca/ehca_iverbs.h b/drivers/staging/rdma/ehca/ehca_iverbs.h
index 80e6a3d..cca5933 100644
--- a/drivers/staging/rdma/ehca/ehca_iverbs.h
+++ b/drivers/staging/rdma/ehca/ehca_iverbs.h
@@ -80,30 +80,14 @@ int ehca_destroy_ah(struct ib_ah *ah);
struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
-struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *phys_buf_array,
- int num_phys_buf,
- int mr_access_flags, u64 *iova_start);
-
struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int mr_access_flags,
struct ib_udata *udata);
-int ehca_rereg_phys_mr(struct ib_mr *mr,
- int mr_rereg_mask,
- struct ib_pd *pd,
- struct ib_phys_buf *phys_buf_array,
- int num_phys_buf, int mr_access_flags, u64 *iova_start);
-
-int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
-
int ehca_dereg_mr(struct ib_mr *mr);
struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
-int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
- struct ib_mw_bind *mw_bind);
-
int ehca_dealloc_mw(struct ib_mw *mw);
struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
diff --git a/drivers/staging/rdma/ehca/ehca_main.c b/drivers/staging/rdma/ehca/ehca_main.c
index 860b974..832f22f 100644
--- a/drivers/staging/rdma/ehca/ehca_main.c
+++ b/drivers/staging/rdma/ehca/ehca_main.c
@@ -511,13 +511,9 @@ static int ehca_init_device(struct ehca_shca *shca)
shca->ib_device.req_notify_cq = ehca_req_notify_cq;
/* shca->ib_device.req_ncomp_notif = ehca_req_ncomp_notif; */
shca->ib_device.get_dma_mr = ehca_get_dma_mr;
- shca->ib_device.reg_phys_mr = ehca_reg_phys_mr;
shca->ib_device.reg_user_mr = ehca_reg_user_mr;
- shca->ib_device.query_mr = ehca_query_mr;
shca->ib_device.dereg_mr = ehca_dereg_mr;
- shca->ib_device.rereg_phys_mr = ehca_rereg_phys_mr;
shca->ib_device.alloc_mw = ehca_alloc_mw;
- shca->ib_device.bind_mw = ehca_bind_mw;
shca->ib_device.dealloc_mw = ehca_dealloc_mw;
shca->ib_device.alloc_fmr = ehca_alloc_fmr;
shca->ib_device.map_phys_fmr = ehca_map_phys_fmr;
diff --git a/drivers/staging/rdma/ehca/ehca_mrmw.c b/drivers/staging/rdma/ehca/ehca_mrmw.c
index 553e883..3367205 100644
--- a/drivers/staging/rdma/ehca/ehca_mrmw.c
+++ b/drivers/staging/rdma/ehca/ehca_mrmw.c
@@ -196,120 +196,6 @@ get_dma_mr_exit0:
/*----------------------------------------------------------------------*/
-struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *phys_buf_array,
- int num_phys_buf,
- int mr_access_flags,
- u64 *iova_start)
-{
- struct ib_mr *ib_mr;
- int ret;
- struct ehca_mr *e_mr;
- struct ehca_shca *shca =
- container_of(pd->device, struct ehca_shca, ib_device);
- struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-
- u64 size;
-
- if ((num_phys_buf <= 0) || !phys_buf_array) {
- ehca_err(pd->device, "bad input values: num_phys_buf=%x "
- "phys_buf_array=%p", num_phys_buf, phys_buf_array);
- ib_mr = ERR_PTR(-EINVAL);
- goto reg_phys_mr_exit0;
- }
- if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
- !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
- ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
- !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
- /*
- * Remote Write Access requires Local Write Access
- * Remote Atomic Access requires Local Write Access
- */
- ehca_err(pd->device, "bad input values: mr_access_flags=%x",
- mr_access_flags);
- ib_mr = ERR_PTR(-EINVAL);
- goto reg_phys_mr_exit0;
- }
-
- /* check physical buffer list and calculate size */
- ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf,
- iova_start, &size);
- if (ret) {
- ib_mr = ERR_PTR(ret);
- goto reg_phys_mr_exit0;
- }
- if ((size == 0) ||
- (((u64)iova_start + size) < (u64)iova_start)) {
- ehca_err(pd->device, "bad input values: size=%llx iova_start=%p",
- size, iova_start);
- ib_mr = ERR_PTR(-EINVAL);
- goto reg_phys_mr_exit0;
- }
-
- e_mr = ehca_mr_new();
- if (!e_mr) {
- ehca_err(pd->device, "out of memory");
- ib_mr = ERR_PTR(-ENOMEM);
- goto reg_phys_mr_exit0;
- }
-
- /* register MR on HCA */
- if (ehca_mr_is_maxmr(size, iova_start)) {
- e_mr->flags |= EHCA_MR_FLAG_MAXMR;
- ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags,
- e_pd, &e_mr->ib.ib_mr.lkey,
- &e_mr->ib.ib_mr.rkey);
- if (ret) {
- ib_mr = ERR_PTR(ret);
- goto reg_phys_mr_exit1;
- }
- } else {
- struct ehca_mr_pginfo pginfo;
- u32 num_kpages;
- u32 num_hwpages;
- u64 hw_pgsize;
-
- num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size,
- PAGE_SIZE);
- /* for kernel space we try most possible pgsize */
- hw_pgsize = ehca_get_max_hwpage_size(shca);
- num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size,
- hw_pgsize);
- memset(&pginfo, 0, sizeof(pginfo));
- pginfo.type = EHCA_MR_PGI_PHYS;
- pginfo.num_kpages = num_kpages;
- pginfo.hwpage_size = hw_pgsize;
- pginfo.num_hwpages = num_hwpages;
- pginfo.u.phy.num_phys_buf = num_phys_buf;
- pginfo.u.phy.phys_buf_array = phys_buf_array;
- pginfo.next_hwpage =
- ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
-
- ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
- e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
- &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
- if (ret) {
- ib_mr = ERR_PTR(ret);
- goto reg_phys_mr_exit1;
- }
- }
-
- /* successful registration of all pages */
- return &e_mr->ib.ib_mr;
-
-reg_phys_mr_exit1:
- ehca_mr_delete(e_mr);
-reg_phys_mr_exit0:
- if (IS_ERR(ib_mr))
- ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p "
- "num_phys_buf=%x mr_access_flags=%x iova_start=%p",
- PTR_ERR(ib_mr), pd, phys_buf_array,
- num_phys_buf, mr_access_flags, iova_start);
- return ib_mr;
-} /* end ehca_reg_phys_mr() */
-
-/*----------------------------------------------------------------------*/
-
struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int mr_access_flags,
struct ib_udata *udata)
@@ -437,207 +323,6 @@ reg_user_mr_exit0:
/*----------------------------------------------------------------------*/
-int ehca_rereg_phys_mr(struct ib_mr *mr,
- int mr_rereg_mask,
- struct ib_pd *pd,
- struct ib_phys_buf *phys_buf_array,
- int num_phys_buf,
- int mr_access_flags,
- u64 *iova_start)
-{
- int ret;
-
- struct ehca_shca *shca =
- container_of(mr->device, struct ehca_shca, ib_device);
- struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
- u64 new_size;
- u64 *new_start;
- u32 new_acl;
- struct ehca_pd *new_pd;
- u32 tmp_lkey, tmp_rkey;
- unsigned long sl_flags;
- u32 num_kpages = 0;
- u32 num_hwpages = 0;
- struct ehca_mr_pginfo pginfo;
-
- if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) {
- /* TODO not supported, because PHYP rereg hCall needs pages */
- ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not "
- "supported yet, mr_rereg_mask=%x", mr_rereg_mask);
- ret = -EINVAL;
- goto rereg_phys_mr_exit0;
- }
-
- if (mr_rereg_mask & IB_MR_REREG_PD) {
- if (!pd) {
- ehca_err(mr->device, "rereg with bad pd, pd=%p "
- "mr_rereg_mask=%x", pd, mr_rereg_mask);
- ret = -EINVAL;
- goto rereg_phys_mr_exit0;
- }
- }
-
- if ((mr_rereg_mask &
- ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) ||
- (mr_rereg_mask == 0)) {
- ret = -EINVAL;
- goto rereg_phys_mr_exit0;
- }
-
- /* check other parameters */
- if (e_mr == shca->maxmr) {
- /* should be impossible, however reject to be sure */
- ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p "
- "shca->maxmr=%p mr->lkey=%x",
- mr, shca->maxmr, mr->lkey);
- ret = -EINVAL;
- goto rereg_phys_mr_exit0;
- }
- if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */
- if (e_mr->flags & EHCA_MR_FLAG_FMR) {
- ehca_err(mr->device, "not supported for FMR, mr=%p "
- "flags=%x", mr, e_mr->flags);
- ret = -EINVAL;
- goto rereg_phys_mr_exit0;
- }
- if (!phys_buf_array || num_phys_buf <= 0) {
- ehca_err(mr->device, "bad input values mr_rereg_mask=%x"
- " phys_buf_array=%p num_phys_buf=%x",
- mr_rereg_mask, phys_buf_array, num_phys_buf);
- ret = -EINVAL;
- goto rereg_phys_mr_exit0;
- }
- }
- if ((mr_rereg_mask & IB_MR_REREG_ACCESS) && /* change ACL */
- (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
- !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
- ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
- !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) {
- /*
- * Remote Write Access requires Local Write Access
- * Remote Atomic Access requires Local Write Access
- */
- ehca_err(mr->device, "bad input values: mr_rereg_mask=%x "
- "mr_access_flags=%x", mr_rereg_mask, mr_access_flags);
- ret = -EINVAL;
- goto rereg_phys_mr_exit0;
- }
-
- /* set requested values dependent on rereg request */
- spin_lock_irqsave(&e_mr->mrlock, sl_flags);
- new_start = e_mr->start;
- new_size = e_mr->size;
- new_acl = e_mr->acl;
- new_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
-
- if (mr_rereg_mask & IB_MR_REREG_TRANS) {
- u64 hw_pgsize = ehca_get_max_hwpage_size(shca);
-
- new_start = iova_start; /* change address */
- /* check physical buffer list and calculate size */
- ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array,
- num_phys_buf, iova_start,
- &new_size);
- if (ret)
- goto rereg_phys_mr_exit1;
- if ((new_size == 0) ||
- (((u64)iova_start + new_size) < (u64)iova_start)) {
- ehca_err(mr->device, "bad input values: new_size=%llx "
- "iova_start=%p", new_size, iova_start);
- ret = -EINVAL;
- goto rereg_phys_mr_exit1;
- }
- num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) +
- new_size, PAGE_SIZE);
- num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) +
- new_size, hw_pgsize);
- memset(&pginfo, 0, sizeof(pginfo));
- pginfo.type = EHCA_MR_PGI_PHYS;
- pginfo.num_kpages = num_kpages;
- pginfo.hwpage_size = hw_pgsize;
- pginfo.num_hwpages = num_hwpages;
- pginfo.u.phy.num_phys_buf = num_phys_buf;
- pginfo.u.phy.phys_buf_array = phys_buf_array;
- pginfo.next_hwpage =
- ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
- }
- if (mr_rereg_mask & IB_MR_REREG_ACCESS)
- new_acl = mr_access_flags;
- if (mr_rereg_mask & IB_MR_REREG_PD)
- new_pd = container_of(pd, struct ehca_pd, ib_pd);
-
- ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl,
- new_pd, &pginfo, &tmp_lkey, &tmp_rkey);
- if (ret)
- goto rereg_phys_mr_exit1;
-
- /* successful reregistration */
- if (mr_rereg_mask & IB_MR_REREG_PD)
- mr->pd = pd;
- mr->lkey = tmp_lkey;
- mr->rkey = tmp_rkey;
-
-rereg_phys_mr_exit1:
- spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
-rereg_phys_mr_exit0:
- if (ret)
- ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p "
- "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x "
- "iova_start=%p",
- ret, mr, mr_rereg_mask, pd, phys_buf_array,
- num_phys_buf, mr_access_flags, iova_start);
- return ret;
-} /* end ehca_rereg_phys_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
-{
- int ret = 0;
- u64 h_ret;
- struct ehca_shca *shca =
- container_of(mr->device, struct ehca_shca, ib_device);
- struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
- unsigned long sl_flags;
- struct ehca_mr_hipzout_parms hipzout;
-
- if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
- ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
- "e_mr->flags=%x", mr, e_mr, e_mr->flags);
- ret = -EINVAL;
- goto query_mr_exit0;
- }
-
- memset(mr_attr, 0, sizeof(struct ib_mr_attr));
- spin_lock_irqsave(&e_mr->mrlock, sl_flags);
-
- h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
- if (h_ret != H_SUCCESS) {
- ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p "
- "hca_hndl=%llx mr_hndl=%llx lkey=%x",
- h_ret, mr, shca->ipz_hca_handle.handle,
- e_mr->ipz_mr_handle.handle, mr->lkey);
- ret = ehca2ib_return_code(h_ret);
- goto query_mr_exit1;
- }
- mr_attr->pd = mr->pd;
- mr_attr->device_virt_addr = hipzout.vaddr;
- mr_attr->size = hipzout.len;
- mr_attr->lkey = hipzout.lkey;
- mr_attr->rkey = hipzout.rkey;
- ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags);
-
-query_mr_exit1:
- spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
-query_mr_exit0:
- if (ret)
- ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p",
- ret, mr, mr_attr);
- return ret;
-} /* end ehca_query_mr() */
-
-/*----------------------------------------------------------------------*/
-
int ehca_dereg_mr(struct ib_mr *mr)
{
int ret = 0;
@@ -728,18 +413,6 @@ alloc_mw_exit0:
/*----------------------------------------------------------------------*/
-int ehca_bind_mw(struct ib_qp *qp,
- struct ib_mw *mw,
- struct ib_mw_bind *mw_bind)
-{
- /* TODO: not supported up to now */
- ehca_gen_err("bind MW currently not supported by HCAD");
-
- return -EPERM;
-} /* end ehca_bind_mw() */
-
-/*----------------------------------------------------------------------*/
-
int ehca_dealloc_mw(struct ib_mw *mw)
{
u64 h_ret;
@@ -1616,7 +1289,6 @@ int ehca_reg_internal_maxmr(
u64 *iova_start;
u64 size_maxmr;
struct ehca_mr_pginfo pginfo;
- struct ib_phys_buf ib_pbuf;
u32 num_kpages;
u32 num_hwpages;
u64 hw_pgsize;
@@ -1637,8 +1309,6 @@ int ehca_reg_internal_maxmr(
/* register internal max-MR on HCA */
size_maxmr = ehca_mr_len;
iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START));
- ib_pbuf.addr = 0;
- ib_pbuf.size = size_maxmr;
num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr,
PAGE_SIZE);
hw_pgsize = ehca_get_max_hwpage_size(shca);
@@ -1650,8 +1320,8 @@ int ehca_reg_internal_maxmr(
pginfo.num_kpages = num_kpages;
pginfo.num_hwpages = num_hwpages;
pginfo.hwpage_size = hw_pgsize;
- pginfo.u.phy.num_phys_buf = 1;
- pginfo.u.phy.phys_buf_array = &ib_pbuf;
+ pginfo.u.phy.addr = 0;
+ pginfo.u.phy.size = size_maxmr;
ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
&pginfo, &e_mr->ib.ib_mr.lkey,
@@ -1669,7 +1339,6 @@ int ehca_reg_internal_maxmr(
e_mr->ib.ib_mr.pd = &e_pd->ib_pd;
e_mr->ib.ib_mr.uobject = NULL;
atomic_inc(&(e_pd->ib_pd.usecnt));
- atomic_set(&(e_mr->ib.ib_mr.usecnt), 0);
*e_maxmr = e_mr;
return 0;
@@ -1762,61 +1431,6 @@ ehca_dereg_internal_maxmr_exit0:
/*----------------------------------------------------------------------*/
-/*
- * check physical buffer array of MR verbs for validness and
- * calculates MR size
- */
-int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
- int num_phys_buf,
- u64 *iova_start,
- u64 *size)
-{
- struct ib_phys_buf *pbuf = phys_buf_array;
- u64 size_count = 0;
- u32 i;
-
- if (num_phys_buf == 0) {
- ehca_gen_err("bad phys buf array len, num_phys_buf=0");
- return -EINVAL;
- }
- /* check first buffer */
- if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) {
- ehca_gen_err("iova_start/addr mismatch, iova_start=%p "
- "pbuf->addr=%llx pbuf->size=%llx",
- iova_start, pbuf->addr, pbuf->size);
- return -EINVAL;
- }
- if (((pbuf->addr + pbuf->size) % PAGE_SIZE) &&
- (num_phys_buf > 1)) {
- ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx "
- "pbuf->size=%llx", pbuf->addr, pbuf->size);
- return -EINVAL;
- }
-
- for (i = 0; i < num_phys_buf; i++) {
- if ((i > 0) && (pbuf->addr % PAGE_SIZE)) {
- ehca_gen_err("bad address, i=%x pbuf->addr=%llx "
- "pbuf->size=%llx",
- i, pbuf->addr, pbuf->size);
- return -EINVAL;
- }
- if (((i > 0) && /* not 1st */
- (i < (num_phys_buf - 1)) && /* not last */
- (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) {
- ehca_gen_err("bad size, i=%x pbuf->size=%llx",
- i, pbuf->size);
- return -EINVAL;
- }
- size_count += pbuf->size;
- pbuf++;
- }
-
- *size = size_count;
- return 0;
-} /* end ehca_mr_chk_buf_and_calc_size() */
-
-/*----------------------------------------------------------------------*/
-
/* check page list of map FMR verb for validness */
int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
u64 *page_list,
@@ -2002,57 +1616,54 @@ static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo,
u32 number, u64 *kpage)
{
int ret = 0;
- struct ib_phys_buf *pbuf;
+ u64 addr = pginfo->u.phy.addr;
+ u64 size = pginfo->u.phy.size;
u64 num_hw, offs_hw;
u32 i = 0;
- /* loop over desired phys_buf_array entries */
- while (i < number) {
- pbuf = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf;
- num_hw = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) +
- pbuf->size, pginfo->hwpage_size);
- offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) /
- pginfo->hwpage_size;
- while (pginfo->next_hwpage < offs_hw + num_hw) {
- /* sanity check */
- if ((pginfo->kpage_cnt >= pginfo->num_kpages) ||
- (pginfo->hwpage_cnt >= pginfo->num_hwpages)) {
- ehca_gen_err("kpage_cnt >= num_kpages, "
- "kpage_cnt=%llx num_kpages=%llx "
- "hwpage_cnt=%llx "
- "num_hwpages=%llx i=%x",
- pginfo->kpage_cnt,
- pginfo->num_kpages,
- pginfo->hwpage_cnt,
- pginfo->num_hwpages, i);
- return -EFAULT;
- }
- *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) +
- (pginfo->next_hwpage * pginfo->hwpage_size);
- if ( !(*kpage) && pbuf->addr ) {
- ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx "
- "next_hwpage=%llx", pbuf->addr,
- pbuf->size, pginfo->next_hwpage);
- return -EFAULT;
- }
- (pginfo->hwpage_cnt)++;
- (pginfo->next_hwpage)++;
- if (PAGE_SIZE >= pginfo->hwpage_size) {
- if (pginfo->next_hwpage %
- (PAGE_SIZE / pginfo->hwpage_size) == 0)
- (pginfo->kpage_cnt)++;
- } else
- pginfo->kpage_cnt += pginfo->hwpage_size /
- PAGE_SIZE;
- kpage++;
- i++;
- if (i >= number) break;
+ num_hw = NUM_CHUNKS((addr % pginfo->hwpage_size) + size,
+ pginfo->hwpage_size);
+ offs_hw = (addr & ~(pginfo->hwpage_size - 1)) / pginfo->hwpage_size;
+
+ while (pginfo->next_hwpage < offs_hw + num_hw) {
+ /* sanity check */
+ if ((pginfo->kpage_cnt >= pginfo->num_kpages) ||
+ (pginfo->hwpage_cnt >= pginfo->num_hwpages)) {
+ ehca_gen_err("kpage_cnt >= num_kpages, "
+ "kpage_cnt=%llx num_kpages=%llx "
+ "hwpage_cnt=%llx "
+ "num_hwpages=%llx i=%x",
+ pginfo->kpage_cnt,
+ pginfo->num_kpages,
+ pginfo->hwpage_cnt,
+ pginfo->num_hwpages, i);
+ return -EFAULT;
}
- if (pginfo->next_hwpage >= offs_hw + num_hw) {
- (pginfo->u.phy.next_buf)++;
- pginfo->next_hwpage = 0;
+ *kpage = (addr & ~(pginfo->hwpage_size - 1)) +
+ (pginfo->next_hwpage * pginfo->hwpage_size);
+ if ( !(*kpage) && addr ) {
+ ehca_gen_err("addr=%llx size=%llx "
+ "next_hwpage=%llx", addr,
+ size, pginfo->next_hwpage);
+ return -EFAULT;
}
+ (pginfo->hwpage_cnt)++;
+ (pginfo->next_hwpage)++;
+ if (PAGE_SIZE >= pginfo->hwpage_size) {
+ if (pginfo->next_hwpage %
+ (PAGE_SIZE / pginfo->hwpage_size) == 0)
+ (pginfo->kpage_cnt)++;
+ } else
+ pginfo->kpage_cnt += pginfo->hwpage_size /
+ PAGE_SIZE;
+ kpage++;
+ i++;
+ if (i >= number) break;
}
+ if (pginfo->next_hwpage >= offs_hw + num_hw) {
+ pginfo->next_hwpage = 0;
+ }
+
return ret;
}
diff --git a/drivers/staging/rdma/ehca/ehca_mrmw.h b/drivers/staging/rdma/ehca/ehca_mrmw.h
index 50d8b51..52bfa95 100644
--- a/drivers/staging/rdma/ehca/ehca_mrmw.h
+++ b/drivers/staging/rdma/ehca/ehca_mrmw.h
@@ -98,11 +98,6 @@ int ehca_reg_maxmr(struct ehca_shca *shca,
int ehca_dereg_internal_maxmr(struct ehca_shca *shca);
-int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
- int num_phys_buf,
- u64 *iova_start,
- u64 *size);
-
int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
u64 *page_list,
int list_len);
diff --git a/drivers/staging/rdma/ehca/ehca_reqs.c b/drivers/staging/rdma/ehca/ehca_reqs.c
index 10e2074..11813b8 100644
--- a/drivers/staging/rdma/ehca/ehca_reqs.c
+++ b/drivers/staging/rdma/ehca/ehca_reqs.c
@@ -614,7 +614,6 @@ int ehca_post_srq_recv(struct ib_srq *srq,
static const u8 ib_wc_opcode[255] = {
[0x01] = IB_WC_RECV+1,
[0x02] = IB_WC_RECV_RDMA_WITH_IMM+1,
- [0x04] = IB_WC_BIND_MW+1,
[0x08] = IB_WC_FETCH_ADD+1,
[0x10] = IB_WC_COMP_SWAP+1,
[0x20] = IB_WC_RDMA_WRITE+1,
diff --git a/drivers/staging/rdma/hfi1/mr.c b/drivers/staging/rdma/hfi1/mr.c
index 568f185..a3f8b88 100644
--- a/drivers/staging/rdma/hfi1/mr.c
+++ b/drivers/staging/rdma/hfi1/mr.c
@@ -167,10 +167,7 @@ static struct hfi1_mr *alloc_mr(int count, struct ib_pd *pd)
rval = init_mregion(&mr->mr, pd, count);
if (rval)
goto bail;
- /*
- * ib_reg_phys_mr() will initialize mr->ibmr except for
- * lkey and rkey.
- */
+
rval = hfi1_alloc_lkey(&mr->mr, 0);
if (rval)
goto bail_mregion;
@@ -188,52 +185,6 @@ bail:
}
/**
- * hfi1_reg_phys_mr - register a physical memory region
- * @pd: protection domain for this memory region
- * @buffer_list: pointer to the list of physical buffers to register
- * @num_phys_buf: the number of physical buffers to register
- * @iova_start: the starting address passed over IB which maps to this MR
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *hfi1_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start)
-{
- struct hfi1_mr *mr;
- int n, m, i;
- struct ib_mr *ret;
-
- mr = alloc_mr(num_phys_buf, pd);
- if (IS_ERR(mr)) {
- ret = (struct ib_mr *)mr;
- goto bail;
- }
-
- mr->mr.user_base = *iova_start;
- mr->mr.iova = *iova_start;
- mr->mr.access_flags = acc;
-
- m = 0;
- n = 0;
- for (i = 0; i < num_phys_buf; i++) {
- mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
- mr->mr.map[m]->segs[n].length = buffer_list[i].size;
- mr->mr.length += buffer_list[i].size;
- n++;
- if (n == HFI1_SEGSZ) {
- m++;
- n = 0;
- }
- }
-
- ret = &mr->ibmr;
-
-bail:
- return ret;
-}
-
-/**
* hfi1_reg_user_mr - register a userspace memory region
* @pd: protection domain for this memory region
* @start: starting userspace address
diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c
index ef0feaa..09b8d41 100644
--- a/drivers/staging/rdma/hfi1/verbs.c
+++ b/drivers/staging/rdma/hfi1/verbs.c
@@ -2052,7 +2052,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
ibdev->poll_cq = hfi1_poll_cq;
ibdev->req_notify_cq = hfi1_req_notify_cq;
ibdev->get_dma_mr = hfi1_get_dma_mr;
- ibdev->reg_phys_mr = hfi1_reg_phys_mr;
ibdev->reg_user_mr = hfi1_reg_user_mr;
ibdev->dereg_mr = hfi1_dereg_mr;
ibdev->alloc_mr = hfi1_alloc_mr;
diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h
index 72106e5..286e468 100644
--- a/drivers/staging/rdma/hfi1/verbs.h
+++ b/drivers/staging/rdma/hfi1/verbs.h
@@ -1024,10 +1024,6 @@ int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
struct ib_mr *hfi1_get_dma_mr(struct ib_pd *pd, int acc);
-struct ib_mr *hfi1_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start);
-
struct ib_mr *hfi1_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_udata *udata);
diff --git a/drivers/staging/rdma/ipath/ipath_fs.c b/drivers/staging/rdma/ipath/ipath_fs.c
index 796af686..476fcdf 100644
--- a/drivers/staging/rdma/ipath/ipath_fs.c
+++ b/drivers/staging/rdma/ipath/ipath_fs.c
@@ -82,14 +82,14 @@ static int create_file(const char *name, umode_t mode,
{
int error;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
*dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(*dentry))
error = ipathfs_mknod(d_inode(parent), *dentry,
mode, fops, data);
else
error = PTR_ERR(*dentry);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
return error;
}
@@ -295,7 +295,7 @@ static int remove_device_files(struct super_block *sb,
int ret;
root = dget(sb->s_root);
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
dir = lookup_one_len(unit, root, strlen(unit));
@@ -311,7 +311,7 @@ static int remove_device_files(struct super_block *sb,
ret = simple_rmdir(d_inode(root), dir);
bail:
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
dput(root);
return ret;
}
diff --git a/drivers/staging/rdma/ipath/ipath_mr.c b/drivers/staging/rdma/ipath/ipath_mr.c
index c7278f6..b76b0ce 100644
--- a/drivers/staging/rdma/ipath/ipath_mr.c
+++ b/drivers/staging/rdma/ipath/ipath_mr.c
@@ -98,10 +98,6 @@ static struct ipath_mr *alloc_mr(int count,
}
mr->mr.mapsz = m;
- /*
- * ib_reg_phys_mr() will initialize mr->ibmr except for
- * lkey and rkey.
- */
if (!ipath_alloc_lkey(lk_table, &mr->mr))
goto bail;
mr->ibmr.rkey = mr->ibmr.lkey = mr->mr.lkey;
@@ -121,57 +117,6 @@ done:
}
/**
- * ipath_reg_phys_mr - register a physical memory region
- * @pd: protection domain for this memory region
- * @buffer_list: pointer to the list of physical buffers to register
- * @num_phys_buf: the number of physical buffers to register
- * @iova_start: the starting address passed over IB which maps to this MR
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start)
-{
- struct ipath_mr *mr;
- int n, m, i;
- struct ib_mr *ret;
-
- mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table);
- if (mr == NULL) {
- ret = ERR_PTR(-ENOMEM);
- goto bail;
- }
-
- mr->mr.pd = pd;
- mr->mr.user_base = *iova_start;
- mr->mr.iova = *iova_start;
- mr->mr.length = 0;
- mr->mr.offset = 0;
- mr->mr.access_flags = acc;
- mr->mr.max_segs = num_phys_buf;
- mr->umem = NULL;
-
- m = 0;
- n = 0;
- for (i = 0; i < num_phys_buf; i++) {
- mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
- mr->mr.map[m]->segs[n].length = buffer_list[i].size;
- mr->mr.length += buffer_list[i].size;
- n++;
- if (n == IPATH_SEGSZ) {
- m++;
- n = 0;
- }
- }
-
- ret = &mr->ibmr;
-
-bail:
- return ret;
-}
-
-/**
* ipath_reg_user_mr - register a userspace memory region
* @pd: protection domain for this memory region
* @start: starting userspace address
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.c b/drivers/staging/rdma/ipath/ipath_verbs.c
index 1778dee..53f9dca 100644
--- a/drivers/staging/rdma/ipath/ipath_verbs.c
+++ b/drivers/staging/rdma/ipath/ipath_verbs.c
@@ -2201,7 +2201,6 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
dev->poll_cq = ipath_poll_cq;
dev->req_notify_cq = ipath_req_notify_cq;
dev->get_dma_mr = ipath_get_dma_mr;
- dev->reg_phys_mr = ipath_reg_phys_mr;
dev->reg_user_mr = ipath_reg_user_mr;
dev->dereg_mr = ipath_dereg_mr;
dev->alloc_fmr = ipath_alloc_fmr;
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.h b/drivers/staging/rdma/ipath/ipath_verbs.h
index 0a90a56..6c70a89 100644
--- a/drivers/staging/rdma/ipath/ipath_verbs.h
+++ b/drivers/staging/rdma/ipath/ipath_verbs.h
@@ -828,10 +828,6 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc);
-struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start);
-
struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_udata *udata);
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index e77d150..5a2899f 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -615,9 +615,9 @@ iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio)
}
bip = bio_integrity_alloc(bio, GFP_NOIO, cmd->t_prot_nents);
- if (!bip) {
+ if (IS_ERR(bip)) {
pr_err("Unable to allocate bio_integrity_payload\n");
- return -ENOMEM;
+ return PTR_ERR(bip);
}
bip->bip_iter.bi_size = (cmd->data_length / dev->dev_attrib.block_size) *
diff --git a/drivers/thermal/int340x_thermal/processor_thermal_device.c b/drivers/thermal/int340x_thermal/processor_thermal_device.c
index ccc0ad0..36fa724 100644
--- a/drivers/thermal/int340x_thermal/processor_thermal_device.c
+++ b/drivers/thermal/int340x_thermal/processor_thermal_device.c
@@ -33,6 +33,12 @@
/* Braswell thermal reporting device */
#define PCI_DEVICE_ID_PROC_BSW_THERMAL 0x22DC
+/* Broxton thermal reporting device */
+#define PCI_DEVICE_ID_PROC_BXT0_THERMAL 0x0A8C
+#define PCI_DEVICE_ID_PROC_BXT1_THERMAL 0x1A8C
+#define PCI_DEVICE_ID_PROC_BXTX_THERMAL 0x4A8C
+#define PCI_DEVICE_ID_PROC_BXTP_THERMAL 0x5A8C
+
struct power_config {
u32 index;
u32 min_uw;
@@ -404,6 +410,10 @@ static const struct pci_device_id proc_thermal_pci_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_HSB_THERMAL)},
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_SKL_THERMAL)},
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BSW_THERMAL)},
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXT0_THERMAL)},
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXT1_THERMAL)},
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXTX_THERMAL)},
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXTP_THERMAL)},
{ 0, },
};
diff --git a/drivers/thermal/intel_pch_thermal.c b/drivers/thermal/intel_pch_thermal.c
index 50c7da7..00d81af 100644
--- a/drivers/thermal/intel_pch_thermal.c
+++ b/drivers/thermal/intel_pch_thermal.c
@@ -136,7 +136,7 @@ struct pch_dev_ops {
/* dev ops for Wildcat Point */
-static struct pch_dev_ops pch_dev_ops_wpt = {
+static const struct pch_dev_ops pch_dev_ops_wpt = {
.hw_init = pch_wpt_init,
.get_temp = pch_wpt_get_temp,
};
diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
index 13d01ed..44b9c48 100644
--- a/drivers/thermal/rcar_thermal.c
+++ b/drivers/thermal/rcar_thermal.c
@@ -75,11 +75,11 @@ struct rcar_thermal_priv {
#define rcar_has_irq_support(priv) ((priv)->common->base)
#define rcar_id_to_shift(priv) ((priv)->id * 8)
-#ifdef DEBUG
-# define rcar_force_update_temp(priv) 1
-#else
-# define rcar_force_update_temp(priv) 0
-#endif
+static const struct of_device_id rcar_thermal_dt_ids[] = {
+ { .compatible = "renesas,rcar-thermal", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, rcar_thermal_dt_ids);
/*
* basic functions
@@ -203,14 +203,26 @@ err_out_unlock:
static int rcar_thermal_get_temp(struct thermal_zone_device *zone, int *temp)
{
struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone);
+ int tmp;
+ int ret;
- if (!rcar_has_irq_support(priv) || rcar_force_update_temp(priv))
- rcar_thermal_update_temp(priv);
+ ret = rcar_thermal_update_temp(priv);
+ if (ret < 0)
+ return ret;
mutex_lock(&priv->lock);
- *temp = MCELSIUS((priv->ctemp * 5) - 65);
+ tmp = MCELSIUS((priv->ctemp * 5) - 65);
mutex_unlock(&priv->lock);
+ if ((tmp < MCELSIUS(-45)) || (tmp > MCELSIUS(125))) {
+ struct device *dev = rcar_priv_to_dev(priv);
+
+ dev_err(dev, "it couldn't measure temperature correctly\n");
+ return -EIO;
+ }
+
+ *temp = tmp;
+
return 0;
}
@@ -288,6 +300,9 @@ static void _rcar_thermal_irq_ctrl(struct rcar_thermal_priv *priv, int enable)
unsigned long flags;
u32 mask = 0x3 << rcar_id_to_shift(priv); /* enable Rising/Falling */
+ if (!rcar_has_irq_support(priv))
+ return;
+
spin_lock_irqsave(&common->lock, flags);
rcar_thermal_common_bset(common, INTMSK, mask, enable ? 0 : mask);
@@ -299,11 +314,15 @@ static void rcar_thermal_work(struct work_struct *work)
{
struct rcar_thermal_priv *priv;
int cctemp, nctemp;
+ int ret;
priv = container_of(work, struct rcar_thermal_priv, work.work);
rcar_thermal_get_temp(priv->zone, &cctemp);
- rcar_thermal_update_temp(priv);
+ ret = rcar_thermal_update_temp(priv);
+ if (ret < 0)
+ return;
+
rcar_thermal_irq_enable(priv);
rcar_thermal_get_temp(priv->zone, &nctemp);
@@ -368,8 +387,7 @@ static int rcar_thermal_remove(struct platform_device *pdev)
struct rcar_thermal_priv *priv;
rcar_thermal_for_each_priv(priv, common) {
- if (rcar_has_irq_support(priv))
- rcar_thermal_irq_disable(priv);
+ rcar_thermal_irq_disable(priv);
thermal_zone_device_unregister(priv->zone);
}
@@ -441,7 +459,9 @@ static int rcar_thermal_probe(struct platform_device *pdev)
mutex_init(&priv->lock);
INIT_LIST_HEAD(&priv->list);
INIT_DELAYED_WORK(&priv->work, rcar_thermal_work);
- rcar_thermal_update_temp(priv);
+ ret = rcar_thermal_update_temp(priv);
+ if (ret < 0)
+ goto error_unregister;
priv->zone = thermal_zone_device_register("rcar_thermal",
1, 0, priv,
@@ -453,8 +473,7 @@ static int rcar_thermal_probe(struct platform_device *pdev)
goto error_unregister;
}
- if (rcar_has_irq_support(priv))
- rcar_thermal_irq_enable(priv);
+ rcar_thermal_irq_enable(priv);
list_move_tail(&priv->list, &common->head);
@@ -484,12 +503,6 @@ error_unregister:
return ret;
}
-static const struct of_device_id rcar_thermal_dt_ids[] = {
- { .compatible = "renesas,rcar-thermal", },
- {},
-};
-MODULE_DEVICE_TABLE(of, rcar_thermal_dt_ids);
-
static struct platform_driver rcar_thermal_driver = {
.driver = {
.name = "rcar_thermal",
diff --git a/drivers/thermal/rockchip_thermal.c b/drivers/thermal/rockchip_thermal.c
index e845841..b58e3fb 100644
--- a/drivers/thermal/rockchip_thermal.c
+++ b/drivers/thermal/rockchip_thermal.c
@@ -38,7 +38,7 @@ enum tshut_mode {
};
/**
- * the system Temperature Sensors tshut(tshut) polarity
+ * The system Temperature Sensors tshut(tshut) polarity
* the bit 8 is tshut polarity.
* 0: low active, 1: high active
*/
@@ -57,10 +57,10 @@ enum sensor_id {
};
/**
-* The conversion table has the adc value and temperature.
-* ADC_DECREMENT is the adc value decremnet.(e.g. v2_code_table)
-* ADC_INCREMNET is the adc value incremnet.(e.g. v3_code_table)
-*/
+ * The conversion table has the adc value and temperature.
+ * ADC_DECREMENT: the adc value is of diminishing.(e.g. v2_code_table)
+ * ADC_INCREMENT: the adc value is incremental.(e.g. v3_code_table)
+ */
enum adc_sort_mode {
ADC_DECREMENT = 0,
ADC_INCREMENT,
@@ -72,16 +72,17 @@ enum adc_sort_mode {
*/
#define SOC_MAX_SENSORS 2
+/**
+ * struct chip_tsadc_table: hold information about chip-specific differences
+ * @id: conversion table
+ * @length: size of conversion table
+ * @data_mask: mask to apply on data inputs
+ * @mode: sort mode of this adc variant (incrementing or decrementing)
+ */
struct chip_tsadc_table {
const struct tsadc_table *id;
-
- /* the array table size*/
unsigned int length;
-
- /* that analogic mask data */
u32 data_mask;
-
- /* the sort mode is adc value that increment or decrement in table */
enum adc_sort_mode mode;
};
@@ -153,6 +154,7 @@ struct rockchip_thermal_data {
#define TSADCV2_SHUT_2GPIO_SRC_EN(chn) BIT(4 + (chn))
#define TSADCV2_SHUT_2CRU_SRC_EN(chn) BIT(8 + (chn))
+#define TSADCV1_INT_PD_CLEAR_MASK ~BIT(16)
#define TSADCV2_INT_PD_CLEAR_MASK ~BIT(8)
#define TSADCV2_DATA_MASK 0xfff
@@ -168,6 +170,51 @@ struct tsadc_table {
int temp;
};
+/**
+ * Note:
+ * Code to Temperature mapping of the Temperature sensor is a piece wise linear
+ * curve.Any temperature, code faling between to 2 give temperatures can be
+ * linearly interpolated.
+ * Code to Temperature mapping should be updated based on sillcon results.
+ */
+static const struct tsadc_table v1_code_table[] = {
+ {TSADCV3_DATA_MASK, -40000},
+ {436, -40000},
+ {431, -35000},
+ {426, -30000},
+ {421, -25000},
+ {416, -20000},
+ {411, -15000},
+ {406, -10000},
+ {401, -5000},
+ {395, 0},
+ {390, 5000},
+ {385, 10000},
+ {380, 15000},
+ {375, 20000},
+ {370, 25000},
+ {364, 30000},
+ {359, 35000},
+ {354, 40000},
+ {349, 45000},
+ {343, 50000},
+ {338, 55000},
+ {333, 60000},
+ {328, 65000},
+ {322, 70000},
+ {317, 75000},
+ {312, 80000},
+ {307, 85000},
+ {301, 90000},
+ {296, 95000},
+ {291, 100000},
+ {286, 105000},
+ {280, 110000},
+ {275, 115000},
+ {270, 120000},
+ {264, 125000},
+};
+
static const struct tsadc_table v2_code_table[] = {
{TSADCV2_DATA_MASK, -40000},
{3800, -40000},
@@ -245,6 +292,44 @@ static const struct tsadc_table v3_code_table[] = {
{TSADCV3_DATA_MASK, 125000},
};
+static const struct tsadc_table v4_code_table[] = {
+ {TSADCV3_DATA_MASK, -40000},
+ {431, -40000},
+ {426, -35000},
+ {421, -30000},
+ {415, -25000},
+ {410, -20000},
+ {405, -15000},
+ {399, -10000},
+ {394, -5000},
+ {389, 0},
+ {383, 5000},
+ {378, 10000},
+ {373, 15000},
+ {367, 20000},
+ {362, 25000},
+ {357, 30000},
+ {351, 35000},
+ {346, 40000},
+ {340, 45000},
+ {335, 50000},
+ {330, 55000},
+ {324, 60000},
+ {319, 65000},
+ {313, 70000},
+ {308, 75000},
+ {302, 80000},
+ {297, 85000},
+ {291, 90000},
+ {286, 95000},
+ {281, 100000},
+ {275, 105000},
+ {270, 110000},
+ {264, 115000},
+ {259, 120000},
+ {253, 125000},
+};
+
static u32 rk_tsadcv2_temp_to_code(struct chip_tsadc_table table,
int temp)
{
@@ -368,6 +453,14 @@ static void rk_tsadcv2_initialize(void __iomem *regs,
regs + TSADCV2_HIGHT_TSHUT_DEBOUNCE);
}
+static void rk_tsadcv1_irq_ack(void __iomem *regs)
+{
+ u32 val;
+
+ val = readl_relaxed(regs + TSADCV2_INT_PD);
+ writel_relaxed(val & TSADCV1_INT_PD_CLEAR_MASK, regs + TSADCV2_INT_PD);
+}
+
static void rk_tsadcv2_irq_ack(void __iomem *regs)
{
u32 val;
@@ -429,6 +522,29 @@ static void rk_tsadcv2_tshut_mode(int chn, void __iomem *regs,
writel_relaxed(val, regs + TSADCV2_INT_EN);
}
+static const struct rockchip_tsadc_chip rk3228_tsadc_data = {
+ .chn_id[SENSOR_CPU] = 0, /* cpu sensor is channel 0 */
+ .chn_num = 1, /* one channel for tsadc */
+
+ .tshut_mode = TSHUT_MODE_GPIO, /* default TSHUT via GPIO give PMIC */
+ .tshut_polarity = TSHUT_LOW_ACTIVE, /* default TSHUT LOW ACTIVE */
+ .tshut_temp = 95000,
+
+ .initialize = rk_tsadcv2_initialize,
+ .irq_ack = rk_tsadcv1_irq_ack,
+ .control = rk_tsadcv2_control,
+ .get_temp = rk_tsadcv2_get_temp,
+ .set_tshut_temp = rk_tsadcv2_tshut_temp,
+ .set_tshut_mode = rk_tsadcv2_tshut_mode,
+
+ .table = {
+ .id = v1_code_table,
+ .length = ARRAY_SIZE(v1_code_table),
+ .data_mask = TSADCV3_DATA_MASK,
+ .mode = ADC_DECREMENT,
+ },
+};
+
static const struct rockchip_tsadc_chip rk3288_tsadc_data = {
.chn_id[SENSOR_CPU] = 1, /* cpu sensor is channel 1 */
.chn_id[SENSOR_GPU] = 2, /* gpu sensor is channel 2 */
@@ -477,8 +593,36 @@ static const struct rockchip_tsadc_chip rk3368_tsadc_data = {
},
};
+static const struct rockchip_tsadc_chip rk3399_tsadc_data = {
+ .chn_id[SENSOR_CPU] = 0, /* cpu sensor is channel 0 */
+ .chn_id[SENSOR_GPU] = 1, /* gpu sensor is channel 1 */
+ .chn_num = 2, /* two channels for tsadc */
+
+ .tshut_mode = TSHUT_MODE_GPIO, /* default TSHUT via GPIO give PMIC */
+ .tshut_polarity = TSHUT_LOW_ACTIVE, /* default TSHUT LOW ACTIVE */
+ .tshut_temp = 95000,
+
+ .initialize = rk_tsadcv2_initialize,
+ .irq_ack = rk_tsadcv1_irq_ack,
+ .control = rk_tsadcv2_control,
+ .get_temp = rk_tsadcv2_get_temp,
+ .set_tshut_temp = rk_tsadcv2_tshut_temp,
+ .set_tshut_mode = rk_tsadcv2_tshut_mode,
+
+ .table = {
+ .id = v4_code_table,
+ .length = ARRAY_SIZE(v4_code_table),
+ .data_mask = TSADCV3_DATA_MASK,
+ .mode = ADC_DECREMENT,
+ },
+};
+
static const struct of_device_id of_rockchip_thermal_match[] = {
{
+ .compatible = "rockchip,rk3228-tsadc",
+ .data = (void *)&rk3228_tsadc_data,
+ },
+ {
.compatible = "rockchip,rk3288-tsadc",
.data = (void *)&rk3288_tsadc_data,
},
@@ -486,6 +630,10 @@ static const struct of_device_id of_rockchip_thermal_match[] = {
.compatible = "rockchip,rk3368-tsadc",
.data = (void *)&rk3368_tsadc_data,
},
+ {
+ .compatible = "rockchip,rk3399-tsadc",
+ .data = (void *)&rk3399_tsadc_data,
+ },
{ /* end */ },
};
MODULE_DEVICE_TABLE(of, of_rockchip_thermal_match);
@@ -617,7 +765,7 @@ rockchip_thermal_register_sensor(struct platform_device *pdev,
return 0;
}
-/*
+/**
* Reset TSADC Controller, reset all tsadc registers.
*/
static void rockchip_thermal_reset_controller(struct reset_control *reset)
diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c
index 2f9f708..ea9366a 100644
--- a/drivers/thermal/step_wise.c
+++ b/drivers/thermal/step_wise.c
@@ -63,6 +63,19 @@ static unsigned long get_target_state(struct thermal_instance *instance,
next_target = instance->target;
dev_dbg(&cdev->device, "cur_state=%ld\n", cur_state);
+ if (!instance->initialized) {
+ if (throttle) {
+ next_target = (cur_state + 1) >= instance->upper ?
+ instance->upper :
+ ((cur_state + 1) < instance->lower ?
+ instance->lower : (cur_state + 1));
+ } else {
+ next_target = THERMAL_NO_TARGET;
+ }
+
+ return next_target;
+ }
+
switch (trend) {
case THERMAL_TREND_RAISING:
if (throttle) {
@@ -149,7 +162,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
dev_dbg(&instance->cdev->device, "old_target=%d, target=%d\n",
old_target, (int)instance->target);
- if (old_target == instance->target)
+ if (instance->initialized && old_target == instance->target)
continue;
/* Activate a passive thermal instance */
@@ -161,7 +174,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
instance->target == THERMAL_NO_TARGET)
update_passive_instance(tz, trip_type, -1);
-
+ instance->initialized = true;
instance->cdev->updated = false; /* cdev needs update */
}
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index d9e525c..a0a8fd1 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -37,6 +37,7 @@
#include <linux/of.h>
#include <net/netlink.h>
#include <net/genetlink.h>
+#include <linux/suspend.h>
#define CREATE_TRACE_POINTS
#include <trace/events/thermal.h>
@@ -59,6 +60,8 @@ static LIST_HEAD(thermal_governor_list);
static DEFINE_MUTEX(thermal_list_lock);
static DEFINE_MUTEX(thermal_governor_lock);
+static atomic_t in_suspend;
+
static struct thermal_governor *def_governor;
static struct thermal_governor *__find_governor(const char *name)
@@ -532,14 +535,31 @@ static void update_temperature(struct thermal_zone_device *tz)
mutex_unlock(&tz->lock);
trace_thermal_temperature(tz);
- dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n",
- tz->last_temperature, tz->temperature);
+ if (tz->last_temperature == THERMAL_TEMP_INVALID)
+ dev_dbg(&tz->device, "last_temperature N/A, current_temperature=%d\n",
+ tz->temperature);
+ else
+ dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n",
+ tz->last_temperature, tz->temperature);
+}
+
+static void thermal_zone_device_reset(struct thermal_zone_device *tz)
+{
+ struct thermal_instance *pos;
+
+ tz->temperature = THERMAL_TEMP_INVALID;
+ tz->passive = 0;
+ list_for_each_entry(pos, &tz->thermal_instances, tz_node)
+ pos->initialized = false;
}
void thermal_zone_device_update(struct thermal_zone_device *tz)
{
int count;
+ if (atomic_read(&in_suspend))
+ return;
+
if (!tz->ops->get_temp)
return;
@@ -676,8 +696,12 @@ trip_point_temp_store(struct device *dev, struct device_attribute *attr,
return -EINVAL;
ret = tz->ops->set_trip_temp(tz, trip, temperature);
+ if (ret)
+ return ret;
- return ret ? ret : count;
+ thermal_zone_device_update(tz);
+
+ return count;
}
static ssize_t
@@ -1321,6 +1345,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
if (!result) {
list_add_tail(&dev->tz_node, &tz->thermal_instances);
list_add_tail(&dev->cdev_node, &cdev->thermal_instances);
+ atomic_set(&tz->need_update, 1);
}
mutex_unlock(&cdev->lock);
mutex_unlock(&tz->lock);
@@ -1430,6 +1455,7 @@ __thermal_cooling_device_register(struct device_node *np,
const struct thermal_cooling_device_ops *ops)
{
struct thermal_cooling_device *cdev;
+ struct thermal_zone_device *pos = NULL;
int result;
if (type && strlen(type) >= THERMAL_NAME_LENGTH)
@@ -1474,6 +1500,12 @@ __thermal_cooling_device_register(struct device_node *np,
/* Update binding information for 'this' new cdev */
bind_cdev(cdev);
+ mutex_lock(&thermal_list_lock);
+ list_for_each_entry(pos, &thermal_tz_list, node)
+ if (atomic_cmpxchg(&pos->need_update, 1, 0))
+ thermal_zone_device_update(pos);
+ mutex_unlock(&thermal_list_lock);
+
return cdev;
}
@@ -1806,6 +1838,8 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
tz->trips = trips;
tz->passive_delay = passive_delay;
tz->polling_delay = polling_delay;
+ /* A new thermal zone needs to be updated anyway. */
+ atomic_set(&tz->need_update, 1);
dev_set_name(&tz->device, "thermal_zone%d", tz->id);
result = device_register(&tz->device);
@@ -1900,7 +1934,10 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
INIT_DELAYED_WORK(&(tz->poll_queue), thermal_zone_device_check);
- thermal_zone_device_update(tz);
+ thermal_zone_device_reset(tz);
+ /* Update the new thermal zone and mark it as already updated. */
+ if (atomic_cmpxchg(&tz->need_update, 1, 0))
+ thermal_zone_device_update(tz);
return tz;
@@ -2140,6 +2177,36 @@ static void thermal_unregister_governors(void)
thermal_gov_power_allocator_unregister();
}
+static int thermal_pm_notify(struct notifier_block *nb,
+ unsigned long mode, void *_unused)
+{
+ struct thermal_zone_device *tz;
+
+ switch (mode) {
+ case PM_HIBERNATION_PREPARE:
+ case PM_RESTORE_PREPARE:
+ case PM_SUSPEND_PREPARE:
+ atomic_set(&in_suspend, 1);
+ break;
+ case PM_POST_HIBERNATION:
+ case PM_POST_RESTORE:
+ case PM_POST_SUSPEND:
+ atomic_set(&in_suspend, 0);
+ list_for_each_entry(tz, &thermal_tz_list, node) {
+ thermal_zone_device_reset(tz);
+ thermal_zone_device_update(tz);
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static struct notifier_block thermal_pm_nb = {
+ .notifier_call = thermal_pm_notify,
+};
+
static int __init thermal_init(void)
{
int result;
@@ -2160,6 +2227,11 @@ static int __init thermal_init(void)
if (result)
goto exit_netlink;
+ result = register_pm_notifier(&thermal_pm_nb);
+ if (result)
+ pr_warn("Thermal: Can not register suspend notifier, return %d\n",
+ result);
+
return 0;
exit_netlink:
@@ -2179,6 +2251,7 @@ error:
static void __exit thermal_exit(void)
{
+ unregister_pm_notifier(&thermal_pm_nb);
of_thermal_destroy_zones();
genetlink_exit();
class_unregister(&thermal_class);
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index d7ac1fc..749d41a 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -41,6 +41,7 @@ struct thermal_instance {
struct thermal_zone_device *tz;
struct thermal_cooling_device *cdev;
int trip;
+ bool initialized;
unsigned long upper; /* Highest cooling state for this trip point */
unsigned long lower; /* Lowest cooling state for this trip point */
unsigned long target; /* expected cooling state */
diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c
index 0fbfb2b..26ccad5 100644
--- a/drivers/usb/gadget/function/f_printer.c
+++ b/drivers/usb/gadget/function/f_printer.c
@@ -673,7 +673,7 @@ printer_fsync(struct file *fd, loff_t start, loff_t end, int datasync)
unsigned long flags;
int tx_list_empty;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
spin_lock_irqsave(&dev->lock, flags);
tx_list_empty = (likely(list_empty(&dev->tx_reqs)));
spin_unlock_irqrestore(&dev->lock, flags);
@@ -683,7 +683,7 @@ printer_fsync(struct file *fd, loff_t start, loff_t end, int datasync)
wait_event_interruptible(dev->tx_flush_wait,
(likely(list_empty(&dev->tx_reqs_active))));
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return 0;
}
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index 365afd7..7e179f8 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -1521,10 +1521,10 @@ static void destroy_ep_files (struct dev_data *dev)
spin_unlock_irq (&dev->lock);
/* break link to dcache */
- mutex_lock (&parent->i_mutex);
+ inode_lock(parent);
d_delete (dentry);
dput (dentry);
- mutex_unlock (&parent->i_mutex);
+ inode_unlock(parent);
spin_lock_irq (&dev->lock);
}
diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c
index f92f5af..8755b2c 100644
--- a/drivers/usb/gadget/udc/atmel_usba_udc.c
+++ b/drivers/usb/gadget/udc/atmel_usba_udc.c
@@ -91,7 +91,7 @@ static ssize_t queue_dbg_read(struct file *file, char __user *buf,
if (!access_ok(VERIFY_WRITE, buf, nbytes))
return -EFAULT;
- mutex_lock(&file_inode(file)->i_mutex);
+ inode_lock(file_inode(file));
list_for_each_entry_safe(req, tmp_req, queue, queue) {
len = snprintf(tmpbuf, sizeof(tmpbuf),
"%8p %08x %c%c%c %5d %c%c%c\n",
@@ -118,7 +118,7 @@ static ssize_t queue_dbg_read(struct file *file, char __user *buf,
nbytes -= len;
buf += len;
}
- mutex_unlock(&file_inode(file)->i_mutex);
+ inode_unlock(file_inode(file));
return actual;
}
@@ -143,7 +143,7 @@ static int regs_dbg_open(struct inode *inode, struct file *file)
u32 *data;
int ret = -ENOMEM;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
udc = inode->i_private;
data = kmalloc(inode->i_size, GFP_KERNEL);
if (!data)
@@ -158,7 +158,7 @@ static int regs_dbg_open(struct inode *inode, struct file *file)
ret = 0;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
@@ -169,11 +169,11 @@ static ssize_t regs_dbg_read(struct file *file, char __user *buf,
struct inode *inode = file_inode(file);
int ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = simple_read_from_buffer(buf, nbytes, ppos,
file->private_data,
file_inode(file)->i_size);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index daa563f..1f117c3 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -229,6 +229,8 @@ config USB_EHCI_TEGRA
depends on ARCH_TEGRA
select USB_EHCI_ROOT_HUB_TT
select USB_PHY
+ select USB_ULPI
+ select USB_ULPI_VIEWPORT
help
This driver enables support for the internal USB Host Controllers
found in NVIDIA Tegra SoCs. The controllers are EHCI compliant.
diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c
index 3fc63c2..57721c7 100644
--- a/drivers/video/fbdev/core/fb_defio.c
+++ b/drivers/video/fbdev/core/fb_defio.c
@@ -78,13 +78,13 @@ int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasy
if (!info->fbdefio)
return 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* Kill off the delayed work */
cancel_delayed_work_sync(&info->deferred_work);
/* Run it immediately */
schedule_delayed_work(&info->deferred_work, 0);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return 0;
}